summaryrefslogtreecommitdiff
path: root/net/dsa
diff options
context:
space:
mode:
Diffstat (limited to 'net/dsa')
0 files changed, 0 insertions, 0 deletions
t info' class='commit-info'> authorDave Airlie <airlied@redhat.com>2020-07-02 15:17:31 +1000 committerDave Airlie <airlied@redhat.com>2020-07-02 15:17:31 +1000 commit9555152beb1143c85c03f9b9de59863cbbe89f4b (patch) tree3d43b98bf373e72fe84562adafe3bcbb45d21054 parentf75020fcb97a54c0d2ade1f4918db82f44d225ad (diff)parent7808363154d622f9446bf4db97ff0f041dafa30b (diff)
Merge tag 'amd-drm-next-5.9-2020-07-01' of git://people.freedesktop.org/~agd5f/linux into drm-next
amd-drm-next-5.9-2020-07-01: amdgpu: - DC DMUB updates - HDCP fixes - Thermal interrupt fixes - Add initial support for Sienna Cichlid GPU - Add support for unique id on Arcturus - Major swSMU code cleanup - Skip BAR resizing if the bios already did id - Fixes for DCN bandwidth calculations - Runtime PM reference count fixes - Add initial UVD support for SI - Add support for ASSR on eDP links - Lots of misc fixes and cleanups - Enable runtime PM on vega10 boards that support BACO - RAS fixes - SR-IOV fixes - Use IP discovery table on renoir - DC stream synchronization fixes amdkfd: - Track SDMA usage per process - Fix GCC10 compiler warnings - Locking fix radeon: - Default to on chip GART for AGP boards on all arches - Runtime PM reference count fixes UAPI: - Update comments to clarify MTYPE From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200701155041.1102829-1-alexander.deucher@amd.com Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c834
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c106
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c134
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c300
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c115
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c178
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c527
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c411
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h (renamed from drivers/gpu/drm/amd/display/dmub/inc/dmub_fw_meta.h)50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c613
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_api_def.h443
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c664
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_common.h (renamed from drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h)42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c1757
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sid.h26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c793
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c148
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c1684
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h877
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm301
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c28
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c63
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h21
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c246
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c295
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig8
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c100
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c10
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c8
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c43
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c20
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c19
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c30
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/Makefile4
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c204
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile10
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c22
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h60
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c543
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.h38
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c255
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h108
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c137
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_debug.c59
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c42
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c163
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h65
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_bios_types.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c28
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hw_types.h75
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_link.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/Makefile3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_abm.h20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_audio.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c153
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c92
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c (renamed from drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h)78
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.h39
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c28
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h62
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c134
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c93
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c121
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c89
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/Makefile54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c206
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.h230
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c640
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h78
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.c100
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.h66
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c205
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h76
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c851
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h269
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c1414
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h608
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c410
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c264
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h923
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c354
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c417
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h119
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c532
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h292
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c719
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h70
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c141
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c239
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h463
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c1409
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h665
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_opp.h36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c365
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h333
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c2691
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h82
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.c194
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h133
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_cp_psp.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_helpers.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_services.h69
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c6865
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c1868
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.h69
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c181
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c67
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h230
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/Makefile10
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.c257
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_factory_dcn30.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_translate_dcn30.c387
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn30/hw_translate_dcn30.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_status.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h75
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h83
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mcif_wb.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h108
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/resource.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/Makefile10
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c384
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/os_types.h10
-rw-r--r--drivers/gpu/drm/amd/display/dmub/dmub_srv.h27
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h500
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_gpint_cmd.h75
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h152
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/Makefile3
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c49
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h6
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c10
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.h6
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c184
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.h50
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h2
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c91
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_asic_id.h4
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h5
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.c115
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.h18
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_table.c48
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_table.h47
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_stats.h8
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.c96
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_2_1_0_offset.h523
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_2_1_0_sh_mask.h2378
-rwxr-xr-xdrivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h17880
-rwxr-xr-xdrivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h70929
-rwxr-xr-xdrivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_3_0_0_offset.h573
-rwxr-xr-xdrivers/gpu/drm/amd/include/asic_reg/dcn/dpcs_3_0_0_sh_mask.h3565
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h36
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h7272
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h13473
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h47727
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h8
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h27
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_1_offset.h8
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_1_sh_mask.h26
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_2_1_offset.h8
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_2_1_sh_mask.h26
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_3_1_d.h98
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_3_1_sh_mask.h804
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_3_0_0_offset.h1542
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_3_0_0_sh_mask.h5496
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h283
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/sdma2/irqsrcs_sdma2_5_0.h45
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/sdma3/irqsrcs_sdma3_5_0.h45
-rw-r--r--drivers/gpu/drm/amd/include/sienna_cichlid_ip_offset.h1168
-rw-r--r--drivers/gpu/drm/amd/include/soc15_ih_clientid.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/Makefile2
-rw-r--r--drivers/gpu/drm/amd/powerplay/amdgpu_smu.c1466
-rw-r--r--drivers/gpu/drm/amd/powerplay/arcturus_ppt.c1152
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c8
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c10
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c6
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h84
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h3
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h12
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h1220
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_types.h4
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h35
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_7_ppsmc.h139
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_7_pptable.h196
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/navi10_ppt.c747
-rw-r--r--drivers/gpu/drm/amd/powerplay/renoir_ppt.c31
-rw-r--r--drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c2640
-rw-r--r--drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.h (renamed from drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_vbios.h)26
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_internal.h274
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_v11_0.c633
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_v12_0.c54
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c11
-rw-r--r--drivers/gpu/drm/amd/powerplay/vega20_ppt.c3288
-rw-r--r--drivers/gpu/drm/amd/powerplay/vega20_ppt.h179
-rw-r--r--drivers/gpu/drm/radeon/ci_dpm.c2
-rw-r--r--drivers/gpu/drm/radeon/ni_dpm.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c20
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c4
-rw-r--r--include/drm/amd_asic_type.h1
-rw-r--r--include/uapi/drm/amdgpu_drm.h10
358 files changed, 222297 insertions, 8549 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 210d57a4afc8..403ec3db29df 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -63,12 +63,13 @@ amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
-amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
+amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o \
+ uvd_v3_1.o
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \
- arct_reg_init.o navi12_reg_init.o mxgpu_nv.o
+ arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o
# add DF block
amdgpu-y += \
@@ -80,7 +81,7 @@ amdgpu-y += \
gmc_v7_0.o \
gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
- gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o
+ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o
# add UMC block
amdgpu-y += \
@@ -129,7 +130,8 @@ amdgpu-y += \
sdma_v2_4.o \
sdma_v3_0.o \
sdma_v4_0.o \
- sdma_v5_0.o
+ sdma_v5_0.o \
+ sdma_v5_2.o
# add MES block
amdgpu-y += \
@@ -154,15 +156,18 @@ amdgpu-y += \
vcn_v1_0.o \
vcn_v2_0.o \
vcn_v2_5.o \
+ vcn_v3_0.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
- jpeg_v2_5.o
+ jpeg_v2_5.o \
+ jpeg_v3_0.o
# add ATHUB block
amdgpu-y += \
athub_v1_0.o \
- athub_v2_0.o
+ athub_v2_0.o \
+ athub_v2_1.o
# add amdkfd interfaces
amdgpu-y += amdgpu_amdkfd.o
@@ -172,12 +177,13 @@ AMDKFD_PATH := ../amdkfd
include $(FULL_AMD_PATH)/amdkfd/Makefile
amdgpu-y += $(AMDKFD_FILES)
amdgpu-y += \
- amdgpu_amdkfd_fence.o \
- amdgpu_amdkfd_gpuvm.o \
- amdgpu_amdkfd_gfx_v8.o \
- amdgpu_amdkfd_gfx_v9.o \
- amdgpu_amdkfd_arcturus.o \
- amdgpu_amdkfd_gfx_v10.o
+ amdgpu_amdkfd_fence.o \
+ amdgpu_amdkfd_gpuvm.o \
+ amdgpu_amdkfd_gfx_v8.o \
+ amdgpu_amdkfd_gfx_v9.o \
+ amdgpu_amdkfd_arcturus.o \
+ amdgpu_amdkfd_gfx_v10.o \
+ amdgpu_amdkfd_gfx_v10_3.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cd913986863e..3d2625beacf7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -186,8 +186,10 @@ extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
+extern bool debug_evictions;
#else
static const int sched_policy = KFD_SCHED_POLICY_HWS;
+static const bool debug_evictions; /* = false */
#endif
extern int amdgpu_tmz;
@@ -652,10 +654,6 @@ struct amdgpu_fw_vram_usage {
u64 size;
struct amdgpu_bo *reserved_bo;
void *va;
-
- /* GDDR6 training support flag.
- */
- bool mem_train_support;
};
/*
@@ -990,9 +988,12 @@ struct amdgpu_device {
/* Chip product information */
char product_number[16];
char product_name[32];
- char serial[16];
+ char serial[20];
struct amdgpu_autodump autodump;
+
+ atomic_t throttling_logging_enabled;
+ struct ratelimit_state throttling_logging_rs;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 956cbbda4793..913c8f0513bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -64,7 +64,9 @@ struct amdgpu_atif {
struct amdgpu_atif_notifications notifications;
struct amdgpu_atif_functions functions;
struct amdgpu_atif_notification_cfg notification_cfg;
- struct amdgpu_encoder *encoder_for_bl;
+#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
+ struct backlight_device *bd;
+#endif
struct amdgpu_dm_backlight_caps backlight_caps;
};
@@ -444,45 +446,21 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
- if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) &&
- !amdgpu_device_has_dc_support(adev)) {
- struct amdgpu_encoder *enc = atif->encoder_for_bl;
-
- if (enc) {
- struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
-
- DRM_DEBUG_DRIVER("Changing brightness to %d\n",
- req.backlight_level);
-
- amdgpu_display_backlight_set_level(adev, enc, req.backlight_level);
-
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
- backlight_force_update(dig->bl_dev,
- BACKLIGHT_UPDATE_HOTKEY);
-#endif
- }
- }
-#if defined(CONFIG_DRM_AMD_DC)
+ if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
- if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) &&
- amdgpu_device_has_dc_support(adev)) {
- struct amdgpu_display_manager *dm = &adev->dm;
- struct backlight_device *bd = dm->backlight_dev;
-
- if (bd) {
+ if (atif->bd) {
DRM_DEBUG_DRIVER("Changing brightness to %d\n",
req.backlight_level);
-
/*
* XXX backlight_device_set_brightness() is
* hardwired to post BACKLIGHT_UPDATE_SYSFS.
* It probably should accept 'reason' parameter.
*/
- backlight_device_set_brightness(bd, req.backlight_level);
+ backlight_device_set_brightness(atif->bd, req.backlight_level);
}
- }
-#endif
#endif
+ }
+
if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
if (adev->flags & AMD_IS_PX) {
pm_runtime_get_sync(adev->ddev->dev);
@@ -829,23 +807,32 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
adev->atif = atif;
if (atif->notifications.brightness_change) {
- struct drm_encoder *tmp;
-
- /* Find the encoder controlling the brightness */
- list_for_each_entry(tmp, &adev->ddev->mode_config.encoder_list,
- head) {
- struct amdgpu_encoder *enc = to_amdgpu_encoder(tmp);
-
- if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
- enc->enc_priv) {
- struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
- if (dig->bl_dev) {
- atif->encoder_for_bl = enc;
- break;
+#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
+ if (amdgpu_device_has_dc_support(adev)) {
+#if defined(CONFIG_DRM_AMD_DC)
+ struct amdgpu_display_manager *dm = &adev->dm;
+ atif->bd = dm->backlight_dev;
+#endif
+ } else {
+ struct drm_encoder *tmp;
+
+ /* Find the encoder controlling the brightness */
+ list_for_each_entry(tmp, &adev->ddev->mode_config.encoder_list,
+ head) {
+ struct amdgpu_encoder *enc = to_amdgpu_encoder(tmp);
+
+ if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
+ enc->enc_priv) {
+ struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
+ if (dig->bl_dev) {
+ atif->bd = dig->bl_dev;
+ break;
+ }
}
}
}
}
+#endif
if (atif->functions.sbios_requests && !atif->functions.system_params) {
/* XXX check this workraround, if sbios request function is
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
new file mode 100644
index 000000000000..7e59e473a190
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -0,0 +1,834 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/mmu_context.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_10_3_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
+#include "navi10_enum.h"
+#include "oss/osssys_5_0_0_offset.h"
+#include "oss/osssys_5_0_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v10_structs.h"
+#include "nv.h"
+#include "nvd.h"
+#include "gfxhub_v2_1.h"
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES,
+ SAVE_WAVES
+};
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+ return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
+
+ return 1ull << bit;
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+ unlock_srbm(kgd);
+}
+
+static void program_sh_mem_settings_v10_3(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ /* APE1 no longer exists on GFX9 */
+
+ unlock_srbm(kgd);
+}
+
+/* ATC is defeatured on Sienna_Cichlid */
+static int set_pasid_vmid_mapping_v10_3(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
+
+ /* Mapping vmid to pasid also for IH block */
+ pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
+ vmid, pasid);
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, value);
+
+ return 0;
+}
+
+static int init_interrupts_v10_3(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ default:
+ dev_warn(adev->dev,
+ "Invalid sdma engine id (%d), using engine id 0\n",
+ engine_id);
+ /* fall through */
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
+ case 2:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
+ mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+ break;
+ case 3:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
+ mmSDMA3_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+ break;
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static inline struct v10_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v10_compute_mqd *)mqd;
+}
+
+static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v10_sdma_mqd *)mqd;
+}
+
+static int hqd_load_v10_3(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v10_compute_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uint64_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uint64_t)wptr));
+ pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+
+ release_queue(kgd);
+
+ return 0;
+}
+
+static int hiq_mqd_load_v10_3(struct kgd_dev *kgd, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct v10_compute_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ release_queue(kgd);
+
+ return r;
+}
+
+static int hqd_dump_v10_3(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(kgd);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int hqd_sdma_load_v10_3(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool hqd_is_occupied_v10_3(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
+ high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+ retval = true;
+ }
+ release_queue(kgd);
+ return retval;
+}
+
+static bool hqd_sdma_is_occupied_v10_3(struct kgd_dev *kgd, void *mqd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v10_compute_mqd *m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue pipe %d queue %d preemption failed\n",
+ pipe_id, queue_id);
+ release_queue(kgd);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(kgd);
+ return 0;
+}
+
+static int hqd_sdma_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+
+static int address_watch_disable_v10_3(struct kgd_dev *kgd)
+{
+ return 0;
+}
+
+static int address_watch_execute_v10_3(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo)
+{
+ return 0;
+}
+
+static int wave_control_execute_v10_3(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static uint32_t address_watch_get_offset_v10_3(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset)
+{
+ return 0;
+}
+
+static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ /* SDMA is on gfxhub as well for Navi1* series */
+ gfxhub_v2_1_setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+#if 0
+uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
+ uint32_t trap_debug_wave_launch_mode,
+ uint32_t vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data = 0;
+ uint32_t orig_wave_cntl_value;
+ uint32_t orig_stall_vmid;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC,
+ 0,
+ mmSPI_GDBG_WAVE_CNTL));
+ orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value,
+ SPI_GDBG_WAVE_CNTL,
+ STALL_VMID);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ data = 0;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t disable_debug_trap_v10_3(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t set_wave_launch_trap_override_v10_3(struct kgd_dev *kgd,
+ uint32_t trap_override,
+ uint32_t trap_mask)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ data = 0;
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
+ EXCP_EN, trap_mask);
+ data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK,
+ REPLACE, trap_override);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+uint32_t set_wave_launch_mode_v10_3(struct kgd_dev *kgd,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data = 0;
+ bool is_stall_mode;
+ bool is_mode_set;
+
+ is_stall_mode = (wave_launch_mode == 4);
+ is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ VMID_MASK, is_mode_set ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+ MODE, is_mode_set ? wave_launch_mode : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+ data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
+ STALL_VMID, is_stall_mode ? 1 << vmid : 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL,
+ STALL_RA, is_stall_mode ? 1 : 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time -- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time -- Wait Count for Global Wave Syncs.
+ */
+void get_iq_wait_times_v10_3(struct kgd_dev *kgd,
+ uint32_t *wait_times)
+
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
+void build_grace_period_packet_info_v10_3(struct kgd_dev *kgd,
+ uint32_t wait_times,
+ uint32_t grace_period,
+ uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ *reg_data = wait_times;
+
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ grace_period);
+
+ *reg_offset = mmCP_IQ_WAIT_TIME2;
+}
+#endif
+
+const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
+ .program_sh_mem_settings = program_sh_mem_settings_v10_3,
+ .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3,
+ .init_interrupts = init_interrupts_v10_3,
+ .hqd_load = hqd_load_v10_3,
+ .hiq_mqd_load = hiq_mqd_load_v10_3,
+ .hqd_sdma_load = hqd_sdma_load_v10_3,
+ .hqd_dump = hqd_dump_v10_3,
+ .hqd_sdma_dump = hqd_sdma_dump_v10_3,
+ .hqd_is_occupied = hqd_is_occupied_v10_3,
+ .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3,
+ .hqd_destroy = hqd_destroy_v10_3,
+ .hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
+ .address_watch_disable = address_watch_disable_v10_3,
+ .address_watch_execute = address_watch_execute_v10_3,
+ .wave_control_execute = wave_control_execute_v10_3,
+ .address_watch_get_offset = address_watch_get_offset_v10_3,
+ .get_atc_vmid_pasid_mapping_info = NULL,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
+ .get_hive_id = amdgpu_amdkfd_get_hive_id,
+#if 0
+ .enable_debug_trap = enable_debug_trap_v10_3,
+ .disable_debug_trap = disable_debug_trap_v10_3,
+ .set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3,
+ .set_wave_launch_mode = set_wave_launch_mode_v10_3,
+ .get_iq_wait_times = get_iq_wait_times_v10_3,
+ .build_grace_period_packet_info = build_grace_period_packet_info_v10_3,
+#endif
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 9015c7b76d60..e5a5ba869eb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -395,7 +395,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;
- return amdgpu_sync_fence(sync, vm->last_update, false);
+ return amdgpu_sync_fence(sync, vm->last_update);
}
static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
@@ -785,7 +785,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
- amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
+ amdgpu_sync_fence(sync, bo_va->last_pt_update);
return 0;
}
@@ -804,7 +804,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
return ret;
}
- return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update);
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@@ -2102,7 +2102,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
goto validate_map_fail;
}
- ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
+ ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index fdd52d86a4d7..c687432da426 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -2022,11 +2022,6 @@ int amdgpu_atombios_init(struct amdgpu_device *adev)
if (adev->is_atom_fw) {
amdgpu_atomfirmware_scratch_regs_init(adev);
amdgpu_atomfirmware_allocate_fb_scratch(adev);
- ret = amdgpu_atomfirmware_get_mem_train_info(adev);
- if (ret) {
- DRM_ERROR("Failed to get mem train fb location.\n");
- return ret;
- }
} else {
amdgpu_atombios_scratch_regs_init(adev);
amdgpu_atombios_allocate_fb_scratch(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 58f9d8c3a17a..e249b22fef54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -120,11 +120,13 @@ union umc_info {
union vram_info {
struct atom_vram_info_header_v2_3 v23;
struct atom_vram_info_header_v2_4 v24;
+ struct atom_vram_info_header_v2_5 v25;
};
union vram_module {
struct atom_vram_module_v9 v9;
struct atom_vram_module_v10 v10;
+ struct atom_vram_module_v11 v11;
};
static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
@@ -260,6 +262,26 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
+ case 5:
+ if (module_id > vram_info->v25.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v25.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v11.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v11.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v11.channel_num;
+ mem_channel_width = vram_module->v11.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
default:
return -EINVAL;
}
@@ -303,6 +325,9 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
union firmware_info {
struct atom_firmware_info_v3_1 v31;
+ struct atom_firmware_info_v3_2 v32;
+ struct atom_firmware_info_v3_3 v33;
+ struct atom_firmware_info_v3_4 v34;
};
/*
@@ -491,7 +516,7 @@ static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev)
return false;
}
-static int gddr6_mem_train_support(struct amdgpu_device *adev)
+int amdgpu_mem_train_support(struct amdgpu_device *adev)
{
int ret;
uint32_t major, minor, revision, hw_v;
@@ -507,6 +532,7 @@ static int gddr6_mem_train_support(struct amdgpu_device *adev)
switch (hw_v) {
case HW_REV(11, 0, 0):
case HW_REV(11, 0, 5):
+ case HW_REV(11, 0, 7):
ret = 1;
break;
default:
@@ -525,46 +551,37 @@ static int gddr6_mem_train_support(struct amdgpu_device *adev)
return ret;
}
-int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev)
+int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
{
struct atom_context *ctx = adev->mode_info.atom_context;
+ union firmware_info *firmware_info;
int index;
- uint8_t frev, crev;
- uint16_t data_offset, size;
- int ret;
+ u16 data_offset, size;
+ u8 frev, crev;
+ int fw_reserved_fb_size;
- adev->fw_vram_usage.mem_train_support = false;
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
- if (adev->asic_type != CHIP_NAVI10 &&
- adev->asic_type != CHIP_NAVI14)
+ if (!amdgpu_atom_parse_data_header(ctx, index, &size,
+ &frev, &crev, &data_offset))
+ /* fail to parse data_header */
return 0;
- if (amdgpu_sriov_vf(adev))
- return 0;
+ firmware_info = (union firmware_info *)(ctx->bios + data_offset);
- ret = gddr6_mem_train_support(adev);
- if (ret == -1)
+ if (frev !=3)
return -EINVAL;
- else if (ret == 0)
- return 0;
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- vram_usagebyfirmware);
- ret = amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev,
- &data_offset);
- if (ret == 0) {
- DRM_ERROR("parse data header failed.\n");
- return -EINVAL;
- }
-
- DRM_DEBUG("atom firmware common table header size:0x%04x, frev:0x%02x,"
- " crev:0x%02x, data_offset:0x%04x.\n", size, frev, crev, data_offset);
- /* only support 2.1+ */
- if (((uint16_t)frev << 8 | crev) < 0x0201) {
- DRM_ERROR("frev:0x%02x, crev:0x%02x < 2.1 !\n", frev, crev);
- return -EINVAL;
+ switch (crev) {
+ case 4:
+ fw_reserved_fb_size =
+ (firmware_info->v34.fw_reserved_size_in_kb << 10);
+ break;
+ default:
+ fw_reserved_fb_size = 0;
+ break;
}
- adev->fw_vram_usage.mem_train_support = true;
- return 0;
+ return fw_reserved_fb_size;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 434fe2fa0089..9f0d4356e8df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -31,10 +31,11 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
int *vram_width, int *vram_type, int *vram_vendor);
-int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev);
+int amdgpu_mem_train_support(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index f355d9a752d2..a1aec205435d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -716,8 +716,10 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
if (!drm_kms_helper_is_poll_worker()) {
r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(connector->dev->dev);
return connector_status_disconnected;
+ }
}
if (encoder) {
@@ -854,8 +856,10 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
if (!drm_kms_helper_is_poll_worker()) {
r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(connector->dev->dev);
return connector_status_disconnected;
+ }
}
encoder = amdgpu_connector_best_single_encoder(connector);
@@ -977,8 +981,10 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
if (!drm_kms_helper_is_poll_worker()) {
r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(connector->dev->dev);
return connector_status_disconnected;
+ }
}
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
@@ -1328,8 +1334,10 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
if (!drm_kms_helper_is_poll_worker()) {
r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(connector->dev->dev);
return connector_status_disconnected;
+ }
}
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index a25fb59c127c..a512ccbc4dea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -992,7 +992,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
- r = amdgpu_sync_fence(&p->job->sync, fence, true);
+ r = amdgpu_sync_fence(&p->job->sync, fence);
dma_fence_put(fence);
if (r)
return r;
@@ -1014,7 +1014,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
return r;
}
- r = amdgpu_sync_fence(&p->job->sync, fence, true);
+ r = amdgpu_sync_fence(&p->job->sync, fence);
dma_fence_put(fence);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index d33cb344be69..aeada7c9fbea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -223,12 +223,16 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
*pos &= (1UL << 22) - 1;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_virt_enable_access_debugfs(adev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
if (use_bank) {
if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
@@ -332,12 +336,16 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_virt_enable_access_debugfs(adev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
while (size) {
uint32_t value;
@@ -387,12 +395,16 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_virt_enable_access_debugfs(adev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
while (size) {
uint32_t value;
@@ -443,12 +455,16 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_virt_enable_access_debugfs(adev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
while (size) {
uint32_t value;
@@ -498,12 +514,16 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_virt_enable_access_debugfs(adev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
while (size) {
uint32_t value;
@@ -554,12 +574,16 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_virt_enable_access_debugfs(adev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
while (size) {
uint32_t value;
@@ -609,12 +633,16 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_virt_enable_access_debugfs(adev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
while (size) {
uint32_t value;
@@ -764,12 +792,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
valuesize = sizeof(values);
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_virt_enable_access_debugfs(adev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
@@ -842,12 +874,16 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_virt_enable_access_debugfs(adev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
@@ -937,11 +973,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
r = pm_runtime_get_sync(adev->ddev->dev);
if (r < 0)
- return r;
+ goto err;
r = amdgpu_virt_enable_access_debugfs(adev);
if (r < 0)
- return r;
+ goto err;
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
@@ -967,7 +1003,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
value = data[result >> 2];
r = put_user(value, (uint32_t *)buf);
if (r) {
- result = r;
+ amdgpu_virt_disable_access_debugfs(adev);
goto err;
}
@@ -976,10 +1012,14 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
size -= 4;
}
-err:
kfree(data);
amdgpu_virt_disable_access_debugfs(adev);
return result;
+
+err:
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+ kfree(data);
+ return r;
}
/**
@@ -1003,8 +1043,10 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
while (size) {
uint32_t value;
@@ -1140,8 +1182,10 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
int r = 0, i;
r = pm_runtime_get_sync(dev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
/* Avoid accidently unparking the sched thread during GPU reset */
mutex_lock(&adev->lock_reset);
@@ -1197,8 +1241,10 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
int r;
r = pm_runtime_get_sync(dev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
@@ -1216,8 +1262,10 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
int r;
r = pm_runtime_get_sync(dev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
@@ -1407,16 +1455,16 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
return -EINVAL;
ret = pm_runtime_get_sync(adev->ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq, true);
if (ret || val > max_freq || val < min_freq)
return -EINVAL;
ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val, true);
- } else {
- return 0;
}
pm_runtime_mark_last_busy(adev->ddev->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a027a8f7b281..a649e40fd96f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -80,6 +80,7 @@ MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
@@ -112,6 +113,7 @@ const char *amdgpu_asic_name[] = {
"NAVI10",
"NAVI14",
"NAVI12",
+ "SIENNA_CICHLID",
"LAST",
};
@@ -907,6 +909,11 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return 0;
+ /* skip if the bios has already enabled large BAR */
+ if (adev->gmc.real_vram_size &&
+ (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
+ return 0;
+
/* Check if the root BUS has 64bit memory resources */
root = adev->pdev->bus;
while (root->parent)
@@ -1159,6 +1166,16 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_vm_fragment_size = -1;
}
+ if (amdgpu_sched_hw_submission < 2) {
+ dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
+ amdgpu_sched_hw_submission);
+ amdgpu_sched_hw_submission = 2;
+ } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
+ dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
+ amdgpu_sched_hw_submission);
+ amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
+ }
+
amdgpu_device_check_smu_prv_buffer_size(adev);
amdgpu_device_check_vm_size(adev);
@@ -1527,22 +1544,25 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
+ char fw_name[40];
int err;
const struct gpu_info_firmware_header_v1_0 *hdr;
adev->firmware.gpu_info_fw = NULL;
+ if (adev->discovery_bin) {
+ amdgpu_discovery_get_gfx_info(adev);
+
+ /*
+ * FIXME: The bounding box is still needed by Navi12, so
+ * temporarily read it from gpu_info firmware. Should be droped
+ * when DAL no longer needs it.
+ */
+ if (adev->asic_type != CHIP_NAVI12)
+ return 0;
+ }
+
switch (adev->asic_type) {
- case CHIP_TOPAZ:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_VERDE:
case CHIP_TAHITI:
@@ -1557,6 +1577,15 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_KABINI:
case CHIP_MULLINS:
#endif
+ case CHIP_TOPAZ:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
case CHIP_VEGA20:
default:
return 0;
@@ -1589,6 +1618,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_NAVI12:
chip_name = "navi12";
break;
+ case CHIP_SIENNA_CICHLID:
+ chip_name = "sienna_cichlid";
+ break;
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
@@ -1617,10 +1649,11 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
- if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
- amdgpu_discovery_get_gfx_info(adev);
+ /*
+ * Should be droped when DAL no longer needs it.
+ */
+ if (adev->asic_type == CHIP_NAVI12)
goto parse_soc_bounding_box;
- }
adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
@@ -1653,7 +1686,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
parse_soc_bounding_box:
/*
* soc bounding box info is not integrated in disocovery table,
- * we always need to parse it from gpu info firmware.
+ * we always need to parse it from gpu info firmware if needed.
*/
if (hdr->version_minor == 2) {
const struct gpu_info_firmware_v1_2 *gpu_info_fw =
@@ -1690,24 +1723,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
amdgpu_device_enable_virtual_display(adev);
switch (adev->asic_type) {
- case CHIP_TOPAZ:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
- adev->family = AMDGPU_FAMILY_CZ;
- else
- adev->family = AMDGPU_FAMILY_VI;
-
- r = vi_set_ip_blocks(adev);
- if (r)
- return r;
- break;
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_VERDE:
case CHIP_TAHITI:
@@ -1726,24 +1741,41 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_MULLINS:
- if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
- adev->family = AMDGPU_FAMILY_CI;
- else
+ if (adev->flags & AMD_IS_APU)
adev->family = AMDGPU_FAMILY_KV;
+ else
+ adev->family = AMDGPU_FAMILY_CI;
r = cik_set_ip_blocks(adev);
if (r)
return r;
break;
#endif
+ case CHIP_TOPAZ:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ if (adev->flags & AMD_IS_APU)
+ adev->family = AMDGPU_FAMILY_CZ;
+ else
+ adev->family = AMDGPU_FAMILY_VI;
+
+ r = vi_set_ip_blocks(adev);
+ if (r)
+ return r;
+ break;
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
case CHIP_ARCTURUS:
case CHIP_RENOIR:
- if (adev->asic_type == CHIP_RAVEN ||
- adev->asic_type == CHIP_RENOIR)
+ if (adev->flags & AMD_IS_APU)
adev->family = AMDGPU_FAMILY_RV;
else
adev->family = AMDGPU_FAMILY_AI;
@@ -1755,6 +1787,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
adev->family = AMDGPU_FAMILY_NV;
r = nv_set_ip_blocks(adev);
@@ -2308,6 +2341,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
{
int i, r;
+ if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
+ amdgpu_virt_release_ras_err_handler_data(adev);
+
amdgpu_ras_pre_fini(adev);
if (adev->gmc.xgmi.num_physical_nodes > 1)
@@ -2779,6 +2815,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_NAVI12:
case CHIP_RENOIR:
#endif
+#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
+ case CHIP_SIENNA_CICHLID:
+#endif
return amdgpu_dc != 0;
#endif
default:
@@ -3036,6 +3075,17 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->gfx.gfx_off_req_count = 1;
adev->pm.ac_power = power_supply_is_system_supplied() > 0;
+ atomic_set(&adev->throttling_logging_enabled, 1);
+ /*
+ * If throttling continues, logging will be performed every minute
+ * to avoid log flooding. "-1" is subtracted since the thermal
+ * throttling interrupt comes every second. Thus, the total logging
+ * interval is 59 seconds(retelimited printk interval) + 1(waiting
+ * for throttling interrupt) = 60 seconds.
+ */
+ ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
+ ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
+
/* Registers mapping */
/* TODO: block userspace mapping of io register */
if (adev->asic_type >= CHIP_BONAIRE) {
@@ -3330,10 +3380,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
amdgpu_pm_sysfs_fini(adev);
amdgpu_fbdev_fini(adev);
r = amdgpu_device_ip_fini(adev);
- if (adev->firmware.gpu_info_fw) {
- release_firmware(adev->firmware.gpu_info_fw);
- adev->firmware.gpu_info_fw = NULL;
- }
+ release_firmware(adev->firmware.gpu_info_fw);
+ adev->firmware.gpu_info_fw = NULL;
adev->accel_working = false;
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
@@ -3365,7 +3413,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
- if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
+ if (adev->discovery_bin)
amdgpu_discovery_fini(adev);
}
@@ -3377,7 +3425,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
* amdgpu_device_suspend - initiate device suspend
*
* @dev: drm dev pointer
- * @suspend: suspend state
* @fbcon : notify the fbdev of suspend
*
* Puts the hw in the suspend state (all asics).
@@ -3474,7 +3521,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
* amdgpu_device_resume - initiate device resume
*
* @dev: drm dev pointer
- * @resume: resume state
* @fbcon : notify the fbdev of resume
*
* Bring the hw back to operating state (all asics).
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index b5d6274952a5..4ef38c2411ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -32,7 +32,7 @@
#define mmMM_DATA 0x1
#define HW_ID_MAX 300
-const char *hw_id_names[HW_ID_MAX] = {
+static const char *hw_id_names[HW_ID_MAX] = {
[MP1_HWID] = "MP1",
[MP2_HWID] = "MP2",
[THM_HWID] = "THM",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index c56438a6c9f2..d76172965199 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -282,7 +282,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
ret = pm_runtime_get_sync(dev->dev);
if (ret < 0)
- return ret;
+ goto out;
ret = drm_crtc_helper_set_config(set, ctx);
@@ -297,7 +297,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
take the current one */
if (active && !adev->have_disp_power_ref) {
adev->have_disp_power_ref = true;
- return ret;
+ goto out;
}
/* if we have no active crtcs, then drop the power ref
we got before */
@@ -306,6 +306,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
adev->have_disp_power_ref = false;
}
+out:
/* drop the power reference we got coming in here */
pm_runtime_put_autosuspend(dev->dev);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 3fa18003d4d6..89e6ad30396f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -53,6 +53,7 @@ struct amdgpu_doorbell_index {
uint32_t gfx_ring0;
uint32_t gfx_ring1;
uint32_t sdma_engine[8];
+ uint32_t mes_ring;
uint32_t ih;
union {
struct {
@@ -177,9 +178,12 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
AMDGPU_NAVI10_DOORBELL_USERQUEUE_END = 0x08A,
AMDGPU_NAVI10_DOORBELL_GFX_RING0 = 0x08B,
AMDGPU_NAVI10_DOORBELL_GFX_RING1 = 0x08C,
+ AMDGPU_NAVI10_DOORBELL_MES_RING = 0x090,
/* SDMA:256~335*/
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100,
AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A,
+ AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2 = 0x114,
+ AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3 = 0x11E,
/* IH: 376~391 */
AMDGPU_NAVI10_DOORBELL_IH = 0x178,
/* MMSCH: 392~407
@@ -191,8 +195,13 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
AMDGPU_NAVI10_DOORBELL64_VCN4_5 = 0x18A,
AMDGPU_NAVI10_DOORBELL64_VCN6_7 = 0x18B,
+ AMDGPU_NAVI10_DOORBELL64_VCN8_9 = 0x18C,
+ AMDGPU_NAVI10_DOORBELL64_VCNa_b = 0x18D,
+ AMDGPU_NAVI10_DOORBELL64_VCNc_d = 0x18E,
+ AMDGPU_NAVI10_DOORBELL64_VCNe_f = 0x18F,
+
AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0,
- AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCN6_7,
+ AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCNe_f,
AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F,
AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index d2a105e3bf7c..6a39996487b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -1162,7 +1162,7 @@ int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
{
int ret = 0;
- if (is_support_sw_smu_xgmi(adev))
+ if (is_support_sw_smu(adev))
ret = smu_set_xgmi_pstate(&adev->smu, pstate);
else if (adev->powerplay.pp_funcs &&
adev->powerplay.pp_funcs->set_xgmi_pstate)
@@ -1197,4 +1197,4 @@ int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en)
return smu_allow_xgmi_power_down(smu, en);
return 0;
-} \ No newline at end of file
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 126e74758a34..2f12f3ae3c7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -87,9 +87,10 @@
* - 3.36.0 - Allow reading more status registers on si/cik
* - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
* - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
+ * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 38
+#define KMS_DRIVER_MINOR 39
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@@ -705,6 +706,14 @@ MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false =
int queue_preemption_timeout_ms = 9000;
module_param(queue_preemption_timeout_ms, int, 0644);
MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
+
+/**
+ * DOC: debug_evictions(bool)
+ * Enable extra debug messages to help determine the cause of evictions
+ */
+bool debug_evictions;
+module_param(debug_evictions, bool, 0644);
+MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
#endif
/**
@@ -1111,7 +1120,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, dev);
- amdgpu_driver_load_kms(dev, ent->driver_data);
+ ret = amdgpu_driver_load_kms(dev, ent->driver_data);
+ if (ret)
+ goto err_pci;
retry_init:
ret = drm_dev_register(dev, ent->driver_data);
@@ -1373,11 +1384,12 @@ long amdgpu_drm_ioctl(struct file *filp,
dev = file_priv->minor->dev;
ret = pm_runtime_get_sync(dev->dev);
if (ret < 0)
- return ret;
+ goto out;
ret = drm_ioctl(filp, cmd, arg);
pm_runtime_mark_last_busy(dev->dev);
+out:
pm_runtime_put_autosuspend(dev->dev);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d878fe7fee51..8d84975885cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -416,7 +416,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
}
amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
- amdgpu_irq_get(adev, irq_src, irq_type);
+
+ if (irq_src)
+ amdgpu_irq_get(adev, irq_src, irq_type);
ring->fence_drv.irq_src = irq_src;
ring->fence_drv.irq_type = irq_type;
@@ -448,8 +450,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
if (!adev)
return -EINVAL;
- /* Check that num_hw_submission is a power of two */
- if ((num_hw_submission & (num_hw_submission - 1)) != 0)
+ if (!is_power_of_2(num_hw_submission))
return -EINVAL;
ring->fence_drv.cpu_addr = NULL;
@@ -467,8 +468,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
if (!ring->fence_drv.fences)
return -ENOMEM;
- /* No need to setup the GPU scheduler for KIQ ring */
- if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
+ /* No need to setup the GPU scheduler for rings that don't need it */
+ if (!ring->no_scheduler) {
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_GFX:
timeout = adev->gfx_timeout;
@@ -537,9 +538,11 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
/* no need to trigger GPU reset as we are unloading */
amdgpu_fence_driver_force_completion(ring);
}
- amdgpu_irq_put(adev, ring->fence_drv.irq_src,
- ring->fence_drv.irq_type);
- drm_sched_fini(&ring->sched);
+ if (ring->fence_drv.irq_src)
+ amdgpu_irq_put(adev, ring->fence_drv.irq_src,
+ ring->fence_drv.irq_type);
+ if (!ring->no_scheduler)
+ drm_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
dma_fence_put(ring->fence_drv.fences[j]);
@@ -574,8 +577,9 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
}
/* disable the interrupt */
- amdgpu_irq_put(adev, ring->fence_drv.irq_src,
- ring->fence_drv.irq_type);
+ if (ring->fence_drv.irq_src)
+ amdgpu_irq_put(adev, ring->fence_drv.irq_src,
+ ring->fence_drv.irq_type);
}
}
@@ -601,8 +605,9 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
continue;
/* enable the interrupt */
- amdgpu_irq_get(adev, ring->fence_drv.irq_src,
- ring->fence_drv.irq_type);
+ if (ring->fence_drv.irq_src)
+ amdgpu_irq_get(adev, ring->fence_drv.irq_src,
+ ring->fence_drv.irq_type);
}
}
@@ -749,8 +754,10 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
int r;
r = pm_runtime_get_sync(dev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
return 0;
+ }
seq_printf(m, "gpu recover\n");
amdgpu_device_gpu_recover(adev, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index 815c072ac4da..e811fecc540f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -26,12 +26,13 @@
#include "amdgpu_i2c.h"
#include "smu_v11_0_i2c.h"
#include "atom.h"
+#include "amdgpu_fru_eeprom.h"
#define I2C_PRODUCT_INFO_ADDR 0xAC
#define I2C_PRODUCT_INFO_ADDR_SIZE 0x2
#define I2C_PRODUCT_INFO_OFFSET 0xC0
-bool is_fru_eeprom_supported(struct amdgpu_device *adev)
+static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
{
/* TODO: Gaming SKUs don't have the FRU EEPROM.
* Use this hack to address hangs on modprobe on gaming SKUs
@@ -47,7 +48,7 @@ bool is_fru_eeprom_supported(struct amdgpu_device *adev)
return false;
}
-int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr,
+static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr,
unsigned char *buff)
{
int ret, size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
index 968115c97e33..f29a8611d69b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
@@ -21,8 +21,8 @@
*
*/
-#ifndef __AMDGPU_PRODINFO_H__
-#define __AMDGPU_PRODINFO_H__
+#ifndef __AMDGPU_FRU_EEPROM_H__
+#define __AMDGPU_FRU_EEPROM_H__
int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index d43c11671a38..50be8e3a443b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -134,6 +134,7 @@ struct gb_addr_config {
uint8_t num_banks;
uint8_t num_se;
uint8_t num_rb_per_se;
+ uint8_t num_pkrs;
};
struct amdgpu_gfx_config {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index acabb57aa8af..34cbd6f6a56b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -357,6 +357,9 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
ring = adev->rings[i];
vmhub = ring->funcs->vmhub;
+ if (ring == &adev->mes.ring)
+ continue;
+
inv_eng = ffs(vm_inv_engs[vmhub]);
if (!inv_eng) {
dev_err(adev->dev, "no VM inv eng for ring %s\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 627104401e84..bc01a06546aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -268,8 +268,6 @@ err_out:
* amdgpu_gtt_mgr_del - free ranges
*
* @man: TTM memory type manager
- * @tbo: TTM BO we need this range for
- * @place: placement flags and restrictions
* @mem: TTM memory object
*
* Free the allocated GTT again.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b91853fd66d3..4ffc32b78745 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -178,7 +178,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
need_ctx_switch = ring->current_ctx != fence_ctx;
if (ring->funcs->emit_pipeline_sync && job &&
- ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) ||
+ ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
(amdgpu_sriov_vf(adev) && need_ctx_switch) ||
amdgpu_vm_need_pipeline_sync(ring, job))) {
need_pipe_sync = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index fe92dcd94d4a..267fa45ddb66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
int r;
if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
- return amdgpu_sync_fence(sync, ring->vmid_wait, false);
+ return amdgpu_sync_fence(sync, ring->vmid_wait);
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
if (!fences)
@@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
return -ENOMEM;
}
- r = amdgpu_sync_fence(sync, &array->base, false);
+ r = amdgpu_sync_fence(sync, &array->base);
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = &array->base;
return r;
@@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) {
*id = NULL;
- r = amdgpu_sync_fence(sync, tmp, false);
+ r = amdgpu_sync_fence(sync, tmp);
return r;
}
needs_flush = true;
@@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, fence, false);
+ r = amdgpu_sync_fence(&(*id)->active, fence);
if (r)
return r;
@@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
/* Good, we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, fence, false);
+ r = amdgpu_sync_fence(&(*id)->active, fence);
if (r)
return r;
@@ -435,7 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
id = idle;
/* Remember this submission as user of the VMID */
- r = amdgpu_sync_fence(&id->active, fence, false);
+ r = amdgpu_sync_fence(&id->active, fence);
if (r)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 47207188c569..2975c4a6e581 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -183,16 +183,13 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
struct amdgpu_job *job = to_amdgpu_job(sched_job);
struct amdgpu_vm *vm = job->vm;
struct dma_fence *fence;
- bool explicit = false;
int r;
- fence = amdgpu_sync_get_fence(&job->sync, &explicit);
- if (fence && explicit) {
- if (drm_sched_dependency_optimized(fence, s_entity)) {
- r = amdgpu_sync_fence(&job->sched_sync, fence, false);
- if (r)
- DRM_ERROR("Error adding fence (%d)\n", r);
- }
+ fence = amdgpu_sync_get_fence(&job->sync);
+ if (fence && drm_sched_dependency_optimized(fence, s_entity)) {
+ r = amdgpu_sync_fence(&job->sched_sync, fence);
+ if (r)
+ DRM_ERROR("Error adding fence (%d)\n", r);
}
while (fence == NULL && vm && !job->vmid) {
@@ -202,7 +199,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
if (r)
DRM_ERROR("Error getting VM ID (%d)\n", r);
- fence = amdgpu_sync_get_fence(&job->sync, NULL);
+ fence = amdgpu_sync_get_fence(&job->sync);
}
return fence;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index d31d65e6b039..8996cb4ed57a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -37,6 +37,8 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
{
INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler);
+ mutex_init(&adev->jpeg.jpeg_pg_lock);
+ atomic_set(&adev->jpeg.total_submission_cnt, 0);
return 0;
}
@@ -54,6 +56,8 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec);
}
+ mutex_destroy(&adev->jpeg.jpeg_pg_lock);
+
return 0;
}
@@ -83,7 +87,7 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec);
}
- if (fences == 0)
+ if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt))
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
AMD_PG_STATE_GATE);
else
@@ -93,15 +97,19 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work);
- if (set_clocks)
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+ atomic_inc(&adev->jpeg.total_submission_cnt);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ mutex_lock(&adev->jpeg.jpeg_pg_lock);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
AMD_PG_STATE_UNGATE);
+ mutex_unlock(&adev->jpeg.jpeg_pg_lock);
}
void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring)
{
+ atomic_dec(&ring->adev->jpeg.total_submission_cnt);
schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index 5131a0a1bc8a..55fbff2be761 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -46,6 +46,8 @@ struct amdgpu_jpeg {
unsigned harvest_config;
struct delayed_work idle_work;
enum amd_powergating_state cur_state;
+ struct mutex jpeg_pg_lock;
+ atomic_t total_submission_cnt;
};
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index d7e17e34fee1..a8c47aecd342 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -167,18 +167,33 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
}
if (amdgpu_device_supports_boco(dev) &&
- (amdgpu_runtime_pm != 0)) /* enable runpm by default for boco */
- adev->runpm = true;
- else if (amdgpu_device_supports_baco(dev) &&
- (amdgpu_runtime_pm != 0) &&
- (adev->asic_type >= CHIP_TOPAZ) &&
- (adev->asic_type != CHIP_VEGA10) &&
- (adev->asic_type != CHIP_VEGA20) &&
- (adev->asic_type != CHIP_ARCTURUS)) /* enable runpm on VI+ */
- adev->runpm = true;
- else if (amdgpu_device_supports_baco(dev) &&
- (amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 on CI */
+ (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */
adev->runpm = true;
+ } else if (amdgpu_device_supports_baco(dev) &&
+ (amdgpu_runtime_pm != 0)) {
+ switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+#endif
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ case CHIP_SIENNA_CICHLID:
+ /* enable runpm if runpm=1 */
+ if (amdgpu_runtime_pm > 0)
+ adev->runpm = true;
+ break;
+ case CHIP_VEGA10:
+ /* turn runpm on if noretry=0 */
+ if (!amdgpu_noretry)
+ adev->runpm = true;
+ break;
+ default:
+ /* enable runpm on VI+ */
+ adev->runpm = true;
+ break;
+ }
+ }
/* Call ACPI methods: require modeset init
* but failure is not fatal
@@ -991,7 +1006,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
r = pm_runtime_get_sync(dev->dev);
if (r < 0)
- return r;
+ goto pm_put;
fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
if (unlikely(!fpriv)) {
@@ -1042,6 +1057,7 @@ error_pasid:
out_suspend:
pm_runtime_mark_last_busy(dev->dev);
+pm_put:
pm_runtime_put_autosuspend(dev->dev);
return r;
@@ -1340,8 +1356,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
fw_info.feature, fw_info.ver);
/* MEC2 */
- if (adev->asic_type == CHIP_KAVERI ||
- (adev->asic_type > CHIP_TOPAZ && adev->asic_type != CHIP_STONEY)) {
+ if (adev->gfx.mec2_fw) {
query_fw.index = 1;
ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 78fe49033543..7334982ea702 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -24,10 +24,32 @@
#ifndef __AMDGPU_MES_H__
#define __AMDGPU_MES_H__
+#define AMDGPU_MES_MAX_COMPUTE_PIPES 8
+#define AMDGPU_MES_MAX_GFX_PIPES 2
+#define AMDGPU_MES_MAX_SDMA_PIPES 2
+
+enum amdgpu_mes_priority_level {
+ AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
+ AMDGPU_MES_PRIORITY_LEVEL_MEDIUM = 2,
+ AMDGPU_MES_PRIORITY_LEVEL_HIGH = 3,
+ AMDGPU_MES_PRIORITY_LEVEL_REALTIME = 4,
+ AMDGPU_MES_PRIORITY_NUM_LEVELS
+};
+
struct amdgpu_mes_funcs;
struct amdgpu_mes {
- struct amdgpu_adev *adev;
+ struct amdgpu_device *adev;
+
+ uint32_t total_max_queue;
+ uint32_t doorbell_id_offset;
+ uint32_t max_doorbell_slices;
+
+ uint64_t default_process_quantum;
+ uint64_t default_gang_quantum;
+
+ struct amdgpu_ring ring;
const struct firmware *fw;
@@ -45,8 +67,27 @@ struct amdgpu_mes {
uint32_t data_fw_version;
uint64_t data_start_addr;
+ /* eop gpu obj */
+ struct amdgpu_bo *eop_gpu_obj;
+ uint64_t eop_gpu_addr;
+
+ void *mqd_backup;
+
+ uint32_t vmid_mask_gfxhub;
+ uint32_t vmid_mask_mmhub;
+ uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
+ uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
+ uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
+ uint32_t agreegated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
+ uint32_t sch_ctx_offs;
+ uint64_t sch_ctx_gpu_addr;
+ uint64_t *sch_ctx_ptr;
+ uint32_t query_status_fence_offs;
+ uint64_t query_status_fence_gpu_addr;
+ uint64_t *query_status_fence_ptr;
+
/* ip specific functions */
- struct amdgpu_mes_funcs *funcs;
+ const struct amdgpu_mes_funcs *funcs;
};
struct mes_add_queue_input {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 16596a9ccabe..347b06d3c140 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -167,8 +167,10 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
if (adev->smu.ppt_funcs->get_current_power_state)
@@ -212,8 +214,10 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
return -EINVAL;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
mutex_lock(&adev->pm.mutex);
@@ -307,8 +311,10 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
level = smu_get_performance_level(&adev->smu);
@@ -369,8 +375,10 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
}
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
current_level = smu_get_performance_level(&adev->smu);
@@ -449,8 +457,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
ret = smu_get_power_num_states(&adev->smu, &data);
@@ -491,8 +501,10 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
pm = smu_get_current_power_state(smu);
@@ -567,8 +579,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
state = data.states[idx];
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
/* only set user selected power states */
if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
@@ -608,8 +622,10 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
size = smu_sys_get_pp_table(&adev->smu, (void **)&table);
@@ -650,8 +666,10 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count);
@@ -790,8 +808,10 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
}
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
ret = smu_od_edit_dpm_table(&adev->smu, type,
@@ -847,8 +867,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf);
@@ -905,8 +927,10 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
pr_debug("featuremask = 0x%llx\n", featuremask);
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
@@ -942,8 +966,10 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
size = smu_sys_get_pp_feature_mask(&adev->smu, buf);
@@ -1001,8 +1027,10 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
@@ -1071,8 +1099,10 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true);
@@ -1101,8 +1131,10 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
@@ -1135,8 +1167,10 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true);
@@ -1165,8 +1199,10 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
@@ -1199,8 +1235,10 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true);
@@ -1231,8 +1269,10 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
@@ -1265,8 +1305,10 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true);
@@ -1297,8 +1339,10 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
@@ -1331,8 +1375,10 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true);
@@ -1363,8 +1409,10 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
@@ -1397,8 +1445,10 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
return ret;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true);
@@ -1429,8 +1479,10 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
@@ -1462,8 +1514,10 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
return -EINVAL;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
@@ -1498,8 +1552,10 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
@@ -1531,8 +1587,10 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
return -EINVAL;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
@@ -1587,8 +1645,10 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
size = smu_get_power_profile_mode(&adev->smu, buf);
@@ -1609,7 +1669,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
const char *buf,
size_t count)
{
- int ret = 0xff;
+ int ret;
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint32_t parameter_size = 0;
@@ -1650,8 +1710,10 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
parameter[parameter_size] = profile_mode;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev))
ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
@@ -1687,8 +1749,10 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return r;
+ }
/* read the IP busy sensor */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
@@ -1723,8 +1787,10 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return r;
+ }
/* read the IP busy sensor */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
@@ -1770,8 +1836,10 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
return -ENODATA;
ret = pm_runtime_get_sync(ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
@@ -1808,9 +1876,76 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
return 0;
}
+/**
+ * DOC: thermal_throttling_logging
+ *
+ * Thermal throttling pulls down the clock frequency and thus the performance.
+ * It's an useful mechanism to protect the chip from overheating. Since it
+ * impacts performance, the user controls whether it is enabled and if so,
+ * the log frequency.
+ *
+ * Reading back the file shows you the status(enabled or disabled) and
+ * the interval(in seconds) between each thermal logging.
+ *
+ * Writing an integer to the file, sets a new logging interval, in seconds.
+ * The value should be between 1 and 3600. If the value is less than 1,
+ * thermal logging is disabled. Values greater than 3600 are ignored.
+ */
+static ssize_t amdgpu_get_thermal_throttling_logging(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+
+ return snprintf(buf, PAGE_SIZE, "%s: thermal throttling logging %s, with interval %d seconds\n",
+ adev->ddev->unique,
+ atomic_read(&adev->throttling_logging_enabled) ? "enabled" : "disabled",
+ adev->throttling_logging_rs.interval / HZ + 1);
+}
+
+static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ long throttling_logging_interval;
+ unsigned long flags;
+ int ret = 0;
+
+ ret = kstrtol(buf, 0, &throttling_logging_interval);
+ if (ret)
+ return ret;
+
+ if (throttling_logging_interval > 3600)
+ return -EINVAL;
+
+ if (throttling_logging_interval > 0) {
+ raw_spin_lock_irqsave(&adev->throttling_logging_rs.lock, flags);
+ /*
+ * Reset the ratelimit timer internals.
+ * This can effectively restart the timer.
+ */
+ adev->throttling_logging_rs.interval =
+ (throttling_logging_interval - 1) * HZ;
+ adev->throttling_logging_rs.begin = 0;
+ adev->throttling_logging_rs.printed = 0;
+ adev->throttling_logging_rs.missed = 0;
+ raw_spin_unlock_irqrestore(&adev->throttling_logging_rs.lock, flags);
+
+ atomic_set(&adev->throttling_logging_enabled, 1);
+ } else {
+ atomic_set(&adev->throttling_logging_enabled, 0);
+ }
+
+ return count;
+}
+
static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
- AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+ AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(pp_num_states, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(pp_cur_state, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_force_state, ATTR_FLAG_BASIC),
@@ -1830,6 +1965,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
AMDGPU_DEVICE_ATTR_RO(pcie_bw, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC),
AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC),
+ AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC),
};
static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
@@ -1872,7 +2008,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
if (adev->flags & AMD_IS_APU)
*states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(unique_id)) {
- if (!adev->unique_id)
+ if (asic_type != CHIP_VEGA10 &&
+ asic_type != CHIP_VEGA20 &&
+ asic_type != CHIP_ARCTURUS)
*states = ATTR_STATE_UNSUPPORTED;
} else if (DEVICE_ATTR_IS(pp_features)) {
if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10)
@@ -2003,8 +2141,10 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
switch (channel) {
case PP_TEMP_JUNCTION:
@@ -2134,8 +2274,10 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(adev->ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
pwm_mode = smu_get_fan_control_mode(&adev->smu);
@@ -2172,8 +2314,10 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
return err;
ret = pm_runtime_get_sync(adev->ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
smu_set_fan_control_mode(&adev->smu, value);
@@ -2220,8 +2364,10 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
return -EPERM;
err = pm_runtime_get_sync(adev->ddev->dev);
- if (err < 0)
+ if (err < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
if (is_support_sw_smu(adev))
pwm_mode = smu_get_fan_control_mode(&adev->smu);
@@ -2272,8 +2418,10 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
return -EPERM;
err = pm_runtime_get_sync(adev->ddev->dev);
- if (err < 0)
+ if (err < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
if (is_support_sw_smu(adev))
err = smu_get_fan_speed_percent(&adev->smu, &speed);
@@ -2305,8 +2453,10 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
return -EPERM;
err = pm_runtime_get_sync(adev->ddev->dev);
- if (err < 0)
+ if (err < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
if (is_support_sw_smu(adev))
err = smu_get_fan_speed_rpm(&adev->smu, &speed);
@@ -2337,8 +2487,10 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
(void *)&min_rpm, &size);
@@ -2365,8 +2517,10 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
(void *)&max_rpm, &size);
@@ -2392,8 +2546,10 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
return -EPERM;
err = pm_runtime_get_sync(adev->ddev->dev);
- if (err < 0)
+ if (err < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
if (is_support_sw_smu(adev))
err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
@@ -2424,8 +2580,10 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
return -EPERM;
err = pm_runtime_get_sync(adev->ddev->dev);
- if (err < 0)
+ if (err < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
if (is_support_sw_smu(adev))
pwm_mode = smu_get_fan_control_mode(&adev->smu);
@@ -2473,8 +2631,10 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
return -EPERM;
ret = pm_runtime_get_sync(adev->ddev->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return ret;
+ }
if (is_support_sw_smu(adev)) {
pwm_mode = smu_get_fan_control_mode(&adev->smu);
@@ -2519,8 +2679,10 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
return -EINVAL;
err = pm_runtime_get_sync(adev->ddev->dev);
- if (err < 0)
+ if (err < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
if (is_support_sw_smu(adev)) {
smu_set_fan_control_mode(&adev->smu, pwm_mode);
@@ -2551,8 +2713,10 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
@@ -2590,8 +2754,10 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
return -EINVAL;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
@@ -2626,8 +2792,10 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
@@ -2665,11 +2833,13 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
if (is_support_sw_smu(adev)) {
- smu_get_power_limit(&adev->smu, &limit, true, true);
+ smu_get_power_limit(&adev->smu, &limit, true);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
@@ -2697,11 +2867,13 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
if (is_support_sw_smu(adev)) {
- smu_get_power_limit(&adev->smu, &limit, false, true);
+ smu_get_power_limit(&adev->smu, &limit, false);
size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
@@ -2740,8 +2912,10 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
err = pm_runtime_get_sync(adev->ddev->dev);
- if (err < 0)
+ if (err < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
if (is_support_sw_smu(adev))
err = smu_set_power_limit(&adev->smu, value);
@@ -2771,8 +2945,10 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
/* get the sclk */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
@@ -2806,8 +2982,10 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
return -EPERM;
r = pm_runtime_get_sync(adev->ddev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
/* get the sclk */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
@@ -3669,8 +3847,10 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
return -EPERM;
r = pm_runtime_get_sync(dev->dev);
- if (r < 0)
+ if (r < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
return r;
+ }
amdgpu_device_ip_get_clockgating_state(adev, &flags);
seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 7301fdcfb8bc..9342a9e8cadf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -98,6 +98,7 @@ static int psp_early_init(void *handle)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
psp_v11_0_set_psp_funcs(psp);
psp->autoload_supported = true;
break;
@@ -115,6 +116,44 @@ static int psp_early_init(void *handle)
return 0;
}
+static void psp_memory_training_fini(struct psp_context *psp)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+ ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+ kfree(ctx->sys_cache);
+ ctx->sys_cache = NULL;
+}
+
+static int psp_memory_training_init(struct psp_context *psp)
+{
+ int ret;
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+ if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
+ DRM_DEBUG("memory training is not supported!\n");
+ return 0;
+ }
+
+ ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
+ if (ctx->sys_cache == NULL) {
+ DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
+ ret = -ENOMEM;
+ goto Err_out;
+ }
+
+ DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
+ ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
+ return 0;
+
+Err_out:
+ psp_memory_training_fini(psp);
+ return ret;
+}
+
static int psp_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -127,7 +166,7 @@ static int psp_sw_init(void *handle)
return ret;
}
- ret = psp_mem_training_init(psp);
+ ret = psp_memory_training_init(psp);
if (ret) {
DRM_ERROR("Failed to initialize memory training!\n");
return ret;
@@ -152,15 +191,13 @@ static int psp_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- psp_mem_training_fini(&adev->psp);
+ psp_memory_training_fini(&adev->psp);
release_firmware(adev->psp.sos_fw);
adev->psp.sos_fw = NULL;
release_firmware(adev->psp.asd_fw);
adev->psp.asd_fw = NULL;
- if (adev->psp.ta_fw) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- }
+ release_firmware(adev->psp.ta_fw);
+ adev->psp.ta_fw = NULL;
if (adev->asic_type == CHIP_NAVI10)
psp_sysfs_fini(adev);
@@ -231,8 +268,9 @@ psp_cmd_submit_buf(struct psp_context *psp,
amdgpu_asic_invalidate_hdp(psp->adev, NULL);
}
- /* We allow TEE_ERROR_NOT_SUPPORTED for VMR command in SRIOV */
- skip_unsupport = (psp->cmd_buf_mem->resp.status == 0xffff000a) && amdgpu_sriov_vf(psp->adev);
+ /* We allow TEE_ERROR_NOT_SUPPORTED for VMR command and PSP_ERR_UNKNOWN_COMMAND in SRIOV */
+ skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED ||
+ psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev);
/* In some cases, psp response status is not 0 even there is no
* problem while the command is submitted. Some version of PSP FW
@@ -350,6 +388,26 @@ static int psp_tmr_init(struct psp_context *psp)
return ret;
}
+static int psp_clear_vf_fw(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12)
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ kfree(cmd);
+
+ return ret;
+}
+
static int psp_tmr_load(struct psp_context *psp)
{
int ret;
@@ -394,7 +452,7 @@ static int psp_asd_load(struct psp_context *psp)
* add workaround to bypass it for sriov now.
* TODO: add version check to make it common
*/
- if (amdgpu_sriov_vf(psp->adev))
+ if (amdgpu_sriov_vf(psp->adev) || (psp->adev->asic_type == CHIP_SIENNA_CICHLID))
return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
@@ -523,7 +581,7 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
}
-int psp_ta_invoke(struct psp_context *psp,
+static int psp_ta_invoke(struct psp_context *psp,
uint32_t ta_cmd_id,
uint32_t session_id)
{
@@ -1316,6 +1374,14 @@ static int psp_hw_start(struct psp_context *psp)
}
}
+ if (psp->spl_bin_size) {
+ ret = psp_bootloader_load_spl(psp);
+ if (ret) {
+ DRM_ERROR("PSP load spl failed!\n");
+ return ret;
+ }
+ }
+
ret = psp_bootloader_load_sysdrv(psp);
if (ret) {
DRM_ERROR("PSP load sysdrv failed!\n");
@@ -1335,6 +1401,12 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
+ ret = psp_clear_vf_fw(psp);
+ if (ret) {
+ DRM_ERROR("PSP clear vf fw!\n");
+ return ret;
+ }
+
ret = psp_tmr_init(psp);
if (ret) {
DRM_ERROR("PSP tmr init failed!\n");
@@ -1389,6 +1461,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_SDMA7:
*type = GFX_FW_TYPE_SDMA7;
break;
+ case AMDGPU_UCODE_ID_CP_MES:
+ *type = GFX_FW_TYPE_CP_MES;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES_DATA:
+ *type = GFX_FW_TYPE_MES_STACK;
+ break;
case AMDGPU_UCODE_ID_CP_CE:
*type = GFX_FW_TYPE_CP_CE;
break;
@@ -1638,6 +1716,15 @@ static int psp_np_fw_load(struct psp_context *psp)
if (fw_load_skip_check(psp, ucode))
continue;
+ if (psp->autoload_supported &&
+ adev->asic_type == CHIP_SIENNA_CICHLID &&
+ (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 ||
+ ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||
+ ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3))
+ /* PSP only receive one SDMA fw for sienna_cichlid,
+ * as all four sdma fw are same */
+ continue;
+
psp_print_fw_hdr(psp, ucode);
ret = psp_execute_np_fw_load(psp, ucode);
@@ -1781,6 +1868,7 @@ static int psp_hw_fini(void *handle)
struct psp_context *psp = &adev->psp;
void *tmr_buf;
void **pptr;
+ int ret;
if (psp->adev->psp.ta_fw) {
psp_ras_terminate(psp);
@@ -1789,6 +1877,11 @@ static int psp_hw_fini(void *handle)
}
psp_asd_unload(psp);
+ ret = psp_clear_vf_fw(psp);
+ if (ret) {
+ DRM_ERROR("PSP clear vf fw!\n");
+ return ret;
+ }
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
@@ -2054,6 +2147,7 @@ int psp_init_sos_microcode(struct psp_context *psp,
const struct psp_firmware_header_v1_0 *sos_hdr;
const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
+ const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
int err = 0;
if (!chip_name) {
@@ -2098,6 +2192,18 @@ int psp_init_sos_microcode(struct psp_context *psp,
adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes);
}
+ if (sos_hdr->header.header_version_minor == 3) {
+ sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data;
+ adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.toc_size_bytes);
+ adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->v1_1.toc_offset_bytes);
+ adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb_size_bytes);
+ adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->v1_1.kdb_offset_bytes);
+ adev->psp.spl_bin_size = le32_to_cpu(sos_hdr_v1_3->spl_size_bytes);
+ adev->psp.spl_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+ le32_to_cpu(sos_hdr_v1_3->spl_offset_bytes);
+ }
break;
default:
dev_err(adev->dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 2a56ad996d83..1513887e7a68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -50,6 +50,7 @@ enum psp_bootloader_cmd {
PSP_BL__LOAD_KEY_DATABASE = 0x80000,
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
+ PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
};
enum psp_ring_type
@@ -84,6 +85,7 @@ struct psp_funcs
{
int (*init_microcode)(struct psp_context *psp);
int (*bootloader_load_kdb)(struct psp_context *psp);
+ int (*bootloader_load_spl)(struct psp_context *psp);
int (*bootloader_load_sysdrv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
@@ -95,8 +97,6 @@ struct psp_funcs
enum psp_ring_type ring_type);
bool (*smu_reload_quirk)(struct psp_context *psp);
int (*mode1_reset)(struct psp_context *psp);
- int (*mem_training_init)(struct psp_context *psp);
- void (*mem_training_fini)(struct psp_context *psp);
int (*mem_training)(struct psp_context *psp, uint32_t ops);
uint32_t (*ring_get_wptr)(struct psp_context *psp);
void (*ring_set_wptr)(struct psp_context *psp, uint32_t value);
@@ -224,10 +224,12 @@ struct psp_context
uint32_t sos_bin_size;
uint32_t toc_bin_size;
uint32_t kdb_bin_size;
+ uint32_t spl_bin_size;
uint8_t *sys_start_addr;
uint8_t *sos_start_addr;
uint8_t *toc_start_addr;
uint8_t *kdb_start_addr;
+ uint8_t *spl_start_addr;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
@@ -298,6 +300,8 @@ struct amdgpu_psp_funcs {
((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0)
#define psp_bootloader_load_kdb(psp) \
((psp)->funcs->bootloader_load_kdb ? (psp)->funcs->bootloader_load_kdb((psp)) : 0)
+#define psp_bootloader_load_spl(psp) \
+ ((psp)->funcs->bootloader_load_spl ? (psp)->funcs->bootloader_load_spl((psp)) : 0)
#define psp_bootloader_load_sysdrv(psp) \
((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
@@ -306,10 +310,6 @@ struct amdgpu_psp_funcs {
((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
#define psp_mode1_reset(psp) \
((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
-#define psp_mem_training_init(psp) \
- ((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0)
-#define psp_mem_training_fini(psp) \
- ((psp)->funcs->mem_training_fini ? (psp)->funcs->mem_training_fini((psp)) : 0)
#define psp_mem_training(psp, ops) \
((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 50fe08bf2f72..ab8e7c91c645 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -86,7 +86,7 @@ void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
amdgpu_ras_get_context(adev)->error_query_ready = ready;
}
-bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
+static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
{
if (adev && amdgpu_ras_get_context(adev))
return amdgpu_ras_get_context(adev)->error_query_ready;
@@ -318,6 +318,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
case 2:
if ((data.inject.address >= adev->gmc.mc_vram_size) ||
(data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
+ dev_warn(adev->dev, "RAS WARN: input address "
+ "0x%llx is invalid.",
+ data.inject.address);
ret = -EINVAL;
break;
}
@@ -502,7 +505,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
}
/* obj end */
-void amdgpu_ras_parse_status_code(struct amdgpu_device* adev,
+static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev,
const char* invoke_type,
const char* block_name,
enum ta_ras_status ret)
@@ -812,7 +815,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
}
/* Trigger XGMI/WAFL error */
-int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
+static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
struct ta_ras_trigger_error_input *block_info)
{
int ret;
@@ -1914,9 +1917,8 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
amdgpu_ras_check_supported(adev, &con->hw_supported,
&con->supported);
if (!con->hw_supported) {
- amdgpu_ras_set_context(adev, NULL);
- kfree(con);
- return 0;
+ r = 0;
+ goto err_out;
}
con->features = 0;
@@ -1927,29 +1929,29 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
if (adev->nbio.funcs->init_ras_controller_interrupt) {
r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
if (r)
- return r;
+ goto err_out;
}
if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
if (r)
- return r;
+ goto err_out;
}
- amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
-
- if (amdgpu_ras_fs_init(adev))
- goto fs_out;
+ if (amdgpu_ras_fs_init(adev)) {
+ r = -EINVAL;
+ goto err_out;
+ }
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
"hardware ability[%x] ras_mask[%x]\n",
con->hw_supported, con->supported);
return 0;
-fs_out:
+err_out:
amdgpu_ras_set_context(adev, NULL);
kfree(con);
- return -EINVAL;
+ return r;
}
/* helper function to handle common stuff in ip late init phase */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index be218754629a..da871d84b742 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -62,7 +62,8 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_VCN_DEC = AMDGPU_HW_IP_VCN_DEC,
AMDGPU_RING_TYPE_VCN_ENC = AMDGPU_HW_IP_VCN_ENC,
AMDGPU_RING_TYPE_VCN_JPEG = AMDGPU_HW_IP_VCN_JPEG,
- AMDGPU_RING_TYPE_KIQ
+ AMDGPU_RING_TYPE_KIQ,
+ AMDGPU_RING_TYPE_MES
};
enum amdgpu_ib_pool_type {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index b87ca171986a..8ea6c49529e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -35,7 +35,6 @@
struct amdgpu_sync_entry {
struct hlist_node node;
struct dma_fence *fence;
- bool explicit;
};
static struct kmem_cache *amdgpu_sync_slab;
@@ -129,8 +128,7 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep,
* Tries to add the fence to an existing hash entry. Returns true when an entry
* was found, false otherwise.
*/
-static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
- bool explicit)
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
{
struct amdgpu_sync_entry *e;
@@ -139,10 +137,6 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
continue;
amdgpu_sync_keep_later(&e->fence, f);
-
- /* Preserve eplicit flag to not loose pipe line sync */
- e->explicit |= explicit;
-
return true;
}
return false;
@@ -153,27 +147,23 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
*
* @sync: sync object to add fence to
* @f: fence to sync to
- * @explicit: if this is an explicit dependency
*
* Add the fence to the sync object.
*/
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
- bool explicit)
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
{
struct amdgpu_sync_entry *e;
if (!f)
return 0;
- if (amdgpu_sync_add_later(sync, f, explicit))
+ if (amdgpu_sync_add_later(sync, f))
return 0;
e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
if (!e)
return -ENOMEM;
- e->explicit = explicit;
-
hash_add(sync->fences, &e->node, f->context);
e->fence = dma_fence_get(f);
return 0;
@@ -194,7 +184,7 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
return 0;
amdgpu_sync_keep_later(&sync->last_vm_update, fence);
- return amdgpu_sync_fence(sync, fence, false);
+ return amdgpu_sync_fence(sync, fence);
}
/**
@@ -221,7 +211,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
/* always sync to the exclusive fence */
f = dma_resv_get_excl(resv);
- r = amdgpu_sync_fence(sync, f, false);
+ r = amdgpu_sync_fence(sync, f);
flist = dma_resv_get_list(resv);
if (!flist || r)
@@ -237,7 +227,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
/* Always sync to moves, no matter what */
if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
- r = amdgpu_sync_fence(sync, f, false);
+ r = amdgpu_sync_fence(sync, f);
if (r)
break;
}
@@ -275,7 +265,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
continue;
}
- r = amdgpu_sync_fence(sync, f, false);
+ WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+ "Adding eviction fence to sync obj");
+ r = amdgpu_sync_fence(sync, f);
if (r)
break;
}
@@ -330,11 +322,10 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
* amdgpu_sync_get_fence - get the next fence from the sync object
*
* @sync: sync object to use
- * @explicit: true if the next fence is explicit
*
* Get and removes the next fence from the sync object not signaled yet.
*/
-struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit)
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
@@ -343,8 +334,6 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
hash_for_each_safe(sync->fences, i, tmp, e, node) {
f = e->fence;
- if (explicit)
- *explicit = e->explicit;
hash_del(&e->node);
kmem_cache_free(amdgpu_sync_slab, e);
@@ -376,7 +365,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
- r = amdgpu_sync_fence(clone, f, e->explicit);
+ r = amdgpu_sync_fence(clone, f);
if (r)
return r;
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index cfbe5788b8b9..7c0fe20c470d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -47,16 +47,14 @@ struct amdgpu_sync {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
- bool explicit);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
-struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync,
- bool *explicit);
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 38d2a7fb5698..bb95627ad2cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -58,6 +58,7 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
+#include "amdgpu_atomfirmware.h"
#include "bif/bif_4_1_d.h"
#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128
@@ -1110,7 +1111,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
#endif
}
-int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
struct ttm_buffer_object *tbo,
uint64_t flags)
{
@@ -1817,54 +1818,86 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
return 0;
}
-static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size)
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
{
- if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) )
- vram_size -= SZ_1M;
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+
+ memset(ctx, 0, sizeof(*ctx));
- return ALIGN(vram_size, SZ_1M);
+ ctx->c2p_train_data_offset =
+ ALIGN((adev->gmc.mc_vram_size - adev->discovery_tmr_size - SZ_1M), SZ_1M);
+ ctx->p2c_train_data_offset =
+ (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
+ ctx->train_data_size =
+ GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
+
+ DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
}
-/**
- * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training
- *
- * @adev: amdgpu_device pointer
- *
- * create bo vram reservation from memory training.
+/*
+ * reserve TMR memory at the top of VRAM which holds
+ * IP Discovery data and is protected by PSP.
*/
-static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev)
+static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
{
int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+ bool mem_train_support = false;
- memset(ctx, 0, sizeof(*ctx));
- if (!adev->fw_vram_usage.mem_train_support) {
- DRM_DEBUG("memory training does not support!\n");
- return 0;
+ if (!amdgpu_sriov_vf(adev)) {
+ ret = amdgpu_mem_train_support(adev);
+ if (ret == 1)
+ mem_train_support = true;
+ else if (ret == -1)
+ return -EINVAL;
+ else
+ DRM_DEBUG("memory training does not support!\n");
}
- ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size);
- ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
- ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
-
- DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
- ctx->train_data_size,
- ctx->p2c_train_data_offset,
- ctx->c2p_train_data_offset);
-
- ret = amdgpu_bo_create_kernel_at(adev,
+ /*
+ * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
+ * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
+ *
+ * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
+ * discovery data and G6 memory training data respectively
+ */
+ adev->discovery_tmr_size =
+ amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
+ if (!adev->discovery_tmr_size)
+ adev->discovery_tmr_size = DISCOVERY_TMR_SIZE;
+
+ if (mem_train_support) {
+ /* reserve vram for mem train according to TMR location */
+ amdgpu_ttm_training_data_block_init(adev);
+ ret = amdgpu_bo_create_kernel_at(adev,
ctx->c2p_train_data_offset,
ctx->train_data_size,
AMDGPU_GEM_DOMAIN_VRAM,
&ctx->c2p_bo,
NULL);
+ if (ret) {
+ DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
+ amdgpu_ttm_training_reserve_vram_fini(adev);
+ return ret;
+ }
+ ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
+ }
+
+ ret = amdgpu_bo_create_kernel_at(adev,
+ adev->gmc.real_vram_size - adev->discovery_tmr_size,
+ adev->discovery_tmr_size,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->discovery_memory,
+ NULL);
if (ret) {
- DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
- amdgpu_ttm_training_reserve_vram_fini(adev);
+ DRM_ERROR("alloc tmr failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
return ret;
}
- ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
return 0;
}
@@ -1932,11 +1965,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
/*
- *The reserved vram for memory training must be pinned to the specified
- *place on the VRAM, so reserve it early.
+ * only NAVI10 and onwards ASIC support for IP discovery.
+ * If IP discovery enabled, a block of memory should be
+ * reserved for IP discovey.
*/
- if (!amdgpu_sriov_vf(adev)) {
- r = amdgpu_ttm_training_reserve_vram_init(adev);
+ if (adev->discovery_bin) {
+ r = amdgpu_ttm_reserve_tmr(adev);
if (r)
return r;
}
@@ -1952,21 +1986,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
if (r)
return r;
- /*
- * reserve TMR memory at the top of VRAM which holds
- * IP Discovery data and is protected by PSP.
- */
- if (adev->discovery_tmr_size > 0) {
- r = amdgpu_bo_create_kernel_at(adev,
- adev->gmc.real_vram_size - adev->discovery_tmr_size,
- adev->discovery_tmr_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->discovery_memory,
- NULL);
- if (r)
- return r;
- }
-
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 65bb25e31d45..744404a05fee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -279,6 +279,30 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
DRM_DEBUG("kdb_size_bytes: %u\n",
le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes));
}
+ if (version_minor == 3) {
+ const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 =
+ container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0);
+ const struct psp_firmware_header_v1_3 *psp_hdr_v1_3 =
+ container_of(psp_hdr_v1_1, struct psp_firmware_header_v1_3, v1_1);
+ DRM_DEBUG("toc_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.toc_header_version));
+ DRM_DEBUG("toc_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.toc_offset_bytes));
+ DRM_DEBUG("toc_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.toc_size_bytes));
+ DRM_DEBUG("kdb_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_header_version));
+ DRM_DEBUG("kdb_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_offset_bytes));
+ DRM_DEBUG("kdb_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->v1_1.kdb_size_bytes));
+ DRM_DEBUG("spl_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->spl_header_version));
+ DRM_DEBUG("spl_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->spl_offset_bytes));
+ DRM_DEBUG("spl_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_3->spl_size_bytes));
+ }
} else {
DRM_ERROR("Unknown PSP ucode version: %u.%u\n",
version_major, version_minor);
@@ -365,6 +389,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
@@ -448,6 +473,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
+ const struct mes_firmware_header_v1_0 *mes_hdr = NULL;
if (NULL == ucode->fw)
return 0;
@@ -462,12 +488,15 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_CP_MES &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_CP_MES_DATA &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
@@ -527,6 +556,16 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MES) {
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+ memcpy(ucode->kaddr, (void *)((uint8_t *)adev->mes.fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)),
+ ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA) {
+ ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+ memcpy(ucode->kaddr, (void *)((uint8_t *)adev->mes.fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)),
+ ucode->ucode_size);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index b0e656409c03..df402c7b3233 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -99,6 +99,14 @@ struct psp_firmware_header_v1_2 {
uint32_t kdb_size_bytes;
};
+/* version_major=1, version_minor=3 */
+struct psp_firmware_header_v1_3 {
+ struct psp_firmware_header_v1_1 v1_1;
+ uint32_t spl_header_version;
+ uint32_t spl_offset_bytes;
+ uint32_t spl_size_bytes;
+};
+
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -266,6 +274,7 @@ union amdgpu_firmware_header {
struct smc_firmware_header_v2_0 smc_v2_0;
struct psp_firmware_header_v1_0 psp;
struct psp_firmware_header_v1_1 psp_v1_1;
+ struct psp_firmware_header_v1_3 psp_v1_3;
struct ta_firmware_header_v1_0 ta;
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 5100ebe8858d..f8bebf18ee36 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -54,6 +54,12 @@
#define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
/* Firmware Names */
+#ifdef CONFIG_DRM_AMDGPU_SI
+#define FIRMWARE_TAHITI "amdgpu/tahiti_uvd.bin"
+#define FIRMWARE_VERDE "amdgpu/verde_uvd.bin"
+#define FIRMWARE_PITCAIRN "amdgpu/pitcairn_uvd.bin"
+#define FIRMWARE_OLAND "amdgpu/oland_uvd.bin"
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
#define FIRMWARE_BONAIRE "amdgpu/bonaire_uvd.bin"
#define FIRMWARE_KABINI "amdgpu/kabini_uvd.bin"
@@ -100,6 +106,12 @@ struct amdgpu_uvd_cs_ctx {
unsigned *buf_sizes;
};
+#ifdef CONFIG_DRM_AMDGPU_SI
+MODULE_FIRMWARE(FIRMWARE_TAHITI);
+MODULE_FIRMWARE(FIRMWARE_VERDE);
+MODULE_FIRMWARE(FIRMWARE_PITCAIRN);
+MODULE_FIRMWARE(FIRMWARE_OLAND);
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
MODULE_FIRMWARE(FIRMWARE_KABINI);
@@ -133,6 +145,20 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+ case CHIP_TAHITI:
+ fw_name = FIRMWARE_TAHITI;
+ break;
+ case CHIP_VERDE:
+ fw_name = FIRMWARE_VERDE;
+ break;
+ case CHIP_PITCAIRN:
+ fw_name = FIRMWARE_PITCAIRN;
+ break;
+ case CHIP_OLAND:
+ fw_name = FIRMWARE_OLAND;
+ break;
+#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE:
fw_name = FIRMWARE_BONAIRE;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 2badbc0355f2..15ff30c53e24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -42,6 +42,7 @@
#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin"
#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin"
+#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
@@ -51,6 +52,7 @@ MODULE_FIRMWARE(FIRMWARE_RENOIR);
MODULE_FIRMWARE(FIRMWARE_NAVI10);
MODULE_FIRMWARE(FIRMWARE_NAVI14);
MODULE_FIRMWARE(FIRMWARE_NAVI12);
+MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@@ -107,6 +109,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
+ case CHIP_SIENNA_CICHLID:
+ fw_name = FIRMWARE_SIENNA_CICHLID;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
+ break;
default:
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 90aa12b22725..e125e8bfac54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -53,7 +53,9 @@
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
+#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define VCN_VID_IP_ADDRESS_2_0 0x0
#define VCN_AON_IP_ADDRESS_2_0 0x30000
@@ -65,7 +67,7 @@
/* 1 second timeout */
#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
-#define RREG32_SOC15_DPG_MODE(ip, inst_idx, reg, mask, sram_sel) \
+#define RREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, mask, sram_sel) \
({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \
UVD_DPG_LMA_CTL__MASK_EN_MASK | \
@@ -75,7 +77,7 @@
RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \
})
-#define WREG32_SOC15_DPG_MODE(ip, inst_idx, reg, value, mask, sram_sel) \
+#define WREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, value, mask, sram_sel) \
do { \
WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \
WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
@@ -86,30 +88,40 @@
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
} while (0)
-#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst_idx, reg) \
+#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \
({ \
uint32_t internal_reg_offset, addr; \
- bool video_range, aon_range; \
+ bool video_range, video1_range, aon_range, aon1_range; \
\
addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
addr <<= 2; \
video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \
((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \
+ video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS_3_0)) && \
+ ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS_3_0 + 0x2600))))); \
aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && \
((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600))))); \
+ aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS_3_0)) && \
+ ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS_3_0 + 0x600))))); \
if (video_range) \
internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + \
(VCN_VID_IP_ADDRESS_2_0)); \
else if (aon_range) \
internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + \
(VCN_AON_IP_ADDRESS_2_0)); \
+ else if (video1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS_3_0) + \
+ (VCN_VID_IP_ADDRESS_2_0)); \
+ else if (aon1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS_3_0) + \
+ (VCN_AON_IP_ADDRESS_2_0)); \
else \
internal_reg_offset = (0xFFFFF & addr); \
\
internal_reg_offset >>= 2; \
})
-#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \
+#define RREG32_SOC15_DPG_MODE(inst_idx, offset, mask_en) \
({ \
WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
(0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
@@ -118,7 +130,7 @@
RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \
})
-#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect) \
+#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
do { \
if (!indirect) { \
WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \
@@ -142,6 +154,7 @@ enum fw_queue_mode {
enum engine_status_constants {
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0,
+ UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0 = 0x2A2A8AA0,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
UVD_STATUS__UVD_BUSY = 0x00000004,
GB_ADDR_CONFIG_DEFAULT = 0x26010011,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index f3b38c9e04ca..da233a9e429d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -26,6 +26,7 @@
#include <drm/drm_drv.h>
#include "amdgpu.h"
+#include "amdgpu_ras.h"
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
@@ -255,12 +256,171 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj,
return ret;
}
+static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data;
+ /* GPU will be marked bad on host if bp count more then 10,
+ * so alloc 512 is enough.
+ */
+ unsigned int align_space = 512;
+ void *bps = NULL;
+ struct amdgpu_bo **bps_bo = NULL;
+
+ *data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL);
+ if (!*data)
+ return -ENOMEM;
+
+ bps = kmalloc(align_space * sizeof((*data)->bps), GFP_KERNEL);
+ bps_bo = kmalloc(align_space * sizeof((*data)->bps_bo), GFP_KERNEL);
+
+ if (!bps || !bps_bo) {
+ kfree(bps);
+ kfree(bps_bo);
+ kfree(*data);
+ return -ENOMEM;
+ }
+
+ (*data)->bps = bps;
+ (*data)->bps_bo = bps_bo;
+ (*data)->count = 0;
+ (*data)->last_reserved = 0;
+
+ virt->ras_init_done = true;
+
+ return 0;
+}
+
+static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+ struct amdgpu_bo *bo;
+ int i;
+
+ if (!data)
+ return;
+
+ for (i = data->last_reserved - 1; i >= 0; i--) {
+ bo = data->bps_bo[i];
+ amdgpu_bo_free_kernel(&bo, NULL, NULL);
+ data->bps_bo[i] = bo;
+ data->last_reserved = i;
+ }
+}
+
+void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+
+ virt->ras_init_done = false;
+
+ if (!data)
+ return;
+
+ amdgpu_virt_ras_release_bp(adev);
+
+ kfree(data->bps);
+ kfree(data->bps_bo);
+ kfree(data);
+ virt->virt_eh_data = NULL;
+}
+
+static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int pages)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+
+ if (!data)
+ return;
+
+ memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
+ data->count += pages;
+}
+
+static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+ struct amdgpu_bo *bo = NULL;
+ uint64_t bp;
+ int i;
+
+ if (!data)
+ return;
+
+ for (i = data->last_reserved; i < data->count; i++) {
+ bp = data->bps[i].retired_page;
+
+ /* There are two cases of reserve error should be ignored:
+ * 1) a ras bad page has been allocated (used by someone);
+ * 2) a ras bad page has been reserved (duplicate error injection
+ * for one page);
+ */
+ if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL))
+ DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
+
+ data->bps_bo[i] = bo;
+ data->last_reserved = i + 1;
+ bo = NULL;
+ }
+}
+
+static bool amdgpu_virt_ras_check_bad_page(struct amdgpu_device *adev,
+ uint64_t retired_page)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+ int i;
+
+ if (!data)
+ return true;
+
+ for (i = 0; i < data->count; i++)
+ if (retired_page == data->bps[i].retired_page)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
+ uint64_t bp_block_offset, uint32_t bp_block_size)
+{
+ struct eeprom_table_record bp;
+ uint64_t retired_page;
+ uint32_t bp_idx, bp_cnt;
+
+ if (bp_block_size) {
+ bp_cnt = bp_block_size / sizeof(uint64_t);
+ for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
+ retired_page = *(uint64_t *)(adev->fw_vram_usage.va +
+ bp_block_offset + bp_idx * sizeof(uint64_t));
+ bp.retired_page = retired_page;
+
+ if (amdgpu_virt_ras_check_bad_page(adev, retired_page))
+ continue;
+
+ amdgpu_virt_ras_add_bps(adev, &bp, 1);
+
+ amdgpu_virt_ras_reserve_bps(adev);
+ }
+ }
+}
+
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
{
uint32_t pf2vf_size = 0;
uint32_t checksum = 0;
uint32_t checkval;
char *str;
+ uint64_t bp_block_offset = 0;
+ uint32_t bp_block_size = 0;
+ struct amdgim_pf2vf_info_v2 *pf2vf_v2 = NULL;
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
@@ -275,6 +435,19 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
/* pf2vf message must be in 4K */
if (pf2vf_size > 0 && pf2vf_size < 4096) {
+ if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
+ pf2vf_v2 = (struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf;
+ bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_L & 0xFFFFFFFF) |
+ ((((uint64_t)pf2vf_v2->bp_block_offset_H) << 32) & 0xFFFFFFFF00000000);
+ bp_block_size = pf2vf_v2->bp_block_size;
+
+ if (bp_block_size && !adev->virt.ras_init_done)
+ amdgpu_virt_init_ras_err_handler_data(adev);
+
+ if (adev->virt.ras_init_done)
+ amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
+ }
+
checkval = amdgpu_virt_fw_reserve_get_checksum(
adev->virt.fw_reserve.p_pf2vf, pf2vf_size,
adev->virt.fw_reserve.checksum_key, checksum);
@@ -321,6 +494,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
case CHIP_VEGA20:
case CHIP_NAVI10:
case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
case CHIP_ARCTURUS:
reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
break;
@@ -341,12 +515,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
}
}
-bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
+static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
{
return amdgpu_sriov_is_debug(adev) ? true : false;
}
-bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev)
+static bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev)
{
return amdgpu_sriov_is_normal(adev) ? true : false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b90e822cebd7..f826945989c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -143,19 +143,27 @@ struct amdgim_pf2vf_info_v2 {
uint32_t vce_enc_max_pixels_count;
/* 16x16 pixels/sec, codec independent */
uint32_t vce_enc_max_bandwidth;
+ /* Bad pages block position in BYTE */
+ uint32_t bp_block_offset_L;
+ uint32_t bp_block_offset_H;
+ /* Bad pages block size in BYTE */
+ uint32_t bp_block_size;
/* MEC FW position in kb from the start of VF visible frame buffer */
- uint64_t mecfw_kboffset;
+ uint32_t mecfw_kboffset_L;
+ uint32_t mecfw_kboffset_H;
/* MEC FW size in KB */
uint32_t mecfw_ksize;
/* UVD FW position in kb from the start of VF visible frame buffer */
- uint64_t uvdfw_kboffset;
+ uint32_t uvdfw_kboffset_L;
+ uint32_t uvdfw_kboffset_H;
/* UVD FW size in KB */
uint32_t uvdfw_ksize;
/* VCE FW position in kb from the start of VF visible frame buffer */
- uint64_t vcefw_kboffset;
+ uint32_t vcefw_kboffset_L;
+ uint32_t vcefw_kboffset_H;
/* VCE FW size in KB */
uint32_t vcefw_ksize;
- uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 3)];
+ uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (18 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 0)];
} __aligned(4);
@@ -254,6 +262,17 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ;
} \
} while (0)
+struct amdgpu_virt_ras_err_handler_data {
+ /* point to bad page records array */
+ struct eeprom_table_record *bps;
+ /* point to reserved bo array */
+ struct amdgpu_bo **bps_bo;
+ /* the count of entries */
+ int count;
+ /* last reserved entry's index + 1 */
+ int last_reserved;
+};
+
/* GPU virtualization */
struct amdgpu_virt {
uint32_t caps;
@@ -272,6 +291,8 @@ struct amdgpu_virt {
uint32_t reg_access_mode;
int req_init_data_ver;
bool tdr_debug;
+ struct amdgpu_virt_ras_err_handler_data *virt_eh_data;
+ bool ras_init_done;
};
#define amdgpu_sriov_enabled(adev) \
@@ -323,6 +344,7 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
unsigned int key,
unsigned int chksum);
+void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
void amdgpu_detect_virtualization(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7417754e9141..71e005cf2952 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2208,7 +2208,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
if (saddr >= eaddr ||
- (bo && offset + size > amdgpu_bo_size(bo)))
+ (bo && offset + size > amdgpu_bo_size(bo)) ||
+ (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL;
saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -2273,7 +2274,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
/* make sure object fit at this offset */
eaddr = saddr + size - 1;
if (saddr >= eaddr ||
- (bo && offset + size > amdgpu_bo_size(bo)))
+ (bo && offset + size > amdgpu_bo_size(bo)) ||
+ (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
return -EINVAL;
/* Allocate all the needed memory */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index f138e6aa4c2b..c498804711d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -208,7 +208,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
int r;
/* Wait for PD/PT moves to be completed */
- r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving, false);
+ r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index d399e5893170..97ad8ffe6c6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -407,8 +407,6 @@ error:
* amdgpu_vram_mgr_del - free ranges
*
* @man: TTM memory type manager
- * @tbo: TTM BO we need this range for
- * @place: placement flags and restrictions
* @mem: TTM memory object
*
* Free the allocated VRAM again.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 91837a991319..e3a3755cb999 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -144,11 +144,6 @@ static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = {
SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
};
-void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
-{
- return &hive->device_list;
-}
-
/**
* DOC: AMDGPU XGMI Support
*
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
new file mode 100644
index 000000000000..0219bd6ce1b2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v2_1.h"
+
+#include "athub/athub_2_1_0_offset.h"
+#include "athub/athub_2_1_0_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+static void
+athub_v2_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+static void
+athub_v2_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if(def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ athub_v2_1_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ athub_v2_1_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
new file mode 100644
index 000000000000..5e6824c0f591
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V2_1_H__
+#define __ATHUB_V2_1_H__
+
+int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index ec61532e2f83..01ce52266966 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2342,7 +2342,7 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
/* XXX need to determine what plls are available on each DCE11 part */
pll_in_use = amdgpu_pll_get_use_mask(crtc);
- if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) {
+ if (adev->flags & AMD_IS_APU) {
if (!(pll_in_use & (1 << ATOM_PPLL1)))
return ATOM_PPLL1;
if (!(pll_in_use & (1 << ATOM_PPLL0)))
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index a7b8292cefee..1ab261836983 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -560,7 +560,7 @@ static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
uint64_t config)
{
- uint32_t lo_base_addr, hi_base_addr;
+ uint32_t lo_base_addr = 0, hi_base_addr = 0;
df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
&hi_base_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index bd5dd4f64311..323285eb1457 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -55,6 +55,7 @@
* 2. Async ring
*/
#define GFX10_NUM_GFX_RINGS_NV1X 1
+#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1
#define GFX10_MEC_HPD_SIZE 2048
#define F32_CE_PROGRAM_RAM_SIZE 65536
@@ -62,6 +63,54 @@
#define mmCGTT_GS_NGG_CLK_CTRL 0x5087
#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1
+#define mmCGTT_SPI_RA0_CLK_CTRL 0x507a
+#define mmCGTT_SPI_RA0_CLK_CTRL_BASE_IDX 1
+#define mmCGTT_SPI_RA1_CLK_CTRL 0x507b
+#define mmCGTT_SPI_RA1_CLK_CTRL_BASE_IDX 1
+
+#define GB_ADDR_CONFIG__NUM_PKRS__SHIFT 0x8
+#define GB_ADDR_CONFIG__NUM_PKRS_MASK 0x00000700L
+
+#define mmCP_MEC_CNTL_Sienna_Cichlid 0x0f55
+#define mmCP_MEC_CNTL_Sienna_Cichlid_BASE_IDX 0
+#define mmRLC_SAFE_MODE_Sienna_Cichlid 0x4ca0
+#define mmRLC_SAFE_MODE_Sienna_Cichlid_BASE_IDX 1
+#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid 0x4ca1
+#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX 1
+#define mmSPI_CONFIG_CNTL_Sienna_Cichlid 0x11ec
+#define mmSPI_CONFIG_CNTL_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_ESGS_RING_SIZE_Sienna_Cichlid 0x0fc1
+#define mmVGT_ESGS_RING_SIZE_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_GSVS_RING_SIZE_Sienna_Cichlid 0x0fc2
+#define mmVGT_GSVS_RING_SIZE_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_TF_RING_SIZE_Sienna_Cichlid 0x0fc3
+#define mmVGT_TF_RING_SIZE_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid 0x0fc4
+#define mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_TF_MEMORY_BASE_Sienna_Cichlid 0x0fc5
+#define mmVGT_TF_MEMORY_BASE_Sienna_Cichlid_BASE_IDX 0
+#define mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid 0x0fc6
+#define mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid_BASE_IDX 0
+#define GRBM_STATUS2__RLC_BUSY_Sienna_Cichlid__SHIFT 0x1a
+#define GRBM_STATUS2__RLC_BUSY_Sienna_Cichlid_MASK 0x04000000L
+#define CP_RB_DOORBELL_RANGE_LOWER__DOORBELL_RANGE_LOWER_Sienna_Cichlid_MASK 0x00000FFCL
+#define CP_RB_DOORBELL_RANGE_LOWER__DOORBELL_RANGE_LOWER_Sienna_Cichlid__SHIFT 0x2
+#define CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_Sienna_Cichlid_MASK 0x00000FFCL
+#define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580
+#define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0
+
+#define mmCP_HYP_PFP_UCODE_ADDR 0x5814
+#define mmCP_HYP_PFP_UCODE_ADDR_BASE_IDX 1
+#define mmCP_HYP_PFP_UCODE_DATA 0x5815
+#define mmCP_HYP_PFP_UCODE_DATA_BASE_IDX 1
+#define mmCP_HYP_CE_UCODE_ADDR 0x5818
+#define mmCP_HYP_CE_UCODE_ADDR_BASE_IDX 1
+#define mmCP_HYP_CE_UCODE_DATA 0x5819
+#define mmCP_HYP_CE_UCODE_DATA_BASE_IDX 1
+#define mmCP_HYP_ME_UCODE_ADDR 0x5816
+#define mmCP_HYP_ME_UCODE_ADDR_BASE_IDX 1
+#define mmCP_HYP_ME_UCODE_DATA 0x5817
+#define mmCP_HYP_ME_UCODE_DATA_BASE_IDX 1
MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
@@ -89,6 +138,13 @@ MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_ce.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_pfp.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_me.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec2.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_rlc.bin");
+
static const struct soc15_reg_golden golden_settings_gc_10_1[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
@@ -3013,6 +3069,51 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
};
+static const struct soc15_reg_golden golden_settings_gc_10_3[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x10f80988),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] =
+{
+ /* Pending on emulation bring up */
+};
+
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@@ -3193,6 +3294,14 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_rlc_spm_10_1_2_nv12,
(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_2_nv12));
break;
+ case CHIP_SIENNA_CICHLID:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_3_sienna_cichlid,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_3_sienna_cichlid));
+ break;
default:
break;
}
@@ -3373,6 +3482,9 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
(adev->gfx.mec_feature_version >= 27))
adev->gfx.cp_fw_write_wait = true;
break;
+ case CHIP_SIENNA_CICHLID:
+ adev->gfx.cp_fw_write_wait = true;
+ break;
default:
break;
}
@@ -3463,6 +3575,9 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
case CHIP_NAVI12:
chip_name = "navi12";
break;
+ case CHIP_SIENNA_CICHLID:
+ chip_name = "sienna_cichlid";
+ break;
default:
BUG();
}
@@ -3992,6 +4107,16 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
break;
+ case CHIP_SIENNA_CICHLID:
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ adev->gfx.config.gb_addr_config_fields.num_pkrs =
+ 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+ break;
default:
BUG();
break;
@@ -4104,6 +4229,14 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
break;
+ case CHIP_SIENNA_CICHLID:
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 2;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
default:
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
@@ -4273,12 +4406,6 @@ static int gfx_v10_0_sw_fini(void *handle)
return 0;
}
-
-static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device *adev)
-{
- /* TODO */
-}
-
static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 instance)
{
@@ -4355,6 +4482,11 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
uint32_t num_packer_per_sc;
uint32_t pa_sc_tile_steering_override;
+ /* for ASICs that integrates GFX v10.3
+ * pa_sc_tile_steering_override should be set to 0 */
+ if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ return 0;
+
/* init num_sc */
num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se *
adev->gfx.config.num_sc_per_sh;
@@ -4523,8 +4655,6 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
- gfx_v10_0_tiling_mode_table_init(adev);
-
gfx_v10_0_setup_rb(adev);
gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
gfx_v10_0_get_tcc_info(adev);
@@ -4558,7 +4688,12 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
- u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
+ u32 tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
enable ? 1 : 0);
@@ -5271,6 +5406,14 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI,
upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_ADDR, 0);
+
+ for (i = 0; i < pfp_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_DATA,
+ le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
+
return 0;
}
@@ -5340,6 +5483,14 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI,
upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_ADDR, 0);
+
+ for (i = 0; i < ce_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_DATA,
+ le32_to_cpup(fw_data + ce_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
+
return 0;
}
@@ -5409,6 +5560,14 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI,
upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_ADDR, 0);
+
+ for (i = 0; i < me_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_DATA,
+ le32_to_cpup(fw_data + me_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
+
return 0;
}
@@ -5549,12 +5708,24 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
DOORBELL_EN, 0);
}
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
- tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
- DOORBELL_RANGE_LOWER, ring->doorbell_index);
- WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_Sienna_Cichlid_MASK);
+ break;
+ default:
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
- WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
- CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+ break;
+ }
}
static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
@@ -5669,11 +5840,27 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
{
if (enable) {
- WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0);
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
+ break;
+ }
} else {
- WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
- (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
- CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid,
+ (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
+ (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ break;
+ }
adev->gfx.kiq.ring.sched.ready = false;
}
udelay(50);
@@ -5755,12 +5942,24 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* tell RLC which is KIQ queue */
- tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
- tmp &= 0xffffff00;
- tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ break;
+ default:
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ break;
+ }
}
static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
@@ -6446,18 +6645,33 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
/* check if mmVGT_ESGS_RING_SIZE_UMD
* has been remapped to mmVGT_ESGS_RING_SIZE */
- data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
-
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
+ if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) == pattern) {
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD , data);
+ return true;
+ } else {
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data);
+ return false;
+ }
+ break;
+ default:
+ data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
- if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
- return true;
- } else {
- WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
- return false;
+ if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
+ return true;
+ } else {
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
+ return false;
+ }
+ break;
}
}
@@ -6469,59 +6683,119 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
* index will auto-inc after each data writting */
WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0);
- /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
- data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
- GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
- (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) <<
- GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
-
- /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
- data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
- GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
- (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) <<
- GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
-
- /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
- data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
- GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
- (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) <<
- GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
-
- /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
- data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
- GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
- (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) <<
- GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
-
- /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
- data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
- GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
- (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) <<
- GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
-
- /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
- data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
- GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
- (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) <<
- GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
- WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
+ data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_Sienna_Cichlid) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ break;
+ default:
+ /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
+ data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ break;
+ }
- /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
- data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
- GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
- (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) <<
- GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
}
@@ -6602,6 +6876,7 @@ static int gfx_v10_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
+ uint32_t tmp;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -6616,6 +6891,11 @@ static int gfx_v10_0_hw_fini(void *handle)
DRM_ERROR("KCQ disable failed\n");
if (amdgpu_sriov_vf(adev)) {
gfx_v10_0_cp_gfx_enable(adev, false);
+ /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+
return 0;
}
gfx_v10_0_cp_enable(adev, false);
@@ -6693,10 +6973,22 @@ static int gfx_v10_0_soft_reset(void *handle)
/* GRBM_STATUS2 */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
- if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
- grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
- GRBM_SOFT_RESET, SOFT_RESET_RLC,
- 1);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET,
+ SOFT_RESET_RLC,
+ 1);
+ break;
+ default:
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET,
+ SOFT_RESET_RLC,
+ 1);
+ break;
+ }
if (grbm_soft_reset) {
/* stop the rlc */
@@ -6774,7 +7066,18 @@ static int gfx_v10_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
+ adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
+ break;
+ case CHIP_SIENNA_CICHLID:
+ adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid;
+ break;
+ default:
+ break;
+ }
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
@@ -6819,13 +7122,30 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
- WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
- /* wait for RLC_SAFE_MODE */
- for (i = 0; i < adev->usec_timeout; i++) {
- if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
- break;
- udelay(1);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid),
+ RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE),
+ RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+ break;
}
}
@@ -6834,7 +7154,14 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)
uint32_t data;
data = RLC_SAFE_MODE__CMD_MASK;
- WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+ break;
+ }
}
static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@@ -7118,6 +7445,8 @@ static int gfx_v10_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
+ case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
amdgpu_gfx_off_ctrl(adev, enable);
break;
default:
@@ -7138,6 +7467,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
gfx_v10_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@@ -8236,6 +8566,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
+ case CHIP_SIENNA_CICHLID:
adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
break;
case CHIP_NAVI12:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 0cc011f9190d..4aec76049a60 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3039,7 +3039,7 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
mqd->cp_hqd_active = 1;
}
-int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
+static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
{
uint32_t tmp;
uint32_t mqd_reg;
@@ -5209,7 +5209,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
cu_info->lds_size = 64;
}
-const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
+static const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 7,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1d4128227ffd..efb759b62d21 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4589,7 +4589,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
return 0;
}
-int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
+static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
struct vi_mqd *mqd)
{
uint32_t mqd_reg;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 711e9dd19705..99ffc3e1fddc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -722,7 +722,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
};
-void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
{
static void *scratch_reg0;
static void *scratch_reg1;
@@ -1890,7 +1890,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return r;
}
- if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
+ if (adev->flags & AMD_IS_APU) {
/* TODO: double check the cp_table_size for RV */
adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
r = amdgpu_gfx_rlc_init_cpt(adev);
@@ -1960,7 +1960,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
fw_data = (const __le32 *)
(adev->gfx.mec_fw->data +
le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
- fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
@@ -2384,7 +2384,7 @@ static int gfx_v9_0_sw_fini(void *handle)
gfx_v9_0_mec_fini(adev);
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
- if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
+ if (adev->flags & AMD_IS_APU) {
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
@@ -2856,8 +2856,8 @@ static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
-
- pwr_10_0_gfxip_control_over_cgpg(adev, true);
+ if (adev->asic_type != CHIP_RENOIR)
+ pwr_10_0_gfxip_control_over_cgpg(adev, true);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index cc866c367939..6939edfc5232 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -181,6 +181,10 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp);
+
+ tmp = mmGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL5, tmp);
}
static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
new file mode 100644
index 000000000000..fcc4c1912513
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v2_1.h"
+
+#include "gc/gc_10_3_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
+#include "gc/gc_10_3_0_default.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24;
+}
+
+void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */
+ int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
+ - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ offset * vmid, lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ offset * vmid, upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v2_1_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v2_1_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ /* Disable AGP. */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ adev->gmc.vram_start >> 18);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ adev->gmc.vram_end >> 18);
+
+ /* Set default page address. */
+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
+ + adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v2_1_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp);
+
+ tmp = mmGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp);
+
+ tmp = mmGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp);
+
+ tmp = mmGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v2_1_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+}
+
+static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ 2 * i, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ 2 * i, 0x1f);
+ }
+}
+
+int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v2_1_init_gart_aperture_regs(adev);
+ gfxhub_v2_1_init_system_aperture_regs(adev);
+ gfxhub_v2_1_init_tlb_regs(adev);
+ gfxhub_v2_1_init_cache_regs(adev);
+
+ gfxhub_v2_1_enable_system_domain(adev);
+ gfxhub_v2_1_disable_identity_aperture(adev);
+ gfxhub_v2_1_setup_vmid_config(adev);
+ gfxhub_v2_1_program_invalidation(adev);
+
+ return 0;
+}
+
+void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v2_1_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+void gfxhub_v2_1_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+}
+
+int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev)
+{
+ u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL);
+ u32 max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+ u32 max_num_physical_nodes = 0;
+ u32 max_physical_node_id = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ max_num_physical_nodes = 4;
+ max_physical_node_id = 3;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* PF_MAX_REGION=0 means xgmi is disabled */
+ if (max_region) {
+ adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
+ return -EINVAL;
+
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
+ return -EINVAL;
+
+ adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_SIZE),
+ GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_fw_meta.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h
index 242ec257998c..3452a4e9a3da 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_fw_meta.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h
@@ -19,45 +19,21 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Authors: AMD
- *
*/
-#ifndef _DMUB_META_H_
-#define _DMUB_META_H_
-
-#include "dmub_types.h"
-
-#pragma pack(push, 1)
-
-/* Magic value for identifying dmub_fw_meta_info */
-#define DMUB_FW_META_MAGIC 0x444D5542
-/* Offset from the end of the file to the dmub_fw_meta_info */
-#define DMUB_FW_META_OFFSET 0x24
-
-/**
- * struct dmub_fw_meta_info - metadata associated with fw binary
- *
- * NOTE: This should be considered a stable API. Fields should
- * not be repurposed or reordered. New fields should be
- * added instead to extend the structure.
- *
- * @magic_value: magic value identifying DMUB firmware meta info
- * @fw_region_size: size of the firmware state region
- * @trace_buffer_size: size of the tracebuffer region
- */
-struct dmub_fw_meta_info {
- uint32_t magic_value;
- uint32_t fw_region_size;
- uint32_t trace_buffer_size;
-};
+#ifndef __GFXHUB_V2_1_H__
+#define __GFXHUB_V2_1_H__
-/* Ensure that the structure remains 64 bytes. */
-union dmub_fw_meta {
- struct dmub_fw_meta_info info;
- uint8_t reserved[64];
-};
+u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev);
+int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev);
+void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev);
+void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value);
+void gfxhub_v2_1_init(struct amdgpu_device *adev);
+u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev);
+void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base);
-#pragma pack(pop)
+int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev);
-#endif /* _DMUB_META_H_ */
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index ba2b7ac0c02d..f7e66bf0f647 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -45,8 +45,10 @@
#include "nbio_v2_3.h"
#include "gfxhub_v2_0.h"
+#include "gfxhub_v2_1.h"
#include "mmhub_v2_0.h"
#include "athub_v2_0.h"
+#include "athub_v2_1.h"
/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/
#define AMDGPU_NUM_OF_VMIDS 8
@@ -348,6 +350,24 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* flush hdp cache */
adev->nbio.funcs->hdp_flush(adev, NULL);
+ /* For SRIOV run time, driver shouldn't access the register through MMIO
+ * Directly use kiq to do the vm invalidation instead
+ */
+ if (adev->gfx.kiq.ring.sched.ready &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+ !adev->in_gpu_reset) {
+
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ const unsigned eng = 17;
+ u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
+ u32 req = hub->vm_inv_eng0_req + eng;
+ u32 ack = hub->vm_inv_eng0_ack + eng;
+
+ amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid);
+ return;
+ }
+
mutex_lock(&adev->mman.gtt_window_lock);
if (vmhub == AMDGPU_MMHUB_0) {
@@ -666,13 +686,26 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
{
u64 base = 0;
- base = gfxhub_v2_0_get_fb_location(adev);
+ if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ base = gfxhub_v2_1_get_fb_location(adev);
+ else
+ base = gfxhub_v2_0_get_fb_location(adev);
+
+ /* add the xgmi offset of the physical node */
+ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc);
/* base offset of vram pages */
- adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev);
+ if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ adev->vm_manager.vram_base_offset = gfxhub_v2_1_get_mc_fb_offset(adev);
+ else
+ adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev);
+
+ /* add the xgmi offset of the physical node */
+ adev->vm_manager.vram_base_offset +=
+ adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
}
/**
@@ -712,6 +745,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@@ -780,24 +814,32 @@ static int gmc_v10_0_sw_init(void *handle)
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- gfxhub_v2_0_init(adev);
+ if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ gfxhub_v2_1_init(adev);
+ else
+ gfxhub_v2_0_init(adev);
+
mmhub_v2_0_init(adev);
spin_lock_init(&adev->gmc.invalidate_lock);
- r = amdgpu_atomfirmware_get_vram_info(adev,
- &vram_width, &vram_type, &vram_vendor);
- if (!amdgpu_emu_mode)
- adev->gmc.vram_width = vram_width;
- else
+ if (adev->asic_type == CHIP_SIENNA_CICHLID && amdgpu_emu_mode == 1) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_GDDR6;
adev->gmc.vram_width = 1 * 128; /* numchan * chansize */
+ } else {
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ adev->gmc.vram_width = vram_width;
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+ }
- adev->gmc.vram_type = vram_type;
- adev->gmc.vram_vendor = vram_vendor;
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
adev->num_vmhubs = 2;
/*
* To fulfill 4-level page support,
@@ -836,6 +878,12 @@ static int gmc_v10_0_sw_init(void *handle)
return r;
}
+ if (adev->gmc.xgmi.supported) {
+ r = gfxhub_v2_1_get_xgmi_info(adev);
+ if (r)
+ return r;
+ }
+
r = gmc_v10_0_mc_init(adev);
if (r)
return r;
@@ -896,6 +944,7 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
break;
default:
break;
@@ -922,7 +971,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
if (r)
return r;
- r = gfxhub_v2_0_gart_enable(adev);
+ if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ r = gfxhub_v2_1_gart_enable(adev);
+ else
+ r = gfxhub_v2_0_gart_enable(adev);
if (r)
return r;
@@ -943,7 +995,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
- gfxhub_v2_0_set_fault_enable_default(adev, value);
+ if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ gfxhub_v2_1_set_fault_enable_default(adev, value);
+ else
+ gfxhub_v2_0_set_fault_enable_default(adev, value);
mmhub_v2_0_set_fault_enable_default(adev, value);
gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
@@ -981,7 +1036,10 @@ static int gmc_v10_0_hw_init(void *handle)
*/
static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
{
- gfxhub_v2_0_gart_disable(adev);
+ if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ gfxhub_v2_1_gart_disable(adev);
+ else
+ gfxhub_v2_0_gart_disable(adev);
mmhub_v2_0_gart_disable(adev);
amdgpu_gart_table_vram_unpin(adev);
}
@@ -1052,7 +1110,10 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
if (r)
return r;
- return athub_v2_0_set_clockgating(adev, state);
+ if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ return athub_v2_1_set_clockgating(adev, state);
+ else
+ return athub_v2_0_set_clockgating(adev, state);
}
static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
@@ -1061,7 +1122,10 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
mmhub_v2_0_get_clockgating(adev, flags);
- athub_v2_0_get_clockgating(adev, flags);
+ if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ athub_v2_1_get_clockgating(adev, flags);
+ else
+ athub_v2_0_get_clockgating(adev, flags);
}
static int gmc_v10_0_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index b10c95cad9a2..4c6c7544b400 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -26,6 +26,7 @@
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v1_0.h"
+#include "jpeg_v1_0.h"
#include "vcn/vcn_1_0_offset.h"
#include "vcn/vcn_1_0_sh_mask.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index e67d09cb1b03..c5f49129a300 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -26,6 +26,7 @@
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
+#include "jpeg_v2_0.h"
#include "vcn/vcn_2_0_0_offset.h"
#include "vcn/vcn_2_0_0_sh_mask.h"
@@ -229,9 +230,9 @@ static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev)
data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
- SOC15_WAIT_ON_RREG(JPEG, 0,
+ r = SOC15_WAIT_ON_RREG(JPEG, 0,
mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
- UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
@@ -260,9 +261,9 @@ static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev)
data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
- SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
+ r = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
(2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
- UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
@@ -676,10 +677,10 @@ static bool jpeg_v2_0_is_idle(void *handle)
static int jpeg_v2_0_wait_for_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret = 0;
+ int ret;
- SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
- UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret);
+ ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index 713c32560445..7a51c615d22d 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -449,20 +449,20 @@ static bool jpeg_v2_5_is_idle(void *handle)
static int jpeg_v2_5_wait_for_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i, ret = 0;
+ int i, ret;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
- SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS,
+ ret = SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
- UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret);
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
if (ret)
return ret;
}
- return ret;
+ return 0;
}
static int jpeg_v2_5_set_clockgating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
new file mode 100644
index 000000000000..42f1a516005e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -0,0 +1,613 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+
+#include "vcn/vcn_3_0_0_offset.h"
+#include "vcn/vcn_3_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v3_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+
+/**
+ * jpeg_v3_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v3_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->asic_type == CHIP_SIENNA_CICHLID) {
+ u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING);
+
+ if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+ return -ENOENT;
+ }
+ adev->jpeg.num_jpeg_inst = 1;
+
+ jpeg_v3_0_set_dec_ring_funcs(adev);
+ jpeg_v3_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v3_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = &adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v3_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v3_0_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v3_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ int r;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ DRM_INFO("JPEG decode initialized successfully.\n");
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v3_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+
+ ring = &adev->jpeg.inst->ring_dec;
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
+ jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ ring->sched.ready = false;
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v3_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v3_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v3_0_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v3_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v3_0_hw_init(adev);
+
+ return r;
+}
+
+static void jpeg_v3_0_disable_clock_gating(struct amdgpu_device* adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JPEG_ENC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+ data &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JMCIF_MODE_MASK
+ | JPEG_CGC_CTRL__JRBBM_MODE_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+}
+
+static void jpeg_v3_0_enable_clock_gating(struct amdgpu_device* adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+}
+
+static int jpeg_v3_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data = 0;
+ int r = 0;
+
+ data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0,
+ mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
+ return r;
+ }
+ }
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device* adev)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data = 0;
+ int r = 0;
+
+ data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+ r = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
+ (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v3_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ /* disable power gating */
+ r = jpeg_v3_0_disable_static_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v3_0_disable_clock_gating(adev);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, 0, mmJPEG_ENC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v3_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v3_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v3_0_enable_static_power_gating(adev);
+ if (r)
+ return r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v3_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v3_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v3_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v3_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 1;
+
+ ret &= (((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+
+ return ret;
+}
+
+static int jpeg_v3_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
+static int jpeg_v3_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+
+ if (enable) {
+ if (!jpeg_v3_0_is_idle(handle))
+ return -EBUSY;
+ jpeg_v3_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v3_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v3_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if(state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v3_0_stop(adev);
+ else
+ ret = jpeg_v3_0_start(adev);
+
+ if(!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v3_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
+ .name = "jpeg_v3_0",
+ .early_init = jpeg_v3_0_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v3_0_sw_init,
+ .sw_fini = jpeg_v3_0_sw_fini,
+ .hw_init = jpeg_v3_0_hw_init,
+ .hw_fini = jpeg_v3_0_hw_fini,
+ .suspend = jpeg_v3_0_suspend,
+ .resume = jpeg_v3_0_resume,
+ .is_idle = jpeg_v3_0_is_idle,
+ .wait_for_idle = jpeg_v3_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v3_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v3_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .vmhub = AMDGPU_MMHUB_0,
+ .get_rptr = jpeg_v3_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v3_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v3_0_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v3_0_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v3_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v3_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec.funcs = &jpeg_v3_0_dec_ring_vm_funcs;
+ DRM_INFO("JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v3_0_irq_funcs = {
+ .set = jpeg_v3_0_set_interrupt_state,
+ .process = jpeg_v3_0_process_interrupt,
+};
+
+static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v3_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v3_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 3,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v3_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h
new file mode 100644
index 000000000000..ce775a0c742f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V3_0_H__
+#define __JPEG_V3_0_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v3_0_ip_block;
+
+#endif /* __JPEG_V3_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h b/drivers/gpu/drm/amd/amdgpu/mes_api_def.h
new file mode 100644
index 000000000000..3f4fca5fd1da
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_api_def.h
@@ -0,0 +1,443 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MES_API_DEF_H__
+#define __MES_API_DEF_H__
+
+#pragma pack(push, 4)
+
+#define MES_API_VERSION 1
+
+/* Driver submits one API(cmd) as a single Frame and this command size is same
+ * for all API to ease the debugging and parsing of ring buffer.
+ */
+enum { API_FRAME_SIZE_IN_DWORDS = 64 };
+
+/* To avoid command in scheduler context to be overwritten whenenver mutilple
+ * interrupts come in, this creates another queue.
+ */
+enum { API_NUMBER_OF_COMMAND_MAX = 32 };
+
+enum MES_API_TYPE {
+ MES_API_TYPE_SCHEDULER = 1,
+ MES_API_TYPE_MAX
+};
+
+enum MES_SCH_API_OPCODE {
+ MES_SCH_API_SET_HW_RSRC = 0,
+ MES_SCH_API_SET_SCHEDULING_CONFIG = 1, /* agreegated db, quantums, etc */
+ MES_SCH_API_ADD_QUEUE = 2,
+ MES_SCH_API_REMOVE_QUEUE = 3,
+ MES_SCH_API_PERFORM_YIELD = 4,
+ MES_SCH_API_SET_GANG_PRIORITY_LEVEL = 5,
+ MES_SCH_API_SUSPEND = 6,
+ MES_SCH_API_RESUME = 7,
+ MES_SCH_API_RESET = 8,
+ MES_SCH_API_SET_LOG_BUFFER = 9,
+ MES_SCH_API_CHANGE_GANG_PRORITY = 10,
+ MES_SCH_API_QUERY_SCHEDULER_STATUS = 11,
+ MES_SCH_API_PROGRAM_GDS = 12,
+ MES_SCH_API_SET_DEBUG_VMID = 13,
+ MES_SCH_API_MISC = 14,
+ MES_SCH_API_MAX = 0xFF
+};
+
+union MES_API_HEADER {
+ struct {
+ uint32_t type : 4; /* 0 - Invalid; 1 - Scheduling; 2 - TBD */
+ uint32_t opcode : 8;
+ uint32_t dwsize : 8; /* including header */
+ uint32_t reserved : 12;
+ };
+
+ uint32_t u32All;
+};
+
+enum MES_AMD_PRIORITY_LEVEL {
+ AMD_PRIORITY_LEVEL_LOW = 0,
+ AMD_PRIORITY_LEVEL_NORMAL = 1,
+ AMD_PRIORITY_LEVEL_MEDIUM = 2,
+ AMD_PRIORITY_LEVEL_HIGH = 3,
+ AMD_PRIORITY_LEVEL_REALTIME = 4,
+ AMD_PRIORITY_NUM_LEVELS
+};
+
+enum MES_QUEUE_TYPE {
+ MES_QUEUE_TYPE_GFX,
+ MES_QUEUE_TYPE_COMPUTE,
+ MES_QUEUE_TYPE_SDMA,
+ MES_QUEUE_TYPE_MAX,
+};
+
+struct MES_API_STATUS {
+ uint64_t api_completion_fence_addr;
+ uint64_t api_completion_fence_value;
+};
+
+enum { MAX_COMPUTE_PIPES = 8 };
+enum { MAX_GFX_PIPES = 2 };
+enum { MAX_SDMA_PIPES = 2 };
+
+enum { MAX_COMPUTE_HQD_PER_PIPE = 8 };
+enum { MAX_GFX_HQD_PER_PIPE = 8 };
+enum { MAX_SDMA_HQD_PER_PIPE = 10 };
+
+enum { MAX_QUEUES_IN_A_GANG = 8 };
+
+enum VM_HUB_TYPE {
+ VM_HUB_TYPE_GC = 0,
+ VM_HUB_TYPE_MM = 1,
+ VM_HUB_TYPE_MAX,
+};
+
+enum { VMID_INVALID = 0xffff };
+
+enum { MAX_VMID_GCHUB = 16 };
+enum { MAX_VMID_MMHUB = 16 };
+
+enum MES_LOG_OPERATION {
+ MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0
+};
+
+enum MES_LOG_CONTEXT_STATE {
+ MES_LOG_CONTEXT_STATE_IDLE = 0,
+ MES_LOG_CONTEXT_STATE_RUNNING = 1,
+ MES_LOG_CONTEXT_STATE_READY = 2,
+ MES_LOG_CONTEXT_STATE_READY_STANDBY = 3,
+};
+
+struct MES_LOG_CONTEXT_STATE_CHANGE {
+ void *h_context;
+ enum MES_LOG_CONTEXT_STATE new_context_state;
+};
+
+struct MES_LOG_ENTRY_HEADER {
+ uint32_t first_free_entry_index;
+ uint32_t wraparound_count;
+ uint64_t number_of_entries;
+ uint64_t reserved[2];
+};
+
+struct MES_LOG_ENTRY_DATA {
+ uint64_t gpu_time_stamp;
+ uint32_t operation_type; /* operation_type is of MES_LOG_OPERATION type */
+ uint32_t reserved_operation_type_bits;
+ union {
+ struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change;
+ uint64_t reserved_operation_data[2];
+ };
+};
+
+struct MES_LOG_BUFFER {
+ struct MES_LOG_ENTRY_HEADER header;
+ struct MES_LOG_ENTRY_DATA entries[1];
+};
+
+union MESAPI_SET_HW_RESOURCES {
+ struct {
+ union MES_API_HEADER header;
+ uint32_t vmid_mask_mmhub;
+ uint32_t vmid_mask_gfxhub;
+ uint32_t gds_size;
+ uint32_t paging_vmid;
+ uint32_t compute_hqd_mask[MAX_COMPUTE_PIPES];
+ uint32_t gfx_hqd_mask[MAX_GFX_PIPES];
+ uint32_t sdma_hqd_mask[MAX_SDMA_PIPES];
+ uint32_t agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS];
+ uint64_t g_sch_ctx_gpu_mc_ptr;
+ uint64_t query_status_fence_gpu_mc_ptr;
+ struct MES_API_STATUS api_status;
+ union {
+ struct {
+ uint32_t disable_reset : 1;
+ uint32_t reserved : 31;
+ };
+ uint32_t uint32_t_all;
+ };
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__ADD_QUEUE {
+ struct {
+ union MES_API_HEADER header;
+ uint32_t process_id;
+ uint64_t page_table_base_addr;
+ uint64_t process_va_start;
+ uint64_t process_va_end;
+ uint64_t process_quantum;
+ uint64_t process_context_addr;
+ uint64_t gang_quantum;
+ uint64_t gang_context_addr;
+ uint32_t inprocess_gang_priority;
+ enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level;
+ uint32_t doorbell_offset;
+ uint64_t mqd_addr;
+ uint64_t wptr_addr;
+ enum MES_QUEUE_TYPE queue_type;
+ uint32_t gds_base;
+ uint32_t gds_size;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint32_t oa_mask;
+
+ struct {
+ uint32_t paging : 1;
+ uint32_t debug_vmid : 4;
+ uint32_t program_gds : 1;
+ uint32_t is_gang_suspended : 1;
+ uint32_t is_tmz_queue : 1;
+ uint32_t reserved : 24;
+ };
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__REMOVE_QUEUE {
+ struct {
+ union MES_API_HEADER header;
+ uint32_t doorbell_offset;
+ uint64_t gang_context_addr;
+
+ struct {
+ uint32_t unmap_legacy_gfx_queue : 1;
+ uint32_t reserved : 31;
+ };
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__SET_SCHEDULING_CONFIG {
+ struct {
+ union MES_API_HEADER header;
+ /* Grace period when preempting another priority band for this
+ * priority band. The value for idle priority band is ignored,
+ * as it never preempts other bands.
+ */
+ uint64_t grace_period_other_levels[AMD_PRIORITY_NUM_LEVELS];
+ /* Default quantum for scheduling across processes within
+ * a priority band.
+ */
+ uint64_t process_quantum_for_level[AMD_PRIORITY_NUM_LEVELS];
+ /* Default grace period for processes that preempt each other
+ * within a priority band.
+ */
+ uint64_t process_grace_period_same_level[AMD_PRIORITY_NUM_LEVELS];
+ /* For normal level this field specifies the target GPU
+ * percentage in situations when it's starved by the high level.
+ * Valid values are between 0 and 50, with the default being 10.
+ */
+ uint32_t normal_yield_percent;
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__PERFORM_YIELD {
+ struct {
+ union MES_API_HEADER header;
+ uint32_t dummy;
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__CHANGE_GANG_PRIORITY_LEVEL {
+ struct {
+ union MES_API_HEADER header;
+ uint32_t inprocess_gang_priority;
+ enum MES_AMD_PRIORITY_LEVEL gang_global_priority_level;
+ uint64_t gang_quantum;
+ uint64_t gang_context_addr;
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__SUSPEND {
+ struct {
+ union MES_API_HEADER header;
+ /* false - suspend all gangs; true - specific gang */
+ struct {
+ uint32_t suspend_all_gangs : 1;
+ uint32_t reserved : 31;
+ };
+ /* gang_context_addr is valid only if suspend_all = false */
+ uint64_t gang_context_addr;
+
+ uint64_t suspend_fence_addr;
+ uint32_t suspend_fence_value;
+
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__RESUME {
+ struct {
+ union MES_API_HEADER header;
+ /* false - resume all gangs; true - specified gang */
+ struct {
+ uint32_t resume_all_gangs : 1;
+ uint32_t reserved : 31;
+ };
+ /* valid only if resume_all_gangs = false */
+ uint64_t gang_context_addr;
+
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__RESET {
+ struct {
+ union MES_API_HEADER header;
+
+ struct {
+ uint32_t reset_queue : 1;
+ uint32_t reserved : 31;
+ };
+
+ uint64_t gang_context_addr;
+ uint32_t doorbell_offset; /* valid only if reset_queue = true */
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__SET_LOGGING_BUFFER {
+ struct {
+ union MES_API_HEADER header;
+ /* There are separate log buffers for each queue type */
+ enum MES_QUEUE_TYPE log_type;
+ /* Log buffer GPU Address */
+ uint64_t logging_buffer_addr;
+ /* number of entries in the log buffer */
+ uint32_t number_of_entries;
+ /* Entry index at which CPU interrupt needs to be signalled */
+ uint32_t interrupt_entry;
+
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__QUERY_MES_STATUS {
+ struct {
+ union MES_API_HEADER header;
+ bool mes_healthy; /* 0 - not healthy, 1 - healthy */
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__PROGRAM_GDS {
+ struct {
+ union MES_API_HEADER header;
+ uint64_t process_context_addr;
+ uint32_t gds_base;
+ uint32_t gds_size;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint32_t oa_mask;
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI__SET_DEBUG_VMID {
+ struct {
+ union MES_API_HEADER header;
+ struct MES_API_STATUS api_status;
+ union {
+ struct {
+ uint32_t use_gds : 1;
+ uint32_t reserved : 31;
+ } flags;
+ uint32_t u32All;
+ };
+ uint32_t reserved;
+ uint32_t debug_vmid;
+ uint64_t process_context_addr;
+ uint64_t page_table_base_addr;
+ uint64_t process_va_start;
+ uint64_t process_va_end;
+ uint32_t gds_base;
+ uint32_t gds_size;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint32_t oa_mask;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+enum MESAPI_MISC_OPCODE {
+ MESAPI_MISC__MODIFY_REG,
+ MESAPI_MISC__MAX,
+};
+
+enum MODIFY_REG_SUBCODE {
+ MODIFY_REG__OVERWRITE,
+ MODIFY_REG__RMW_OR,
+ MODIFY_REG__RMW_AND,
+ MODIFY_REG__MAX,
+};
+
+enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 };
+
+union MESAPI__MISC {
+ struct {
+ union MES_API_HEADER header;
+ enum MESAPI_MISC_OPCODE opcode;
+ struct MES_API_STATUS api_status;
+
+ union {
+ struct {
+ enum MODIFY_REG_SUBCODE subcode;
+ uint32_t reg_offset;
+ uint32_t reg_value;
+ } modify_reg;
+ uint32_t data[MISC_DATA_MAX_SIZE_IN_DWORDS];
+ };
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+#pragma pack(pop)
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 29fab7984855..4b746584a797 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -28,19 +28,165 @@
#include "nv.h"
#include "gc/gc_10_1_0_offset.h"
#include "gc/gc_10_1_0_sh_mask.h"
+#include "v10_structs.h"
+#include "mes_api_def.h"
+
+#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820
+#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1
MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
+
+static int mes_v10_1_hw_fini(void *handle);
+
+#define MES_EOP_SIZE 2048
+
+static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs],
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG();
+ }
+}
+
+static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return ring->adev->wb.wb[ring->rptr_offs];
+}
+
+static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)
+ &ring->adev->wb.wb[ring->wptr_offs]);
+ else
+ BUG();
+ return wptr;
+}
+
+static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_MES,
+ .align_mask = 1,
+ .nop = 0,
+ .support_64bit_ptrs = true,
+ .get_rptr = mes_v10_1_ring_get_rptr,
+ .get_wptr = mes_v10_1_ring_get_wptr,
+ .set_wptr = mes_v10_1_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
+ void *pkt, int size)
+{
+ int ndw = size / 4;
+ signed long r;
+ union MESAPI__ADD_QUEUE *x_pkt = pkt;
+ struct amdgpu_device *adev = mes->adev;
+ struct amdgpu_ring *ring = &mes->ring;
+
+ BUG_ON(size % 4 != 0);
+
+ if (amdgpu_ring_alloc(ring, ndw))
+ return -ENOMEM;
+
+ amdgpu_ring_write_multiple(ring, pkt, ndw);
+ amdgpu_ring_commit(ring);
+
+ DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
+ adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("MES failed to response msg=%d\n",
+ x_pkt->header.opcode);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ if (queue_type == AMDGPU_RING_TYPE_GFX)
+ return MES_QUEUE_TYPE_GFX;
+ else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
+ return MES_QUEUE_TYPE_COMPUTE;
+ else if (queue_type == AMDGPU_RING_TYPE_SDMA)
+ return MES_QUEUE_TYPE_SDMA;
+ else
+ BUG();
+ return -1;
+}
static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
struct mes_add_queue_input *input)
{
- return 0;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.process_id = input->process_id;
+ mes_add_queue_pkt.page_table_base_addr =
+ input->page_table_base_addr - adev->gmc.vram_start;
+ mes_add_queue_pkt.process_va_start = input->process_va_start;
+ mes_add_queue_pkt.process_va_end = input->process_va_end;
+ mes_add_queue_pkt.process_quantum = input->process_quantum;
+ mes_add_queue_pkt.process_context_addr = input->process_context_addr;
+ mes_add_queue_pkt.gang_quantum = input->gang_quantum;
+ mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
+ mes_add_queue_pkt.inprocess_gang_priority =
+ input->inprocess_gang_priority;
+ mes_add_queue_pkt.gang_global_priority_level =
+ input->gang_global_priority_level;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.paging = input->paging;
+
+ mes_add_queue_pkt.api_status.api_completion_fence_addr =
+ mes->ring.fence_drv.gpu_addr;
+ mes_add_queue_pkt.api_status.api_completion_fence_value =
+ ++mes->ring.fence_drv.sync_seq;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
}
static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input)
{
- return 0;
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+
+ mes_remove_queue_pkt.api_status.api_completion_fence_addr =
+ mes->ring.fence_drv.gpu_addr;
+ mes_remove_queue_pkt.api_status.api_completion_fence_value =
+ ++mes->ring.fence_drv.sync_seq;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
}
static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
@@ -55,6 +201,68 @@ static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
return 0;
}
+static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ mes->ring.fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value =
+ ++mes->ring.fence_drv.sync_seq;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_status_pkt, sizeof(mes_status_pkt));
+}
+
+static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
+{
+ int i;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
+
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
+ mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
+ mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
+ mes_set_hw_res_pkt.paging_vmid = 0;
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
+ mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
+ mes->query_status_fence_gpu_addr;
+
+ for (i = 0; i < MAX_COMPUTE_PIPES; i++)
+ mes_set_hw_res_pkt.compute_hqd_mask[i] =
+ mes->compute_hqd_mask[i];
+
+ for (i = 0; i < MAX_GFX_PIPES; i++)
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
+
+ for (i = 0; i < MAX_SDMA_PIPES; i++)
+ mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
+
+ for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
+ mes_set_hw_res_pkt.agreegated_doorbells[i] =
+ mes->agreegated_doorbells[i];
+
+ mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
+ mes->ring.fence_drv.gpu_addr;
+ mes_set_hw_res_pkt.api_status.api_completion_fence_value =
+ ++mes->ring.fence_drv.sync_seq;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
+}
+
static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
.add_hw_queue = mes_v10_1_add_hw_queue,
.remove_hw_queue = mes_v10_1_remove_hw_queue,
@@ -68,11 +276,15 @@ static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
char fw_name[30];
int err;
const struct mes_firmware_header_v1_0 *mes_hdr;
+ struct amdgpu_firmware_info *info;
switch (adev->asic_type) {
case CHIP_NAVI10:
chip_name = "navi10";
break;
+ case CHIP_SIENNA_CICHLID:
+ chip_name = "sienna_cichlid";
+ break;
default:
BUG();
}
@@ -100,6 +312,22 @@ static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MES;
+ info->fw = adev->mes.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
+ PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MES_DATA];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MES_DATA;
+ info->fw = adev->mes.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
+ PAGE_SIZE);
+ }
+
return 0;
}
@@ -267,18 +495,389 @@ static int mes_v10_1_load_microcode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
/* invalidate ICACHE */
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
+ break;
+ default:
+ data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
+ break;
+ }
data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
+ break;
+ }
/* prime the ICACHE. */
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
+ break;
+ default:
+ data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
+ break;
+ }
data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
+ break;
+ default:
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
+ break;
+ }
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *eop;
+
+ r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.eop_gpu_obj,
+ &adev->mes.eop_gpu_addr,
+ (void **)&eop);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
+ return r;
+ }
+
+ memset(eop, 0, adev->mes.eop_gpu_obj->tbo.mem.size);
+
+ amdgpu_bo_kunmap(adev->mes.eop_gpu_obj);
+ amdgpu_bo_unreserve(adev->mes.eop_gpu_obj);
+
+ return 0;
+}
+
+static int mes_v10_1_allocate_mem_slots(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) mes sch_ctx_offs wb alloc failed\n", r);
+ return r;
+ }
+ adev->mes.sch_ctx_gpu_addr =
+ adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
+ adev->mes.sch_ctx_ptr =
+ (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
+
+ r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) query_status_fence_offs wb alloc failed\n", r);
+ return r;
+ }
+ adev->mes.query_status_fence_gpu_addr =
+ adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
+ adev->mes.query_status_fence_ptr =
+ (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
+
+ return 0;
+}
+
+static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MES_EOP_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
+
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+ else
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+ return 0;
+}
+
+static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
+{
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data = 0;
+
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, 3, 0, 0, 0);
+
+ /* set CP_HQD_VMID.VMID = 0. */
+ data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
+ data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
+ WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
+ data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
+ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+
+ /* set CP_MQD_CONTROL.VMID=0 */
+ data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
+ data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
+ WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
+
+ /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+
+ /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* set CP_HQD_PQ_CONTROL */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+
+ /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+
+ /* set CP_HQD_ACTIVE.ACTIVE=1 */
+ WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+}
+
+#if 0
+static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ DRM_ERROR("kfq enable failed\n");
+ kiq_ring->sched.ready = false;
+ }
+ return r;
+}
+#endif
+
+static int mes_v10_1_queue_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = mes_v10_1_mqd_init(&adev->mes.ring);
+ if (r)
+ return r;
+
+#if 0
+ r = mes_v10_1_kiq_enable_queue(adev);
+ if (r)
+ return r;
+#else
+ mes_v10_1_queue_init_register(&adev->mes.ring);
+#endif
+
+ return 0;
+}
+
+static int mes_v10_1_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ ring = &adev->mes.ring;
+
+ ring->funcs = &mes_v10_1_ring_funcs;
+
+ ring->me = 3;
+ ring->pipe = 0;
+ ring->queue = 0;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr;
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev)
+{
+ int r, mqd_size = sizeof(struct v10_compute_mqd);
+ struct amdgpu_ring *ring = &adev->mes.ring;
+
+ if (ring->mqd_obj)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ /* prepare MQD backup */
+ adev->mes.mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->mes.mqd_backup)
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n",
+ ring->name);
return 0;
}
@@ -288,10 +887,29 @@ static int mes_v10_1_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->mes.adev = adev;
+ adev->mes.funcs = &mes_v10_1_funcs;
+
r = mes_v10_1_init_microcode(adev);
if (r)
return r;
+ r = mes_v10_1_allocate_eop_buf(adev);
+ if (r)
+ return r;
+
+ r = mes_v10_1_mqd_sw_init(adev);
+ if (r)
+ return r;
+
+ r = mes_v10_1_ring_init(adev);
+ if (r)
+ return r;
+
+ r = mes_v10_1_allocate_mem_slots(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -299,6 +917,19 @@ static int mes_v10_1_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
+ amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
+
+ kfree(adev->mes.mqd_backup);
+
+ amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
+ &adev->mes.ring.mqd_gpu_addr,
+ &adev->mes.ring.mqd_ptr);
+
+ amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj,
+ &adev->mes.eop_gpu_addr,
+ NULL);
+
mes_v10_1_free_microcode(adev);
return 0;
@@ -315,14 +946,29 @@ static int mes_v10_1_hw_init(void *handle)
DRM_ERROR("failed to MES fw, r=%d\n", r);
return r;
}
- } else {
- DRM_ERROR("only support direct fw loading on MES\n");
- return -EINVAL;
}
mes_v10_1_enable(adev, true);
+ r = mes_v10_1_queue_init(adev);
+ if (r)
+ goto failure;
+
+ r = mes_v10_1_set_hw_resources(&adev->mes);
+ if (r)
+ goto failure;
+
+ r = mes_v10_1_query_sched_status(&adev->mes);
+ if (r) {
+ DRM_ERROR("MES is busy\n");
+ goto failure;
+ }
+
return 0;
+
+failure:
+ mes_v10_1_hw_fini(adev);
+ return r;
}
static int mes_v10_1_hw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index fb3f228458e5..af0866af63a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -31,6 +31,11 @@
#include "soc15_common.h"
+#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid 0x064d
+#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid_BASE_IDX 0
+#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid 0x0070
+#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid_BASE_IDX 0
+
void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base)
{
@@ -164,6 +169,10 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL4, tmp);
+
+ tmp = mmMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp);
}
static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
@@ -363,9 +372,16 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
{
uint32_t def, data, def1, data1;
- def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
-
- def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
+ break;
+ default:
+ def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+ break;
+ }
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
@@ -388,11 +404,20 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
}
- if (def != data)
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
-
- if (def1 != data1)
- WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid, data1);
+ break;
+ default:
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
+ break;
+ }
}
static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
@@ -400,15 +425,30 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
{
uint32_t def, data;
- def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
+ break;
+ default:
+ def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
+ break;
+ }
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
else
data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
- if (def != data)
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
+ if (def != data) {
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
+ break;
+ default:
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
+ break;
+ }
+ }
}
int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
@@ -421,6 +461,7 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
mmhub_v2_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
mmhub_v2_0_update_medium_grain_light_sleep(adev,
@@ -440,9 +481,16 @@ void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
if (amdgpu_sriov_vf(adev))
*flags = 0;
- data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
-
- data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
+ data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
+ break;
+ default:
+ data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
+ data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+ break;
+ }
/* AMD_CG_SUPPORT_MC_MGCG */
if ((data & MM_ATC_L2_MISC_CG__ENABLE_MASK) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index f97857ed3c7e..471dc82fd1aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -34,6 +34,9 @@
#define MAX_REARM_RETRY 10
+#define mmIH_CHICKEN_Sienna_Cichlid 0x018d
+#define mmIH_CHICKEN_Sienna_Cichlid_BASE_IDX 0
+
static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev);
/**
@@ -265,10 +268,20 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) {
if (ih->use_bus_addr) {
- ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
- ih_chicken = REG_SET_FIELD(ih_chicken,
- IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid, ih_chicken);
+ break;
+ default:
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
+ break;
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index cbcf04578b99..7429f30398b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -33,6 +33,13 @@
#define smnCPM_CONTROL 0x11180460
#define smnPCIE_CNTL2 0x11180070
+#define mmBIF_SDMA2_DOORBELL_RANGE 0x01d6
+#define mmBIF_SDMA2_DOORBELL_RANGE_BASE_IDX 2
+#define mmBIF_SDMA3_DOORBELL_RANGE 0x01d7
+#define mmBIF_SDMA3_DOORBELL_RANGE_BASE_IDX 2
+
+#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01d8
+#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2
static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
{
@@ -81,7 +88,9 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan
int doorbell_size)
{
u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
- SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
+ instance == 1 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) :
+ instance == 2 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA2_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA3_DOORBELL_RANGE);
u32 doorbell_range = RREG32(reg);
@@ -103,7 +112,8 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan
static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
int doorbell_index, int instance)
{
- u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
+ u32 reg = instance ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
u32 doorbell_range = RREG32(reg);
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 6655dd2009b6..a7cfe3ac7cb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -41,6 +41,7 @@
#include "hdp/hdp_5_0_0_offset.h"
#include "hdp/hdp_5_0_0_sh_mask.h"
#include "smuio/smuio_11_0_0_offset.h"
+#include "mp/mp_11_0_offset.h"
#include "soc15.h"
#include "soc15_common.h"
@@ -52,8 +53,11 @@
#include "navi10_ih.h"
#include "gfx_v10_0.h"
#include "sdma_v5_0.h"
+#include "sdma_v5_2.h"
#include "vcn_v2_0.h"
#include "jpeg_v2_0.h"
+#include "vcn_v3_0.h"
+#include "jpeg_v3_0.h"
#include "dce_virtual.h"
#include "mes_v10_1.h"
#include "mxgpu_nv.h"
@@ -188,10 +192,8 @@ static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)},
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)},
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)},
-#if 0 /* TODO: will set it when SDMA header is available */
{ SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)},
{ SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)},
-#endif
{ SOC15_REG_ENTRY(GC, 0, mmCP_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)},
@@ -256,31 +258,6 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
return -EINVAL;
}
-#if 0
-static void nv_gpu_pci_config_reset(struct amdgpu_device *adev)
-{
- u32 i;
-
- dev_info(adev->dev, "GPU pci config reset\n");
-
- /* disable BM */
- pci_clear_master(adev->pdev);
- /* reset */
- amdgpu_pci_config_reset(adev);
-
- udelay(100);
-
- /* wait for asic to come out of reset */
- for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = nbio_v2_3_get_memsize(adev);
- if (memsize != 0xffffffff)
- break;
- udelay(1);
- }
-
-}
-#endif
-
static int nv_asic_mode1_reset(struct amdgpu_device *adev)
{
u32 i;
@@ -338,15 +315,6 @@ nv_asic_reset_method(struct amdgpu_device *adev)
static int nv_asic_reset(struct amdgpu_device *adev)
{
-
- /* FIXME: it doesn't work since vega10 */
-#if 0
- amdgpu_atombios_scratch_regs_engine_hung(adev, true);
-
- nv_gpu_pci_config_reset(adev);
-
- amdgpu_atombios_scratch_regs_engine_hung(adev, false);
-#endif
int ret = 0;
struct smu_context *smu = &adev->smu;
@@ -442,6 +410,9 @@ legacy_init:
case CHIP_NAVI12:
navi12_reg_base_init(adev);
break;
+ case CHIP_SIENNA_CICHLID:
+ sienna_cichlid_reg_base_init(adev);
+ break;
default:
return -EINVAL;
}
@@ -515,6 +486,28 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
break;
+ case CHIP_SIENNA_CICHLID:
+ amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+ amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
+ if (adev->enable_mes)
+ amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
+ break;
default:
return -EINVAL;
}
@@ -550,7 +543,6 @@ static bool nv_need_full_reset(struct amdgpu_device *adev)
static bool nv_need_reset_on_init(struct amdgpu_device *adev)
{
-#if 0
u32 sol_reg;
if (adev->flags & AMD_IS_APU)
@@ -562,8 +554,7 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev)
sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
if (sol_reg)
return true;
-#endif
- /* TODO: re-enable it when mode1 reset is functional */
+
return false;
}
@@ -592,8 +583,11 @@ static void nv_init_doorbell_index(struct amdgpu_device *adev)
adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
+ adev->doorbell_index.mes_ring = AMDGPU_NAVI10_DOORBELL_MES_RING;
adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.sdma_engine[2] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2;
+ adev->doorbell_index.sdma_engine[3] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3;
adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
@@ -714,7 +708,8 @@ static int nv_common_early_init(void *handle)
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG |
- AMD_PG_SUPPORT_ATHUB;
+ AMD_PG_SUPPORT_ATHUB |
+ AMD_PG_SUPPORT_MMHUB;
/* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0,
* as a consequence, the rev_id and external_rev_id are wrong.
* workaround it by hardcoding rev_id to 0 (default value).
@@ -723,6 +718,23 @@ static int nv_common_early_init(void *handle)
adev->rev_id = 0;
adev->external_rev_id = adev->rev_id + 0xa;
break;
+ case CHIP_SIENNA_CICHLID:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_MC_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_ATHUB;
+ adev->external_rev_id = adev->rev_id + 0x28;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@@ -889,6 +901,16 @@ static void nv_update_hdp_mem_power_gating(struct amdgpu_device *adev,
RC_MEM_POWER_DS_EN, enable);
}
+ /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+ * be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ }
+
WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
/* restore IPH & RC clock override after clock/power mode changing */
@@ -938,6 +960,7 @@ static int nv_common_set_clockgating_state(void *handle,
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h
index 82e6cb432f3d..b6a95f0122fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -32,4 +32,5 @@ int nv_set_ip_blocks(struct amdgpu_device *adev);
int navi10_reg_base_init(struct amdgpu_device *adev);
int navi14_reg_base_init(struct amdgpu_device *adev);
int navi12_reg_base_init(struct amdgpu_device *adev);
+int sienna_cichlid_reg_base_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index a44fd6060d5b..cbc04a5c0fe1 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -110,6 +110,7 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
+ GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */
/* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */
GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */
@@ -365,4 +366,11 @@ struct psp_gfx_rb_frame
/* total 64 bytes */
};
+#define PSP_ERR_UNKNOWN_COMMAND 0x00000100
+
+enum tee_error_code {
+ TEE_SUCCESS = 0x00000000,
+ TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
+};
+
#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 1de89cc3c355..423386272920 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -55,6 +55,8 @@ MODULE_FIRMWARE("amdgpu/navi12_ta.bin");
MODULE_FIRMWARE("amdgpu/arcturus_sos.bin");
MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_asd.bin");
/* address block */
#define smnMP1_FIRMWARE_FLAGS 0x3010024
@@ -95,6 +97,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
case CHIP_ARCTURUS:
chip_name = "arcturus";
break;
+ case CHIP_SIENNA_CICHLID:
+ chip_name = "sienna_cichlid";
+ break;
default:
BUG();
}
@@ -103,9 +108,11 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
if (err)
return err;
- err = psp_init_asd_microcode(psp, chip_name);
- if (err)
- return err;
+ if (adev->asic_type != CHIP_SIENNA_CICHLID) {
+ err = psp_init_asd_microcode(psp, chip_name);
+ if (err)
+ return err;
+ }
switch (adev->asic_type) {
case CHIP_VEGA20:
@@ -165,6 +172,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
}
break;
+ case CHIP_SIENNA_CICHLID:
+ break;
default:
BUG();
}
@@ -243,6 +252,39 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
return ret;
}
+static int psp_v11_0_bootloader_load_spl(struct psp_context *psp)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check tOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v11_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v11_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP SPL binary to memory */
+ memcpy(psp->fw_pri_buf, psp->spl_start_addr, psp->spl_bin_size);
+
+ /* Provide the PSP SPL to bootloader */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_TOS_SPL_TABLE;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v11_0_wait_for_bootloader(psp);
+
+ return ret;
+}
+
static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
{
int ret;
@@ -354,7 +396,8 @@ static int psp_v11_0_ring_init(struct psp_context *psp,
struct psp_ring *ring;
struct amdgpu_device *adev = psp->adev;
- if (!amdgpu_sriov_vf(adev))
+ if ((!amdgpu_sriov_vf(adev)) &&
+ (adev->asic_type != CHIP_SIENNA_CICHLID))
psp_v11_0_reroute_ih(psp);
ring = &psp->km_ring;
@@ -555,44 +598,6 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
return ret;
}
-static void psp_v11_0_memory_training_fini(struct psp_context *psp)
-{
- struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
-
- ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
- kfree(ctx->sys_cache);
- ctx->sys_cache = NULL;
-}
-
-static int psp_v11_0_memory_training_init(struct psp_context *psp)
-{
- int ret;
- struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
-
- if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
- DRM_DEBUG("memory training is not supported!\n");
- return 0;
- }
-
- ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
- if (ctx->sys_cache == NULL) {
- DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
- ret = -ENOMEM;
- goto Err_out;
- }
-
- DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
- ctx->train_data_size,
- ctx->p2c_train_data_offset,
- ctx->c2p_train_data_offset);
- ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
- return 0;
-
-Err_out:
- psp_v11_0_memory_training_fini(psp);
- return ret;
-}
-
/*
* save and restore proces
*/
@@ -813,6 +818,7 @@ static int psp_v11_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
static const struct psp_funcs psp_v11_0_funcs = {
.init_microcode = psp_v11_0_init_microcode,
.bootloader_load_kdb = psp_v11_0_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v11_0_bootloader_load_spl,
.bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,
.bootloader_load_sos = psp_v11_0_bootloader_load_sos,
.ring_init = psp_v11_0_ring_init,
@@ -820,8 +826,6 @@ static const struct psp_funcs psp_v11_0_funcs = {
.ring_stop = psp_v11_0_ring_stop,
.ring_destroy = psp_v11_0_ring_destroy,
.mode1_reset = psp_v11_0_mode1_reset,
- .mem_training_init = psp_v11_0_memory_training_init,
- .mem_training_fini = psp_v11_0_memory_training_fini,
.mem_training = psp_v11_0_memory_training,
.ring_get_wptr = psp_v11_0_ring_get_wptr,
.ring_set_wptr = psp_v11_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h b/drivers/gpu/drm/amd/amdgpu/sdma_common.h
index e42de9ded275..8629ef7e8ad9 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd_dal.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_common.h
@@ -19,38 +19,24 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Authors: AMD
- *
- */
-
-#ifndef _DMUB_CMD_DAL_H_
-#define _DMUB_CMD_DAL_H_
-
-/*
- * Command IDs should be treated as stable ABI.
- * Do not reuse or modify IDs.
*/
-enum dmub_cmd_psr_type {
- DMUB_CMD__PSR_SET_VERSION = 0,
- DMUB_CMD__PSR_COPY_SETTINGS = 1,
- DMUB_CMD__PSR_ENABLE = 2,
- DMUB_CMD__PSR_DISABLE = 3,
- DMUB_CMD__PSR_SET_LEVEL = 4,
-};
+#ifndef __SDMA_COMMON_H__
+#define __SDMA_COMMON_H__
-enum psr_version {
- PSR_VERSION_1 = 0,
- PSR_VERSION_UNSUPPORTED = 0xFFFFFFFF,
+enum sdma_utcl2_cache_read_policy {
+ CACHE_READ_POLICY_L2__LRU = 0x00000000,
+ CACHE_READ_POLICY_L2__STREAM = 0x00000001,
+ CACHE_READ_POLICY_L2__NOA = 0x00000002,
+ CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA,
};
-enum dmub_cmd_abm_type {
- DMUB_CMD__ABM_INIT_CONFIG = 0,
- DMUB_CMD__ABM_SET_PIPE = 1,
- DMUB_CMD__ABM_SET_BACKLIGHT = 2,
- DMUB_CMD__ABM_SET_LEVEL = 3,
- DMUB_CMD__ABM_SET_AMBIENT_LEVEL = 4,
- DMUB_CMD__ABM_SET_PWM_FRAC = 5,
+enum sdma_utcl2_cache_write_policy {
+ CACHE_WRITE_POLICY_L2__LRU = 0x00000000,
+ CACHE_WRITE_POLICY_L2__STREAM = 0x00000001,
+ CACHE_WRITE_POLICY_L2__NOA = 0x00000002,
+ CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003,
+ CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS,
};
-#endif /* _DMUB_CMD_DAL_H_ */
+#endif /* __SDMA_COMMON_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 33501c6c7189..856c50386c86 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -505,6 +505,36 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
}
}
+static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
+{
+ int i;
+
+ /*
+ * The only chips with SDMAv4 and ULV are VG10 and VG20.
+ * Server SKUs take a different hysteresis setting from other SKUs.
+ */
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ if (adev->pdev->device == 0x6860)
+ break;
+ return;
+ case CHIP_VEGA20:
+ if (adev->pdev->device == 0x66a1)
+ break;
+ return;
+ default:
+ return;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ uint32_t temp;
+
+ temp = RREG32_SDMA(i, mmSDMA0_ULV_CNTL);
+ temp = REG_SET_FIELD(temp, SDMA0_ULV_CNTL, HYSTERESIS, 0x0);
+ WREG32_SDMA(i, mmSDMA0_ULV_CNTL, temp);
+ }
+}
+
static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
{
int err = 0;
@@ -529,8 +559,8 @@ static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- if (adev->sdma.instance[i].fw != NULL)
- release_firmware(adev->sdma.instance[i].fw);
+ release_firmware(adev->sdma.instance[i].fw);
+ adev->sdma.instance[i].fw = NULL;
/* arcturus shares the same FW memory across
all SDMA isntances */
@@ -1774,7 +1804,7 @@ static int sdma_v4_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR)
+ if (adev->flags & AMD_IS_APU)
adev->sdma.num_instances = 1;
else if (adev->asic_type == CHIP_ARCTURUS)
adev->sdma.num_instances = 8;
@@ -1813,6 +1843,8 @@ static int sdma_v4_0_late_init(void *handle)
.cb = sdma_v4_0_process_ras_data_cb,
};
+ sdma_v4_0_setup_ulv(adev);
+
if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
adev->sdma.funcs->reset_ras_error_count(adev);
@@ -1912,9 +1944,7 @@ static int sdma_v4_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->set_powergating_by_smu) ||
- (adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset))
+ if (adev->flags & AMD_IS_APU)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
if (!amdgpu_sriov_vf(adev))
@@ -1941,9 +1971,7 @@ static int sdma_v4_0_hw_fini(void *handle)
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
- if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
- && adev->powerplay.pp_funcs->set_powergating_by_smu) ||
- adev->asic_type == CHIP_RENOIR)
+ if (adev->flags & AMD_IS_APU)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
return 0;
@@ -2202,6 +2230,7 @@ static int sdma_v4_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_RAVEN:
+ case CHIP_RENOIR:
sdma_v4_1_update_power_gating(adev,
state == AMD_PG_STATE_GATE ? true : false);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index b544baf306f2..1baeddf2f1e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -40,6 +40,7 @@
#include "soc15.h"
#include "navi10_sdma_pkt_open.h"
#include "nbio_v2_3.h"
+#include "sdma_common.h"
#include "sdma_v5_0.h"
MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");
@@ -1298,8 +1299,12 @@ static int sdma_v5_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ release_firmware(adev->sdma.instance[i].fw);
+ adev->sdma.instance[i].fw = NULL;
+
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ }
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
index d5a94e3d181c..d4e3c2e696f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
@@ -24,21 +24,6 @@
#ifndef __SDMA_V5_0_H__
#define __SDMA_V5_0_H__
-enum sdma_v5_0_utcl2_cache_read_policy {
- CACHE_READ_POLICY_L2__LRU = 0x00000000,
- CACHE_READ_POLICY_L2__STREAM = 0x00000001,
- CACHE_READ_POLICY_L2__NOA = 0x00000002,
- CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA,
-};
-
-enum sdma_v5_0_utcl2_cache_write_policy {
- CACHE_WRITE_POLICY_L2__LRU = 0x00000000,
- CACHE_WRITE_POLICY_L2__STREAM = 0x00000001,
- CACHE_WRITE_POLICY_L2__NOA = 0x00000002,
- CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003,
- CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS,
-};
-
extern const struct amd_ip_funcs sdma_v5_0_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
new file mode 100644
index 000000000000..318d32e2bbf6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -0,0 +1,1757 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_10_3_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
+#include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h"
+#include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h"
+#include "ivsrcid/sdma2/irqsrcs_sdma2_5_0.h"
+#include "ivsrcid/sdma3/irqsrcs_sdma3_5_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "navi10_sdma_pkt_open.h"
+#include "nbio_v2_3.h"
+#include "sdma_common.h"
+#include "sdma_v5_2.h"
+
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA3_REG_OFFSET 0x400
+#define SDMA0_HYP_DEC_REG_START 0x5880
+#define SDMA0_HYP_DEC_REG_END 0x5893
+#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+
+static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
+
+static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+
+ if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
+ internal_offset <= SDMA0_HYP_DEC_REG_END) {
+ base = adev->reg_offset[GC_HWIP][0][1];
+ if (instance != 0)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
+ } else {
+ if (instance < 2) {
+ base = adev->reg_offset[GC_HWIP][0][0];
+ if (instance == 1)
+ internal_offset += SDMA1_REG_OFFSET;
+ } else {
+ base = adev->reg_offset[GC_HWIP][0][2];
+ if (instance == 3)
+ internal_offset += SDMA3_REG_OFFSET;
+ }
+ }
+
+ return base + internal_offset;
+}
+
+static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ break;
+ default:
+ break;
+ }
+}
+
+static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
+{
+ int err = 0;
+ const struct sdma_firmware_header_v1_0 *hdr;
+
+ err = amdgpu_ucode_validate(sdma_inst->fw);
+ if (err)
+ return err;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
+
+ if (sdma_inst->feature_version >= 20)
+ sdma_inst->burst_nop = true;
+
+ return 0;
+}
+
+static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ release_firmware(adev->sdma.instance[i].fw);
+ adev->sdma.instance[i].fw = NULL;
+
+ if (adev->asic_type == CHIP_SIENNA_CICHLID)
+ break;
+ }
+
+ memset((void*)adev->sdma.instance, 0,
+ sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
+}
+
+/**
+ * sdma_v5_2_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+
+// emulation only, won't work on real chip
+// navi10 real chip need to use PSP to load firmware
+static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[40];
+ int err = 0, i;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ chip_name = "sienna_cichlid";
+ break;
+ default:
+ BUG();
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+
+ err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+
+ err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]);
+ if (err)
+ goto out;
+
+ for (i = 1; i < adev->sdma.num_instances; i++) {
+ if (adev->asic_type == CHIP_SIENNA_CICHLID) {
+ memcpy((void*)&adev->sdma.instance[i],
+ (void*)&adev->sdma.instance[0],
+ sizeof(struct amdgpu_sdma_instance));
+ } else {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i);
+ err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+
+ err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]);
+ if (err)
+ goto out;
+ }
+ }
+
+ DRM_DEBUG("psp_load == '%s'\n",
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
+ info->fw = adev->sdma.instance[i].fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+
+out:
+ if (err) {
+ DRM_ERROR("sdma_v5_2: Failed to load firmware \"%s\"\n", fw_name);
+ sdma_v5_2_destroy_inst_ctx(adev);
+ }
+ return err;
+}
+
+static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 1);
+ ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
+ amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+
+ return ret;
+}
+
+static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ unsigned cur;
+
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (cur > offset)
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
+}
+
+/**
+ * sdma_v5_2_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (NAVI10+).
+ */
+static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v5_2_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (NAVI10+).
+ */
+static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 *wptr = NULL;
+ uint64_t local_wptr = 0;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
+ *wptr = (*wptr) >> 2;
+ DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
+ } else {
+ u32 lowbit, highbit;
+
+ wptr = &local_wptr;
+ lowbit = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
+ highbit = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
+
+ DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
+ ring->me, highbit, lowbit);
+ *wptr = highbit;
+ *wptr = (*wptr) << 32;
+ *wptr |= lowbit;
+ }
+
+ return *wptr;
+}
+
+/**
+ * sdma_v5_2_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (NAVI10+).
+ */
+static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2);
+ adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v5_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v5_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v5_2_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @fence: amdgpu fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */
+ if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+ }
+}
+
+
+/**
+ * sdma_v5_2_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
+ struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
+ struct amdgpu_ring *sdma2 = &adev->sdma.instance[2].ring;
+ struct amdgpu_ring *sdma3 = &adev->sdma.instance[3].ring;
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ if ((adev->mman.buffer_funcs_ring == sdma0) ||
+ (adev->mman.buffer_funcs_ring == sdma1) ||
+ (adev->mman.buffer_funcs_ring == sdma2) ||
+ (adev->mman.buffer_funcs_ring == sdma3))
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ }
+
+ sdma0->sched.ready = false;
+ sdma1->sched.ready = false;
+ sdma2->sched.ready = false;
+ sdma3->sched.ready = false;
+}
+
+/**
+ * sdma_v5_2_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch.
+ */
+static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl, phase_quantum = 0;
+ int i;
+
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ if (enable && amdgpu_sdma_phase_quantum) {
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
+ phase_quantum);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
+ phase_quantum);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
+ phase_quantum);
+ }
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+ }
+
+}
+
+/**
+ * sdma_v5_2_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (enable == false) {
+ sdma_v5_2_gfx_stop(adev);
+ sdma_v5_2_rlc_stop(adev);
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+ }
+}
+
+/**
+ * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 wb_offset;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u32 wptr_poll_cntl;
+ u64 wptr_gpu_addr;
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ wb_offset = (ring->rptr_offs * 4);
+
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
+
+ doorbell = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_2_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
+ 0x01000000);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+
+ ring->sched.ready = true;
+
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_2_ctx_switch_enable(adev, true);
+ sdma_v5_2_enable(adev, true);
+ }
+
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->sched.ready = false;
+ return r;
+ }
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v5_2_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_rlc_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+/**
+ * sdma_v5_2_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1/2/3 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+
+ /* halt the MEs */
+ sdma_v5_2_enable(adev, false);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!adev->sdma.instance[i].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[i].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v5_2_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_start(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v5_2_ctx_switch_enable(adev, false);
+ sdma_v5_2_enable(adev, false);
+
+ /* set RB registers */
+ r = sdma_v5_2_gfx_resume(adev);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v5_2_load_microcode(adev);
+ if (r)
+ return r;
+
+ /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */
+ if (amdgpu_emu_mode == 1)
+ msleep(1000);
+ }
+
+ /* unhalt the MEs */
+ sdma_v5_2_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v5_2_ctx_switch_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v5_2_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v5_2_rlc_resume(adev);
+
+ return r;
+}
+
+/**
+ * sdma_v5_2_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/**
+ * sdma_v5_2_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+
+ ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v5_2_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v5_2_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v5_2_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v5_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v5_2_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v5_2_ring_pad_ib - pad the IB
+ *
+ * @ib: indirect buffer to fill with padding
+ *
+ * Pad the IB with NOPs to a boundary multiple of 8.
+ */
+static void sdma_v5_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 0x7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+}
+
+
+/**
+ * sdma_v5_2_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+
+/**
+ * sdma_v5_2_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vm: amdgpu_vm pointer
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v5_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+static int sdma_v5_2_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->sdma.num_instances = 4;
+
+ sdma_v5_2_set_ring_funcs(adev);
+ sdma_v5_2_set_buffer_funcs(adev);
+ sdma_v5_2_set_vm_pte_funcs(adev);
+ sdma_v5_2_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v5_2_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0,
+ SDMA0_5_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1,
+ SDMA1_5_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA2,
+ SDMA2_5_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid,
+ SDMA3_5_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ r = sdma_v5_2_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->me = i;
+
+ DRM_INFO("use_doorbell being set to: [%s]\n",
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset
+
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int sdma_v5_2_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ sdma_v5_2_destroy_inst_ctx(adev);
+
+ return 0;
+}
+
+static int sdma_v5_2_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v5_2_init_golden_registers(adev);
+
+ r = sdma_v5_2_start(adev);
+
+ return r;
+}
+
+static int sdma_v5_2_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ sdma_v5_2_ctx_switch_enable(adev, false);
+ sdma_v5_2_enable(adev, false);
+
+ return 0;
+}
+
+static int sdma_v5_2_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v5_2_hw_fini(adev);
+}
+
+static int sdma_v5_2_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v5_2_hw_init(adev);
+}
+
+static bool sdma_v5_2_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v5_2_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 sdma0, sdma1, sdma2, sdma3;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
+ sdma1 = RREG32(sdma_v5_2_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
+ sdma2 = RREG32(sdma_v5_2_get_reg_offset(adev, 2, mmSDMA0_STATUS_REG));
+ sdma3 = RREG32(sdma_v5_2_get_reg_offset(adev, 3, mmSDMA0_STATUS_REG));
+
+ if (sdma0 & sdma1 & sdma2 & sdma3 & SDMA0_STATUS_REG__IDLE_MASK)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v5_2_soft_reset(void *handle)
+{
+ /* todo */
+
+ return 0;
+}
+
+static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ sdma_gfx_preempt =
+ sdma_v5_2_get_reg_offset(adev, index, mmSDMA0_GFX_PREEMPT);
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 10);
+ sdma_v5_2_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v5_2_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ u32 reg_offset = sdma_v5_2_get_reg_offset(adev, type, mmSDMA0_CNTL);
+
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: SDMA trap\n");
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_SDMA0:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[0].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ case SOC15_IH_CLIENTID_SDMA1:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[1].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ case SOC15_IH_CLIENTID_SDMA2:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[2].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ case SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[3].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
+ /* Enable sdma clock gating */
+ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
+ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK);
+ if (def != data)
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
+ } else {
+ /* Disable sdma clock gating */
+ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
+ data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDER_REG_MASK);
+ if (def != data)
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
+ }
+ }
+}
+
+static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
+ /* Enable sdma mem light sleep */
+ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
+ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
+
+ } else {
+ /* Disable sdma mem light sleep */
+ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
+ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
+
+ }
+ }
+}
+
+static int sdma_v5_2_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_SIENNA_CICHLID:
+ sdma_v5_2_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ sdma_v5_2_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int sdma_v5_2_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_SDMA_LS */
+ data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
+ if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
+ *flags |= AMD_CG_SUPPORT_SDMA_LS;
+}
+
+const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
+ .name = "sdma_v5_2",
+ .early_init = sdma_v5_2_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v5_2_sw_init,
+ .sw_fini = sdma_v5_2_sw_fini,
+ .hw_init = sdma_v5_2_hw_init,
+ .hw_fini = sdma_v5_2_hw_fini,
+ .suspend = sdma_v5_2_suspend,
+ .resume = sdma_v5_2_resume,
+ .is_idle = sdma_v5_2_is_idle,
+ .wait_for_idle = sdma_v5_2_wait_for_idle,
+ .soft_reset = sdma_v5_2_soft_reset,
+ .set_clockgating_state = sdma_v5_2_set_clockgating_state,
+ .set_powergating_state = sdma_v5_2_set_powergating_state,
+ .get_clockgating_state = sdma_v5_2_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .vmhub = AMDGPU_GFXHUB_0,
+ .get_rptr = sdma_v5_2_ring_get_rptr,
+ .get_wptr = sdma_v5_2_ring_get_wptr,
+ .set_wptr = sdma_v5_2_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v5_2_ring_init_cond_exec */
+ 6 + /* sdma_v5_2_ring_emit_hdp_flush */
+ 3 + /* hdp_invalidate */
+ 6 + /* sdma_v5_2_ring_emit_pipeline_sync */
+ /* sdma_v5_2_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v5_2_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v5_2_ring_emit_ib */
+ .emit_ib = sdma_v5_2_ring_emit_ib,
+ .emit_fence = sdma_v5_2_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v5_2_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v5_2_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v5_2_ring_emit_hdp_flush,
+ .test_ring = sdma_v5_2_ring_test_ring,
+ .test_ib = sdma_v5_2_ring_test_ib,
+ .insert_nop = sdma_v5_2_ring_insert_nop,
+ .pad_ib = sdma_v5_2_ring_pad_ib,
+ .emit_wreg = sdma_v5_2_ring_emit_wreg,
+ .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v5_2_ring_init_cond_exec,
+ .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
+ .preempt_ib = sdma_v5_2_ring_preempt_ib,
+};
+
+static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v5_2_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v5_2_trap_irq_funcs = {
+ .set = sdma_v5_2_set_trap_irq_state,
+ .process = sdma_v5_2_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v5_2_illegal_inst_irq_funcs = {
+ .process = sdma_v5_2_process_illegal_inst_irq,
+};
+
+static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
+ adev->sdma.trap_irq.funcs = &sdma_v5_2_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v5_2_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v5_2_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ bool tmz)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v5_2_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v5_2_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v5_2_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v5_2_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v5_2_emit_fill_buffer,
+};
+
+static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ if (adev->mman.buffer_funcs == NULL) {
+ adev->mman.buffer_funcs = &sdma_v5_2_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+ }
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v5_2_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v5_2_vm_copy_pte,
+ .write_pte = sdma_v5_2_vm_write_pte,
+ .set_pte_pde = sdma_v5_2_vm_set_pte_pde,
+};
+
+static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ if (adev->vm_manager.vm_pte_funcs == NULL) {
+ adev->vm_manager.vm_pte_funcs = &sdma_v5_2_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+ }
+}
+
+const struct amdgpu_ip_block_version sdma_v5_2_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 5,
+ .minor = 2,
+ .rev = 0,
+ .funcs = &sdma_v5_2_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
new file mode 100644
index 000000000000..b70414fef2a1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V5_2_H__
+#define __SDMA_V5_2_H__
+
+extern const struct amd_ip_funcs sdma_v5_2_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block;
+
+#endif /* __SDMA_V5_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 153db3f763bc..9b12285177e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -41,9 +41,11 @@
#include "si_dma.h"
#include "dce_v6_0.h"
#include "si.h"
+#include "uvd_v3_1.h"
#include "dce_virtual.h"
#include "gca/gfx_6_0_d.h"
#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
#include "gmc/gmc_6_0_d.h"
#include "dce/dce_6_0_d.h"
#include "uvd/uvd_4_0_d.h"
@@ -973,6 +975,28 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
}
+static u32 si_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
+ r = RREG32(mmUVD_CTX_DATA);
+ spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+ return r;
+}
+
+static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+ WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
+ WREG32(mmUVD_CTX_DATA, (v));
+ spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+}
+
static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
{GRBM_STATUS},
{mmGRBM_STATUS2},
@@ -1443,8 +1467,8 @@ static int si_common_early_init(void *handle)
adev->pcie_wreg = &si_pcie_wreg;
adev->pciep_rreg = &si_pciep_rreg;
adev->pciep_wreg = &si_pciep_wreg;
- adev->uvd_ctx_rreg = NULL;
- adev->uvd_ctx_wreg = NULL;
+ adev->uvd_ctx_rreg = si_uvd_ctx_rreg;
+ adev->uvd_ctx_wreg = si_uvd_ctx_wreg;
adev->didt_rreg = NULL;
adev->didt_wreg = NULL;
@@ -2173,7 +2197,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
else
amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block);
- /* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */
+ amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
/* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
break;
case CHIP_OLAND:
@@ -2187,8 +2211,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
else
amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block);
-
- /* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */
+ amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block);
/* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
break;
case CHIP_HAINAN:
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.h b/drivers/gpu/drm/amd/amdgpu/si_dpm.h
index 6b7d292b919f..bc0be6818e21 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.h
@@ -781,7 +781,7 @@ struct NISLANDS_SMC_SWSTATE
uint8_t levelCount;
uint8_t padding2;
uint8_t padding3;
- NISLANDS_SMC_HW_PERFORMANCE_LEVEL levels[1];
+ NISLANDS_SMC_HW_PERFORMANCE_LEVEL levels[];
};
typedef struct NISLANDS_SMC_SWSTATE NISLANDS_SMC_SWSTATE;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 88ae27a5a03d..621727d7fd18 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -27,6 +27,8 @@
#include "amdgpu_ih.h"
#include "sid.h"
#include "si_ih.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
static void si_ih_set_interrupt_funcs(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h
index 7cf12adb3915..5f660f0c819f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sid.h
+++ b/drivers/gpu/drm/amd/amdgpu/sid.h
@@ -1646,9 +1646,10 @@
/*
* PM4
*/
-#define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \
- (((reg) >> 2) & 0xFFFF) | \
- ((n) & 0x3FFF) << 16)
+#define PACKET_TYPE0 0
+#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
+ ((reg) & 0xFFFF) | \
+ ((n) & 0x3FFF) << 16)
#define CP_PACKET2 0x80000000
#define PACKET2_PAD_SHIFT 0
#define PACKET2_PAD_MASK (0x3fffffff << 0)
@@ -2340,11 +2341,6 @@
# define NI_INPUT_GAMMA_XVYCC_222 3
# define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4)
-#define IH_RB_WPTR__RB_OVERFLOW_MASK 0x1
-#define IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK 0x80000000
-#define SRBM_STATUS__IH_BUSY_MASK 0x20000
-#define SRBM_SOFT_RESET__SOFT_RESET_IH_MASK 0x400
-
#define BLACKOUT_MODE_MASK 0x00000007
#define VGA_RENDER_CONTROL 0xC0
#define R_000300_VGA_RENDER_CONTROL 0xC0
@@ -2431,18 +2427,6 @@
#define MC_SEQ_MISC0__MT__HBM 0x60000000
#define MC_SEQ_MISC0__MT__DDR3 0xB0000000
-#define SRBM_STATUS__MCB_BUSY_MASK 0x200
-#define SRBM_STATUS__MCB_BUSY__SHIFT 0x9
-#define SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK 0x400
-#define SRBM_STATUS__MCB_NON_DISPLAY_BUSY__SHIFT 0xa
-#define SRBM_STATUS__MCC_BUSY_MASK 0x800
-#define SRBM_STATUS__MCC_BUSY__SHIFT 0xb
-#define SRBM_STATUS__MCD_BUSY_MASK 0x1000
-#define SRBM_STATUS__MCD_BUSY__SHIFT 0xc
-#define SRBM_STATUS__VMC_BUSY_MASK 0x100
-#define SRBM_STATUS__VMC_BUSY__SHIFT 0x8
-
-
#define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000
#define CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK 0x4000000
#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000
@@ -2467,8 +2451,6 @@
#define PCIE_BUS_CLK 10000
#define TCLK (PCIE_BUS_CLK / 10)
-#define CC_DRM_ID_STRAPS__ATI_REV_ID_MASK 0xf0000000
-#define CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT 0x1c
#define PCIE_PORT_INDEX 0xe
#define PCIE_PORT_DATA 0xf
#define EVERGREEN_PIF_PHY0_INDEX 0x8
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c
new file mode 100644
index 000000000000..5ee69f70c49b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nv.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "sienna_cichlid_ip_offset.h"
+
+int sienna_cichlid_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the blocke needed by driver */
+ uint32_t i;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index c7c9e07962b9..3e406eeeaff6 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -415,7 +415,8 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
*value = 0;
for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) {
en = &soc15_allowed_read_registers[i];
- if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ if (adev->reg_offset[en->hwip][en->inst] &&
+ reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ en->reg_offset))
continue;
@@ -670,14 +671,25 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
int soc15_set_ip_blocks(struct amdgpu_device *adev)
{
+ int r;
+
/* Set IP register base before any HW register access */
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
- case CHIP_RENOIR:
vega10_reg_base_init(adev);
break;
+ case CHIP_RENOIR:
+ if (amdgpu_discovery) {
+ r = amdgpu_discovery_reg_base_init(adev);
+ if (r) {
+ DRM_WARN("failed to init reg base from ip discovery table, "
+ "fallback to legacy init method\n");
+ vega10_reg_base_init(adev);
+ }
+ }
+ break;
case CHIP_VEGA20:
vega20_reg_base_init(adev);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 56d02aa690a7..a5c00ab8b021 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -50,18 +50,19 @@
#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value)
-#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \
+#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \
+({ int ret = 0; \
do { \
- uint32_t old_ = 0; \
+ uint32_t old_ = 0; \
uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
uint32_t loop = adev->usec_timeout; \
ret = 0; \
while ((tmp_ & (mask)) != (expected_value)) { \
if (old_ != tmp_) { \
loop = adev->usec_timeout; \
- old_ = tmp_; \
- } else \
- udelay(1); \
+ old_ = tmp_; \
+ } else \
+ udelay(1); \
tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
loop--; \
if (!loop) { \
@@ -71,7 +72,9 @@
break; \
} \
} \
- } while (0)
+ } while (0); \
+ ret; \
+})
#define WREG32_RLC(reg, value) \
do { \
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
new file mode 100644
index 000000000000..7cf4b11a65c5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -0,0 +1,793 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Sonny Jiang <sonny.jiang@amd.com>
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_uvd.h"
+#include "sid.h"
+
+#include "uvd/uvd_3_1_d.h"
+#include "uvd/uvd_3_1_sh_mask.h"
+
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+
+/**
+ * uvd_v3_1_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t uvd_v3_1_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * uvd_v3_1_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t uvd_v3_1_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * uvd_v3_1_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void uvd_v3_1_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * uvd_v3_1_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer
+ */
+static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
+ amdgpu_ring_write(ring, ib->gpu_addr);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE, 0));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+/**
+ * uvd_v3_1_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void uvd_v3_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, addr & 0xffffffff);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+ amdgpu_ring_write(ring, 2);
+}
+
+/**
+ * uvd_v3_1_ring_test_ring - register write test
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Test if we can successfully write to the context register
+ */
+static int uvd_v3_1_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(mmUVD_CONTEXT_ID);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+static void uvd_v3_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static const struct amdgpu_ring_funcs uvd_v3_1_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_UVD,
+ .align_mask = 0xf,
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .get_rptr = uvd_v3_1_ring_get_rptr,
+ .get_wptr = uvd_v3_1_ring_get_wptr,
+ .set_wptr = uvd_v3_1_ring_set_wptr,
+ .parse_cs = amdgpu_uvd_ring_parse_cs,
+ .emit_frame_size =
+ 14, /* uvd_v3_1_ring_emit_fence x1 no user fence */
+ .emit_ib_size = 4, /* uvd_v3_1_ring_emit_ib */
+ .emit_ib = uvd_v3_1_ring_emit_ib,
+ .emit_fence = uvd_v3_1_ring_emit_fence,
+ .test_ring = uvd_v3_1_ring_test_ring,
+ .test_ib = amdgpu_uvd_ring_test_ib,
+ .insert_nop = uvd_v3_1_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_uvd_ring_begin_use,
+ .end_use = amdgpu_uvd_ring_end_use,
+};
+
+static void uvd_v3_1_set_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->uvd.inst->ring.funcs = &uvd_v3_1_ring_funcs;
+}
+
+static void uvd_v3_1_set_dcm(struct amdgpu_device *adev,
+ bool sw_mode)
+{
+ u32 tmp, tmp2;
+
+ WREG32_FIELD(UVD_CGC_GATE, REGS, 0);
+
+ tmp = RREG32(mmUVD_CGC_CTRL);
+ tmp &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
+ tmp |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
+ (1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT) |
+ (4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT);
+
+ if (sw_mode) {
+ tmp &= ~0x7ffff800;
+ tmp2 = UVD_CGC_CTRL2__DYN_OCLK_RAMP_EN_MASK |
+ UVD_CGC_CTRL2__DYN_RCLK_RAMP_EN_MASK |
+ (7 << UVD_CGC_CTRL2__GATER_DIV_ID__SHIFT);
+ } else {
+ tmp |= 0x7ffff800;
+ tmp2 = 0;
+ }
+
+ WREG32(mmUVD_CGC_CTRL, tmp);
+ WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2);
+}
+
+/**
+ * uvd_v3_1_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the UVD memory controller know it's offsets
+ */
+static void uvd_v3_1_mc_resume(struct amdgpu_device *adev)
+{
+ uint64_t addr;
+ uint32_t size;
+
+ /* programm the VCPU memory controller bits 0-27 */
+ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
+ size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
+ WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
+
+ addr += size;
+ size = AMDGPU_UVD_HEAP_SIZE >> 3;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET1, addr);
+ WREG32(mmUVD_VCPU_CACHE_SIZE1, size);
+
+ addr += size;
+ size = (AMDGPU_UVD_STACK_SIZE +
+ (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles)) >> 3;
+ WREG32(mmUVD_VCPU_CACHE_OFFSET2, addr);
+ WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
+
+ /* bits 28-31 */
+ addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF;
+ WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+ /* bits 32-39 */
+ addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF;
+ WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+ WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+}
+
+/**
+ * uvd_v3_1_fw_validate - FW validation operation
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialate and check UVD validation.
+ */
+static int uvd_v3_1_fw_validate(struct amdgpu_device *adev)
+{
+ void *ptr;
+ uint32_t ucode_len, i;
+ uint32_t keysel;
+
+ ptr = adev->uvd.inst[0].cpu_addr;
+ ptr += 192 + 16;
+ memcpy(&ucode_len, ptr, 4);
+ ptr += ucode_len;
+ memcpy(&keysel, ptr, 4);
+
+ WREG32(mmUVD_FW_START, keysel);
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__DONE_MASK)
+ break;
+ }
+
+ if (i == 10)
+ return -ETIMEDOUT;
+
+ if (!(RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__PASS_MASK))
+ return -EINVAL;
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (!(RREG32(mmUVD_FW_STATUS) & UVD_FW_STATUS__BUSY_MASK))
+ break;
+ }
+
+ if (i == 10)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+/**
+ * uvd_v3_1_start - start UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the UVD block
+ */
+static int uvd_v3_1_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+ uint32_t rb_bufsz;
+ int i, j, r;
+ u32 tmp;
+ /* disable byte swapping */
+ u32 lmi_swap_cntl = 0;
+ u32 mp_swap_cntl = 0;
+
+ /* set uvd busy */
+ WREG32_P(mmUVD_STATUS, 1<<2, ~(1<<2));
+
+ uvd_v3_1_set_dcm(adev, true);
+ WREG32(mmUVD_CGC_GATE, 0);
+
+ /* take UVD block out of reset */
+ WREG32_P(mmSRBM_SOFT_RESET, 0, ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
+ mdelay(5);
+
+ /* enable VCPU clock */
+ WREG32(mmUVD_VCPU_CNTL, 1 << 9);
+
+ /* disable interupt */
+ WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
+
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+ mp_swap_cntl = 0;
+#endif
+ WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+ WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+
+ /* initialize UVD memory controller */
+ WREG32(mmUVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
+ (1 << 21) | (1 << 9) | (1 << 20));
+
+ tmp = RREG32(mmUVD_MPC_CNTL);
+ WREG32(mmUVD_MPC_CNTL, tmp | 0x10);
+
+ WREG32(mmUVD_MPC_SET_MUXA0, 0x40c2040);
+ WREG32(mmUVD_MPC_SET_MUXA1, 0x0);
+ WREG32(mmUVD_MPC_SET_MUXB0, 0x40c2040);
+ WREG32(mmUVD_MPC_SET_MUXB1, 0x0);
+ WREG32(mmUVD_MPC_SET_ALU, 0);
+ WREG32(mmUVD_MPC_SET_MUX, 0x88);
+
+ tmp = RREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL);
+ WREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL, tmp & (~0x10));
+
+ /* enable UMC */
+ WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
+
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ mdelay(10);
+
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("UVD not responding, giving up!!!\n");
+ return r;
+ }
+
+ /* enable interupt */
+ WREG32_P(mmUVD_MASTINT_EN, 3<<1, ~(3 << 1));
+
+ WREG32_P(mmUVD_STATUS, 0, ~(1<<2));
+
+ /* force RBC into idle state */
+ WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
+
+ /* Set the write pointer delay */
+ WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* programm the 4GB memory segment for rptr and ring buffer */
+ WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
+ (0x7 << 16) | (0x1 << 31));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(mmUVD_RBC_RB_RPTR, 0x0);
+
+ ring->wptr = RREG32(mmUVD_RBC_RB_RPTR);
+ WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+
+ /* set the ring address */
+ WREG32(mmUVD_RBC_RB_BASE, ring->gpu_addr);
+
+ /* Set ring buffer size */
+ rb_bufsz = order_base_2(ring->ring_size);
+ rb_bufsz = (0x1 << 8) | rb_bufsz;
+ WREG32_P(mmUVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f);
+
+ return 0;
+}
+
+/**
+ * uvd_v3_1_stop - stop UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the UVD block
+ */
+static void uvd_v3_1_stop(struct amdgpu_device *adev)
+{
+ uint32_t i, j;
+ uint32_t status;
+
+ WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(1);
+ }
+ if (status & 2)
+ break;
+ }
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_LMI_STATUS);
+ if (status & 0xf)
+ break;
+ mdelay(1);
+ }
+ if (status & 0xf)
+ break;
+ }
+
+ /* Stall UMC and register bus before resetting VCPU */
+ WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+
+ for (i = 0; i < 10; ++i) {
+ for (j = 0; j < 100; ++j) {
+ status = RREG32(mmUVD_LMI_STATUS);
+ if (status & 0x240)
+ break;
+ mdelay(1);
+ }
+ if (status & 0x240)
+ break;
+ }
+
+ WREG32_P(0x3D49, 0, ~(1 << 2));
+
+ WREG32_P(mmUVD_VCPU_CNTL, 0, ~(1 << 9));
+
+ /* put LMI, VCPU, RBC etc... into reset */
+ WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+ WREG32(mmUVD_STATUS, 0);
+
+ uvd_v3_1_set_dcm(adev, false);
+}
+
+static int uvd_v3_1_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int uvd_v3_1_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: UVD TRAP\n");
+ amdgpu_fence_process(&adev->uvd.inst->ring);
+ return 0;
+}
+
+
+static const struct amdgpu_irq_src_funcs uvd_v3_1_irq_funcs = {
+ .set = uvd_v3_1_set_interrupt_state,
+ .process = uvd_v3_1_process_interrupt,
+};
+
+static void uvd_v3_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->uvd.inst->irq.num_types = 1;
+ adev->uvd.inst->irq.funcs = &uvd_v3_1_irq_funcs;
+}
+
+
+static int uvd_v3_1_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->uvd.num_uvd_inst = 1;
+
+ uvd_v3_1_set_ring_funcs(adev);
+ uvd_v3_1_set_irq_funcs(adev);
+
+ return 0;
+}
+
+static int uvd_v3_1_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ /* UVD TRAP */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_sw_init(adev);
+ if (r)
+ return r;
+
+ ring = &adev->uvd.inst->ring;
+ sprintf(ring->name, "uvd");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_entity_init(adev);
+
+ return r;
+}
+
+static int uvd_v3_1_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_suspend(adev);
+ if (r)
+ return r;
+
+ return amdgpu_uvd_sw_fini(adev);
+}
+
+static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data |= 0x3fff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ } else {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data &= ~0x3fff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ }
+}
+
+/**
+ * uvd_v3_1_hw_init - start and test UVD block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int uvd_v3_1_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+ uint32_t tmp;
+ int r;
+
+ uvd_v3_1_mc_resume(adev);
+
+ r = uvd_v3_1_fw_validate(adev);
+ if (r) {
+ DRM_ERROR("amdgpu: UVD Firmware validate fail (%d).\n", r);
+ return r;
+ }
+
+ uvd_v3_1_enable_mgcg(adev, true);
+ amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
+
+ uvd_v3_1_start(adev);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r) {
+ DRM_ERROR("amdgpu: UVD ring test fail (%d).\n", r);
+ goto done;
+ }
+
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+ goto done;
+ }
+
+ tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+ amdgpu_ring_write(ring, tmp);
+ amdgpu_ring_write(ring, 0xFFFFF);
+
+ /* Clear timeout status bits */
+ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
+ amdgpu_ring_write(ring, 3);
+
+ amdgpu_ring_commit(ring);
+
+done:
+ if (!r)
+ DRM_INFO("UVD initialized successfully.\n");
+
+ return r;
+}
+
+/**
+ * uvd_v3_1_hw_fini - stop the hardware block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the UVD block, mark ring as not ready any more
+ */
+static int uvd_v3_1_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (RREG32(mmUVD_STATUS) != 0)
+ uvd_v3_1_stop(adev);
+
+ return 0;
+}
+
+static int uvd_v3_1_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = uvd_v3_1_hw_fini(adev);
+ if (r)
+ return r;
+
+ return amdgpu_uvd_suspend(adev);
+}
+
+static int uvd_v3_1_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+
+ return uvd_v3_1_hw_init(adev);
+}
+
+static bool uvd_v3_1_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
+}
+
+static int uvd_v3_1_wait_for_idle(void *handle)
+{
+ unsigned i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
+ return 0;
+ }
+ return -ETIMEDOUT;
+}
+
+static int uvd_v3_1_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ uvd_v3_1_stop(adev);
+
+ WREG32_P(mmSRBM_SOFT_RESET, SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK,
+ ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
+ mdelay(5);
+
+ return uvd_v3_1_start(adev);
+}
+
+static int uvd_v3_1_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int uvd_v3_1_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
+ .name = "uvd_v3_1",
+ .early_init = uvd_v3_1_early_init,
+ .late_init = NULL,
+ .sw_init = uvd_v3_1_sw_init,
+ .sw_fini = uvd_v3_1_sw_fini,
+ .hw_init = uvd_v3_1_hw_init,
+ .hw_fini = uvd_v3_1_hw_fini,
+ .suspend = uvd_v3_1_suspend,
+ .resume = uvd_v3_1_resume,
+ .is_idle = uvd_v3_1_is_idle,
+ .wait_for_idle = uvd_v3_1_wait_for_idle,
+ .soft_reset = uvd_v3_1_soft_reset,
+ .set_clockgating_state = uvd_v3_1_set_clockgating_state,
+ .set_powergating_state = uvd_v3_1_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version uvd_v3_1_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_UVD,
+ .major = 3,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &uvd_v3_1_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h
new file mode 100644
index 000000000000..8c2f9b207574
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __UVD_V3_1_H__
+#define __UVD_V3_1_H__
+
+extern const struct amdgpu_ip_block_version uvd_v3_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 1ad79155ed00..6dcc3ce0c00a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -38,6 +38,7 @@
#include "ivsrcid/vcn/irqsrcs_vcn_1_0.h"
#include "jpeg_v1_0.h"
+#include "vcn_v1_0.h"
#define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab
#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1
@@ -360,68 +361,68 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo),
0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi),
0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0,
0xFFFFFFFF, 0);
offset = 0;
} else {
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);
offset = size;
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0);
}
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size, 0xFFFFFFFF, 0);
/* cache window 1: stack */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0,
0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE,
0xFFFFFFFF, 0);
/* cache window 2: context */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE,
0xFFFFFFFF, 0);
/* VCN global tiling registers */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG,
adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
}
@@ -636,9 +637,9 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s
reg_data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
reg_data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
reg_data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmJPEG_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmJPEG_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
/* enable sw clock gating control */
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -667,22 +668,21 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s
UVD_CGC_CTRL__WCB_MODE_MASK |
UVD_CGC_CTRL__VCPU_MODE_MASK |
UVD_CGC_CTRL__SCPU_MODE_MASK);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
/* turn off clock gating */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
/* turn on SUVD clock gating */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_GATE, 1, 0xFFFFFFFF, sram_sel);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_GATE, 1, 0xFFFFFFFF, sram_sel);
/* turn on sw mode in UVD_SUVD_CGC_CTRL */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel);
}
static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
- int ret;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
@@ -698,7 +698,7 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
| 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF, ret);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF);
} else {
data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
| 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
@@ -712,7 +712,7 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
| 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
| 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF, ret);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF);
}
/* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */
@@ -728,7 +728,6 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
- int ret;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
/* Before power off, this indicator has to be turned on */
@@ -763,7 +762,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
| 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
| 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
| 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT);
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF, ret);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF);
}
}
@@ -972,14 +971,14 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CNTL, tmp, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_VCPU_CNTL, tmp, 0xFFFFFFFF, 0);
/* disable interupt */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN,
0, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
/* initialize VCN memory controller */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL,
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
@@ -993,48 +992,48 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
/* swap (8 in 32) RB and IB */
lmi_swap_cntl = 0xa;
#endif
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_CNTL,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_CNTL,
0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXA0,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUXA0,
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXB0,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUXB0,
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUX,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MPC_SET_MUX,
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0);
vcn_v1_0_mc_resume_dpg_mode(adev);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0);
/* boot up the VCPU */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SOFT_RESET, 0, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SOFT_RESET, 0, 0xFFFFFFFF, 0);
/* enable UMC */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL2,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL2,
0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT,
0xFFFFFFFF, 0);
/* enable master interrupt */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN,
UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
vcn_v1_0_clock_gating_dpg_mode(adev, 1);
/* setup mmUVD_LMI_CTRL */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL,
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
@@ -1046,11 +1045,11 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
tmp = adev->gfx.config.gb_addr_config;
/* setup VCN global tiling registers */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
/* enable System Interrupt for JRBC */
- WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SYS_INT_EN,
+ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SYS_INT_EN,
UVD_SYS_INT_EN__UVD_JRBC_EN_MASK, 0xFFFFFFFF, 1);
/* force RBC into idle state */
@@ -1112,15 +1111,15 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
*/
static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
{
- int ret_code, tmp;
+ int tmp;
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, ret_code);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
UVD_LMI_STATUS__READ_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp, ret_code);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp);
/* put VCPU into reset */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
@@ -1129,7 +1128,7 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp, ret_code);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp);
/* disable VCPU clock */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0,
@@ -1153,30 +1152,29 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
{
- int ret_code = 0;
uint32_t tmp;
/* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* wait for read ptr to be equal to write ptr */
tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
@@ -1219,9 +1217,9 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
ret_code = 0;
if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
/* pause DPG non-jpeg */
@@ -1229,7 +1227,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
- UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
/* Restore */
ring = &adev->vcn.inst->ring_enc[0];
@@ -1251,7 +1249,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
} else {
/* unpause dpg non-jpeg, no need to wait */
@@ -1275,9 +1273,9 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
ret_code = 0;
if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
/* Make sure JPRG Snoop is disabled before sending the pause */
@@ -1290,7 +1288,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
- UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
+ UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
/* Restore */
ring = &adev->jpeg.inst->ring_dec;
@@ -1312,7 +1310,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
} else {
/* unpause dpg jpeg, no need to wait */
@@ -1335,10 +1333,10 @@ static bool vcn_v1_0_is_idle(void *handle)
static int vcn_v1_0_wait_for_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret = 0;
+ int ret;
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
- UVD_STATUS__IDLE, ret);
+ ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 90ed773695ea..c0e4133a6dd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -30,6 +30,7 @@
#include "amdgpu_pm.h"
#include "amdgpu_psp.h"
#include "mmsch_v2_0.h"
+#include "vcn_v2_0.h"
#include "vcn/vcn_2_0_0_offset.h"
#include "vcn/vcn_2_0_0_sh_mask.h"
@@ -382,91 +383,91 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
if (!indirect) {
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
} else {
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
}
offset = 0;
} else {
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
offset = size;
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
}
if (!indirect)
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
else
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
/* cache window 1: stack */
if (!indirect) {
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
} else {
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
}
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
/* cache window 2: context */
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
/* non-cache window */
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0),
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
/* VCN global tiling registers */
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
}
@@ -615,19 +616,19 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
UVD_CGC_CTRL__WCB_MODE_MASK |
UVD_CGC_CTRL__VCPU_MODE_MASK |
UVD_CGC_CTRL__SCPU_MODE_MASK);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
/* turn off clock gating */
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
/* turn on SUVD clock gating */
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
/* turn on sw mode in UVD_SUVD_CGC_CTRL */
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
}
@@ -696,7 +697,6 @@ static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
- int ret;
if (amdgpu_sriov_vf(adev))
return;
@@ -715,7 +715,7 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS,
- UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF, ret);
+ UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF);
} else {
data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
| 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
@@ -728,7 +728,7 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
| 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
| 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFF, ret);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFF);
}
/* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS,
@@ -746,7 +746,6 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
- int ret;
if (amdgpu_sriov_vf(adev))
return;
@@ -782,7 +781,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
| 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
| 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
| 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT);
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF, ret);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF);
}
}
@@ -810,11 +809,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
/* disable master interupt */
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
/* setup mmUVD_LMI_CTRL */
@@ -826,28 +825,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
0x00100000L);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MPC_CNTL),
0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MPC_SET_MUXA0),
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MPC_SET_MUXB0),
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MPC_SET_MUX),
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
@@ -855,22 +854,22 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
/* release VCPU reset to boot */
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect);
/* enable LMI MC and UMC channels */
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_LMI_CTRL2),
0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect);
/* enable master interrupt */
- WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
@@ -1098,25 +1097,24 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
{
- int ret_code = 0;
uint32_t tmp;
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* wait for read ptr to be equal to write ptr */
tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
@@ -1138,7 +1136,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
}
/* wait for uvd idle */
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
+ r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
if (r)
return r;
@@ -1146,7 +1144,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
UVD_LMI_STATUS__READ_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r);
+ r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp);
if (r)
return r;
@@ -1157,7 +1155,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r);
+ r = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp);
if (r)
return r;
@@ -1208,9 +1206,8 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
- ret_code = 0;
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ ret_code = SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr;
@@ -1221,7 +1218,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
/* wait for ACK */
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
- UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
/* Stall DPG before WPTR/RPTR reset */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
@@ -1258,7 +1255,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
} else {
/* unpause dpg, no need to wait */
@@ -1281,10 +1278,10 @@ static bool vcn_v2_0_is_idle(void *handle)
static int vcn_v2_0_wait_for_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret = 0;
+ int ret;
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
- UVD_STATUS__IDLE, ret);
+ ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 3c6eafb62ee6..e99bef6e2354 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -451,91 +451,91 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
if (!indirect) {
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
} else {
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
}
offset = 0;
} else {
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
offset = size;
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET0),
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
}
if (!indirect)
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
else
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
/* cache window 1: stack */
if (!indirect) {
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
} else {
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
}
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
/* cache window 2: context */
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
/* non-cache window */
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0),
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
/* VCN global tiling registers */
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
}
@@ -549,7 +549,6 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
{
uint32_t data;
- int ret = 0;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -589,7 +588,7 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
- SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret);
+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
@@ -689,19 +688,19 @@ static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
UVD_CGC_CTRL__WCB_MODE_MASK |
UVD_CGC_CTRL__VCPU_MODE_MASK |
UVD_CGC_CTRL__MMSCH_MODE_MASK);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
/* turn off clock gating */
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
/* turn on SUVD clock gating */
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
/* turn on sw mode in UVD_SUVD_CGC_CTRL */
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
}
@@ -792,11 +791,11 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
/* disable master interupt */
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
/* setup mmUVD_LMI_CTRL */
@@ -808,28 +807,28 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
0x00100000L);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MPC_CNTL),
0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MPC_SET_MUXA0),
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MPC_SET_MUXB0),
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MPC_SET_MUX),
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
@@ -837,26 +836,26 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
/* enable LMI MC and UMC channels */
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
/* unblock VCPU register access */
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
/* enable master interrupt */
- WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
@@ -1302,25 +1301,24 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
{
- int ret_code = 0;
uint32_t tmp;
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* wait for read ptr to be equal to write ptr */
tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
- SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
- SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
- SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
@@ -1343,7 +1341,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
}
/* wait for vcn idle */
- SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
if (r)
return r;
@@ -1351,7 +1349,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
UVD_LMI_STATUS__READ_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_MASK |
UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r);
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
if (r)
return r;
@@ -1362,7 +1360,7 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r);
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
if (r)
return r;
@@ -1412,8 +1410,8 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
- SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
@@ -1425,7 +1423,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
/* wait for ACK */
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
- UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
/* Stall DPG before WPTR/RPTR reset */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
@@ -1458,13 +1456,13 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
- UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
} else {
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
}
@@ -1701,8 +1699,8 @@ static int vcn_v2_5_wait_for_idle(void *handle)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
- SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
- UVD_STATUS__IDLE, ret);
+ ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
new file mode 100644
index 000000000000..90fe95f345e3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -0,0 +1,1684 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "vcn_v2_0.h"
+
+#include "vcn/vcn_3_0_0_offset.h"
+#include "vcn/vcn_3_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
+#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
+#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10
+#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11
+#define mmUVD_NO_OP_INTERNAL_OFFSET 0x29
+#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66
+#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d
+
+#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5
+#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c
+
+#define VCN_INSTANCES_SIENNA_CICHLID 2
+
+static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v3_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+
+static int amdgpu_ih_clientid_vcns[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+/**
+ * vcn_v3_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int vcn_v3_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->asic_type == CHIP_SIENNA_CICHLID) {
+ u32 harvest;
+ int i;
+
+ adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
+ if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+ adev->vcn.harvest_config |= 1 << i;
+ }
+
+ if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
+ AMDGPU_VCN_HARVEST_VCN1))
+ /* both instances are harvested, disable the block */
+ return -ENOENT;
+ } else
+ adev->vcn.num_vcn_inst = 1;
+
+ adev->vcn.num_enc_rings = 2;
+
+ vcn_v3_0_set_dec_ring_funcs(adev);
+ vcn_v3_0_set_enc_ring_funcs(adev);
+ vcn_v3_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * vcn_v3_0_sw_init - sw init for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v3_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, j, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ const struct common_firmware_header *hdr;
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+
+ if (adev->vcn.num_vcn_inst == VCN_INSTANCES_SIENNA_CICHLID) {
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+ }
+ DRM_INFO("PSP loading VCN firmware\n");
+ }
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+
+ adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9);
+ adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0);
+ adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1);
+ adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD);
+ adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP);
+
+ /* VCN DEC TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
+ if (i != 0)
+ ring->no_scheduler = true;
+ sprintf(ring->name, "vcn_dec_%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT);
+ if (r)
+ return r;
+
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ /* VCN ENC TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ j + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[j];
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
+ if (i != 1)
+ ring->no_scheduler = true;
+ sprintf(ring->name, "vcn_enc_%d.%d", i, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT);
+ if (r)
+ return r;
+ }
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
+
+ return 0;
+}
+
+/**
+ * vcn_v3_0_sw_fini - sw fini for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v3_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v3_0_hw_init - start and test VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v3_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, j, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_dec;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ring->doorbell_index, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ ring = &adev->vcn.inst[i].ring_enc[j];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+ }
+
+done:
+ if (!r)
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
+
+ return r;
+}
+
+/**
+ * vcn_v3_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v3_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, j;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_dec;
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, mmUVD_STATUS)))
+ vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ ring->sched.ready = false;
+
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ ring = &adev->vcn.inst[i].ring_enc[j];
+ ring->sched.ready = false;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v3_0_suspend - suspend VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v3_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = vcn_v3_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v3_0_resume - resume VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v3_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v3_0_hw_init(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v3_0_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+}
+
+static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
+}
+
+static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF);
+ } else {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, 0, 0x3F3FFFFF);
+ }
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
+ UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
+}
+
+static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
+
+ data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
+
+ data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIRL_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDATD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, data, 0x3F3FFFFF);
+ }
+}
+
+/**
+ * vcn_v3_0_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ /* VCN disable CGC */
+ data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, inst, mmUVD_CGC_GATE, data);
+
+ SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__IME_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__EFC_MASK
+ | UVD_SUVD_CGC_GATE__SAOE_MASK
+ | 0x08000000
+ | UVD_SUVD_CGC_GATE__FBC_PCLK_MASK
+ | UVD_SUVD_CGC_GATE__FBC_CCLK_MASK
+ | 0x40000000
+ | UVD_SUVD_CGC_GATE__SMPA_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2);
+ data |= (UVD_SUVD_CGC_GATE2__MPBE0_MASK
+ | UVD_SUVD_CGC_GATE2__MPBE1_MASK
+ | 0x00000004
+ | 0x00000008
+ | UVD_SUVD_CGC_GATE2__MPC1_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
+ | 0x00008000
+ | 0x00010000
+ | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
+ | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
+ uint8_t sram_sel, int inst_idx, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ /* enable sw clock gating control */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v3_0_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data;
+
+ /* enable VCN CGC */
+ data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
+ | 0x00008000
+ | 0x00010000
+ | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
+ | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
+ WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v3_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect);
+
+ /* unblock VCPU register access */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ /* add nop to workaround PSP size check */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ if (indirect)
+ psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+ (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
+
+ ring = &adev->vcn.inst[inst_idx].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ return 0;
+}
+
+static int vcn_v3_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+ int i, j, k, r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
+ r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+ continue;
+ }
+
+ /* disable VCN power gating */
+ vcn_v3_0_disable_static_power_gating(adev, i);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
+
+ /*SW clock gating */
+ vcn_v3_0_disable_clock_gating(adev, i);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v3_0_mc_resume(adev, i);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+
+ ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->vcn.inst[i].ring_enc[1];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ }
+
+ return 0;
+}
+
+static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t tmp;
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int vcn_v3_0_stop(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int i, r = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v3_0_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+
+ /* apply HW clock gating */
+ vcn_v3_0_enable_clock_gating(adev, i);
+
+ /* enable VCN power gating */
+ vcn_v3_0_enable_static_power_gating(adev, i);
+ }
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state)
+{
+ struct amdgpu_ring *ring;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ /* Restore */
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[1];
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v3_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vcn_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * vcn_v3_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * vcn_v3_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2,
+ lower_32_bits(ring->wptr) | 0x80000000);
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_DEC,
+ .align_mask = 0xf,
+ .vmhub = AMDGPU_MMHUB_0,
+ .get_rptr = vcn_v3_0_dec_ring_get_rptr,
+ .get_wptr = vcn_v3_0_dec_ring_get_wptr,
+ .set_wptr = vcn_v3_0_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
+ 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
+ .emit_ib = vcn_v2_0_dec_ring_emit_ib,
+ .emit_fence = vcn_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_dec_ring_test_ring,
+ .test_ib = amdgpu_vcn_dec_ring_test_ib,
+ .insert_nop = vcn_v2_0_dec_ring_insert_nop,
+ .insert_start = vcn_v2_0_dec_ring_insert_start,
+ .insert_end = vcn_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v3_0_enc_ring_get_rptr - get enc read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc read pointer
+ */
+static uint64_t vcn_v3_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
+}
+
+/**
+ * vcn_v3_0_enc_ring_get_wptr - get enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc write pointer
+ */
+static uint64_t vcn_v3_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
+ } else {
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
+ }
+}
+
+/**
+ * vcn_v3_0_enc_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+ } else {
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ }
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .vmhub = AMDGPU_MMHUB_0,
+ .get_rptr = vcn_v3_0_enc_ring_get_rptr,
+ .get_wptr = vcn_v3_0_enc_ring_get_wptr,
+ .set_wptr = vcn_v3_0_enc_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_enc_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs;
+ adev->vcn.inst[i].ring_dec.me = i;
+ DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i);
+ }
+}
+
+static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[j].me = i;
+ }
+ DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i);
+ }
+}
+
+static bool vcn_v3_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+static int vcn_v3_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int vcn_v3_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v3_0_enable_clock_gating(adev, i);
+ } else {
+ vcn_v3_0_disable_clock_gating(adev, i);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v3_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if(state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v3_0_stop(adev);
+ else
+ ret = vcn_v3_0_start(adev);
+
+ if(!ret)
+ adev->vcn.cur_state = state;
+
+ return ret;
+}
+
+static int vcn_v3_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int vcn_v3_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v3_0_irq_funcs = {
+ .set = vcn_v3_0_set_interrupt_state,
+ .process = vcn_v3_0_process_interrupt,
+};
+
+static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs;
+ }
+}
+
+static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
+ .name = "vcn_v3_0",
+ .early_init = vcn_v3_0_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v3_0_sw_init,
+ .sw_fini = vcn_v3_0_sw_fini,
+ .hw_init = vcn_v3_0_hw_init,
+ .hw_fini = vcn_v3_0_hw_fini,
+ .suspend = vcn_v3_0_suspend,
+ .resume = vcn_v3_0_resume,
+ .is_idle = vcn_v3_0_is_idle,
+ .wait_for_idle = vcn_v3_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v3_0_set_clockgating_state,
+ .set_powergating_state = vcn_v3_0_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v3_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 3,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vcn_v3_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h
new file mode 100644
index 000000000000..31683582d778
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V3_0_H__
+#define __VCN_V3_0_H__
+
+extern const struct amdgpu_ip_block_version vcn_v3_0_ip_block;
+
+#endif /* __VCN_V3_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index d3400da6ab64..577d901fdb63 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -679,169 +679,175 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf810000, 0x00000000,
};
-static const uint32_t cwsr_trap_gfx10_hex[] = {
- 0xbf820001, 0xbf8201c1,
+static const uint32_t cwsr_trap_nv1x_hex[] = {
+ 0xbf820001, 0xbf8201cd,
0xb0804004, 0xb978f802,
- 0x8a788678, 0xb971f803,
- 0x876eff71, 0x00000400,
- 0xbf850033, 0x876eff71,
- 0x00000100, 0xbf840002,
- 0x8878ff78, 0x00002000,
- 0x8a77ff77, 0xff000000,
- 0xb96ef807, 0x876fff6e,
- 0x02000000, 0x8f6f866f,
- 0x88776f77, 0x876fff6e,
- 0x003f8000, 0x8f6f896f,
- 0x88776f77, 0x8a6eff6e,
- 0x023f8000, 0xb9eef807,
- 0xb97af812, 0xb97bf813,
- 0x8ffa887a, 0xf4051bbd,
- 0xfa000000, 0xbf8cc07f,
- 0xf4051ebd, 0xfa000008,
- 0xbf8cc07f, 0x87ee6e6e,
- 0xbf840001, 0xbe80206e,
- 0xb971f803, 0x8771ff71,
- 0x000001ff, 0xbf850002,
- 0x806c846c, 0x826d806d,
- 0x876dff6d, 0x0000ffff,
- 0x906e8977, 0x876fff6e,
- 0x003f8000, 0x906e8677,
- 0x876eff6e, 0x02000000,
- 0x886e6f6e, 0xb9eef807,
- 0x87fe7e7e, 0x87ea6a6a,
- 0xb9f8f802, 0xbe80226c,
- 0xb971f803, 0x8771ff71,
- 0x00000100, 0xbf840006,
- 0xbef60380, 0xb9f60203,
- 0x876dff6d, 0x0000ffff,
- 0x80ec886c, 0x82ed806d,
- 0xbef60380, 0xb9f60283,
- 0xb972f816, 0xb9762c07,
- 0x8f769a76, 0x886d766d,
- 0xb97603c7, 0x8f769976,
- 0x886d766d, 0xb9760647,
- 0x8f769876, 0x886d766d,
- 0xb976f807, 0x8776ff76,
- 0x00007fff, 0xb9f6f807,
+ 0x8a788678, 0xb96ef801,
+ 0x876eff6e, 0x00000800,
+ 0xbf840003, 0x876eff78,
+ 0x00002000, 0xbf840009,
+ 0xb97bf803, 0x876eff7b,
+ 0x00000400, 0xbf850033,
+ 0x876eff7b, 0x00000100,
+ 0xbf840002, 0x8878ff78,
+ 0x00002000, 0x8a77ff77,
+ 0xff000000, 0xb96ef807,
+ 0x876fff6e, 0x02000000,
+ 0x8f6f866f, 0x88776f77,
+ 0x876fff6e, 0x003f8000,
+ 0x8f6f896f, 0x88776f77,
+ 0x8a6eff6e, 0x023f8000,
+ 0xb9eef807, 0xb97af812,
+ 0xb97bf813, 0x8ffa887a,
+ 0xf4051bbd, 0xfa000000,
+ 0xbf8cc07f, 0xf4051ebd,
+ 0xfa000008, 0xbf8cc07f,
+ 0x87ee6e6e, 0xbf840001,
+ 0xbe80206e, 0xb97bf803,
+ 0x877bff7b, 0x000001ff,
+ 0xbf850002, 0x806c846c,
+ 0x826d806d, 0x876dff6d,
+ 0x0000ffff, 0x906e8977,
+ 0x876fff6e, 0x003f8000,
+ 0x906e8677, 0x876eff6e,
+ 0x02000000, 0x886e6f6e,
+ 0xb9eef807, 0x87fe7e7e,
+ 0x87ea6a6a, 0xb9f8f802,
+ 0xbe80226c, 0x876dff6d,
+ 0x0000ffff, 0xbefa0380,
+ 0xb9fa0283, 0xb97a2c07,
+ 0x8f7a9a7a, 0x886d7a6d,
+ 0xb97a03c7, 0x8f7a997a,
+ 0x886d7a6d, 0xb97a0647,
+ 0x8f7a987a, 0x886d7a6d,
+ 0xb97af807, 0x877aff7a,
+ 0x00007fff, 0xb9faf807,
0xbeee037e, 0xbeef037f,
0xbefe0480, 0xbf900004,
0xbf8e0002, 0xbf88fffe,
+ 0xb97b02dc, 0x8f7b997b,
+ 0x887b7b7f, 0xb97a2a05,
+ 0x807a817a, 0xbf0d997b,
+ 0xbf850002, 0x8f7a897a,
+ 0xbf820001, 0x8f7a8a7a,
+ 0x877bff7f, 0x0000ffff,
+ 0x807aff7a, 0x00000200,
+ 0x807a7e7a, 0x827b807b,
+ 0xf4491c3d, 0xfa000050,
+ 0xf4491d3d, 0xfa000060,
+ 0xf4411e7d, 0xfa000074,
0xbef4037e, 0x8775ff7f,
0x0000ffff, 0x8875ff75,
0x00040000, 0xbef60380,
0xbef703ff, 0x10807fac,
- 0x8776ff7f, 0x08000000,
- 0x90768376, 0x88777677,
- 0x8776ff7f, 0x70000000,
- 0x90768176, 0x88777677,
- 0xbefb037c, 0xbefa0380,
+ 0x877aff7f, 0x08000000,
+ 0x907a837a, 0x88777a77,
+ 0x877aff7f, 0x70000000,
+ 0x907a817a, 0x88777a77,
+ 0xbef1037c, 0xbef00380,
0xb97302dc, 0x8f739973,
- 0x8873737f, 0xb97a2a05,
- 0x807a817a, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbf850002, 0x8f7a897a,
- 0xbf820001, 0x8f7a8a7a,
- 0xb9761e06, 0x8f768a76,
- 0x807a767a, 0x807aff7a,
- 0x00000200, 0xbef603ff,
- 0x01000000, 0xbefe037c,
- 0xbefc037a, 0xf4611efa,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xbefe037c,
- 0xbefc037a, 0xf4611b3a,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xbefe037c,
- 0xbefc037a, 0xf4611b7a,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xbefe037c,
- 0xbefc037a, 0xf4611bba,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xbefe037c,
- 0xbefc037a, 0xf4611bfa,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xbefe037c,
- 0xbefc037a, 0xf4611e3a,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xb971f803,
- 0xbefe037c, 0xbefc037a,
- 0xf4611c7a, 0xf8000000,
- 0x807a847a, 0xbefc037e,
- 0xbefe037c, 0xbefc037a,
- 0xf4611cba, 0xf8000000,
- 0x807a847a, 0xbefc037e,
- 0xb97bf801, 0xbefe037c,
- 0xbefc037a, 0xf4611efa,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0xb97bf814,
- 0xbefe037c, 0xbefc037a,
- 0xf4611efa, 0xf8000000,
- 0x807a847a, 0xbefc037e,
- 0xb97bf815, 0xbefe037c,
- 0xbefc037a, 0xf4611efa,
- 0xf8000000, 0x807a847a,
- 0xbefc037e, 0x8776ff7f,
- 0x04000000, 0xbeef0380,
- 0x886f6f76, 0xb97a2a05,
- 0x807a817a, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbf850002, 0x8f7a897a,
- 0xbf820001, 0x8f7a8a7a,
- 0xb9761e06, 0x8f768a76,
- 0x807a767a, 0xbef603ff,
- 0x01000000, 0xbef20374,
- 0x80747a74, 0x82758075,
- 0xbefc0380, 0xbf800000,
- 0xbe802f00, 0xbe822f02,
- 0xbe842f04, 0xbe862f06,
- 0xbe882f08, 0xbe8a2f0a,
- 0xbe8c2f0c, 0xbe8e2f0e,
- 0xf469003a, 0xfa000000,
- 0xf469013a, 0xfa000010,
- 0xf469023a, 0xfa000020,
- 0xf469033a, 0xfa000030,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0aff7c,
- 0x00000060, 0xbf85ffea,
- 0xbe802f00, 0xbe822f02,
- 0xbe842f04, 0xbe862f06,
- 0xbe882f08, 0xbe8a2f0a,
- 0xf469003a, 0xfa000000,
- 0xf469013a, 0xfa000010,
- 0xf469023a, 0xfa000020,
- 0x8074b074, 0x82758075,
- 0xbef40372, 0xbefa0380,
+ 0x8873737f, 0xb97bf816,
+ 0xba80f816, 0x00000000,
0xbefe03c1, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820002, 0xbeff03c1,
0xbf82000b, 0xbef603ff,
0x01000000, 0xe0704000,
- 0x7a5d0000, 0xe0704080,
- 0x7a5d0100, 0xe0704100,
- 0x7a5d0200, 0xe0704180,
- 0x7a5d0300, 0xbf82000a,
+ 0x705d0000, 0xe0704080,
+ 0x705d0100, 0xe0704100,
+ 0x705d0200, 0xe0704180,
+ 0x705d0300, 0xbf82000a,
0xbef603ff, 0x01000000,
- 0xe0704000, 0x7a5d0000,
- 0xe0704100, 0x7a5d0100,
- 0xe0704200, 0x7a5d0200,
- 0xe0704300, 0x7a5d0300,
+ 0xe0704000, 0x705d0000,
+ 0xe0704100, 0x705d0100,
+ 0xe0704200, 0x705d0200,
+ 0xe0704300, 0x705d0300,
+ 0xb9702a05, 0x80708170,
+ 0xbf0d9973, 0xbf850002,
+ 0x8f708970, 0xbf820001,
+ 0x8f708a70, 0xb97a1e06,
+ 0x8f7a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0xbef603ff, 0x01000000,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611c7a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611b3a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611b7a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611bba, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611bfa, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611e3a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xb97af803, 0xbefe037c,
+ 0xbefc0370, 0xf4611eba,
+ 0xf8000000, 0x80708470,
+ 0xbefc037e, 0xbefe037c,
+ 0xbefc0370, 0xf4611efa,
+ 0xf8000000, 0x80708470,
+ 0xbefc037e, 0xb971f801,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611c7a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xb971f814, 0xbefe037c,
+ 0xbefc0370, 0xf4611c7a,
+ 0xf8000000, 0x80708470,
+ 0xbefc037e, 0xb971f815,
+ 0xbefe037c, 0xbefc0370,
+ 0xf4611c7a, 0xf8000000,
+ 0x80708470, 0xbefc037e,
+ 0xb9702a05, 0x80708170,
+ 0xbf0d9973, 0xbf850002,
+ 0x8f708970, 0xbf820001,
+ 0x8f708a70, 0xb97a1e06,
+ 0x8f7a8a7a, 0x80707a70,
+ 0xbef603ff, 0x01000000,
+ 0xbefb0374, 0x80747074,
+ 0x82758075, 0xbefc0380,
+ 0xbf800000, 0xbe802f00,
+ 0xbe822f02, 0xbe842f04,
+ 0xbe862f06, 0xbe882f08,
+ 0xbe8a2f0a, 0xbe8c2f0c,
+ 0xbe8e2f0e, 0xf469003a,
+ 0xfa000000, 0xf469013a,
+ 0xfa000010, 0xf469023a,
+ 0xfa000020, 0xf469033a,
+ 0xfa000030, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0aff7c, 0x00000060,
+ 0xbf85ffea, 0xbe802f00,
+ 0xbe822f02, 0xbe842f04,
+ 0xbe862f06, 0xbe882f08,
+ 0xbe8a2f0a, 0xf469003a,
+ 0xfa000000, 0xf469013a,
+ 0xfa000010, 0xf469023a,
+ 0xfa000020, 0x8074b074,
+ 0x82758075, 0xbef4037b,
0xbefe03c1, 0x907c9973,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
- 0xb9714306, 0x8771c171,
- 0xbf840046, 0xbf8a0000,
- 0x8776ff6f, 0x04000000,
- 0xbf840042, 0x8f718671,
- 0x8f718271, 0xbef60371,
- 0xb97a2a05, 0x807a817a,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850002,
- 0x8f7a897a, 0xbf820001,
- 0x8f7a8a7a, 0xb9761e06,
- 0x8f768a76, 0x807a767a,
- 0x807aff7a, 0x00000200,
- 0x807aff7a, 0x00000080,
+ 0xb97b4306, 0x877bc17b,
+ 0xbf840044, 0xbf8a0000,
+ 0x877aff73, 0x04000000,
+ 0xbf840040, 0x8f7b867b,
+ 0x8f7b827b, 0xbef6037b,
+ 0xb9702a05, 0x80708170,
+ 0xbf0d9973, 0xbf850002,
+ 0x8f708970, 0xbf820001,
+ 0x8f708a70, 0xb97a1e06,
+ 0x8f7a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000080,
0xbef603ff, 0x01000000,
0xd7650000, 0x000100c1,
0xd7660000, 0x000200c1,
@@ -852,87 +858,86 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8c0000,
- 0xe0704000, 0x7a5d0100,
- 0x807c037c, 0x807a037a,
+ 0xe0704000, 0x705d0100,
+ 0x807c037c, 0x80700370,
0xd5250000, 0x0001ff00,
- 0x00000080, 0xbf0a717c,
+ 0x00000080, 0xbf0a7b7c,
0xbf85fff4, 0xbf820011,
0xbe8303ff, 0x00000100,
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8c0000,
- 0xe0704000, 0x7a5d0100,
- 0x807c037c, 0x807a037a,
+ 0xe0704000, 0x705d0100,
+ 0x807c037c, 0x80700370,
0xd5250000, 0x0001ff00,
- 0x00000100, 0xbf0a717c,
+ 0x00000100, 0xbf0a7b7c,
0xbf85fff4, 0xbefe03c1,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850004,
- 0xbefa03ff, 0x00000200,
+ 0xbef003ff, 0x00000200,
0xbeff0380, 0xbf820003,
- 0xbefa03ff, 0x00000400,
- 0xbeff03c1, 0xb9712a05,
- 0x80718171, 0x8f718271,
+ 0xbef003ff, 0x00000400,
+ 0xbeff03c1, 0xb97b2a05,
+ 0x807b817b, 0x8f7b827b,
0x907c9973, 0x877c817c,
0xbf06817c, 0xbf850017,
0xbef603ff, 0x01000000,
- 0xbefc0384, 0xbf0a717c,
+ 0xbefc0384, 0xbf0a7b7c,
0xbf840037, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
- 0x7a5d0000, 0xe0704080,
- 0x7a5d0100, 0xe0704100,
- 0x7a5d0200, 0xe0704180,
- 0x7a5d0300, 0x807c847c,
- 0x807aff7a, 0x00000200,
- 0xbf0a717c, 0xbf85ffef,
+ 0x705d0000, 0xe0704080,
+ 0x705d0100, 0xe0704100,
+ 0x705d0200, 0xe0704180,
+ 0x705d0300, 0x807c847c,
+ 0x8070ff70, 0x00000200,
+ 0xbf0a7b7c, 0xbf85ffef,
0xbf820025, 0xbef603ff,
0x01000000, 0xbefc0384,
- 0xbf0a717c, 0xbf840020,
+ 0xbf0a7b7c, 0xbf840020,
0x7e008700, 0x7e028701,
0x7e048702, 0x7e068703,
- 0xe0704000, 0x7a5d0000,
- 0xe0704100, 0x7a5d0100,
- 0xe0704200, 0x7a5d0200,
- 0xe0704300, 0x7a5d0300,
- 0x807c847c, 0x807aff7a,
- 0x00000400, 0xbf0a717c,
- 0xbf85ffef, 0xb9711e06,
- 0x8771c171, 0xbf84000c,
- 0x8f718371, 0x80717c71,
+ 0xe0704000, 0x705d0000,
+ 0xe0704100, 0x705d0100,
+ 0xe0704200, 0x705d0200,
+ 0xe0704300, 0x705d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xb97b1e06,
+ 0x877bc17b, 0xbf84000c,
+ 0x8f7b837b, 0x807b7c7b,
0xbefe03c1, 0xbeff0380,
0x7e008700, 0xe0704000,
- 0x7a5d0000, 0x807c817c,
- 0x807aff7a, 0x00000080,
- 0xbf0a717c, 0xbf85fff8,
- 0xbf820142, 0xbef4037e,
+ 0x705d0000, 0x807c817c,
+ 0x8070ff70, 0x00000080,
+ 0xbf0a7b7c, 0xbf85fff8,
+ 0xbf82014f, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
- 0x10807fac, 0x8772ff7f,
- 0x08000000, 0x90728372,
- 0x88777277, 0x8772ff7f,
- 0x70000000, 0x90728172,
- 0x88777277, 0xb97302dc,
- 0x8f739973, 0x8873737f,
- 0x8772ff7f, 0x04000000,
- 0xbf840036, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
+ 0x10807fac, 0x876eff7f,
+ 0x08000000, 0x906e836e,
+ 0x88776e77, 0x876eff7f,
+ 0x70000000, 0x906e816e,
+ 0x88776e77, 0xb97202dc,
+ 0x8f729972, 0x8872727f,
+ 0x876eff7f, 0x04000000,
+ 0xbf840034, 0xbefe03c1,
+ 0x907c9972, 0x877c817c,
0xbf06817c, 0xbf850002,
0xbeff0380, 0xbf820001,
0xbeff03c1, 0xb96f4306,
- 0x876fc16f, 0xbf84002b,
+ 0x876fc16f, 0xbf840029,
0x8f6f866f, 0x8f6f826f,
0xbef6036f, 0xb9782a05,
- 0x80788178, 0x907c9973,
- 0x877c817c, 0xbf06817c,
+ 0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
- 0xb9721e06, 0x8f728a72,
- 0x80787278, 0x8078ff78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
0x00000200, 0x8078ff78,
0x00000080, 0xbef603ff,
- 0x01000000, 0x907c9973,
+ 0x01000000, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbefc0380, 0xbf850009,
0xe0310000, 0x781d0000,
@@ -944,15 +949,15 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x00000100, 0x8078ff78,
0x00000100, 0xbf0a6f7c,
0xbf85fff8, 0xbef80380,
- 0xbefe03c1, 0x907c9973,
+ 0xbefe03c1, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb96f2a05, 0x806f816f,
- 0x8f6f826f, 0x907c9973,
+ 0x8f6f826f, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850021, 0xbef603ff,
- 0x01000000, 0xbef20378,
+ 0x01000000, 0xbeee0378,
0x8078ff78, 0x00000200,
0xbefc0384, 0xe0304000,
0x785d0000, 0xe0304080,
@@ -964,12 +969,12 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x807c847c, 0x8078ff78,
0x00000200, 0xbf0a6f7c,
0xbf85ffee, 0xe0304000,
- 0x725d0000, 0xe0304080,
- 0x725d0100, 0xe0304100,
- 0x725d0200, 0xe0304180,
- 0x725d0300, 0xbf820032,
+ 0x6e5d0000, 0xe0304080,
+ 0x6e5d0100, 0xe0304100,
+ 0x6e5d0200, 0xe0304180,
+ 0x6e5d0300, 0xbf820032,
0xbef603ff, 0x01000000,
- 0xbef20378, 0x8078ff78,
+ 0xbeee0378, 0x8078ff78,
0x00000400, 0xbefc0384,
0xe0304000, 0x785d0000,
0xe0304100, 0x785d0100,
@@ -989,16 +994,15 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x8078ff78, 0x00000080,
0xbf0a6f7c, 0xbf85fff7,
0xbeff03c1, 0xe0304000,
- 0x725d0000, 0xe0304100,
- 0x725d0100, 0xe0304200,
- 0x725d0200, 0xe0304300,
- 0x725d0300, 0xbf8c3f70,
+ 0x6e5d0000, 0xe0304100,
+ 0x6e5d0100, 0xe0304200,
+ 0x6e5d0200, 0xe0304300,
+ 0x6e5d0300, 0xbf8c3f70,
0xb9782a05, 0x80788178,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850002,
+ 0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
- 0x8f788a78, 0xb9721e06,
- 0x8f728a72, 0x80787278,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
0x80f8ff78, 0x00000050,
0xbef603ff, 0x01000000,
@@ -1021,23 +1025,22 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbe8c310c, 0xbe8e310e,
0xbf06807c, 0xbf84fff0,
0xb9782a05, 0x80788178,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850002,
+ 0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
- 0x8f788a78, 0xb9721e06,
- 0x8f728a72, 0x80787278,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
0xbef603ff, 0x01000000,
0xf4211bfa, 0xf0000000,
0x80788478, 0xf4211b3a,
0xf0000000, 0x80788478,
0xf4211b7a, 0xf0000000,
- 0x80788478, 0xf4211eba,
- 0xf0000000, 0x80788478,
- 0xf4211efa, 0xf0000000,
0x80788478, 0xf4211c3a,
0xf0000000, 0x80788478,
0xf4211c7a, 0xf0000000,
+ 0x80788478, 0xf4211eba,
+ 0xf0000000, 0x80788478,
+ 0xf4211efa, 0xf0000000,
0x80788478, 0xf4211e7a,
0xf0000000, 0x80788478,
0xf4211cfa, 0xf0000000,
@@ -1046,31 +1049,41 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
0x80788478, 0xbf8cc07f,
- 0xb9eef815, 0xbef2036d,
- 0x876dff72, 0x0000ffff,
- 0xbefc036f, 0xbefe037a,
- 0xbeff037b, 0x876f71ff,
- 0x000003ff, 0xb9ef4803,
- 0xb9f9f816, 0x876f71ff,
- 0xfffff800, 0x906f8b6f,
- 0xb9efa2c3, 0xb9f3f801,
- 0x876fff72, 0xfc000000,
- 0x906f9a6f, 0x8f6f906f,
- 0xbef30380, 0x88736f73,
- 0x876fff72, 0x02000000,
- 0x906f996f, 0x8f6f8f6f,
- 0x88736f73, 0x876fff72,
- 0x01000000, 0x906f986f,
- 0x8f6f996f, 0x88736f73,
- 0x876fff70, 0x00800000,
- 0x906f976f, 0xb9f3f807,
- 0x87fe7e7e, 0x87ea6a6a,
- 0xb9f0f802, 0xbf8a0000,
- 0xbe80226c, 0xbf810000,
+ 0xb9eef815, 0xbefc036f,
+ 0xbefe0370, 0xbeff0371,
+ 0x876f7bff, 0x000003ff,
+ 0xb9ef4803, 0xb9f9f816,
+ 0x876f7bff, 0xfffff800,
+ 0x906f8b6f, 0xb9efa2c3,
+ 0xb9f3f801, 0xb96e2a05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbf850002, 0x8f6e896e,
+ 0xbf820001, 0x8f6e8a6e,
+ 0x806eff6e, 0x00000200,
+ 0x806e746e, 0x826f8075,
+ 0x876fff6f, 0x0000ffff,
+ 0xf4091c37, 0xfa000050,
+ 0xf4091d37, 0xfa000060,
+ 0xf4011e77, 0xfa000074,
+ 0xbf8cc07f, 0x876fff6d,
+ 0xfc000000, 0x906f9a6f,
+ 0x8f6f906f, 0xbeee0380,
+ 0x886e6f6e, 0x876fff6d,
+ 0x02000000, 0x906f996f,
+ 0x8f6f8f6f, 0x886e6f6e,
+ 0x876fff6d, 0x01000000,
+ 0x906f986f, 0x8f6f996f,
+ 0x886e6f6e, 0x876fff7a,
+ 0x00800000, 0x906f976f,
+ 0xb9eef807, 0x876dff6d,
+ 0x0000ffff, 0x87fe7e7e,
+ 0x87ea6a6a, 0xb9faf802,
+ 0xbf8a0000, 0xbe80226c,
+ 0xbf810000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
- 0xbf9f0000, 0x00000000,
};
+
static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf820001, 0xbf8202c4,
0xb8f8f802, 0x89788678,
@@ -1560,3 +1573,399 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf8a0000, 0x95806f6c,
0xbf810000, 0x00000000,
};
+
+static const uint32_t cwsr_trap_gfx10_hex[] = {
+ 0xbf820001, 0xbf8201cf,
+ 0xb0804004, 0xb978f802,
+ 0x8a788678, 0xb96ef801,
+ 0x876eff6e, 0x00000800,
+ 0xbf840003, 0x876eff78,
+ 0x00002000, 0xbf840009,
+ 0xb97bf803, 0x876eff7b,
+ 0x00000400, 0xbf85001d,
+ 0x876eff7b, 0x00000100,
+ 0xbf840002, 0x8878ff78,
+ 0x00002000, 0xb97af812,
+ 0xb97bf813, 0x8ffa887a,
+ 0xf4051bbd, 0xfa000000,
+ 0xbf8cc07f, 0xf4051ebd,
+ 0xfa000008, 0xbf8cc07f,
+ 0x87ee6e6e, 0xbf840001,
+ 0xbe80206e, 0xb97bf803,
+ 0x877bff7b, 0x000001ff,
+ 0xbf850002, 0x806c846c,
+ 0x826d806d, 0x876dff6d,
+ 0x0000ffff, 0x87fe7e7e,
+ 0x87ea6a6a, 0xb9f8f802,
+ 0xbe80226c, 0x876dff6d,
+ 0x0000ffff, 0xbefa0380,
+ 0xb9fa0283, 0xbeee037e,
+ 0xbeef037f, 0xbefe0480,
+ 0xbf900004, 0xbf8cc07f,
+ 0xb97b02dc, 0x8f7b997b,
+ 0x887b7b7f, 0xb97a2a05,
+ 0x807a817a, 0xbf0d997b,
+ 0xbf850002, 0x8f7a897a,
+ 0xbf820001, 0x8f7a8a7a,
+ 0x877bff7f, 0x0000ffff,
+ 0x807aff7a, 0x00000200,
+ 0x807a7e7a, 0x827b807b,
+ 0xbef4037e, 0x8775ff7f,
+ 0x0000ffff, 0x8875ff75,
+ 0x00040000, 0xbef60380,
+ 0xbef703ff, 0x10807fac,
+ 0x877aff7f, 0x08000000,
+ 0x907a837a, 0x88777a77,
+ 0x877aff7f, 0x70000000,
+ 0x907a817a, 0x88777a77,
+ 0xbef1037c, 0xbef00380,
+ 0xb97302dc, 0x8f739973,
+ 0x8873737f, 0xbefe03c1,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0xbeff0380, 0xbf820002,
+ 0xbeff03c1, 0xbf82000b,
+ 0xbef603ff, 0x01000000,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0xbf82000a, 0xbef603ff,
+ 0x01000000, 0xe0704000,
+ 0x705d0000, 0xe0704100,
+ 0x705d0100, 0xe0704200,
+ 0x705d0200, 0xe0704300,
+ 0x705d0300, 0xb9702a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0xbef603ff,
+ 0x01000000, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0xbefc0380, 0xd7610002,
+ 0x0000f871, 0x807c817c,
+ 0xd7610002, 0x0000f86c,
+ 0x807c817c, 0xd7610002,
+ 0x0000f86d, 0x807c817c,
+ 0xd7610002, 0x0000f86e,
+ 0x807c817c, 0xd7610002,
+ 0x0000f86f, 0x807c817c,
+ 0xd7610002, 0x0000f878,
+ 0x807c817c, 0xb97af803,
+ 0xd7610002, 0x0000f87a,
+ 0x807c817c, 0xd7610002,
+ 0x0000f87b, 0x807c817c,
+ 0xb971f801, 0xd7610002,
+ 0x0000f871, 0x807c817c,
+ 0xb971f814, 0xd7610002,
+ 0x0000f871, 0x807c817c,
+ 0xb971f815, 0xd7610002,
+ 0x0000f871, 0x807c817c,
+ 0xbeff0380, 0xe0704000,
+ 0x705d0200, 0xb9702a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0xbef603ff,
+ 0x01000000, 0xbef90380,
+ 0xbefc0380, 0xbf800000,
+ 0xbe802f00, 0xbe822f02,
+ 0xbe842f04, 0xbe862f06,
+ 0xbe882f08, 0xbe8a2f0a,
+ 0xbe8c2f0c, 0xbe8e2f0e,
+ 0xd7610002, 0x0000f200,
+ 0x80798179, 0xd7610002,
+ 0x0000f201, 0x80798179,
+ 0xd7610002, 0x0000f202,
+ 0x80798179, 0xd7610002,
+ 0x0000f203, 0x80798179,
+ 0xd7610002, 0x0000f204,
+ 0x80798179, 0xd7610002,
+ 0x0000f205, 0x80798179,
+ 0xd7610002, 0x0000f206,
+ 0x80798179, 0xd7610002,
+ 0x0000f207, 0x80798179,
+ 0xd7610002, 0x0000f208,
+ 0x80798179, 0xd7610002,
+ 0x0000f209, 0x80798179,
+ 0xd7610002, 0x0000f20a,
+ 0x80798179, 0xd7610002,
+ 0x0000f20b, 0x80798179,
+ 0xd7610002, 0x0000f20c,
+ 0x80798179, 0xd7610002,
+ 0x0000f20d, 0x80798179,
+ 0xd7610002, 0x0000f20e,
+ 0x80798179, 0xd7610002,
+ 0x0000f20f, 0x80798179,
+ 0xbf06a079, 0xbf840006,
+ 0xe0704000, 0x705d0200,
+ 0x8070ff70, 0x00000080,
+ 0xbef90380, 0x7e040280,
+ 0x807c907c, 0xbf0aff7c,
+ 0x00000060, 0xbf85ffbc,
+ 0xbe802f00, 0xbe822f02,
+ 0xbe842f04, 0xbe862f06,
+ 0xbe882f08, 0xbe8a2f0a,
+ 0xd7610002, 0x0000f200,
+ 0x80798179, 0xd7610002,
+ 0x0000f201, 0x80798179,
+ 0xd7610002, 0x0000f202,
+ 0x80798179, 0xd7610002,
+ 0x0000f203, 0x80798179,
+ 0xd7610002, 0x0000f204,
+ 0x80798179, 0xd7610002,
+ 0x0000f205, 0x80798179,
+ 0xd7610002, 0x0000f206,
+ 0x80798179, 0xd7610002,
+ 0x0000f207, 0x80798179,
+ 0xd7610002, 0x0000f208,
+ 0x80798179, 0xd7610002,
+ 0x0000f209, 0x80798179,
+ 0xd7610002, 0x0000f20a,
+ 0x80798179, 0xd7610002,
+ 0x0000f20b, 0x80798179,
+ 0xe0704000, 0x705d0200,
+ 0xbefe03c1, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850002, 0xbeff0380,
+ 0xbf820001, 0xbeff03c1,
+ 0xb97b4306, 0x877bc17b,
+ 0xbf840044, 0xbf8a0000,
+ 0x877aff73, 0x04000000,
+ 0xbf840040, 0x8f7b867b,
+ 0x8f7b827b, 0xbef6037b,
+ 0xb9702a05, 0x80708170,
+ 0xbf0d9973, 0xbf850002,
+ 0x8f708970, 0xbf820001,
+ 0x8f708a70, 0xb97a1e06,
+ 0x8f7a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000080,
+ 0xbef603ff, 0x01000000,
+ 0xd7650000, 0x000100c1,
+ 0xd7660000, 0x000200c1,
+ 0x16000084, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbefc0380, 0xbf850012,
+ 0xbe8303ff, 0x00000080,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf8c0000,
+ 0xe0704000, 0x705d0100,
+ 0x807c037c, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000080, 0xbf0a7b7c,
+ 0xbf85fff4, 0xbf820011,
+ 0xbe8303ff, 0x00000100,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf8c0000,
+ 0xe0704000, 0x705d0100,
+ 0x807c037c, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000100, 0xbf0a7b7c,
+ 0xbf85fff4, 0xbefe03c1,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbf850004,
+ 0xbef003ff, 0x00000200,
+ 0xbeff0380, 0xbf820003,
+ 0xbef003ff, 0x00000400,
+ 0xbeff03c1, 0xb97b2a05,
+ 0x807b817b, 0x8f7b827b,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbf850017,
+ 0xbef603ff, 0x01000000,
+ 0xbefc0384, 0xbf0a7b7c,
+ 0xbf840037, 0x7e008700,
+ 0x7e028701, 0x7e048702,
+ 0x7e068703, 0xe0704000,
+ 0x705d0000, 0xe0704080,
+ 0x705d0100, 0xe0704100,
+ 0x705d0200, 0xe0704180,
+ 0x705d0300, 0x807c847c,
+ 0x8070ff70, 0x00000200,
+ 0xbf0a7b7c, 0xbf85ffef,
+ 0xbf820025, 0xbef603ff,
+ 0x01000000, 0xbefc0384,
+ 0xbf0a7b7c, 0xbf840020,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xe0704000, 0x705d0000,
+ 0xe0704100, 0x705d0100,
+ 0xe0704200, 0x705d0200,
+ 0xe0704300, 0x705d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xb97b1e06,
+ 0x877bc17b, 0xbf84000c,
+ 0x8f7b837b, 0x807b7c7b,
+ 0xbefe03c1, 0xbeff0380,
+ 0x7e008700, 0xe0704000,
+ 0x705d0000, 0x807c817c,
+ 0x8070ff70, 0x00000080,
+ 0xbf0a7b7c, 0xbf85fff8,
+ 0xbf82013a, 0xbef4037e,
+ 0x8775ff7f, 0x0000ffff,
+ 0x8875ff75, 0x00040000,
+ 0xbef60380, 0xbef703ff,
+ 0x10807fac, 0x876eff7f,
+ 0x08000000, 0x906e836e,
+ 0x88776e77, 0x876eff7f,
+ 0x70000000, 0x906e816e,
+ 0x88776e77, 0xb97202dc,
+ 0x8f729972, 0x8872727f,
+ 0x876eff7f, 0x04000000,
+ 0xbf840034, 0xbefe03c1,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0xbeff0380, 0xbf820001,
+ 0xbeff03c1, 0xb96f4306,
+ 0x876fc16f, 0xbf840029,
+ 0x8f6f866f, 0x8f6f826f,
+ 0xbef6036f, 0xb9782a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef603ff,
+ 0x01000000, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbefc0380, 0xbf850009,
+ 0xe0310000, 0x781d0000,
+ 0x807cff7c, 0x00000080,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7c, 0xbf85fff8,
+ 0xbf820008, 0xe0310000,
+ 0x781d0000, 0x807cff7c,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7c,
+ 0xbf85fff8, 0xbef80380,
+ 0xbefe03c1, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850002, 0xbeff0380,
+ 0xbf820001, 0xbeff03c1,
+ 0xb96f2a05, 0x806f816f,
+ 0x8f6f826f, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850021, 0xbef603ff,
+ 0x01000000, 0xbeee0378,
+ 0x8078ff78, 0x00000200,
+ 0xbefc0384, 0xe0304000,
+ 0x785d0000, 0xe0304080,
+ 0x785d0100, 0xe0304100,
+ 0x785d0200, 0xe0304180,
+ 0x785d0300, 0xbf8c3f70,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807c847c, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85ffee, 0xe0304000,
+ 0x6e5d0000, 0xe0304080,
+ 0x6e5d0100, 0xe0304100,
+ 0x6e5d0200, 0xe0304180,
+ 0x6e5d0300, 0xbf820032,
+ 0xbef603ff, 0x01000000,
+ 0xbeee0378, 0x8078ff78,
+ 0x00000400, 0xbefc0384,
+ 0xe0304000, 0x785d0000,
+ 0xe0304100, 0x785d0100,
+ 0xe0304200, 0x785d0200,
+ 0xe0304300, 0x785d0300,
+ 0xbf8c3f70, 0x7e008500,
+ 0x7e028501, 0x7e048502,
+ 0x7e068503, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xb96f1e06, 0x876fc16f,
+ 0xbf84000e, 0x8f6f836f,
+ 0x806f7c6f, 0xbefe03c1,
+ 0xbeff0380, 0xe0304000,
+ 0x785d0000, 0xbf8c3f70,
+ 0x7e008500, 0x807c817c,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7c, 0xbf85fff7,
+ 0xbeff03c1, 0xe0304000,
+ 0x6e5d0000, 0xe0304100,
+ 0x6e5d0100, 0xe0304200,
+ 0x6e5d0200, 0xe0304300,
+ 0x6e5d0300, 0xbf8c3f70,
+ 0xb9782a05, 0x80788178,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f788978, 0xbf820001,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0x80f8ff78, 0x00000050,
+ 0xbef603ff, 0x01000000,
+ 0xbefc03ff, 0x0000006c,
+ 0x80f89078, 0xf429003a,
+ 0xf0000000, 0xbf8cc07f,
+ 0x80fc847c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0x80f8a078, 0xf42d003a,
+ 0xf0000000, 0xbf8cc07f,
+ 0x80fc887c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0xbe843104, 0xbe863106,
+ 0x80f8c078, 0xf431003a,
+ 0xf0000000, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0xbe843104, 0xbe863106,
+ 0xbe883108, 0xbe8a310a,
+ 0xbe8c310c, 0xbe8e310e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb9782a05, 0x80788178,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f788978, 0xbf820001,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0xbef603ff, 0x01000000,
+ 0xf4211bfa, 0xf0000000,
+ 0x80788478, 0xf4211b3a,
+ 0xf0000000, 0x80788478,
+ 0xf4211b7a, 0xf0000000,
+ 0x80788478, 0xf4211c3a,
+ 0xf0000000, 0x80788478,
+ 0xf4211c7a, 0xf0000000,
+ 0x80788478, 0xf4211eba,
+ 0xf0000000, 0x80788478,
+ 0xf4211efa, 0xf0000000,
+ 0x80788478, 0xf4211e7a,
+ 0xf0000000, 0x80788478,
+ 0xf4211cfa, 0xf0000000,
+ 0x80788478, 0xf4211bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8cc07f, 0xb9eef814,
+ 0xf4211bba, 0xf0000000,
+ 0x80788478, 0xbf8cc07f,
+ 0xb9eef815, 0xbefc036f,
+ 0xbefe0370, 0xbeff0371,
+ 0x876f7bff, 0x000003ff,
+ 0xb9ef4803, 0x876f7bff,
+ 0xfffff800, 0x906f8b6f,
+ 0xb9efa2c3, 0xb9f3f801,
+ 0xb96e2a05, 0x806e816e,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f6e896e, 0xbf820001,
+ 0x8f6e8a6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x876fff6f,
+ 0x0000ffff, 0xf4091c37,
+ 0xfa000050, 0xf4091d37,
+ 0xfa000060, 0xf4011e77,
+ 0xfa000074, 0xbf8cc07f,
+ 0x876dff6d, 0x0000ffff,
+ 0x87fe7e7e, 0x87ea6a6a,
+ 0xb9faf802, 0xbf8a0000,
+ 0xbe80226c, 0xbf810000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0x00000000,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 4433bda2ce25..5b220f2a7501 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -20,6 +20,21 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
+/* To compile this assembly code:
+ *
+ * Navi1x:
+ * cpp -DASIC_TARGET_NAVI1X=1 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
+ * sp3-nv1x nv1x.sp3 -hex nv1x.hex
+ *
+ * Others:
+ * cpp -DASIC_TARGET_NAVI1X=0 cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
+ * sp3-gfx10 gfx10.sp3 -hex gfx10.hex
+ */
+
+#define NO_SQC_STORE !ASIC_TARGET_NAVI1X
+
+var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
@@ -59,6 +74,8 @@ var SQ_WAVE_IB_STS_RCNT_SIZE = 6
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF
+var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
+
var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
@@ -96,17 +113,19 @@ var s_save_pc_hi = ttmp1
var s_save_exec_lo = ttmp2
var s_save_exec_hi = ttmp3
var s_save_status = ttmp12
-var s_save_trapsts = ttmp5
-var s_save_xnack_mask = ttmp6
+var s_save_trapsts = ttmp15
+var s_save_xnack_mask = s_save_trapsts
var s_wave_size = ttmp7
var s_save_buf_rsrc0 = ttmp8
var s_save_buf_rsrc1 = ttmp9
var s_save_buf_rsrc2 = ttmp10
var s_save_buf_rsrc3 = ttmp11
-var s_save_mem_offset = ttmp14
+var s_save_mem_offset = ttmp4
var s_save_alloc_size = s_save_trapsts
-var s_save_tmp = s_save_buf_rsrc2
-var s_save_m0 = ttmp15
+var s_save_tmp = ttmp14
+var s_save_m0 = ttmp5
+var s_save_ttmps_lo = s_save_tmp
+var s_save_ttmps_hi = s_save_trapsts
var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
@@ -128,23 +147,25 @@ var s_restore_spi_init_lo = exec_lo
var s_restore_spi_init_hi = exec_hi
var s_restore_mem_offset = ttmp12
var s_restore_alloc_size = ttmp3
-var s_restore_tmp = ttmp6
+var s_restore_tmp = ttmp2
var s_restore_mem_offset_save = s_restore_tmp
var s_restore_m0 = s_restore_alloc_size
var s_restore_mode = ttmp7
-var s_restore_flat_scratch = ttmp2
+var s_restore_flat_scratch = s_restore_tmp
var s_restore_pc_lo = ttmp0
var s_restore_pc_hi = ttmp1
-var s_restore_exec_lo = ttmp14
-var s_restore_exec_hi = ttmp15
-var s_restore_status = ttmp4
-var s_restore_trapsts = ttmp5
+var s_restore_exec_lo = ttmp4
+var s_restore_exec_hi = ttmp5
+var s_restore_status = ttmp14
+var s_restore_trapsts = ttmp15
var s_restore_xnack_mask = ttmp13
var s_restore_buf_rsrc0 = ttmp8
var s_restore_buf_rsrc1 = ttmp9
var s_restore_buf_rsrc2 = ttmp10
var s_restore_buf_rsrc3 = ttmp11
-var s_restore_size = ttmp7
+var s_restore_size = ttmp6
+var s_restore_ttmps_lo = s_restore_tmp
+var s_restore_ttmps_hi = s_restore_alloc_size
shader main
asic(DEFAULT)
@@ -159,6 +180,24 @@ L_JUMP_TO_RESTORE:
L_SKIP_RESTORE:
s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK
+
+if SINGLE_STEP_MISSED_WORKAROUND
+ // No single step exceptions if MODE.DEBUG_EN=0.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+ s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND
+
+ // Second-level trap already handled exception if STATUS.HALT=1.
+ s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+ s_cbranch_scc0 L_FETCH_2ND_TRAP
+
+L_NO_SINGLE_STEP_WORKAROUND:
+end
+
+
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
s_cbranch_scc1 L_SAVE
@@ -170,6 +209,8 @@ L_SKIP_RESTORE:
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
L_FETCH_2ND_TRAP:
+
+#if ASIC_TARGET_NAVI1X
// Preserve and clear scalar XNACK state before issuing scalar loads.
// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
// unused space ttmp11[31:24].
@@ -183,6 +224,7 @@ L_FETCH_2ND_TRAP:
s_or_b32 ttmp11, ttmp11, ttmp3
s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+#endif
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
@@ -207,6 +249,7 @@ L_NO_NEXT_TRAP:
L_EXCP_CASE:
s_and_b32 ttmp1, ttmp1, 0xFFFF
+#if ASIC_TARGET_NAVI1X
// Restore SQ_WAVE_IB_STS.
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
@@ -214,6 +257,7 @@ L_EXCP_CASE:
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
s_or_b32 ttmp2, ttmp2, ttmp3
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+#endif
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@@ -223,23 +267,11 @@ L_EXCP_CASE:
s_rfe_b64 [ttmp0, ttmp1]
L_SAVE:
- //check whether there is mem_viol
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
- s_cbranch_scc0 L_NO_PC_REWIND
-
- //if so, need rewind PC assuming GDS operation gets NACKed
- s_mov_b32 s_save_tmp, 0
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
- s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
- s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0
-
-L_NO_PC_REWIND:
s_mov_b32 s_save_tmp, 0
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
- s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
+#if ASIC_TARGET_NAVI1X
s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)
s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
@@ -253,6 +285,7 @@ L_NO_PC_REWIND:
s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
+#endif
/* inform SPI the readiness and wait for SPI's go signal */
s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
@@ -261,12 +294,31 @@ L_NO_PC_REWIND:
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
+#if ASIC_TARGET_NAVI1X
L_SLEEP:
// sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause
// SQ hang, since the 7,8th wave could not get arbit to exec inst, while
// other waves are stuck into the sleep-loop and waiting for wrexec!=0
s_sleep 0x2
s_cbranch_execz L_SLEEP
+#else
+ s_waitcnt lgkmcnt(0)
+#endif
+
+ // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ get_wave_size(s_save_ttmps_hi)
+ get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+ s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+ s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
+
+#if ASIC_TARGET_NAVI1X
+ s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
+ s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
+ s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
+#endif
/* setup Resource Contants */
s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
@@ -285,9 +337,45 @@ L_SLEEP:
/* global mem offset */
s_mov_b32 s_save_mem_offset, 0x0
- s_getreg_b32 s_wave_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
- s_lshl_b32 s_wave_size, s_wave_size, S_WAVE_SIZE
- s_or_b32 s_wave_size, s_save_spi_init_hi, s_wave_size //share s_wave_size with exec_hi, it's at bit25
+ get_wave_size(s_wave_size)
+
+#if ASIC_TARGET_NAVI1X
+ // Save and clear vector XNACK state late to free up SGPRs.
+ s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
+ s_setreg_imm32_b32 hwreg(HW_REG_SHADER_XNACK_MASK), 0x0
+#endif
+
+ /* save first 4 VGPRs, needed for SGPR save */
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_4VGPR_WAVE32
+L_ENABLE_SAVE_4VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ s_branch L_SAVE_4VGPR_WAVE64
+L_SAVE_4VGPR_WAVE32:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
+ s_branch L_SAVE_HWREG
+
+L_SAVE_4VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
/* save HW registers */
@@ -300,6 +388,13 @@ L_SAVE_HWREG:
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+#if NO_SQC_STORE
+ v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource
+ v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource
+ v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store
+ s_mov_b32 m0, 0x0 //Next lane of v2 to write to
+#endif
+
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
@@ -307,8 +402,10 @@ L_SAVE_HWREG:
write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset)
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
+ write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
+
+ // Not used on Sienna_Cichlid but keep layout same for debugger.
write_hwreg_to_mem(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset)
s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE)
@@ -320,10 +417,11 @@ L_SAVE_HWREG:
s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI)
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
- /* the first wave in the threadgroup */
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
- s_mov_b32 s_save_exec_hi, 0x0
- s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26]
+#if NO_SQC_STORE
+ // Write HWREG/SGPRs with 32 VGPR lanes, wave32 is common case.
+ s_mov_b32 exec_hi, 0x0
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#endif
/* save SGPRs */
// Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
@@ -334,10 +432,14 @@ L_SAVE_HWREG:
s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+#if NO_SQC_STORE
+ s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into
+#else
// backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
s_mov_b32 s_save_xnack_mask, s_save_buf_rsrc0
s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+#endif
s_mov_b32 m0, 0x0 //SGPR initial index value =0
s_nop 0x0 //Manually inserted wait states
@@ -353,6 +455,18 @@ L_SAVE_SGPR_LOOP:
s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
+
+#if NO_SQC_STORE
+ s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled?
+ s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
+ s_mov_b32 ttmp13, 0x0
+ v_mov_b32 v2, 0x0
+L_SAVE_SGPR_SKIP_TCP_STORE:
+#endif
+
s_add_u32 m0, m0, 16 //next sgpr index
s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete?
@@ -366,43 +480,12 @@ L_SAVE_SGPR_LOOP:
s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
write_12sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
+#if NO_SQC_STORE
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#else
// restore s_save_buf_rsrc0,1
s_mov_b32 s_save_buf_rsrc0, s_save_xnack_mask
-
- /* save first 4 VGPR, then LDS save could use */
- // each wave will alloc 4 vgprs at least...
-
- s_mov_b32 s_save_mem_offset, 0
- s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
- s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
- s_and_b32 m0, m0, 1
- s_cmp_eq_u32 m0, 1
- s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI
- s_mov_b32 exec_hi, 0x00000000
- s_branch L_SAVE_4VGPR_WAVE32
-L_ENABLE_SAVE_4VGPR_EXEC_HI:
- s_mov_b32 exec_hi, 0xFFFFFFFF
- s_branch L_SAVE_4VGPR_WAVE64
-L_SAVE_4VGPR_WAVE32:
- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
-
- // VGPR Allocated in 4-GPR granularity
-
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
- s_branch L_SAVE_LDS
-
-L_SAVE_4VGPR_WAVE64:
- s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
-
- // VGPR Allocated in 4-GPR granularity
-
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+#endif
/* save LDS */
@@ -423,7 +506,7 @@ L_SAVE_LDS_NORMAL:
s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
s_barrier //LDS is used? wait for other waves in the same TG
- s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
s_cbranch_scc0 L_SAVE_LDS_DONE
// first wave do LDS save;
@@ -598,9 +681,7 @@ L_RESTORE:
s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)
s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
//determine it is wave32 or wave64
- s_getreg_b32 s_restore_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
- s_lshl_b32 s_restore_size, s_restore_size, S_WAVE_SIZE
- s_or_b32 s_restore_size, s_restore_spi_init_hi, s_restore_size
+ get_wave_size(s_restore_size)
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
s_cbranch_scc0 L_RESTORE_VGPR
@@ -634,7 +715,7 @@ L_RESTORE_LDS_NORMAL:
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
- s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
s_and_b32 m0, m0, 1
s_cmp_eq_u32 m0, 1
s_mov_b32 m0, 0x0
@@ -842,38 +923,55 @@ L_RESTORE_HWREG:
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
- s_mov_b32 s_restore_tmp, s_restore_pc_hi
- s_and_b32 s_restore_pc_hi, s_restore_tmp, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
-
s_mov_b32 m0, s_restore_m0
s_mov_b32 exec_lo, s_restore_exec_lo
s_mov_b32 exec_hi, s_restore_exec_hi
s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+
+#if ASIC_TARGET_NAVI1X
s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
+#endif
+
s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
- s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_RCNT_MASK
+
+ // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
+ s_waitcnt lgkmcnt(0)
+
+#if ASIC_TARGET_NAVI1X
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
- s_mov_b32 s_restore_mode, 0x0
- s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_FIRST_REPLAY_MASK
+ s_mov_b32 s_restore_tmp, 0x0
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
- s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
- s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_REPLAY_W64H_MASK
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_REPLAY_W64H_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT
s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT
- s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
- s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_mode
+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+#endif
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
@@ -887,32 +985,53 @@ L_END_PGM:
end
function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+#if NO_SQC_STORE
+ // Copy into VGPR for later TCP store.
+ v_writelane_b32 v2, s, m0
+ s_add_u32 m0, m0, 0x1
+#else
s_mov_b32 exec_lo, m0
s_mov_b32 m0, s_mem_offset
s_buffer_store_dword s, s_rsrc, m0 glc:1
s_add_u32 s_mem_offset, s_mem_offset, 4
s_mov_b32 m0, exec_lo
+#endif
end
function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+#if NO_SQC_STORE
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], ttmp13
+ s_add_u32 ttmp13, ttmp13, 0x1
+ end
+#else
s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
+#endif
end
function write_12sgpr_to_mem(s, s_rsrc, s_mem_offset)
+#if NO_SQC_STORE
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], ttmp13
+ s_add_u32 ttmp13, ttmp13, 0x1
+ end
+#else
s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
s_add_u32 s_rsrc[0], s_rsrc[0], 4*12
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
+#endif
end
-
function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
s_add_u32 s_mem_offset, s_mem_offset, 4
@@ -942,9 +1061,7 @@ end
function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
- s_lshr_b32 m0, s_size, S_WAVE_SIZE
- s_and_b32 m0, m0, 1
- s_cmp_eq_u32 m0, 1
+ s_bitcmp1_b32 s_size, S_WAVE_SIZE
s_cbranch_scc1 L_ENABLE_SHIFT_W64
s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value)
s_branch L_SHIFT_DONE
@@ -965,3 +1082,9 @@ end
function get_hwreg_size_bytes
return 128
end
+
+function get_wave_size(s_reg)
+ s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
+ s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
+ s_or_b32 s_reg, s_save_spi_init_hi, s_reg //share with exec_hi, it's at bit25
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 1009a3b8dcc2..9deadfd8f929 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -678,6 +678,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
+ case CHIP_SIENNA_CICHLID:
pcache_info = navi10_cache_info;
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0491ab2b4a9b..7f6d0958ed62 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -46,6 +46,7 @@ extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
extern const struct kfd2kgd_calls arcturus_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
#ifdef KFD_SUPPORT_IOMMU_V2
@@ -72,6 +73,7 @@ static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
[CHIP_NAVI10] = &gfx_v10_kfd2kgd,
[CHIP_NAVI12] = &gfx_v10_kfd2kgd,
[CHIP_NAVI14] = &gfx_v10_kfd2kgd,
+ [CHIP_SIENNA_CICHLID] = &gfx_v10_3_kfd2kgd,
};
#ifdef KFD_SUPPORT_IOMMU_V2
@@ -458,6 +460,24 @@ static const struct kfd_device_info navi14_device_info = {
.num_sdma_queues_per_engine = 8,
};
+static const struct kfd_device_info sienna_cichlid_device_info = {
+ .asic_family = CHIP_SIENNA_CICHLID,
+ .asic_name = "sienna_cichlid",
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .needs_iommu_device = false,
+ .supports_cwsr = true,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 4,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 8,
+};
+
/* For each entry, [0] is regular and [1] is virtualisation device. */
static const struct kfd_device_info *kfd_supported_devices[][2] = {
#ifdef KFD_SUPPORT_IOMMU_V2
@@ -480,6 +500,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
[CHIP_NAVI10] = {&navi10_device_info, NULL},
[CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
[CHIP_NAVI14] = {&navi14_device_info, NULL},
+ [CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_device_info},
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -559,6 +580,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
+ } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_nv1x_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
} else {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx10_hex;
@@ -910,6 +935,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
if (!p)
return -ESRCH;
+ WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
r = kfd_process_evict_queues(p);
kfd_unref_process(p);
@@ -977,6 +1003,8 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
/* During process initialization eviction_work.dwork is initialized
* to kfd_evict_bo_worker
*/
+ WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
+ p->lead_thread->pid, delay_jiffies);
schedule_delayed_work(&p->eviction_work, delay_jiffies);
out:
kfd_unref_process(p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e9c4867abeff..dd550025d1c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -137,7 +137,7 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
qpd->sh_mem_bases);
}
-void increment_queue_count(struct device_queue_manager *dqm,
+static void increment_queue_count(struct device_queue_manager *dqm,
enum kfd_queue_type type)
{
dqm->active_queue_count++;
@@ -145,7 +145,7 @@ void increment_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count++;
}
-void decrement_queue_count(struct device_queue_manager *dqm,
+static void decrement_queue_count(struct device_queue_manager *dqm,
enum kfd_queue_type type)
{
dqm->active_queue_count--;
@@ -153,6 +153,30 @@ void decrement_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count--;
}
+int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val)
+{
+ int ret;
+ uint64_t tmp = 0;
+
+ if (!val)
+ return -EINVAL;
+ /*
+ * SDMA activity counter is stored at queue's RPTR + 0x8 location.
+ */
+ if (!access_ok((const void __user *)(q_rptr +
+ sizeof(uint64_t)), sizeof(uint64_t))) {
+ pr_err("Can't access sdma queue activity counter\n");
+ return -EFAULT;
+ }
+
+ ret = get_user(tmp, (uint64_t *)(q_rptr + sizeof(uint64_t)));
+ if (!ret) {
+ *val = tmp;
+ }
+
+ return ret;
+}
+
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
struct kfd_dev *dev = qpd->dqm->dev;
@@ -487,6 +511,7 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
if (retval == -ETIME)
qpd->reset_wavefronts = true;
+
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
list_del(&q->list);
@@ -521,9 +546,23 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q)
{
int retval;
+ uint64_t sdma_val = 0;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+ /* Get the SDMA queue stats */
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+ retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
+ &sdma_val);
+ if (retval)
+ pr_err("Failed to read SDMA queue counter for queue: %d\n",
+ q->properties.queue_id);
+ }
dqm_lock(dqm);
retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
+ if (!retval)
+ pdd->sdma_past_activity_counter += sdma_val;
dqm_unlock(dqm);
return retval;
@@ -1428,6 +1467,18 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
{
int retval;
struct mqd_manager *mqd_mgr;
+ uint64_t sdma_val = 0;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+ /* Get the SDMA queue stats */
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+ retval = read_sdma_queue_counter((uint64_t)q->properties.read_ptr,
+ &sdma_val);
+ if (retval)
+ pr_err("Failed to read SDMA queue counter for queue: %d\n",
+ q->properties.queue_id);
+ }
retval = 0;
@@ -1449,10 +1500,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
deallocate_doorbell(qpd, q);
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
- deallocate_sdma_queue(dqm, q);
- else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
deallocate_sdma_queue(dqm, q);
+ pdd->sdma_past_activity_counter += sdma_val;
+ }
list_del(&q->list);
qpd->queue_count--;
@@ -1886,6 +1938,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
+ case CHIP_SIENNA_CICHLID:
device_queue_manager_init_v10_navi10(&dqm->asic_ops);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 4afa015c69b1..49d8e324c636 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -251,4 +251,5 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
mutex_unlock(&dqm->lock_hidden);
}
+int read_sdma_queue_counter(uint64_t q_rptr, uint64_t *val);
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c