diff options
author | Jerome Glisse <jglisse@redhat.com> | 2012-05-22 17:01:41 -0400 |
---|---|---|
committer | Jerome Glisse <jglisse@redhat.com> | 2012-05-22 17:01:41 -0400 |
commit | 8568c748f530bf0bb3cb03e8e456f206f4ae6f0a (patch) | |
tree | f7ed900388a87dbdbbe0a9d73bcbf4eab6901558 | |
parent | 98a26545fd041bff49186fb8439a7282d7df3ca3 (diff) |
replayx: finish support for kernel cmd stream replaying
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | r6xx.c | 1239 | ||||
-rw-r--r-- | r6xx.h | 123 | ||||
-rw-r--r-- | r6xx_rati.c | 493 | ||||
-rw-r--r-- | r6xx_replayx.c | 376 | ||||
-rw-r--r-- | r6xxd.h (renamed from replayx_r6xxd.h) | 101 | ||||
-rw-r--r-- | replayx.c | 8 | ||||
-rw-r--r-- | replayx.h | 74 | ||||
-rw-r--r-- | replayx_drv.c | 10 | ||||
-rw-r--r-- | tati.c | 8 |
10 files changed, 2362 insertions, 74 deletions
@@ -7,10 +7,10 @@ JOUJOU_OBJECTS = $(JOUJOU_SOURCES:.c=.o) RDUMP_SOURCES = rdump.c radeon_pci.c reg.c RDUMP_OBJECTS = $(RDUMP_SOURCES:.c=.o) -TATI_SOURCES = tati.c +TATI_SOURCES = tati.c r6xx_rati.c TATI_OBJECTS = $(TATI_SOURCES:.c=.o) -REPLAYX_SOURCES = replayx.c replayx_drv.c replayx_r6xx.c +REPLAYX_SOURCES = replayx.c replayx_drv.c r6xx.c r6xx_rati.c r6xx_replayx.c REPLAYX_OBJECTS = $(REPLAYX_SOURCES:.c=.o) CDUMP_SOURCES = cdump.c radeon_pci.c reg.c @@ -0,0 +1,1239 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + * Jerome Glisse + */ +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include "replayx.h" +#include "xf86drm.h" +#include "radeon_drm.h" +#include "radeon_family.h" +#include "r6xx.h" +#include "r6xxd.h" + +void r6xx_emit_reloc(struct r6xx_blit *blit, struct ctx_bo *bo) +{ + unsigned i; + + for (i = 0; i < 3; i++) { + if (blit->relocs[i].handle == bo->handle) { + blit->cs[blit->cdw++] = PKT3(IT_NOP, 1); + blit->cs[blit->cdw++] = i * 4; + return; + } + } +} + +int r6xx_sq_conf(struct r6xx_blit *blit) +{ + struct r6xx_sq_conf *sq_conf = &blit->sq_conf; + + sq_conf->ps_prio = 0; + sq_conf->vs_prio = 1; + sq_conf->gs_prio = 2; + sq_conf->es_prio = 3; + /* need to set stack/thread/gpr limits based on the asic + * for now just set them low enough so any card will work + * see r600_cp.c in the drm + */ + switch (blit->ctx->family) { + case CHIP_R600: + sq_conf->num_ps_gprs = 192; + sq_conf->num_vs_gprs = 56; + sq_conf->num_temp_gprs = 4; + sq_conf->num_gs_gprs = 0; + sq_conf->num_es_gprs = 0; + sq_conf->num_ps_threads = 136; + sq_conf->num_vs_threads = 48; + sq_conf->num_gs_threads = 4; + sq_conf->num_es_threads = 4; + sq_conf->num_ps_stack_entries = 128; + sq_conf->num_vs_stack_entries = 128; + sq_conf->num_gs_stack_entries = 0; + sq_conf->num_es_stack_entries = 0; + break; + case CHIP_RV630: + case CHIP_RV635: + sq_conf->num_ps_gprs = 84; + sq_conf->num_vs_gprs = 36; + sq_conf->num_temp_gprs = 4; + sq_conf->num_gs_gprs = 0; + sq_conf->num_es_gprs = 0; + sq_conf->num_ps_threads = 144; + sq_conf->num_vs_threads = 40; + sq_conf->num_gs_threads = 4; + sq_conf->num_es_threads = 4; + sq_conf->num_ps_stack_entries = 40; + sq_conf->num_vs_stack_entries = 40; + sq_conf->num_gs_stack_entries = 32; + sq_conf->num_es_stack_entries = 16; + break; + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + default: + sq_conf->num_ps_gprs = 84; + sq_conf->num_vs_gprs = 36; + sq_conf->num_temp_gprs = 4; + sq_conf->num_gs_gprs = 0; + sq_conf->num_es_gprs = 0; + sq_conf->num_ps_threads = 136; + sq_conf->num_vs_threads = 48; + sq_conf->num_gs_threads = 4; + sq_conf->num_es_threads = 4; + sq_conf->num_ps_stack_entries = 40; + sq_conf->num_vs_stack_entries = 40; + sq_conf->num_gs_stack_entries = 32; + sq_conf->num_es_stack_entries = 16; + break; + case CHIP_RV670: + sq_conf->num_ps_gprs = 144; + sq_conf->num_vs_gprs = 40; + sq_conf->num_temp_gprs = 4; + sq_conf->num_gs_gprs = 0; + sq_conf->num_es_gprs = 0; + sq_conf->num_ps_threads = 136; + sq_conf->num_vs_threads = 48; + sq_conf->num_gs_threads = 4; + sq_conf->num_es_threads = 4; + sq_conf->num_ps_stack_entries = 40; + sq_conf->num_vs_stack_entries = 40; + sq_conf->num_gs_stack_entries = 32; + sq_conf->num_es_stack_entries = 16; + break; + case CHIP_RV770: + sq_conf->num_ps_gprs = 192; + sq_conf->num_vs_gprs = 56; + sq_conf->num_temp_gprs = 4; + sq_conf->num_gs_gprs = 0; + sq_conf->num_es_gprs = 0; + sq_conf->num_ps_threads = 188; + sq_conf->num_vs_threads = 60; + sq_conf->num_gs_threads = 0; + sq_conf->num_es_threads = 0; + sq_conf->num_ps_stack_entries = 256; + sq_conf->num_vs_stack_entries = 256; + sq_conf->num_gs_stack_entries = 0; + sq_conf->num_es_stack_entries = 0; + break; + case CHIP_RV730: + case CHIP_RV740: + sq_conf->num_ps_gprs = 84; + sq_conf->num_vs_gprs = 36; + sq_conf->num_temp_gprs = 4; + sq_conf->num_gs_gprs = 0; + sq_conf->num_es_gprs = 0; + sq_conf->num_ps_threads = 188; + sq_conf->num_vs_threads = 60; + sq_conf->num_gs_threads = 0; + sq_conf->num_es_threads = 0; + sq_conf->num_ps_stack_entries = 128; + sq_conf->num_vs_stack_entries = 128; + sq_conf->num_gs_stack_entries = 0; + sq_conf->num_es_stack_entries = 0; + break; + case CHIP_RV710: + sq_conf->num_ps_gprs = 192; + sq_conf->num_vs_gprs = 56; + sq_conf->num_temp_gprs = 4; + sq_conf->num_gs_gprs = 0; + sq_conf->num_es_gprs = 0; + sq_conf->num_ps_threads = 144; + sq_conf->num_vs_threads = 48; + sq_conf->num_gs_threads = 0; + sq_conf->num_es_threads = 0; + sq_conf->num_ps_stack_entries = 128; + sq_conf->num_vs_stack_entries = 128; + sq_conf->num_gs_stack_entries = 0; + sq_conf->num_es_stack_entries = 0; + break; + } + + /* SQ setup */ + switch (blit->ctx->family) { + case CHIP_RV610: + case CHIP_RV620: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_RV710: + /* no vertex cache (VC) */ + sq_conf->sq_config = SQ_CONFIG__VC_ENABLE(0); + break; + default: + sq_conf->sq_config = SQ_CONFIG__VC_ENABLE(1); + break; + } + sq_conf->sq_config |= SQ_CONFIG__DX9_CONSTS(1) | + SQ_CONFIG__ALU_INST_PREFER_VECTOR(1) | + SQ_CONFIG__PS_PRIO(sq_conf->ps_prio) | + SQ_CONFIG__VS_PRIO(sq_conf->vs_prio) | + SQ_CONFIG__GS_PRIO(sq_conf->gs_prio) | + SQ_CONFIG__ES_PRIO(sq_conf->es_prio); + + return 0; +} + +int r6xx_set_vport_scissor(struct r6xx_blit *blit, unsigned id, int x1, int y1, int x2, int y2) +{ + unsigned cdw = blit->cdw + 4; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3); + blit->cs[blit->cdw++] = ((PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL__STRIDE) - + SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = PA_SC_VPORT_SCISSOR_0_TL__TL_X(x1) | + PA_SC_VPORT_SCISSOR_0_TL__TL_Y(y1) | + PA_SC_VPORT_SCISSOR_0_TL__WINDOW_OFFSET_DISABLE(1); + blit->cs[blit->cdw++] = PA_SC_VPORT_SCISSOR_0_BR__BR_X(x2) | + PA_SC_VPORT_SCISSOR_0_BR__BR_Y(y2); + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_set_generic_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2) +{ + unsigned cdw = blit->cdw + 4; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3); + blit->cs[blit->cdw++] = (PA_SC_GENERIC_SCISSOR_TL - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = PA_SC_GENERIC_SCISSOR_TL__TL_X(x1) | + PA_SC_GENERIC_SCISSOR_TL__TL_Y(y1) | + PA_SC_GENERIC_SCISSOR_TL__WINDOW_OFFSET_DISABLE(1); + blit->cs[blit->cdw++] = PA_SC_GENERIC_SCISSOR_BR__BR_X(x2) | + PA_SC_GENERIC_SCISSOR_BR__BR_Y(y2); + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_set_window_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2) +{ + unsigned cdw = blit->cdw + 4; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3); + blit->cs[blit->cdw++] = (PA_SC_WINDOW_SCISSOR_TL - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = PA_SC_WINDOW_SCISSOR_TL__TL_X(x1) | + PA_SC_WINDOW_SCISSOR_TL__TL_Y(y1) | + PA_SC_WINDOW_SCISSOR_TL__WINDOW_OFFSET_DISABLE(1); + blit->cs[blit->cdw++] = PA_SC_WINDOW_SCISSOR_BR__BR_X(x2) | + PA_SC_WINDOW_SCISSOR_BR__BR_Y(y2); + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_set_screen_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2) +{ + unsigned cdw = blit->cdw + 4; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3); + blit->cs[blit->cdw++] = (PA_SC_SCREEN_SCISSOR_TL - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = PA_SC_SCREEN_SCISSOR_TL__TL_X(x1) | + PA_SC_SCREEN_SCISSOR_TL__TL_Y(y1); + blit->cs[blit->cdw++] = PA_SC_SCREEN_SCISSOR_BR__BR_X(x2) | + PA_SC_SCREEN_SCISSOR_BR__BR_Y(y2); + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_set_clip_rect(struct r6xx_blit *blit, unsigned id, int x1, int y1, int x2, int y2) +{ + unsigned cdw = blit->cdw + 4; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3); + blit->cs[blit->cdw++] = ((PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL__STRIDE) - + SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = PA_SC_CLIPRECT_0_TL__TL_X(x1) | + PA_SC_CLIPRECT_0_TL__TL_Y(y1); + blit->cs[blit->cdw++] = PA_SC_CLIPRECT_0_BR__BR_X(x2) | + PA_SC_CLIPRECT_0_BR__BR_Y(y2); + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_set_render_target(struct r6xx_blit *blit, struct ctx_bo *bo) +{ + uint32_t cb_color_info; + unsigned pitch, slice; + unsigned cdw = blit->cdw + 50; + + pitch = bo->pitch; + slice = bo->h * pitch; + cb_color_info = CB_COLOR0_INFO__FORMAT(bo->hw_format)| + CB_COLOR0_INFO__COMP_SWAP(SWAP_ALT); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = ((CB_COLOR0_BASE) - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + r6xx_emit_reloc(blit, bo); + + /* rv6xx workaround */ + if ((blit->ctx->family > CHIP_R600) && + (blit->ctx->family < CHIP_RV770)) { + cdw += 2; + blit->cs[blit->cdw++] = PKT3(IT_SURFACE_BASE_UPDATE, 1); + blit->cs[blit->cdw++] = (1 << 1); + } + /* set CMASK & TILE buffer to the offset of color buffer as + * we don't use those this shouldn't cause any issue and we + * then have a valid cmd stream + */ + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = ((CB_COLOR0_TILE) - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + r6xx_emit_reloc(blit, bo); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = ((CB_COLOR0_FRAG) - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + r6xx_emit_reloc(blit, bo); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = ((CB_COLOR0_SIZE) - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = CB_COLOR0_SIZE__PITCH_TILE_MAX((pitch >> 3) - 1) | + CB_COLOR0_SIZE__SLICE_TILE_MAX((slice >> 6) - 1); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = ((CB_COLOR0_VIEW) - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = CB_COLOR0_VIEW__SLICE_START(0) | + CB_COLOR0_VIEW__SLICE_MAX(0); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = ((CB_COLOR0_MASK) - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = CB_COLOR0_MASK__CMASK_BLOCK_MAX(0) | + CB_COLOR0_MASK__FMASK_TILE_MAX(0); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = ((CB_COLOR0_INFO) - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = cb_color_info; + r6xx_emit_reloc(blit, bo); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = ((CB_TARGET_MASK) - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = CB_TARGET_MASK__TARGET0_ENABLE(0xf); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = ((CB_COLOR_CONTROL) - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = CB_COLOR_CONTROL__ROP3(ROP3_COPY); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = ((CB_BLEND_CONTROL) - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + + r6xx_set_generic_scissor(blit, 0, 0, bo->w, bo->h); + r6xx_set_screen_scissor(blit, 0, 0, bo->w, bo->h); + r6xx_set_window_scissor(blit, 0, 0, bo->w, bo->h); + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_fs_setup(struct r6xx_blit *blit, struct ctx_bo *bo, + unsigned offset, unsigned ngprs, unsigned stack_size) +{ + unsigned cdw = blit->cdw + 11; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_START_FS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = offset >> 8; + r6xx_emit_reloc(blit, bo); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_CF_OFFSET_FS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_RESOURCES_FS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = SQ_PGM_RESOURCES_FS__NUM_GPRS(ngprs) | + SQ_PGM_RESOURCES_FS__STACK_SIZE(stack_size); + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_vs_setup(struct r6xx_blit *blit, struct ctx_bo *bo, + unsigned offset, unsigned ngprs, unsigned stack_size, + unsigned cs_export_count) +{ + unsigned cdw = blit->cdw + 17; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_START_VS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = offset >> 8; + r6xx_emit_reloc(blit, bo); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_CF_OFFSET_VS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_RESOURCES_VS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = SQ_PGM_RESOURCES_VS__NUM_GPRS(ngprs) | + SQ_PGM_RESOURCES_VS__STACK_SIZE(stack_size) | + SQ_PGM_RESOURCES_VS__UNCACHED_FIRST_INST(1); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_CF_OFFSET_VS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + + /* Interpolator setup */ + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SPI_VS_OUT_CONFIG - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = SPI_VS_OUT_CONFIG__VS_EXPORT_COUNT(cs_export_count); + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +void r6xx_ps_setup(struct r6xx_blit *blit, struct ctx_bo *bo, + unsigned offset, unsigned ngprs, unsigned stack_size, + unsigned export_mode, unsigned num_interp) +{ + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_START_PS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = offset >> 8; + r6xx_emit_reloc(blit, bo); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_CF_OFFSET_PS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_RESOURCES_PS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = SQ_PGM_RESOURCES_PS__NUM_GPRS(ngprs) | + SQ_PGM_RESOURCES_PS__STACK_SIZE(stack_size) | + SQ_PGM_RESOURCES_PS__UNCACHED_FIRST_INST(1); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_CF_OFFSET_PS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SQ_PGM_EXPORTS_PS - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = SQ_PGM_EXPORTS_PS__EXPORT_MODE(export_mode); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 4); + blit->cs[blit->cdw++] = (SPI_PS_IN_CONTROL_0 - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = SPI_PS_IN_CONTROL_0__NUM_INTERP(num_interp); + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = 0; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3); + blit->cs[blit->cdw++] = (SPI_PS_INPUT_CNTL_0 - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = 0; +} + +int r6xx_set_alu_consts(struct r6xx_blit *blit, unsigned id, + unsigned count, float *cst) +{ + unsigned cdw = blit->cdw + count * 4 + 2; + unsigned i; + + blit->cs[blit->cdw++] = PKT3(IT_SET_ALU_CONST, count * 4 + 1); + blit->cs[blit->cdw++] = id * 4; + for (i = 0; i < count * 4; i++) { + blit->cs[blit->cdw++] = fui(cst[i]); + } + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +void r6xx_set_default_sampler(struct r6xx_blit *blit, unsigned id) +{ + blit->cs[blit->cdw++] = PKT3(IT_SET_SAMPLER, 4); + blit->cs[blit->cdw++] = id * 3; + blit->cs[blit->cdw++] = SQ_TEX_SAMPLER_WORD0_0__CLAMP_X(SQ_TEX_CLAMP_LAST_TEXEL) | + SQ_TEX_SAMPLER_WORD0_0__CLAMP_Y(SQ_TEX_CLAMP_LAST_TEXEL) | + SQ_TEX_SAMPLER_WORD0_0__CLAMP_Z(SQ_TEX_WRAP) | + SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER(SQ_TEX_XY_FILTER_POINT) | + SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER(SQ_TEX_XY_FILTER_POINT) | + SQ_TEX_SAMPLER_WORD0_0__Z_FILTER(SQ_TEX_Z_FILTER_NONE) | + SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER(SQ_TEX_Z_FILTER_NONE) | + SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE(0); + blit->cs[blit->cdw++] = SQ_TEX_SAMPLER_WORD1_0__MIN_LOD(0) | + SQ_TEX_SAMPLER_WORD1_0__MAX_LOD(0) | + SQ_TEX_SAMPLER_WORD1_0__MAX_LOD(0) | + SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS(0); + blit->cs[blit->cdw++] = SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_SEC(0) | + SQ_TEX_SAMPLER_WORD2_0__MC_COORD_TRUNCATE(1) | + SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA(0) | + SQ_TEX_SAMPLER_WORD2_0__HIGH_PRECISION_FILTER(0) | + SQ_TEX_SAMPLER_WORD2_0__TYPE(0); +} + +void r6xx_set_tex_resource(struct r6xx_blit *blit, unsigned id, struct ctx_bo *bo) +{ + blit->cs[blit->cdw++] = PKT3(IT_SET_RESOURCE, 8); + blit->cs[blit->cdw++] = id * 7; + blit->cs[blit->cdw++] = SQ_TEX_RESOURCE_WORD0_0__DIM(SQ_TEX_DIM_2D) | + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE(bo->hw_tile) | + SQ_TEX_RESOURCE_WORD0_0__PITCH((bo->pitch >> 3) - 1) | + SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH(bo->w - 1); + blit->cs[blit->cdw++] = SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT(bo->hw_format) | + SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT(bo->h - 1); + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = SQ_TEX_RESOURCE_WORD4_0__REQUEST_SIZE(1) | + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X(SQ_SEL_X) | + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y(SQ_SEL_Y) | + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z(SQ_SEL_Z) | + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W(SQ_SEL_W); + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = SQ_VTX_CONSTANT_WORD6_0__TYPE(SQ_TEX_VTX_VALID_TEXTURE); + r6xx_emit_reloc(blit, bo); + r6xx_emit_reloc(blit, bo); +} + +int r6xx_set_vtx_resource(struct r6xx_blit *blit, unsigned id, + struct r6xx_vbo *vbo) +{ + unsigned cdw = blit->cdw + 11; + + blit->cs[blit->cdw++] = PKT3(IT_SET_RESOURCE, 8); + blit->cs[blit->cdw++] = id * 7; + blit->cs[blit->cdw++] = vbo->offset; + blit->cs[blit->cdw++] = vbo->ndw * 4; + blit->cs[blit->cdw++] = SQ_VTX_CONSTANT_WORD2_0__STRIDE(vbo->stride) | + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT(vbo->data_format) | + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL(vbo->num_format_all) | + SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL(vbo->format_comp_all) | + SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL(vbo->srf_mode_all) | + SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP(vbo->endian_swap); + blit->cs[blit->cdw++] = SQ_VTX_CONSTANT_WORD3_0__MEM_REQUEST_SIZE(vbo->mem_request_size); + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = SQ_VTX_CONSTANT_WORD6_0__TYPE(SQ_TEX_VTX_VALID_BUFFER); + r6xx_emit_reloc(blit, vbo->bo); + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_draw_auto(struct r6xx_blit *blit, struct r6xx_draw *draw) +{ + unsigned cdw = blit->cdw + 10; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONFIG_REG, 2); + blit->cs[blit->cdw++] = (VGT_PRIMITIVE_TYPE - SET_CONFIG_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = draw->primitive_type; + + blit->cs[blit->cdw++] = PKT3(IT_INDEX_TYPE, 1); + blit->cs[blit->cdw++] = draw->index_type; + blit->cs[blit->cdw++] = PKT3(IT_NUM_INSTANCES, 1); + blit->cs[blit->cdw++] = draw->num_instances; + blit->cs[blit->cdw++] = PKT3(IT_DRAW_INDEX_AUTO, 2); + blit->cs[blit->cdw++] = draw->num_indices; + blit->cs[blit->cdw++] = draw->vgt_draw_initiator; + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_disable_depth(struct r6xx_blit *blit) +{ + unsigned cdw = blit->cdw + 6; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (DB_DEPTH_INFO - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (DB_DEPTH_CONTROL - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_surface_sync(struct r6xx_blit *blit, + struct ctx_bo *bo, + unsigned sync_type) +{ + unsigned cdw = blit->cdw + 5; + unsigned size = 0xffffffff; + + if (bo) { + size = (bo->size + 255) >> 8; + cdw += 2; + } + + blit->cs[blit->cdw++] = PKT3(IT_SURFACE_SYNC, 4); + blit->cs[blit->cdw++] = sync_type; + blit->cs[blit->cdw++] = size; + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = 10; + if (bo) { + r6xx_emit_reloc(blit, bo); + } + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_event(struct r6xx_blit *blit, unsigned event_iniator) +{ + unsigned cdw = blit->cdw + 2; + + blit->cs[blit->cdw++] = PKT3(IT_EVENT_WRITE, 1); + blit->cs[blit->cdw++] = event_iniator; + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + +int r6xx_set_default_state(struct r6xx_blit *blit) +{ + struct r6xx_sq_conf *sq_conf = &blit->sq_conf; + unsigned cdw = blit->cdw + 234, i; + + if (blit->ctx->family < CHIP_RV770) { + blit->cs[blit->cdw++] = PKT3(IT_START_3D_CMDBUF, 1); + blit->cs[blit->cdw++] = 0; + } + blit->cs[blit->cdw++] = PKT3(IT_CONTEXT_CONTROL, 2); + blit->cs[blit->cdw++] = 0x80000000; + blit->cs[blit->cdw++] = 0x80000000; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONFIG_REG, 7); + blit->cs[blit->cdw++] = (SQ_CONFIG - SET_CONFIG_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = sq_conf->sq_config; + blit->cs[blit->cdw++] = SQ_GPR_RESOURCE_MGMT_1__NUM_PS_GPRS(sq_conf->num_ps_gprs) | + SQ_GPR_RESOURCE_MGMT_1__NUM_VS_GPRS(sq_conf->num_vs_gprs) | + SQ_GPR_RESOURCE_MGMT_1__NUM_CLAUSE_TEMP_GPRS(sq_conf->num_temp_gprs); + blit->cs[blit->cdw++] = SQ_GPR_RESOURCE_MGMT_2__NUM_GS_GPRS(sq_conf->num_gs_gprs) | + SQ_GPR_RESOURCE_MGMT_2__NUM_ES_GPRS(sq_conf->num_es_gprs); + blit->cs[blit->cdw++] = SQ_THREAD_RESOURCE_MGMT__NUM_PS_THREADS(sq_conf->num_ps_threads) | + SQ_THREAD_RESOURCE_MGMT__NUM_VS_THREADS(sq_conf->num_vs_threads) | + SQ_THREAD_RESOURCE_MGMT__NUM_GS_THREADS(sq_conf->num_gs_threads) | + SQ_THREAD_RESOURCE_MGMT__NUM_ES_THREADS(sq_conf->num_es_threads); + blit->cs[blit->cdw++] = SQ_STACK_RESOURCE_MGMT_1__NUM_PS_STACK_ENTRIES(sq_conf->num_ps_stack_entries) | + SQ_STACK_RESOURCE_MGMT_1__NUM_VS_STACK_ENTRIES(sq_conf->num_vs_stack_entries); + blit->cs[blit->cdw++] = SQ_STACK_RESOURCE_MGMT_2__NUM_GS_STACK_ENTRIES(sq_conf->num_gs_stack_entries) | + SQ_STACK_RESOURCE_MGMT_2__NUM_ES_STACK_ENTRIES(sq_conf->num_es_stack_entries); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONFIG_REG, 2); + blit->cs[blit->cdw++] = (VC_ENHANCE - SET_CONFIG_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = PKT3(IT_SET_CONFIG_REG, 2); + blit->cs[blit->cdw++] = (DB_DEBUG - SET_CONFIG_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0x80000000; + blit->cs[blit->cdw++] = PKT3(IT_SET_CONFIG_REG, 2); + blit->cs[blit->cdw++] = (DB_WATERMARKS - SET_CONFIG_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = DB_WATERMARKS__DEPTH_FREE(4) | + DB_WATERMARKS__DEPTH_FLUSH(16) | + DB_WATERMARKS__FORCE_SUMMARIZE(0) | + DB_WATERMARKS__DEPTH_PENDING_FREE(4) | + DB_WATERMARKS__DEPTH_CACHELINE_FREE(16); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CTL_CONST, 3); + blit->cs[blit->cdw++] = (SQ_VTX_BASE_VTX_LOC - SET_CTL_CONST__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = 0; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 10); + blit->cs[blit->cdw++] = (SQ_ESGS_RING_ITEMSIZE - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; // SQ_ESGS_RING_ITEMSIZE + blit->cs[blit->cdw++] = 0; // SQ_GSVS_RING_ITEMSIZE + blit->cs[blit->cdw++] = 0; // SQ_ESTMP_RING_ITEMSIZE + blit->cs[blit->cdw++] = 0; // SQ_GSTMP_RING_ITEMSIZE + blit->cs[blit->cdw++] = 0; // SQ_VSTMP_RING_ITEMSIZE + blit->cs[blit->cdw++] = 0; // SQ_PSTMP_RING_ITEMSIZE + blit->cs[blit->cdw++] = 0; // SQ_FBUF_RING_ITEMSIZE + blit->cs[blit->cdw++] = 0; // SQ_REDUC_RING_ITEMSIZE + blit->cs[blit->cdw++] = 0; // SQ_GS_VERT_ITEMSIZE + + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 5); + blit->cs[blit->cdw++] = (CB_CLRCMP_CONTROL - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = CB_CLRCMP_CONTROL__CLRCMP_FCN_SEL(CLRCMP_SEL_SRC); + blit->cs[blit->cdw++] = 0; // CB_CLRCMP_SRC + blit->cs[blit->cdw++] = 0; // CB_CLRCMP_DST + blit->cs[blit->cdw++] = 0; // CB_CLRCMP_MSK + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (CB_SHADER_MASK - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = CB_SHADER_MASK__OUTPUT0_ENABLE(0xf); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 6); + blit->cs[blit->cdw++] = (SX_ALPHA_TEST_CONTROL - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; // SX_ALPHA_TEST_CONTROL + blit->cs[blit->cdw++] = 0; // CB_BLEND_RED + blit->cs[blit->cdw++] = 0; // CB_BLEND_GREEN + blit->cs[blit->cdw++] = 0; // CB_BLEND_BLUE + blit->cs[blit->cdw++] = 0; // CB_BLEND_ALPHA + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (PA_SC_WINDOW_OFFSET - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET(0) | + PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET(0); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (PA_SC_CLIPRECT_RULE - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = PA_SC_CLIPRECT_RULE__CLIP_RULE(0xfffff); + + /* clip boolean is set to always visible -> doesn't matter */ + for (i = 0; i < PA_SC_CLIPRECT_0_TL__NUM; i++) { + r6xx_set_clip_rect(blit, i, 0, 0, 8192, 8192); + } + + for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL__NUM; i++) { + r6xx_set_vport_scissor(blit, i, 0, 0, 8192, 8192); + } + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3); + blit->cs[blit->cdw++] = (PA_SC_MPASS_PS_CNTL - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = PA_SC_MODE_CNTL__FORCE_EOV_CNTDWN_ENABLE(1) | + PA_SC_MODE_CNTL__FORCE_EOV_REZ_ENABLE(1) | + PA_SC_MODE_CNTL__TILE_COVER_DISABLE(0) | + PA_SC_MODE_CNTL__TILE_COVER_NO_SCISSOR(1) | + 0x00500000; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 10); + blit->cs[blit->cdw++] = (PA_SC_LINE_CNTL - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; // PA_SC_LINE_CNTL + blit->cs[blit->cdw++] = 0; // PA_SC_AA_CONFIG + blit->cs[blit->cdw++] = PA_SU_VTX_CNTL__ROUND_MODE(X_TRUNCATE) | + PA_SU_VTX_CNTL__PIX_CENTER(1) | + /* round to even, fixed point 1/256 */ + PA_SU_VTX_CNTL__QUANT_MODE(X_1_256TH); + blit->cs[blit->cdw++] = fui(1.0); // PA_CL_GB_VERT_CLIP_ADJ + blit->cs[blit->cdw++] = fui(1.0); // PA_CL_GB_VERT_DISC_ADJ + blit->cs[blit->cdw++] = fui(1.0); // PA_CL_GB_HORZ_CLIP_ADJ + blit->cs[blit->cdw++] = fui(1.0); // PA_CL_GB_HORZ_DISC_ADJ + blit->cs[blit->cdw++] = 0; // PA_SC_AA_SAMPLE_LOCS_MCTX + blit->cs[blit->cdw++] = 0; // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (PA_SC_AA_MASK - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0xFFFFFFFF; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 6); + blit->cs[blit->cdw++] = (PA_CL_CLIP_CNTL - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = PA_CL_CLIP_CNTL__CLIP_DISABLE(1) | + PA_CL_CLIP_CNTL__ZCLIP_NEAR_DISABLE(0) | + PA_CL_CLIP_CNTL__ZCLIP_FAR_DISABLE(0); + blit->cs[blit->cdw++] = PA_SU_SC_MODE_CNTL__FACE(1); + blit->cs[blit->cdw++] = PA_CL_VTE_CNTL__VTX_XY_FMT(1) | + PA_CL_VTE_CNTL__VTX_Z_FMT(0) | + PA_CL_VTE_CNTL__VPORT_Z_SCALE_ENA(1) | + PA_CL_VTE_CNTL__VPORT_Z_OFFSET_ENA(1); + blit->cs[blit->cdw++] = 0; // PA_CL_VS_OUT_CNTL + blit->cs[blit->cdw++] = 0; // PA_CL_NANINF_CNTL + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (PA_CL_VPORT_ZSCALE_0 - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = fui(1.0); + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (PA_CL_VPORT_ZOFFSET_0 - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = fui(0.0); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 7); + blit->cs[blit->cdw++] = (PA_SU_POLY_OFFSET_DB_FMT_CNTL - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0xe8; // PA_SU_POLY_OFFSET_DB_FMT_CNTL + blit->cs[blit->cdw++] = 0; // PA_SU_POLY_OFFSET_CLAMP + blit->cs[blit->cdw++] = 0; // PA_SU_POLY_OFFSET_FRONT_SCALE + blit->cs[blit->cdw++] = 0; // PA_SU_POLY_OFFSET_FRONT_OFFSET + blit->cs[blit->cdw++] = 0; // PA_SU_POLY_OFFSET_BACK_SCALE + blit->cs[blit->cdw++] = 0; // PA_SU_POLY_OFFSET_BACK_OFFSET + + /* default Interpolator setup */ + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (SPI_VS_OUT_ID_0 - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = SPI_VS_OUT_ID_0__SEMANTIC_0(0) | + SPI_VS_OUT_ID_0__SEMANTIC_1(1); + /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3); + blit->cs[blit->cdw++] = (SPI_PS_INPUT_CNTL_0 - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = SPI_PS_INPUT_CNTL_0__SEMANTIC(0) | + SPI_PS_INPUT_CNTL_0__DEFAULT_VAL(0x01) | + SPI_PS_INPUT_CNTL_0__SEL_CENTROID(1); + /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ + blit->cs[blit->cdw++] = SPI_PS_INPUT_CNTL_0__SEMANTIC(1) | + SPI_PS_INPUT_CNTL_0__DEFAULT_VAL(0x01) | + SPI_PS_INPUT_CNTL_0__SEL_CENTROID(1); + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 5); + blit->cs[blit->cdw++] = (SPI_INPUT_Z - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; // SPI_INPUT_Z + blit->cs[blit->cdw++] = 0; // SPI_FOG_CNTL + blit->cs[blit->cdw++] = 0; // SPI_FOG_FUNC_SCALE + blit->cs[blit->cdw++] = 0; // SPI_FOG_FUNC_BIAS + + /* VGT */ + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 5); + blit->cs[blit->cdw++] = (VGT_MAX_VTX_INDX - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0xffffff; // VGT_MAX_VTX_INDX + blit->cs[blit->cdw++] = 0; // VGT_MIN_VTX_INDX + blit->cs[blit->cdw++] = 0; // VGT_INDX_OFFSET + blit->cs[blit->cdw++] = 0; // VGT_MULTI_PRIM_IB_RESET_INDX + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (VGT_PRIMITIVEID_EN - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (VGT_MULTI_PRIM_IB_RESET_EN - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3); + blit->cs[blit->cdw++] = (VGT_INSTANCE_STEP_RATE_0 - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; // VGT_INSTANCE_STEP_RATE_0 + blit->cs[blit->cdw++] = 0; // VGT_INSTANCE_STEP_RATE_1 + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 18); + blit->cs[blit->cdw++] = (PA_SU_POINT_SIZE - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; // PA_SU_POINT_SIZE + blit->cs[blit->cdw++] = 0; // PA_SU_POINT_MINMAX + blit->cs[blit->cdw++] = PA_SU_LINE_CNTL__WIDTH(8); + blit->cs[blit->cdw++] = 0; // PA_SC_LINE_STIPPLE + blit->cs[blit->cdw++] = 0; // VGT_OUTPUT_PATH_CNTL + blit->cs[blit->cdw++] = 0; // VGT_HOS_CNTL + blit->cs[blit->cdw++] = 0; // VGT_HOS_MAX_TESS_LEVEL + blit->cs[blit->cdw++] = 0; // VGT_HOS_MIN_TESS_LEVEL + blit->cs[blit->cdw++] = 0; // VGT_HOS_REUSE_DEPTH + blit->cs[blit->cdw++] = 0; // VGT_GROUP_PRIM_TYPE + blit->cs[blit->cdw++] = 0; // VGT_GROUP_FIRST_DECR + blit->cs[blit->cdw++] = 0; // VGT_GROUP_DECR + blit->cs[blit->cdw++] = 0; // VGT_GROUP_VECT_0_CNTL + blit->cs[blit->cdw++] = 0; // VGT_GROUP_VECT_1_CNTL + blit->cs[blit->cdw++] = 0; // VGT_GROUP_VECT_0_FMT_CNTL + blit->cs[blit->cdw++] = 0; // VGT_GROUP_VECT_1_FMT_CNTL + blit->cs[blit->cdw++] = 0; // VGT_GS_MODE + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 4); + blit->cs[blit->cdw++] = (VGT_STRMOUT_EN - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; // VGT_STRMOUT_EN + blit->cs[blit->cdw++] = VGT_REUSE_OFF__REUSE_OFF(1); // VGT_REUSE_OFF + blit->cs[blit->cdw++] = 0; // VGT_VTX_CNT_EN + + blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2); + blit->cs[blit->cdw++] = (VGT_STRMOUT_BUFFER_EN - SET_CONTEXT_REG__OFFSET) >> 2; + blit->cs[blit->cdw++] = 0; + + if (blit->cdw != cdw) { + fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n", + __func__, __LINE__, cdw, blit->cdw); + return -EFBIG; + } + return 0; +} + + +/* solid vs */ +unsigned r6xx_solid_vs(uint32_t *shader) +{ + unsigned i = 0; + + /* 0 */ + shader[i++] = SQ_CF_WORD0__ADDR(4); + shader[i++] = SQ_CF_WORD1__CF_INST(SQ_CF_INST_VTX) | + SQ_CF_WORD1__POP_COUNT(0) | + SQ_CF_WORD1__CF_CONST(0) | + SQ_CF_WORD1__COND(SQ_CF_COND_ACTIVE) | + SQ_CF_WORD1__COUNT(0) | + SQ_CF_WORD1__VALID_PIXEL_MODE(0) | + SQ_CF_WORD1__END_OF_PROGRAM (0) | + SQ_CF_WORD1__WHOLE_QUAD_MODE(0) | + SQ_CF_WORD1__BARRIER(0); + /* 1 */ + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_POS0) | + SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_POS) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) | + SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(3); + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_X) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_Y) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_0) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_1) | + SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) | + SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(0) | + SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) | + SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(1); + /* 2 */ + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_PARAM0) | + SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_PARAM) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) | + SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(0); + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_Z) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_W) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_0) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_0) | + SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) | + SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(1) | + SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) | + SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(0); + /* 3 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + /* 4/5 */ + shader[i++] = SQ_VTX_WORD0__VTX_INST(SQ_VTX_INST_FETCH) | + SQ_VTX_WORD0__FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA) | + SQ_VTX_WORD0__FETCH_WHOLE_QUAD(0) | + SQ_VTX_WORD0__BUFFER_ID(0) | + SQ_VTX_WORD0__SRC_GPR(0) | + SQ_VTX_WORD0__SRC_REL(0) | + SQ_VTX_WORD0__SRC_SEL_X(SQ_SEL_X) | + SQ_VTX_WORD0__MEGA_FETCH_COUNT(16); + shader[i++] = SQ_VTX_WORD1_GPR__DST_GPR(0) | + SQ_VTX_WORD1_GPR__DST_REL(0) | + SQ_VTX_WORD1__DST_SEL_X(SQ_SEL_X) | + SQ_VTX_WORD1__DST_SEL_Y(SQ_SEL_Y) | + SQ_VTX_WORD1__DST_SEL_Z(SQ_SEL_Z) | + SQ_VTX_WORD1__DST_SEL_W(SQ_SEL_W) | + SQ_VTX_WORD1__USE_CONST_FIELDS(1) | + SQ_VTX_WORD1__DATA_FORMAT(FMT_32_32_32_32_FLOAT) | + SQ_VTX_WORD1__NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED) | + SQ_VTX_WORD1__FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED) | + SQ_VTX_WORD1__SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE); + shader[i++] = SQ_VTX_WORD2__OFFSET(0) | + SQ_VTX_WORD2__ENDIAN_SWAP(SQ_ENDIAN_NONE) | + SQ_VTX_WORD2__CONST_BUF_NO_STRIDE(0) | + SQ_VTX_WORD2__MEGA_FETCH(1); + shader[i++] = 0x0; + + return i; +} + +unsigned r6xx_copy_vs(uint32_t *shader) +{ + unsigned i = 0; + + /* 0 */ + shader[i++] = SQ_CF_WORD0__ADDR(4); + shader[i++] = SQ_CF_WORD1__CF_INST(SQ_CF_INST_VTX) | + SQ_CF_WORD1__POP_COUNT(0) | + SQ_CF_WORD1__CF_CONST(0) | + SQ_CF_WORD1__COND(SQ_CF_COND_ACTIVE) | + SQ_CF_WORD1__COUNT(0) | + SQ_CF_WORD1__VALID_PIXEL_MODE(0) | + SQ_CF_WORD1__END_OF_PROGRAM (0) | + SQ_CF_WORD1__WHOLE_QUAD_MODE(0) | + SQ_CF_WORD1__BARRIER(0); + /* 1 */ + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_POS0) | + SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_POS) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) | + SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(3); + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_X) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_Y) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_0) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_1) | + SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) | + SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(0) | + SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) | + SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(1); + /* 2 */ + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_PARAM0) | + SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_PARAM) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) | + SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(0); + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_Z) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_W) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_0) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_0) | + SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) | + SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(1) | + SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) | + SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(0); + /* 3 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + /* 4/5 */ + shader[i++] = SQ_VTX_WORD0__VTX_INST(SQ_VTX_INST_FETCH) | + SQ_VTX_WORD0__FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA) | + SQ_VTX_WORD0__FETCH_WHOLE_QUAD(0) | + SQ_VTX_WORD0__BUFFER_ID(0) | + SQ_VTX_WORD0__SRC_GPR(0) | + SQ_VTX_WORD0__SRC_REL(0) | + SQ_VTX_WORD0__SRC_SEL_X(SQ_SEL_X) | + SQ_VTX_WORD0__MEGA_FETCH_COUNT(16); + shader[i++] = SQ_VTX_WORD1_GPR__DST_GPR(0) | + SQ_VTX_WORD1_GPR__DST_REL(0) | + SQ_VTX_WORD1__DST_SEL_X(SQ_SEL_X) | + SQ_VTX_WORD1__DST_SEL_Y(SQ_SEL_Y) | + SQ_VTX_WORD1__DST_SEL_Z(SQ_SEL_Z) | + SQ_VTX_WORD1__DST_SEL_W(SQ_SEL_W) | + SQ_VTX_WORD1__USE_CONST_FIELDS(1) | + SQ_VTX_WORD1__DATA_FORMAT(FMT_32_32_32_32_FLOAT) | + SQ_VTX_WORD1__NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED) | + SQ_VTX_WORD1__FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED) | + SQ_VTX_WORD1__SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE); + shader[i++] = SQ_VTX_WORD2__OFFSET(0) | + SQ_VTX_WORD2__ENDIAN_SWAP(SQ_ENDIAN_NONE) | + SQ_VTX_WORD2__CONST_BUF_NO_STRIDE(0) | + SQ_VTX_WORD2__MEGA_FETCH(1); + shader[i++] = 0x0; + + return i; +} + +/* solid ps */ +unsigned r6xx_solid_ps(uint32_t* shader) +{ + unsigned i = 0; + /* 0 */ + shader[i++] = SQ_CF_ALU_WORD0__ADDR(2) | + SQ_CF_ALU_WORD0__KCACHE_BANK0(0) | + SQ_CF_ALU_WORD0__KCACHE_BANK1(0) | + SQ_CF_ALU_WORD0__KCACHE_MODE0(SQ_CF_KCACHE_NOP); + shader[i++] = SQ_CF_ALU_WORD1__CF_INST(SQ_CF_INST_ALU) | + SQ_CF_ALU_WORD1__COUNT(3) | + SQ_CF_ALU_WORD1__KCACHE_MODE1(SQ_CF_KCACHE_NOP) | + SQ_CF_ALU_WORD1__KCACHE_ADDR0(0) | + SQ_CF_ALU_WORD1__KCACHE_ADDR1(0) | + SQ_CF_ALU_WORD1__WHOLE_QUAD_MODE(0) | + SQ_CF_ALU_WORD1__BARRIER(0); + /* 1 */ + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_CB0) | + SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_PIXEL) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) | + SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(0); + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_X) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_Y) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_Z) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_W) | + SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) | + SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(1) | + SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) | + SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(1); + /* 2 */ + shader[i++] = SQ_ALU_WORD0__SRC0_SEL(SQ_ALU_CFILE_0) | + SQ_ALU_WORD0__SRC0_REL(0) | + SQ_ALU_WORD0__SRC0_CHAN(SQ_CHAN_X) | + SQ_ALU_WORD0__SRC0_NEG(0) | + SQ_ALU_WORD0__SRC1_SEL(0) | + SQ_ALU_WORD0__SRC1_REL(0) | + SQ_ALU_WORD0__SRC1_CHAN(SQ_CHAN_X) | + SQ_ALU_WORD0__SRC1_NEG(0) | + SQ_ALU_WORD0__INDEX_MODE(0) | + SQ_ALU_WORD0__PRED_SEL(SQ_PRED_SEL_OFF) | + SQ_ALU_WORD0__LAST(0); + shader[i++] = SQ_ALU_WORD1_OP2__ALU_INST(SQ_OP2_INST_MOV) | + SQ_ALU_WORD1_OP2__SRC0_ABS(0) | + SQ_ALU_WORD1_OP2__SRC1_ABS(0) | + SQ_ALU_WORD1_OP2__UPDATE_EXECUTE_MASK(0) | + SQ_ALU_WORD1_OP2__UPDATE_PRED(0) | + SQ_ALU_WORD1_OP2__WRITE_MASK(1) | + SQ_ALU_WORD1_OP2__OMOD(SQ_ALU_OMOD_OFF) | + SQ_ALU_WORD1__BANK_SWIZZLE(SQ_ALU_VEC_012) | + SQ_ALU_WORD1__DST_GPR(0) | + SQ_ALU_WORD1__DST_REL(0) | + SQ_ALU_WORD1__DST_CHAN(SQ_CHAN_X) | + SQ_ALU_WORD1__CLAMP(1); + /* 3 */ + shader[i++] = SQ_ALU_WORD0__SRC0_SEL(SQ_ALU_CFILE_0) | + SQ_ALU_WORD0__SRC0_REL(0) | + SQ_ALU_WORD0__SRC0_CHAN(SQ_CHAN_Y) | + SQ_ALU_WORD0__SRC0_NEG(0) | + SQ_ALU_WORD0__SRC1_SEL(0) | + SQ_ALU_WORD0__SRC1_REL(0) | + SQ_ALU_WORD0__SRC1_CHAN(SQ_CHAN_Y) | + SQ_ALU_WORD0__SRC1_NEG(0) | + SQ_ALU_WORD0__INDEX_MODE(0) | + SQ_ALU_WORD0__PRED_SEL(SQ_PRED_SEL_OFF) | + SQ_ALU_WORD0__LAST(0); + shader[i++] = SQ_ALU_WORD1_OP2__ALU_INST(SQ_OP2_INST_MOV) | + SQ_ALU_WORD1_OP2__SRC0_ABS(0) | + SQ_ALU_WORD1_OP2__SRC1_ABS(0) | + SQ_ALU_WORD1_OP2__UPDATE_EXECUTE_MASK(0) | + SQ_ALU_WORD1_OP2__UPDATE_PRED(0) | + SQ_ALU_WORD1_OP2__WRITE_MASK(1) | + SQ_ALU_WORD1_OP2__OMOD(SQ_ALU_OMOD_OFF) | + SQ_ALU_WORD1__BANK_SWIZZLE(SQ_ALU_VEC_012) | + SQ_ALU_WORD1__DST_GPR(0) | + SQ_ALU_WORD1__DST_REL(0) | + SQ_ALU_WORD1__DST_CHAN(SQ_CHAN_Y) | + SQ_ALU_WORD1__CLAMP(1); + /* 4 */ + shader[i++] = SQ_ALU_WORD0__SRC0_SEL(SQ_ALU_CFILE_0) | + SQ_ALU_WORD0__SRC0_REL(0) | + SQ_ALU_WORD0__SRC0_CHAN(SQ_CHAN_Z) | + SQ_ALU_WORD0__SRC0_NEG(0) | + SQ_ALU_WORD0__SRC1_SEL(0) | + SQ_ALU_WORD0__SRC1_REL(0) | + SQ_ALU_WORD0__SRC1_CHAN(SQ_CHAN_Z) | + SQ_ALU_WORD0__SRC1_NEG(0) | + SQ_ALU_WORD0__INDEX_MODE(0) | + SQ_ALU_WORD0__PRED_SEL(SQ_PRED_SEL_OFF) | + SQ_ALU_WORD0__LAST(0); + shader[i++] = SQ_ALU_WORD1_OP2__ALU_INST(SQ_OP2_INST_MOV) | + SQ_ALU_WORD1_OP2__SRC0_ABS(0) | + SQ_ALU_WORD1_OP2__SRC1_ABS(0) | + SQ_ALU_WORD1_OP2__UPDATE_EXECUTE_MASK(0) | + SQ_ALU_WORD1_OP2__UPDATE_PRED(0) | + SQ_ALU_WORD1_OP2__WRITE_MASK(1) | + SQ_ALU_WORD1_OP2__OMOD(SQ_ALU_OMOD_OFF) | + SQ_ALU_WORD1__BANK_SWIZZLE(SQ_ALU_VEC_012) | + SQ_ALU_WORD1__DST_GPR(0) | + SQ_ALU_WORD1__DST_REL(0) | + SQ_ALU_WORD1__DST_CHAN(SQ_CHAN_Z) | + SQ_ALU_WORD1__CLAMP(1); + /* 5 */ + shader[i++] = SQ_ALU_WORD0__SRC0_SEL(SQ_ALU_CFILE_0) | + SQ_ALU_WORD0__SRC0_REL(0) | + SQ_ALU_WORD0__SRC0_CHAN(SQ_CHAN_W) | + SQ_ALU_WORD0__SRC0_NEG(0) | + SQ_ALU_WORD0__SRC1_SEL(0) | + SQ_ALU_WORD0__SRC1_REL(0) | + SQ_ALU_WORD0__SRC1_CHAN(SQ_CHAN_W) | + SQ_ALU_WORD0__SRC1_NEG(0) | + SQ_ALU_WORD0__INDEX_MODE(0) | + SQ_ALU_WORD0__PRED_SEL(SQ_PRED_SEL_OFF) | + SQ_ALU_WORD0__LAST(1); + shader[i++] = SQ_ALU_WORD1_OP2__ALU_INST(SQ_OP2_INST_MOV) | + SQ_ALU_WORD1_OP2__SRC0_ABS(0) | + SQ_ALU_WORD1_OP2__SRC1_ABS(0) | + SQ_ALU_WORD1_OP2__UPDATE_EXECUTE_MASK(0) | + SQ_ALU_WORD1_OP2__UPDATE_PRED(0) | + SQ_ALU_WORD1_OP2__WRITE_MASK(1) | + SQ_ALU_WORD1_OP2__OMOD(SQ_ALU_OMOD_OFF) | + SQ_ALU_WORD1__BANK_SWIZZLE(SQ_ALU_VEC_012) | + SQ_ALU_WORD1__DST_GPR(0) | + SQ_ALU_WORD1__DST_REL(0) | + SQ_ALU_WORD1__DST_CHAN(SQ_CHAN_W) | + SQ_ALU_WORD1__CLAMP(1); + + return i; +} + +unsigned r6xx_copy_ps(uint32_t *shader) +{ + unsigned i = 0; + + /* 0 */ + shader[i++] = SQ_CF_WORD0__ADDR(2); + shader[i++] = SQ_CF_WORD1__CF_INST(SQ_CF_INST_TEX) | + SQ_CF_WORD1__COUNT(0) | + SQ_CF_WORD1__WHOLE_QUAD_MODE(0) | + SQ_CF_WORD1__BARRIER(1); + /* 1 */ + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_CB0) | + SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_PIXEL) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) | + SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) | + SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(0); + shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_X) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_Y) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_Z) | + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_W) | + SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) | + SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(1) | + SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) | + SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(1); + /* TEX INST 0 */ + shader[i++] = SQ_TEX_WORD0__TEX_INST(SQ_TEX_INST_SAMPLE) | + SQ_TEX_WORD0__BC_FRAC_MODE(0) | + SQ_TEX_WORD0__FETCH_WHOLE_QUAD(0) | + SQ_TEX_WORD0__RESOURCE_ID(0) | + SQ_TEX_WORD0__SRC_GPR(0) | + SQ_TEX_WORD0__SRC_REL(0); + shader[i++] = SQ_TEX_WORD1__DST_GPR(0) | + SQ_TEX_WORD1__DST_REL(0) | + SQ_TEX_WORD1__DST_SEL_X(SQ_SEL_X) | + SQ_TEX_WORD1__DST_SEL_Y(SQ_SEL_Y) | + SQ_TEX_WORD1__DST_SEL_Z(SQ_SEL_Z) | + SQ_TEX_WORD1__DST_SEL_W(SQ_SEL_W) | + SQ_TEX_WORD1__LOD_BIAS(0) | + SQ_TEX_WORD1__COORD_TYPE_X(0) | + SQ_TEX_WORD1__COORD_TYPE_Y(0) | + SQ_TEX_WORD1__COORD_TYPE_Z(0) | + SQ_TEX_WORD1__COORD_TYPE_W(0); + shader[i++] = SQ_TEX_WORD2__OFFSET_X(0) | + SQ_TEX_WORD2__OFFSET_Y(0) | + SQ_TEX_WORD2__OFFSET_Z(0) | + SQ_TEX_WORD2__SAMPLER_ID(0) | + SQ_TEX_WORD2__SRC_SEL_X(SQ_SEL_X) | + SQ_TEX_WORD2__SRC_SEL_Y(SQ_SEL_Y) | + SQ_TEX_WORD2__SRC_SEL_Z(SQ_SEL_0) | + SQ_TEX_WORD2__SRC_SEL_W(SQ_SEL_1); + shader[i++] = 0; + + return i; +} @@ -0,0 +1,123 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + * Jerome Glisse + */ +#ifndef R6XX_H +#define R6XX_H + +#include "rati_file.h" + +struct r6xx_sq_conf { + unsigned ps_prio; + unsigned vs_prio; + unsigned gs_prio; + unsigned es_prio; + unsigned num_ps_gprs; + unsigned num_vs_gprs; + unsigned num_gs_gprs; + unsigned num_es_gprs; + unsigned num_temp_gprs; + unsigned num_ps_threads; + unsigned num_vs_threads; + unsigned num_gs_threads; + unsigned num_es_threads; + unsigned num_ps_stack_entries; + unsigned num_vs_stack_entries; + unsigned num_gs_stack_entries; + unsigned num_es_stack_entries; + unsigned sq_config; +}; + +struct r6xx_vbo { + unsigned offset; + unsigned ndw; + unsigned stride; + unsigned data_format; + unsigned num_format_all; + unsigned format_comp_all; + unsigned srf_mode_all; + unsigned endian_swap; + unsigned mem_request_size; + struct ctx_bo *bo; +}; + +struct r6xx_draw { + unsigned primitive_type; + unsigned num_instances; + unsigned index_type; + unsigned num_indices; + unsigned vgt_draw_initiator; +}; + +struct r6xx_blit; + +void r6xx_emit_reloc(struct r6xx_blit *blit, struct ctx_bo *bo); +int r6xx_sq_conf(struct r6xx_blit *blit); +int r6xx_set_vport_scissor(struct r6xx_blit *blit, unsigned id, int x1, int y1, int x2, int y2); +int r6xx_set_generic_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2); +int r6xx_set_window_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2); +int r6xx_set_screen_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2); +int r6xx_set_clip_rect(struct r6xx_blit *blit, unsigned id, int x1, int y1, int x2, int y2); +int r6xx_set_render_target(struct r6xx_blit *blit, struct ctx_bo *bo); +int r6xx_fs_setup(struct r6xx_blit *blit, struct ctx_bo *bo, + unsigned offset, unsigned ngprs, unsigned stack_size); +int r6xx_vs_setup(struct r6xx_blit *blit, struct ctx_bo *bo, + unsigned offset, unsigned ngprs, unsigned stack_size, + unsigned cs_export_count); +void r6xx_ps_setup(struct r6xx_blit *blit, struct ctx_bo *bo, + unsigned offset, unsigned ngprs, unsigned stack_size, + unsigned export_mode, unsigned num_interp); +int r6xx_set_alu_consts(struct r6xx_blit *blit, unsigned id, + unsigned count, float *cst); +void r6xx_set_default_sampler(struct r6xx_blit *blit, unsigned id); +void r6xx_set_tex_resource(struct r6xx_blit *blit, unsigned id, struct ctx_bo *bo); +int r6xx_set_vtx_resource(struct r6xx_blit *blit, unsigned id, + struct r6xx_vbo *vbo); +int r6xx_draw_auto(struct r6xx_blit *blit, struct r6xx_draw *draw); +int r6xx_disable_depth(struct r6xx_blit *blit); +int r6xx_surface_sync(struct r6xx_blit *blit, + struct ctx_bo *bo, + unsigned sync_type); +int r6xx_event(struct r6xx_blit *blit, unsigned event_iniator); +int r6xx_set_default_state(struct r6xx_blit *blit); +unsigned r6xx_solid_vs(uint32_t *shader); +unsigned r6xx_copy_vs(uint32_t *shader); +unsigned r6xx_solid_ps(uint32_t* shader); +unsigned r6xx_copy_ps(uint32_t *shader); + +/* + * rfile helper + */ +int r6xx_rfile_legalize(struct rati_file *rfile); +int r6xx_rfile_clear_offset(struct rati_file *rfile); + +/* + * tati helpers + */ +int r6xx_tati_cmd_buffer_write(struct rati_file *rfile, + unsigned idx, FILE *file); + +#endif diff --git a/r6xx_rati.c b/r6xx_rati.c new file mode 100644 index 0000000..d6949ee --- /dev/null +++ b/r6xx_rati.c @@ -0,0 +1,493 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + * Jerome Glisse + */ +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include "xf86drm.h" +#include "radeon_drm.h" +#include "radeon_family.h" +#include "r6xx.h" +#include "r6xxd.h" + +static int r6xx_next_reloc_offset(struct rati_file *rfile, unsigned idx, + unsigned *next_idx, uint64_t *offset) +{ + unsigned type, it, count, ridx; + uint32_t *pm4 = rfile->cmd_buffer_ptr[idx]; + + type = PKTx_TYPE(pm4[*next_idx]); + count = PKTx_COUNT(pm4[*next_idx]); + it = PKT3_IT(pm4[*next_idx]); + if (type != 3 || it != IT_NOP) { + /* missing relocation */ + return -EINVAL; + } + ridx = pm4[(*next_idx) + 1] / 4; + if (ridx >= rfile->header.ndata_buffers) { + /* relocation out of range */ + return -EINVAL; + } + *offset = rfile->data_buffer[ridx].offset; + *next_idx += count + 1; + return 0; +} + +static int r6xx_reg_clear_offset(struct rati_file *rfile, unsigned idx, + unsigned reg, unsigned pm4_idx, + unsigned *next_idx) +{ + uint32_t *pm4 = rfile->cmd_buffer_ptr[idx]; + uint64_t offset; + int r; + + /* xor the offset, this will keep offset from start of bo intact */ + switch (reg) { + case VGT_STRMOUT_BUFFER_BASE_0: + case VGT_STRMOUT_BUFFER_BASE_1: + case VGT_STRMOUT_BUFFER_BASE_2: + case VGT_STRMOUT_BUFFER_BASE_3: + case CP_COHER_BASE: + case CB_COLOR0_FRAG: + case CB_COLOR1_FRAG: + case CB_COLOR2_FRAG: + case CB_COLOR3_FRAG: + case CB_COLOR4_FRAG: + case CB_COLOR5_FRAG: + case CB_COLOR6_FRAG: + case CB_COLOR7_FRAG: + case CB_COLOR0_TILE: + case CB_COLOR1_TILE: + case CB_COLOR2_TILE: + case CB_COLOR3_TILE: + case CB_COLOR4_TILE: + case CB_COLOR5_TILE: + case CB_COLOR6_TILE: + case CB_COLOR7_TILE: + case CB_COLOR0_BASE: + case CB_COLOR1_BASE: + case CB_COLOR2_BASE: + case CB_COLOR3_BASE: + case CB_COLOR4_BASE: + case CB_COLOR5_BASE: + case CB_COLOR6_BASE: + case CB_COLOR7_BASE: + case DB_DEPTH_BASE: + case DB_HTILE_DATA_BASE: + case SQ_PGM_START_FS: + case SQ_PGM_START_ES: + case SQ_PGM_START_VS: + case SQ_PGM_START_GS: + case SQ_PGM_START_PS: + case SQ_ALU_CONST_CACHE_GS_0: + case SQ_ALU_CONST_CACHE_GS_1: + case SQ_ALU_CONST_CACHE_GS_2: + case SQ_ALU_CONST_CACHE_GS_3: + case SQ_ALU_CONST_CACHE_GS_4: + case SQ_ALU_CONST_CACHE_GS_5: + case SQ_ALU_CONST_CACHE_GS_6: + case SQ_ALU_CONST_CACHE_GS_7: + case SQ_ALU_CONST_CACHE_GS_8: + case SQ_ALU_CONST_CACHE_GS_9: + case SQ_ALU_CONST_CACHE_GS_10: + case SQ_ALU_CONST_CACHE_GS_11: + case SQ_ALU_CONST_CACHE_GS_12: + case SQ_ALU_CONST_CACHE_GS_13: + case SQ_ALU_CONST_CACHE_GS_14: + case SQ_ALU_CONST_CACHE_GS_15: + case SQ_ALU_CONST_CACHE_PS_0: + case SQ_ALU_CONST_CACHE_PS_1: + case SQ_ALU_CONST_CACHE_PS_2: + case SQ_ALU_CONST_CACHE_PS_3: + case SQ_ALU_CONST_CACHE_PS_4: + case SQ_ALU_CONST_CACHE_PS_5: + case SQ_ALU_CONST_CACHE_PS_6: + case SQ_ALU_CONST_CACHE_PS_7: + case SQ_ALU_CONST_CACHE_PS_8: + case SQ_ALU_CONST_CACHE_PS_9: + case SQ_ALU_CONST_CACHE_PS_10: + case SQ_ALU_CONST_CACHE_PS_11: + case SQ_ALU_CONST_CACHE_PS_12: + case SQ_ALU_CONST_CACHE_PS_13: + case SQ_ALU_CONST_CACHE_PS_14: + case SQ_ALU_CONST_CACHE_PS_15: + case SQ_ALU_CONST_CACHE_VS_0: + case SQ_ALU_CONST_CACHE_VS_1: + case SQ_ALU_CONST_CACHE_VS_2: + case SQ_ALU_CONST_CACHE_VS_3: + case SQ_ALU_CONST_CACHE_VS_4: + case SQ_ALU_CONST_CACHE_VS_5: + case SQ_ALU_CONST_CACHE_VS_6: + case SQ_ALU_CONST_CACHE_VS_7: + case SQ_ALU_CONST_CACHE_VS_8: + case SQ_ALU_CONST_CACHE_VS_9: + case SQ_ALU_CONST_CACHE_VS_10: + case SQ_ALU_CONST_CACHE_VS_11: + case SQ_ALU_CONST_CACHE_VS_12: + case SQ_ALU_CONST_CACHE_VS_13: + case SQ_ALU_CONST_CACHE_VS_14: + case SQ_ALU_CONST_CACHE_VS_15: + case SX_MEMORY_EXPORT_BASE: + r = r6xx_next_reloc_offset(rfile, idx, next_idx, &offset); + if (r) { + return r; + } + pm4[pm4_idx] ^= ((offset >> 8) & 0xffffffff); + break; + default: + break; + } + return 0; +} + +static int r6xx_rfile_cmd_buffer_legalize(struct rati_file *rfile, unsigned idx) +{ + return -EINVAL; +} + +static int r6xx_cmd_buffer_clear_offset(struct rati_file *rfile, unsigned idx) +{ + unsigned i, j, count, reg, it, header, next_idx, tmp; + uint32_t *pm4 = rfile->cmd_buffer_ptr[idx]; + uint64_t offset; + int r; + + for (i = 0; i < rfile->cmd_buffer[idx].ndw;) { + header = pm4[i]; + count = PKTx_COUNT(header); + switch (PKTx_TYPE(header)) { + case 0: + reg = PKT0_REG(header); + next_idx = i + count + 1; + for (j = 0, i++; j < count; j++, reg += 4) { + r = r6xx_reg_clear_offset(rfile, idx, reg, i++, &next_idx); + if (r) { + return r; + } + } + break; + case 1: + case 2: + break; + case 3: + next_idx = i + count + 1; + it = PKT3_IT(pm4[i]); + switch (it) { + case IT_SET_CONFIG_REG: + reg = (pm4[++i] << 2) + SET_CONFIG_REG__OFFSET; + for (j = 0, i++; j < (count - 1); j++, reg += 4) { + r = r6xx_reg_clear_offset(rfile, idx, reg, i++, &next_idx); + if (r) { + return r; + } + } + break; + case IT_SET_CONTEXT_REG: + reg = (pm4[++i] << 2) + SET_CONTEXT_REG__OFFSET; + for (j = 0, i++; j < (count - 1); j++, reg += 4) { + r = r6xx_reg_clear_offset(rfile, idx, reg, i++, &next_idx); + if (r) { + return r; + } + } + break; + case IT_SET_PREDICATION: + tmp = (pm4[i + 2] >> 16) & 0x7; + if (tmp) { + r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset); + if (r) { + return r; + } + pm4[i + 1] ^= (offset & 0xffffffff); + pm4[i + 2] ^= ((offset >> 32ULL) & 0xff); + } + break; + case IT_DRAW_INDEX: + r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset); + if (r) { + return r; + } + pm4[i + 1] ^= (offset & 0xffffffff); + pm4[i + 2] ^= ((offset >> 32ULL) & 0xff); + break; + case IT_WAIT_REG_MEM: + if (pm4[i + 1] & 0x10) { + r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset); + if (r) { + return r; + } + pm4[i + 2] ^= (offset & 0xffffffff); + pm4[i + 3] ^= ((offset >> 32ULL) & 0xff); + } + break; + case IT_SURFACE_SYNC: + if (pm4[i + 2] != 0xffffffff || pm4[i + 3]) { + r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset); + if (r) { + return r; + } + pm4[i + 3] ^= ((offset >> 8ULL) & 0xffffffff); + } + break; + case IT_EVENT_WRITE: + case IT_EVENT_WRITE_EOP: + if (count > 1) { + r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset); + if (r) { + return r; + } + pm4[i + 2] ^= (offset & 0xfffffffc); + pm4[i + 3] ^= ((offset >> 32ULL) & 0xff); + } + break; + case IT_SET_RESOURCE: + for (j = 0, i +=2; j < ((count - 1) / 7); j++, i += 7) { + tmp = pm4[i + 6]; + switch ((tmp >> 30) & 3) { + case SQ_TEX_VTX_VALID_TEXTURE: + r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset); + if (r) { + return r; + } + pm4[i + 2] ^= ((offset >> 8ULL) & 0xffffffff); + r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset); + if (r) { + return r; + } + pm4[i + 3] ^= ((offset >> 8ULL) & 0xffffffff); + break; + case SQ_TEX_VTX_VALID_BUFFER: + r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset); + if (r) { + return r; + } + pm4[i + 0] ^= (offset & 0xffffffffULL); + pm4[i + 2] ^= ((offset >> 32ULL) & 0xff); + break; + default: + break; + } + } + break; +#if 0 + case PACKET3_STRMOUT_BUFFER_UPDATE: + /* Updating memory at DST_ADDRESS. */ + if (idx_value & 0x1) { + u64 offset; + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+1); + offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; + if ((offset + 4) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+1] = offset; + ib[idx+2] = upper_32_bits(offset) & 0xff; + } + /* Reading data from SRC_ADDRESS. */ + if (((idx_value >> 1) & 0x3) == 2) { + u64 offset; + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+3); + offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + if ((offset + 4) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+3] = offset; + ib[idx+4] = upper_32_bits(offset) & 0xff; + } + break; + case PACKET3_COPY_DW: + if (idx_value & 0x1) { + u64 offset; + /* SRC is memory. */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad COPY_DW (missing src reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+1); + offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; + if ((offset + 4) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+1] = offset; + ib[idx+2] = upper_32_bits(offset) & 0xff; + } + if (idx_value & 0x2) { + u64 offset; + /* DST is memory. */ + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad COPY_DW (missing dst reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+3); + offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + if ((offset + 4) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+3] = offset; + ib[idx+4] = upper_32_bits(offset) & 0xff; + } + break; +#endif + default: + break; + } + i = next_idx; + break; + default: + return -EINVAL; + } + } + return 0; +} + +int r6xx_rfile_legalize(struct rati_file *rfile) +{ + unsigned i; + int r; + + for (i = 0; i < rfile->header.ncmd_buffers; i++) { + r = r6xx_rfile_cmd_buffer_legalize(rfile, i); + if (r) { + return r; + } + } + return 0; +} + +int r6xx_rfile_clear_offset(struct rati_file *rfile) +{ + unsigned i; + int r = 0; + + for (i = 0; i < rfile->header.ncmd_buffers; i++) { + if (rfile->cmd_buffer[i].flags & RATI_CMD_CLEAR_OFFSET) { + r = r6xx_cmd_buffer_clear_offset(rfile, i); + if (r) { + return r; + } + } + } + return 0; +} + + +/* + * tati helpers + */ +int r6xx_tati_cmd_buffer_write(struct rati_file *rfile, + unsigned idx, FILE *file) +{ + unsigned i, j, count, reg, it; + uint32_t *pm4 = rfile->cmd_buffer_ptr[idx]; + + for (i = 0; i < rfile->cmd_buffer[idx].ndw;) { + count = PKTx_COUNT(pm4[i]); + switch (PKTx_TYPE(pm4[i])) { + case 0: + reg = PKT0_REG(pm4[i]); + if (fprintf(file, " 0x%08x\n", pm4[i++]) < 0) { + return -EINVAL; + } + for (j = 0; j < count; j++, reg += 4) { + if (fprintf(file, " 0x%08x // reg 0x%08x\n", pm4[i++], reg) < 0) { + return -EINVAL; + } + } + break; + case 1: + case 2: + if (fprintf(file, " 0x%08x\n", pm4[i++]) < 0) { + return -EINVAL; + } + break; + case 3: + it = PKT3_IT(pm4[i]); + if (fprintf(file, " 0x%08x // pkt3 0x%02x\n", pm4[i++], it) < 0) { + return -EINVAL; + } + switch (it) { + case IT_SET_CONFIG_REG: + reg = (pm4[i] << 2) + SET_CONFIG_REG__OFFSET; + if (fprintf(file, " 0x%08x\n", pm4[i++]) < 0) { + return -EINVAL; + } + for (j = 0; j < (count - 1); j++, reg += 4) { + if (fprintf(file, " 0x%08x // reg 0x%08x\n", pm4[i++], reg) < 0) { + return -EINVAL; + } + } + break; + case IT_SET_CONTEXT_REG: + reg = (pm4[i] << 2) + SET_CONTEXT_REG__OFFSET; + if (fprintf(file, " 0x%08x\n", pm4[i++]) < 0) { + return -EINVAL; + } + for (j = 0; j < (count - 1); j++, reg += 4) { + if (fprintf(file, " 0x%08x // reg 0x%08x\n", pm4[i++], reg) < 0) { + return -EINVAL; + } + } + break; + default: + for (j = 0; j < count; j++) { + if (fprintf(file, " 0x%08x\n", pm4[i++]) < 0) { + return -EINVAL; + } + } + break; + } + break; + default: + return -EINVAL; + } + } + return 0; +} diff --git a/r6xx_replayx.c b/r6xx_replayx.c new file mode 100644 index 0000000..e5a1615 --- /dev/null +++ b/r6xx_replayx.c @@ -0,0 +1,376 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + * Jerome Glisse + */ +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include "replayx.h" +#include "xf86drm.h" +#include "radeon_drm.h" +#include "radeon_family.h" +#include "r6xx.h" +#include "r6xxd.h" + +static int r6xx_blit_init(struct ctx *ctx) +{ + struct r6xx_blit *blit = &ctx->blit.r6xx; + int r; + + blit->ctx = ctx; + blit->vs_offset = 512; + blit->ps_offset = 1024; + blit->vbo_offset = 0; + r = r6xx_sq_conf(blit); + if (r) { + return r; + } + blit->cdw = 0; + blit->cs = malloc(16 << 10); + if (blit->cs == NULL) { + return -ENOMEM; + } + blit->ctx = ctx; + blit->shader_bo.size = 4096; + blit->shader_bo.alignment = 4096; + blit->shader_bo.flags = 0; + r = ctx_bo(ctx, &blit->shader_bo, NULL); + if (r) { + return r; + } + + r = ctx_bo_map(ctx, &blit->shader_bo); + if (r) { + return r; + } + + blit->vs_size = r6xx_copy_vs(blit->shader_bo.data + blit->vs_offset); + blit->ps_size = r6xx_copy_ps(blit->shader_bo.data + blit->ps_offset); + + blit->relocs[0].handle = 0; + blit->relocs[0].read_domain = 0; + blit->relocs[0].write_domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; + blit->relocs[0].flags = 0; + blit->relocs[1].handle = blit->shader_bo.handle; + blit->relocs[1].read_domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; + blit->relocs[1].write_domain = 0; + blit->relocs[1].flags = 0; + blit->relocs[2].handle = 0; + blit->relocs[2].read_domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; + blit->relocs[2].write_domain = 0; + blit->relocs[2].flags = 0; + return 0; +} + +static void r6xx_blit_fini(struct ctx *ctx) +{ + struct r6xx_blit *blit = &ctx->blit.r6xx; + ctx_bo_free(blit->ctx, &blit->shader_bo); +} + +static int r6xx_blit(struct ctx *ctx, struct ctx_bo *bo) +{ + struct r6xx_blit *blit = &ctx->blit.r6xx; + unsigned x0, y0, x1, y1; + float *vtx; + struct r6xx_vbo vbo; + struct r6xx_draw draw; + + ctx->front.hw_format = COLOR_8_8_8_8; + ctx->front.hw_tile = ARRAY_1D_TILED_THIN1; + blit->relocs[0].handle = ctx->front.handle; + blit->relocs[2].handle = bo->handle; + blit->cdw = 0; + blit->ctx = ctx; + + r6xx_set_default_state(blit); + r6xx_disable_depth(blit); + r6xx_set_render_target(blit, &ctx->front); + + /* vbo */ + vbo.bo = &blit->shader_bo; + vbo.offset = blit->vbo_offset; + vbo.stride = 4 * 4; + vbo.num_format_all = SQ_NUM_FORMAT_NORM; + vbo.data_format = FMT_32_32_32_32_FLOAT; + vbo.srf_mode_all = 0; + vbo.format_comp_all = 0; + vbo.endian_swap = SQ_ENDIAN_NONE; + vbo.ndw = 4 * 4; + vbo.mem_request_size = 1; + vtx = blit->shader_bo.data + blit->vbo_offset; + + x0 = 0; + y0 = 0; + x1 = bo->w; + y1 = bo->h; + vtx[ 0] = x0; + vtx[ 1] = y0; + vtx[ 2] = 0.0; + vtx[ 3] = 0.0; + vtx[ 4] = x0; + vtx[ 5] = y1; + vtx[ 6] = 0.0; + vtx[ 7] = bo->h; + vtx[ 8] = x1; + vtx[ 9] = y1; + vtx[10] = bo->w; + vtx[11] = bo->h; + + r6xx_fs_setup(blit, &blit->shader_bo, blit->vs_offset, 0, 0); + r6xx_vs_setup(blit, &blit->shader_bo, blit->vs_offset, 2, 0, 0); + r6xx_ps_setup(blit, &blit->shader_bo, blit->ps_offset, 1, 0, 2, 1); + + r6xx_set_tex_resource(blit, 0, bo); + r6xx_set_default_sampler(blit, 0); + r6xx_set_vtx_resource(blit, 160, &vbo); + r6xx_surface_sync(blit, NULL, + CP_COHER_CNTL__SH_ACTION_ENA(1) | + CP_COHER_CNTL__TC_ACTION_ENA(1) | + CP_COHER_CNTL__VC_ACTION_ENA(1)); + + draw.primitive_type = DI_PT_RECTLIST; + draw.num_indices = 3; + + draw.num_instances = 1; + draw.index_type = DI_INDEX_SIZE_16_BIT; + draw.vgt_draw_initiator = VGT_DRAW_INITIATOR__SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX); + r6xx_draw_auto(blit, &draw); + r6xx_event(blit, VGT_EVENT_INITIATOR__EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT)); + + return ctx_cs(ctx, blit->cs, blit->cdw, blit->relocs, 3); +} + +struct r6xx_target { + unsigned w; + unsigned h; + unsigned hw_format; + unsigned hw_tile; + unsigned pitch; + int reloc_id; +}; + +static int r6xx_next_reloc_id(uint32_t *pm4, unsigned *next_idx, + int *reloc_id) +{ + unsigned type, it, count; + + type = PKTx_TYPE(pm4[*next_idx]); + count = PKTx_COUNT(pm4[*next_idx]); + it = PKT3_IT(pm4[*next_idx]); + if (type != 3 || it != IT_NOP) { + /* missing relocation */ + return -EINVAL; + } + *reloc_id = pm4[(*next_idx) + 1] / 4; + *next_idx += count + 1; + return 0; +} + +static int r6xx_reg_target(uint32_t *pm4, unsigned reg, unsigned value, + struct r6xx_target *target, unsigned *next_idx) +{ + unsigned idx; + int r; + + switch (reg) { + case CB_COLOR0_BASE: + case CB_COLOR1_BASE: + case CB_COLOR2_BASE: + case CB_COLOR3_BASE: + case CB_COLOR4_BASE: + case CB_COLOR5_BASE: + case CB_COLOR6_BASE: + case CB_COLOR7_BASE: + idx = (reg - CB_COLOR0_BASE) / 4; + r = r6xx_next_reloc_id(pm4, next_idx, &target[idx].reloc_id); + if (r) { + return r; + } + break; + case CB_COLOR0_INFO: + case CB_COLOR1_INFO: + case CB_COLOR2_INFO: + case CB_COLOR3_INFO: + case CB_COLOR4_INFO: + case CB_COLOR5_INFO: + case CB_COLOR6_INFO: + case CB_COLOR7_INFO: + idx = (reg - CB_COLOR0_INFO) / 4; + target[idx].hw_format = (value >> 2) & 0x3f; + target[idx].hw_tile = (value >> 8) & 0xf; + break; + case CB_COLOR0_SIZE: + case CB_COLOR1_SIZE: + case CB_COLOR2_SIZE: + case CB_COLOR3_SIZE: + case CB_COLOR4_SIZE: + case CB_COLOR5_SIZE: + case CB_COLOR6_SIZE: + case CB_COLOR7_SIZE: + idx = (reg - CB_COLOR0_SIZE) / 4; + target[idx].pitch = (((value >> 0) & 0x3ff) + 1) * 8; + target[idx].w = target[idx].pitch; + target[idx].h = (((value >> 10) & 0x000fffff) + 1) * 64; + target[idx].h = target[idx].h / target[idx].pitch; + break; + default: + break; + } + return 0; +} + +static int r6xx_cmd_buffer_target(struct ctx *ctx, unsigned idx) +{ + uint32_t *pm4 = ctx->rfile.cmd_buffer_ptr[idx]; + unsigned i, j, header, count, next_idx, reg, it; + struct r6xx_target target[8]; + int r; + + for (i = 0; i < 8; i++) { + target[i].reloc_id = -1; + } + ctx->ntarget = 0; + ctx->ctarget = 0; + + for (i = 0; i < ctx->rfile.cmd_buffer[idx].ndw;) { + header = pm4[i]; + count = PKTx_COUNT(header); + switch (PKTx_TYPE(header)) { + case 0: + reg = PKT0_REG(header); + next_idx = i + count + 1; + for (j = 0, i++; j < count; j++, reg += 4, i++) { + r = r6xx_reg_target(pm4, reg, pm4[i], target, &next_idx); + if (r) { + return r; + } + } + break; + case 1: + case 2: + break; + case 3: + next_idx = i + count + 1; + it = PKT3_IT(pm4[i]); + switch (it) { + case IT_SET_CONFIG_REG: + reg = (pm4[++i] << 2) + SET_CONFIG_REG__OFFSET; + for (j = 0, i++; j < (count - 1); j++, reg += 4, i++) { + r = r6xx_reg_target(pm4, reg, pm4[i], target, &next_idx); + if (r) { + return r; + } + } + break; + case IT_SET_CONTEXT_REG: + reg = (pm4[++i] << 2) + SET_CONTEXT_REG__OFFSET; + for (j = 0, i++; j < (count - 1); j++, reg += 4, i++) { + r = r6xx_reg_target(pm4, reg, pm4[i], target, &next_idx); + if (r) { + return r; + } + } + break; + case IT_DRAW_INDEX: + case IT_DRAW_INDEX_AUTO: + case IT_DRAW_INDEX_IMMD: + for (i = 0; i < 8; i++) { + if (target[i].reloc_id != 1 && target[i].reloc_id < ctx->rfile.header.ndata_buffers) { + ctx->target[ctx->ntarget] = &ctx->bos[target[i].reloc_id]; + ctx->target[ctx->ntarget]->w = target[i].w; + ctx->target[ctx->ntarget]->h = target[i].h; + ctx->target[ctx->ntarget]->pitch = target[i].pitch; + ctx->target[ctx->ntarget]->hw_format = target[i].hw_format; + ctx->target[ctx->ntarget]->hw_tile = target[i].hw_tile; + ctx->ntarget++; + } + target[i].reloc_id = -1; + } + break; + default: + break; + } + } + i = next_idx; + } + return 0; +} + +static int r6xx_target(struct ctx *ctx) +{ + unsigned i; + int r; + + for (i = 0; i < ctx->rfile.header.ncmd_buffers; i++) { + r = r6xx_cmd_buffer_target(ctx, i); + if (r) { + return r; + } + } + return 0; +} + +static int r6xx_compatible(struct ctx *ctx) +{ + const struct radeon_chipinfo *info; + unsigned family; + unsigned i; + int r; + + /* check we can replay */ + info = radeon_chipinfo_from_pciid(ctx->rfile.header.pciid); + if (info == NULL) { + fprintf(stderr, "%s unknown gpu pci id 0x%08x\n", __func__, ctx->rfile.header.pciid); + return -EINVAL; + } + family = info->family; + if (family < CHIP_R600 || family > CHIP_RS880) { + fprintf(stderr, "%s can't replay 0x%02x trace on 0x%02x hw\n", __func__, family, ctx->family); + return -EINVAL; + } + + /* set front format & tile */ + ctx->front.hw_format = COLOR_8_8_8_8; + ctx->front.hw_tile = ARRAY_1D_TILED_THIN1; + + for (i = 0; i < ctx->nbos; i++) { + printf("bo[%d] %dkb\n", i, ctx->bos[i].size >> 10); + } + r = r6xx_rfile_clear_offset(&ctx->rfile); + if (r) { + return r; + } + return r6xx_target(ctx); +} + +const struct ctx_drv _r6xx_drv = { + r6xx_compatible, + r6xx_blit_init, + r6xx_blit_fini, + r6xx_blit, +}; diff --git a/replayx_r6xxd.h b/r6xxd.h index 4284503..c2b67d4 100644 --- a/replayx_r6xxd.h +++ b/r6xxd.h @@ -23,17 +23,21 @@ * * Authors: * Alex Deucher <alexander.deucher@amd.com> - * Jerome Glisse + * Jerome Glisse */ -#ifndef REPLAYX_R6XXD_H -#define REPLAYX_R6XXD_H +#ifndef R6XXD_H +#define R6XXD_H /***************************************************************************** * PM4 */ -#define PKT3(it, n) ((3 << 30) | ((((n) - 1) & 0x3fff) << 16) | (((it) & 0xff) << 8)) -#define PKT0(reg, n) ((0 << 30) | ((((n) - 1) & 0x3fff) << 16) | (((reg) >> 2) & 0xffff)) +#define PKT3(it, n) ((3 << 30) | ((((n) - 1) & 0x3fff) << 16) | (((it) & 0xff) << 8)) +#define PKT0(reg, n) ((0 << 30) | ((((n) - 1) & 0x3fff) << 16) | (((reg) >> 2) & 0xffff)) +#define PKTx_TYPE(v) (((v) >> 30) & 3) +#define PKTx_COUNT(v) ((((v) >> 16) & 0x3fff) + 1) +#define PKT3_IT(v) (((v) >> 8) & 0xff) +#define PKT0_REG(v) (((v) & 0xffff) << 2) /* packet3 commands */ #define IT_NOP 0x10 @@ -915,12 +919,33 @@ #define PA_SC_SCREEN_SCISSOR_BR__BR_X(x) (((x) & 0x00007fff) << 0) #define PA_SC_SCREEN_SCISSOR_BR__BR_Y(x) (((x) & 0x00007fff) << 16) #define CB_COLOR0_BASE 0x00028040 +#define CB_COLOR1_BASE 0x00028044 +#define CB_COLOR2_BASE 0x00028048 +#define CB_COLOR3_BASE 0x0002804c +#define CB_COLOR4_BASE 0x00028050 +#define CB_COLOR5_BASE 0x00028054 +#define CB_COLOR6_BASE 0x00028058 +#define CB_COLOR7_BASE 0x0002805c #define CB_COLOR0_SIZE 0x00028060 #define CB_COLOR0_SIZE__PITCH_TILE_MAX(x) (((x) & 0x000003ff) << 0) #define CB_COLOR0_SIZE__SLICE_TILE_MAX(x) (((x) & 0x000fffff) << 10) +#define CB_COLOR1_SIZE 0x00028064 +#define CB_COLOR2_SIZE 0x00028068 +#define CB_COLOR3_SIZE 0x0002806c +#define CB_COLOR4_SIZE 0x00028070 +#define CB_COLOR5_SIZE 0x00028074 +#define CB_COLOR6_SIZE 0x00028078 +#define CB_COLOR7_SIZE 0x0002807c #define CB_COLOR0_VIEW 0x00028080 #define CB_COLOR0_VIEW__SLICE_START(x) (((x) & 0x000007ff) << 0) #define CB_COLOR0_VIEW__SLICE_MAX(x) (((x) & 0x000007ff) << 13) +#define CB_COLOR1_VIEW 0x00028084 +#define CB_COLOR2_VIEW 0x00028088 +#define CB_COLOR3_VIEW 0x0002808c +#define CB_COLOR4_VIEW 0x00028090 +#define CB_COLOR5_VIEW 0x00028094 +#define CB_COLOR6_VIEW 0x00028098 +#define CB_COLOR7_VIEW 0x0002809c #define CB_COLOR0_INFO 0x000280a0 #define CB_COLOR0_INFO__ENDIAN(x) (((x) & 0x00000003) << 0) #define ENDIAN_NONE 0 @@ -993,8 +1018,29 @@ #define CB_COLOR0_INFO__ROUND_MODE(x) (((x) & 0x00000001) << 25) #define CB_COLOR0_INFO__TILE_COMPACT(x) (((x) & 0x00000001) << 26) #define CB_COLOR0_INFO__SOURCE_FORMAT(x) (((x) & 0x00000001) << 27) +#define CB_COLOR1_INFO 0x000280a4 +#define CB_COLOR2_INFO 0x000280a8 +#define CB_COLOR3_INFO 0x000280ac +#define CB_COLOR4_INFO 0x000280b0 +#define CB_COLOR5_INFO 0x000280b4 +#define CB_COLOR6_INFO 0x000280b8 +#define CB_COLOR7_INFO 0x000280bc #define CB_COLOR0_TILE 0x000280c0 +#define CB_COLOR1_TILE 0x000280c4 +#define CB_COLOR2_TILE 0x000280c8 +#define CB_COLOR3_TILE 0x000280cc +#define CB_COLOR4_TILE 0x000280d0 +#define CB_COLOR5_TILE 0x000280d4 +#define CB_COLOR6_TILE 0x000280d8 +#define CB_COLOR7_TILE 0x000280dc #define CB_COLOR0_FRAG 0x000280e0 +#define CB_COLOR1_FRAG 0x000280e4 +#define CB_COLOR2_FRAG 0x000280e8 +#define CB_COLOR3_FRAG 0x000280ec +#define CB_COLOR4_FRAG 0x000280f0 +#define CB_COLOR5_FRAG 0x000280f4 +#define CB_COLOR6_FRAG 0x000280f8 +#define CB_COLOR7_FRAG 0x000280fc #define CB_COLOR0_MASK 0x00028100 #define CB_COLOR0_MASK__CMASK_BLOCK_MAX(x) (((x) & 0x00000fff) << 0) #define CB_COLOR0_MASK__FMASK_TILE_MAX(x) (((x) & 0x000fffff) << 12) @@ -1616,8 +1662,53 @@ #define SQ_PGM_CF_OFFSET_FS__PGM_CF_OFFSET(x) (((x) & 0x000fffff) << 0) #define SQ_VTX_SEMANTIC_CLEAR 0x000288e0 #define SQ_ALU_CONST_CACHE_PS_0 0x00028940 +#define SQ_ALU_CONST_CACHE_PS_1 0x00028944 +#define SQ_ALU_CONST_CACHE_PS_2 0x00028948 +#define SQ_ALU_CONST_CACHE_PS_3 0x0002894c +#define SQ_ALU_CONST_CACHE_PS_4 0x00028950 +#define SQ_ALU_CONST_CACHE_PS_5 0x00028954 +#define SQ_ALU_CONST_CACHE_PS_6 0x00028958 +#define SQ_ALU_CONST_CACHE_PS_7 0x0002895c +#define SQ_ALU_CONST_CACHE_PS_8 0x00028960 +#define SQ_ALU_CONST_CACHE_PS_9 0x00028964 +#define SQ_ALU_CONST_CACHE_PS_10 0x00028968 +#define SQ_ALU_CONST_CACHE_PS_11 0x0002896c +#define SQ_ALU_CONST_CACHE_PS_12 0x00028970 +#define SQ_ALU_CONST_CACHE_PS_13 0x00028974 +#define SQ_ALU_CONST_CACHE_PS_14 0x00028978 +#define SQ_ALU_CONST_CACHE_PS_15 0x0002897c #define SQ_ALU_CONST_CACHE_VS_0 0x00028980 +#define SQ_ALU_CONST_CACHE_VS_1 0x00028984 +#define SQ_ALU_CONST_CACHE_VS_2 0x00028988 +#define SQ_ALU_CONST_CACHE_VS_3 0x0002898c +#define SQ_ALU_CONST_CACHE_VS_4 0x00028990 +#define SQ_ALU_CONST_CACHE_VS_5 0x00028994 +#define SQ_ALU_CONST_CACHE_VS_6 0x00028998 +#define SQ_ALU_CONST_CACHE_VS_7 0x0002899c +#define SQ_ALU_CONST_CACHE_VS_8 0x000289a0 +#define SQ_ALU_CONST_CACHE_VS_9 0x000289a4 +#define SQ_ALU_CONST_CACHE_VS_10 0x000289a8 +#define SQ_ALU_CONST_CACHE_VS_11 0x000289ac +#define SQ_ALU_CONST_CACHE_VS_12 0x000289b0 +#define SQ_ALU_CONST_CACHE_VS_13 0x000289b4 +#define SQ_ALU_CONST_CACHE_VS_14 0x000289b8 +#define SQ_ALU_CONST_CACHE_VS_15 0x000289bc #define SQ_ALU_CONST_CACHE_GS_0 0x000289c0 +#define SQ_ALU_CONST_CACHE_GS_1 0x000289c4 +#define SQ_ALU_CONST_CACHE_GS_2 0x000289c8 +#define SQ_ALU_CONST_CACHE_GS_3 0x000289cc +#define SQ_ALU_CONST_CACHE_GS_4 0x000289d0 +#define SQ_ALU_CONST_CACHE_GS_5 0x000289d4 +#define SQ_ALU_CONST_CACHE_GS_6 0x000289d8 +#define SQ_ALU_CONST_CACHE_GS_7 0x000289dc +#define SQ_ALU_CONST_CACHE_GS_8 0x000289e0 +#define SQ_ALU_CONST_CACHE_GS_9 0x000289e4 +#define SQ_ALU_CONST_CACHE_GS_10 0x000289e8 +#define SQ_ALU_CONST_CACHE_GS_11 0x000289ec +#define SQ_ALU_CONST_CACHE_GS_12 0x000289f0 +#define SQ_ALU_CONST_CACHE_GS_13 0x000289f4 +#define SQ_ALU_CONST_CACHE_GS_14 0x000289f8 +#define SQ_ALU_CONST_CACHE_GS_15 0x000289fc #define PA_SU_POINT_SIZE 0x00028a00 #define PA_SU_POINT_SIZE__HEIGHT(x) (((x) & 0x0000ffff) << 0) #define PA_SU_POINT_SIZE__WIDTH(x) (((x) & 0x0000ffff) << 16) @@ -26,14 +26,15 @@ #include <stdio.h> #include <string.h> #include <stdlib.h> +#include <stdint.h> #include <unistd.h> #include <errno.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include "libdrm/radeon_drm.h" #include "replayx.h" #include "xf86drm.h" -#include "radeon_drm.h" static int ctx_start_visual(struct ctx *ctx) { @@ -226,8 +227,9 @@ static int ctx_start_dri2(struct ctx *ctx) ctx->fd = open(ctx->device_name, O_RDWR | O_CLOEXEC); if (ctx->fd == -1) { - fprintf(stderr, "%s dri2 could not open %s\n", __func__, - ctx->device_name); + perror(NULL); + fprintf(stderr, "%s dri2 could not open |%s| %d\n", __func__, + ctx->device_name, errno); return -EINVAL; } @@ -31,7 +31,8 @@ #include <xcb/dri2.h> #include <xcb/xfixes.h> #include <stdint.h> -#include <libdrm/rati_file.h> +#include <rati_file.h> +#include "r6xx.h" struct ctx; @@ -52,76 +53,31 @@ struct ctx_bo { unsigned hw_tile; }; -#pragma pack(1) struct radeon_cs_reloc { uint32_t handle; uint32_t read_domain; uint32_t write_domain; uint32_t flags; }; -#pragma pack() - -struct r6xx_sq_conf { - unsigned ps_prio; - unsigned vs_prio; - unsigned gs_prio; - unsigned es_prio; - unsigned num_ps_gprs; - unsigned num_vs_gprs; - unsigned num_gs_gprs; - unsigned num_es_gprs; - unsigned num_temp_gprs; - unsigned num_ps_threads; - unsigned num_vs_threads; - unsigned num_gs_threads; - unsigned num_es_threads; - unsigned num_ps_stack_entries; - unsigned num_vs_stack_entries; - unsigned num_gs_stack_entries; - unsigned num_es_stack_entries; - unsigned sq_config; -}; - -struct r6xx_vbo { - unsigned offset; - unsigned ndw; - unsigned stride; - unsigned data_format; - unsigned num_format_all; - unsigned format_comp_all; - unsigned srf_mode_all; - unsigned endian_swap; - unsigned mem_request_size; - struct ctx_bo *bo; -}; - -struct r6xx_draw { - unsigned primitive_type; - unsigned num_instances; - unsigned index_type; - unsigned num_indices; - unsigned vgt_draw_initiator; -}; struct r6xx_blit { - uint32_t *cs; - unsigned cdw; - struct ctx *ctx; - struct ctx_bo shader_bo; - struct radeon_cs_reloc relocs[3]; - struct r6xx_sq_conf sq_conf; - unsigned vs_offset; - unsigned ps_offset; - unsigned vbo_offset; - unsigned ps_size; - unsigned vs_size; + uint32_t *cs; + unsigned cdw; + struct ctx *ctx; + struct ctx_bo shader_bo; + struct radeon_cs_reloc relocs[3]; + struct r6xx_sq_conf sq_conf; + unsigned vs_offset; + unsigned ps_offset; + unsigned vbo_offset; + unsigned ps_size; + unsigned vs_size; }; union ctx_blit { - struct r6xx_blit r6xx; + struct r6xx_blit r6xx; }; - typedef int (*drv_compatible_t)(struct ctx *ctx); typedef int (*drv_blit_init_t)(struct ctx *ctx); typedef void (*drv_blit_fini_t)(struct ctx *ctx); @@ -154,7 +110,7 @@ struct ctx { struct ctx_drv drv; uint32_t pciid; union ctx_blit blit; - struct ctx_bo **target; + struct ctx_bo *target[64]; unsigned ntarget; unsigned ctarget; }; diff --git a/replayx_drv.c b/replayx_drv.c index 73a5061..e7b925b 100644 --- a/replayx_drv.c +++ b/replayx_drv.c @@ -27,6 +27,7 @@ #include <stdio.h> #include <string.h> #include <stdlib.h> +#include <stdint.h> #include <unistd.h> #include <errno.h> #include <sys/types.h> @@ -36,6 +37,7 @@ #include "replayx.h" #include "xf86drm.h" #include "radeon_drm.h" +#include "radeon_family.h" int ctx_bo(struct ctx *ctx, struct ctx_bo *bo, void *data) { @@ -146,7 +148,7 @@ int ctx_rati_load(struct ctx *ctx, const char *filename) } ctx->ntarget = 0; - ctx->nbos = ctx->rfile.header.v001.ndata_buffers; + ctx->nbos = ctx->rfile.header.ndata_buffers; ctx->bos = calloc(1, sizeof(*ctx->bos) * ctx->nbos); ctx->relocs = calloc(1, sizeof(*ctx->relocs) * ctx->nbos); ctx->target = calloc(1, sizeof(void*) * ctx->nbos); @@ -157,8 +159,8 @@ int ctx_rati_load(struct ctx *ctx, const char *filename) } for (i = 0; i < ctx->nbos; ++i) { - ctx->bos[i].size = ctx->rfile.data_buffer[i].v001.size; - ctx->bos[i].alignment = ctx->rfile.data_buffer[i].v001.alignment; + ctx->bos[i].size = ctx->rfile.data_buffer[i].size; + ctx->bos[i].alignment = ctx->rfile.data_buffer[i].alignment; r = ctx_bo(ctx, &ctx->bos[i], ctx->rfile.data_buffer_ptr[i]); if (r) { return r; @@ -198,7 +200,7 @@ int ctx_cs(struct ctx *ctx, void *cs, unsigned ndw, void *relocs, unsigned nrelo int ctx_cs_rati(struct ctx *ctx) { return ctx_cs(ctx, ctx->rfile.cmd_buffer_ptr[0], - ctx->rfile.cmd_buffer[0].v001.ndw, + ctx->rfile.cmd_buffer[0].ndw, ctx->relocs, ctx->nbos); } @@ -27,6 +27,7 @@ #include <string.h> #include <stdlib.h> #include <unistd.h> +#include <stdint.h> #include <errno.h> #include <sys/types.h> #include <sys/stat.h> @@ -39,11 +40,16 @@ void usage(const char *exename) exit(0); } +int r6xx_tati_cmd_buffer_write(struct rati_file *rfile, + unsigned idx, FILE *file); + int main(int argc, char *argv[]) { struct rati_file rfile; char dstname[256], *tmp; + tati_cmd_buffer_write_t write = NULL; + write = r6xx_tati_cmd_buffer_write; if (argc != 2) { usage(argv[0]); } @@ -60,5 +66,5 @@ int main(int argc, char *argv[]) fprintf(stderr, "failed reading %s\n", argv[1]); return -1; } - return tati_file_write(&rfile, dstname); + return tati_file_write(&rfile, dstname, write); } |