summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerome Glisse <jglisse@redhat.com>2012-05-22 17:01:41 -0400
committerJerome Glisse <jglisse@redhat.com>2012-05-22 17:01:41 -0400
commit8568c748f530bf0bb3cb03e8e456f206f4ae6f0a (patch)
treef7ed900388a87dbdbbe0a9d73bcbf4eab6901558
parent98a26545fd041bff49186fb8439a7282d7df3ca3 (diff)
replayx: finish support for kernel cmd stream replaying
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
-rw-r--r--Makefile4
-rw-r--r--r6xx.c1239
-rw-r--r--r6xx.h123
-rw-r--r--r6xx_rati.c493
-rw-r--r--r6xx_replayx.c376
-rw-r--r--r6xxd.h (renamed from replayx_r6xxd.h)101
-rw-r--r--replayx.c8
-rw-r--r--replayx.h74
-rw-r--r--replayx_drv.c10
-rw-r--r--tati.c8
10 files changed, 2362 insertions, 74 deletions
diff --git a/Makefile b/Makefile
index 0978847..c716b6c 100644
--- a/Makefile
+++ b/Makefile
@@ -7,10 +7,10 @@ JOUJOU_OBJECTS = $(JOUJOU_SOURCES:.c=.o)
RDUMP_SOURCES = rdump.c radeon_pci.c reg.c
RDUMP_OBJECTS = $(RDUMP_SOURCES:.c=.o)
-TATI_SOURCES = tati.c
+TATI_SOURCES = tati.c r6xx_rati.c
TATI_OBJECTS = $(TATI_SOURCES:.c=.o)
-REPLAYX_SOURCES = replayx.c replayx_drv.c replayx_r6xx.c
+REPLAYX_SOURCES = replayx.c replayx_drv.c r6xx.c r6xx_rati.c r6xx_replayx.c
REPLAYX_OBJECTS = $(REPLAYX_SOURCES:.c=.o)
CDUMP_SOURCES = cdump.c radeon_pci.c reg.c
diff --git a/r6xx.c b/r6xx.c
new file mode 100644
index 0000000..ed4a238
--- /dev/null
+++ b/r6xx.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Alex Deucher <alexander.deucher@amd.com>
+ * Jerome Glisse
+ */
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "replayx.h"
+#include "xf86drm.h"
+#include "radeon_drm.h"
+#include "radeon_family.h"
+#include "r6xx.h"
+#include "r6xxd.h"
+
+void r6xx_emit_reloc(struct r6xx_blit *blit, struct ctx_bo *bo)
+{
+ unsigned i;
+
+ for (i = 0; i < 3; i++) {
+ if (blit->relocs[i].handle == bo->handle) {
+ blit->cs[blit->cdw++] = PKT3(IT_NOP, 1);
+ blit->cs[blit->cdw++] = i * 4;
+ return;
+ }
+ }
+}
+
+int r6xx_sq_conf(struct r6xx_blit *blit)
+{
+ struct r6xx_sq_conf *sq_conf = &blit->sq_conf;
+
+ sq_conf->ps_prio = 0;
+ sq_conf->vs_prio = 1;
+ sq_conf->gs_prio = 2;
+ sq_conf->es_prio = 3;
+ /* need to set stack/thread/gpr limits based on the asic
+ * for now just set them low enough so any card will work
+ * see r600_cp.c in the drm
+ */
+ switch (blit->ctx->family) {
+ case CHIP_R600:
+ sq_conf->num_ps_gprs = 192;
+ sq_conf->num_vs_gprs = 56;
+ sq_conf->num_temp_gprs = 4;
+ sq_conf->num_gs_gprs = 0;
+ sq_conf->num_es_gprs = 0;
+ sq_conf->num_ps_threads = 136;
+ sq_conf->num_vs_threads = 48;
+ sq_conf->num_gs_threads = 4;
+ sq_conf->num_es_threads = 4;
+ sq_conf->num_ps_stack_entries = 128;
+ sq_conf->num_vs_stack_entries = 128;
+ sq_conf->num_gs_stack_entries = 0;
+ sq_conf->num_es_stack_entries = 0;
+ break;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ sq_conf->num_ps_gprs = 84;
+ sq_conf->num_vs_gprs = 36;
+ sq_conf->num_temp_gprs = 4;
+ sq_conf->num_gs_gprs = 0;
+ sq_conf->num_es_gprs = 0;
+ sq_conf->num_ps_threads = 144;
+ sq_conf->num_vs_threads = 40;
+ sq_conf->num_gs_threads = 4;
+ sq_conf->num_es_threads = 4;
+ sq_conf->num_ps_stack_entries = 40;
+ sq_conf->num_vs_stack_entries = 40;
+ sq_conf->num_gs_stack_entries = 32;
+ sq_conf->num_es_stack_entries = 16;
+ break;
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ default:
+ sq_conf->num_ps_gprs = 84;
+ sq_conf->num_vs_gprs = 36;
+ sq_conf->num_temp_gprs = 4;
+ sq_conf->num_gs_gprs = 0;
+ sq_conf->num_es_gprs = 0;
+ sq_conf->num_ps_threads = 136;
+ sq_conf->num_vs_threads = 48;
+ sq_conf->num_gs_threads = 4;
+ sq_conf->num_es_threads = 4;
+ sq_conf->num_ps_stack_entries = 40;
+ sq_conf->num_vs_stack_entries = 40;
+ sq_conf->num_gs_stack_entries = 32;
+ sq_conf->num_es_stack_entries = 16;
+ break;
+ case CHIP_RV670:
+ sq_conf->num_ps_gprs = 144;
+ sq_conf->num_vs_gprs = 40;
+ sq_conf->num_temp_gprs = 4;
+ sq_conf->num_gs_gprs = 0;
+ sq_conf->num_es_gprs = 0;
+ sq_conf->num_ps_threads = 136;
+ sq_conf->num_vs_threads = 48;
+ sq_conf->num_gs_threads = 4;
+ sq_conf->num_es_threads = 4;
+ sq_conf->num_ps_stack_entries = 40;
+ sq_conf->num_vs_stack_entries = 40;
+ sq_conf->num_gs_stack_entries = 32;
+ sq_conf->num_es_stack_entries = 16;
+ break;
+ case CHIP_RV770:
+ sq_conf->num_ps_gprs = 192;
+ sq_conf->num_vs_gprs = 56;
+ sq_conf->num_temp_gprs = 4;
+ sq_conf->num_gs_gprs = 0;
+ sq_conf->num_es_gprs = 0;
+ sq_conf->num_ps_threads = 188;
+ sq_conf->num_vs_threads = 60;
+ sq_conf->num_gs_threads = 0;
+ sq_conf->num_es_threads = 0;
+ sq_conf->num_ps_stack_entries = 256;
+ sq_conf->num_vs_stack_entries = 256;
+ sq_conf->num_gs_stack_entries = 0;
+ sq_conf->num_es_stack_entries = 0;
+ break;
+ case CHIP_RV730:
+ case CHIP_RV740:
+ sq_conf->num_ps_gprs = 84;
+ sq_conf->num_vs_gprs = 36;
+ sq_conf->num_temp_gprs = 4;
+ sq_conf->num_gs_gprs = 0;
+ sq_conf->num_es_gprs = 0;
+ sq_conf->num_ps_threads = 188;
+ sq_conf->num_vs_threads = 60;
+ sq_conf->num_gs_threads = 0;
+ sq_conf->num_es_threads = 0;
+ sq_conf->num_ps_stack_entries = 128;
+ sq_conf->num_vs_stack_entries = 128;
+ sq_conf->num_gs_stack_entries = 0;
+ sq_conf->num_es_stack_entries = 0;
+ break;
+ case CHIP_RV710:
+ sq_conf->num_ps_gprs = 192;
+ sq_conf->num_vs_gprs = 56;
+ sq_conf->num_temp_gprs = 4;
+ sq_conf->num_gs_gprs = 0;
+ sq_conf->num_es_gprs = 0;
+ sq_conf->num_ps_threads = 144;
+ sq_conf->num_vs_threads = 48;
+ sq_conf->num_gs_threads = 0;
+ sq_conf->num_es_threads = 0;
+ sq_conf->num_ps_stack_entries = 128;
+ sq_conf->num_vs_stack_entries = 128;
+ sq_conf->num_gs_stack_entries = 0;
+ sq_conf->num_es_stack_entries = 0;
+ break;
+ }
+
+ /* SQ setup */
+ switch (blit->ctx->family) {
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ case CHIP_RV710:
+ /* no vertex cache (VC) */
+ sq_conf->sq_config = SQ_CONFIG__VC_ENABLE(0);
+ break;
+ default:
+ sq_conf->sq_config = SQ_CONFIG__VC_ENABLE(1);
+ break;
+ }
+ sq_conf->sq_config |= SQ_CONFIG__DX9_CONSTS(1) |
+ SQ_CONFIG__ALU_INST_PREFER_VECTOR(1) |
+ SQ_CONFIG__PS_PRIO(sq_conf->ps_prio) |
+ SQ_CONFIG__VS_PRIO(sq_conf->vs_prio) |
+ SQ_CONFIG__GS_PRIO(sq_conf->gs_prio) |
+ SQ_CONFIG__ES_PRIO(sq_conf->es_prio);
+
+ return 0;
+}
+
+int r6xx_set_vport_scissor(struct r6xx_blit *blit, unsigned id, int x1, int y1, int x2, int y2)
+{
+ unsigned cdw = blit->cdw + 4;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3);
+ blit->cs[blit->cdw++] = ((PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL__STRIDE) -
+ SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = PA_SC_VPORT_SCISSOR_0_TL__TL_X(x1) |
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y(y1) |
+ PA_SC_VPORT_SCISSOR_0_TL__WINDOW_OFFSET_DISABLE(1);
+ blit->cs[blit->cdw++] = PA_SC_VPORT_SCISSOR_0_BR__BR_X(x2) |
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y(y2);
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_set_generic_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2)
+{
+ unsigned cdw = blit->cdw + 4;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3);
+ blit->cs[blit->cdw++] = (PA_SC_GENERIC_SCISSOR_TL - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = PA_SC_GENERIC_SCISSOR_TL__TL_X(x1) |
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y(y1) |
+ PA_SC_GENERIC_SCISSOR_TL__WINDOW_OFFSET_DISABLE(1);
+ blit->cs[blit->cdw++] = PA_SC_GENERIC_SCISSOR_BR__BR_X(x2) |
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y(y2);
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_set_window_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2)
+{
+ unsigned cdw = blit->cdw + 4;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3);
+ blit->cs[blit->cdw++] = (PA_SC_WINDOW_SCISSOR_TL - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = PA_SC_WINDOW_SCISSOR_TL__TL_X(x1) |
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y(y1) |
+ PA_SC_WINDOW_SCISSOR_TL__WINDOW_OFFSET_DISABLE(1);
+ blit->cs[blit->cdw++] = PA_SC_WINDOW_SCISSOR_BR__BR_X(x2) |
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y(y2);
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_set_screen_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2)
+{
+ unsigned cdw = blit->cdw + 4;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3);
+ blit->cs[blit->cdw++] = (PA_SC_SCREEN_SCISSOR_TL - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = PA_SC_SCREEN_SCISSOR_TL__TL_X(x1) |
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y(y1);
+ blit->cs[blit->cdw++] = PA_SC_SCREEN_SCISSOR_BR__BR_X(x2) |
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y(y2);
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_set_clip_rect(struct r6xx_blit *blit, unsigned id, int x1, int y1, int x2, int y2)
+{
+ unsigned cdw = blit->cdw + 4;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3);
+ blit->cs[blit->cdw++] = ((PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL__STRIDE) -
+ SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = PA_SC_CLIPRECT_0_TL__TL_X(x1) |
+ PA_SC_CLIPRECT_0_TL__TL_Y(y1);
+ blit->cs[blit->cdw++] = PA_SC_CLIPRECT_0_BR__BR_X(x2) |
+ PA_SC_CLIPRECT_0_BR__BR_Y(y2);
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_set_render_target(struct r6xx_blit *blit, struct ctx_bo *bo)
+{
+ uint32_t cb_color_info;
+ unsigned pitch, slice;
+ unsigned cdw = blit->cdw + 50;
+
+ pitch = bo->pitch;
+ slice = bo->h * pitch;
+ cb_color_info = CB_COLOR0_INFO__FORMAT(bo->hw_format)|
+ CB_COLOR0_INFO__COMP_SWAP(SWAP_ALT);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = ((CB_COLOR0_BASE) - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+ r6xx_emit_reloc(blit, bo);
+
+ /* rv6xx workaround */
+ if ((blit->ctx->family > CHIP_R600) &&
+ (blit->ctx->family < CHIP_RV770)) {
+ cdw += 2;
+ blit->cs[blit->cdw++] = PKT3(IT_SURFACE_BASE_UPDATE, 1);
+ blit->cs[blit->cdw++] = (1 << 1);
+ }
+ /* set CMASK & TILE buffer to the offset of color buffer as
+ * we don't use those this shouldn't cause any issue and we
+ * then have a valid cmd stream
+ */
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = ((CB_COLOR0_TILE) - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+ r6xx_emit_reloc(blit, bo);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = ((CB_COLOR0_FRAG) - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+ r6xx_emit_reloc(blit, bo);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = ((CB_COLOR0_SIZE) - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = CB_COLOR0_SIZE__PITCH_TILE_MAX((pitch >> 3) - 1) |
+ CB_COLOR0_SIZE__SLICE_TILE_MAX((slice >> 6) - 1);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = ((CB_COLOR0_VIEW) - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = CB_COLOR0_VIEW__SLICE_START(0) |
+ CB_COLOR0_VIEW__SLICE_MAX(0);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = ((CB_COLOR0_MASK) - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = CB_COLOR0_MASK__CMASK_BLOCK_MAX(0) |
+ CB_COLOR0_MASK__FMASK_TILE_MAX(0);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = ((CB_COLOR0_INFO) - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = cb_color_info;
+ r6xx_emit_reloc(blit, bo);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = ((CB_TARGET_MASK) - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = CB_TARGET_MASK__TARGET0_ENABLE(0xf);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = ((CB_COLOR_CONTROL) - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = CB_COLOR_CONTROL__ROP3(ROP3_COPY);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = ((CB_BLEND_CONTROL) - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+
+ r6xx_set_generic_scissor(blit, 0, 0, bo->w, bo->h);
+ r6xx_set_screen_scissor(blit, 0, 0, bo->w, bo->h);
+ r6xx_set_window_scissor(blit, 0, 0, bo->w, bo->h);
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_fs_setup(struct r6xx_blit *blit, struct ctx_bo *bo,
+ unsigned offset, unsigned ngprs, unsigned stack_size)
+{
+ unsigned cdw = blit->cdw + 11;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_START_FS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = offset >> 8;
+ r6xx_emit_reloc(blit, bo);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_CF_OFFSET_FS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_RESOURCES_FS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = SQ_PGM_RESOURCES_FS__NUM_GPRS(ngprs) |
+ SQ_PGM_RESOURCES_FS__STACK_SIZE(stack_size);
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_vs_setup(struct r6xx_blit *blit, struct ctx_bo *bo,
+ unsigned offset, unsigned ngprs, unsigned stack_size,
+ unsigned cs_export_count)
+{
+ unsigned cdw = blit->cdw + 17;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_START_VS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = offset >> 8;
+ r6xx_emit_reloc(blit, bo);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_CF_OFFSET_VS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_RESOURCES_VS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = SQ_PGM_RESOURCES_VS__NUM_GPRS(ngprs) |
+ SQ_PGM_RESOURCES_VS__STACK_SIZE(stack_size) |
+ SQ_PGM_RESOURCES_VS__UNCACHED_FIRST_INST(1);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_CF_OFFSET_VS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+
+ /* Interpolator setup */
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SPI_VS_OUT_CONFIG - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = SPI_VS_OUT_CONFIG__VS_EXPORT_COUNT(cs_export_count);
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+void r6xx_ps_setup(struct r6xx_blit *blit, struct ctx_bo *bo,
+ unsigned offset, unsigned ngprs, unsigned stack_size,
+ unsigned export_mode, unsigned num_interp)
+{
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_START_PS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = offset >> 8;
+ r6xx_emit_reloc(blit, bo);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_CF_OFFSET_PS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_RESOURCES_PS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = SQ_PGM_RESOURCES_PS__NUM_GPRS(ngprs) |
+ SQ_PGM_RESOURCES_PS__STACK_SIZE(stack_size) |
+ SQ_PGM_RESOURCES_PS__UNCACHED_FIRST_INST(1);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_CF_OFFSET_PS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SQ_PGM_EXPORTS_PS - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = SQ_PGM_EXPORTS_PS__EXPORT_MODE(export_mode);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 4);
+ blit->cs[blit->cdw++] = (SPI_PS_IN_CONTROL_0 - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = SPI_PS_IN_CONTROL_0__NUM_INTERP(num_interp);
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = 0;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3);
+ blit->cs[blit->cdw++] = (SPI_PS_INPUT_CNTL_0 - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = 0;
+}
+
+int r6xx_set_alu_consts(struct r6xx_blit *blit, unsigned id,
+ unsigned count, float *cst)
+{
+ unsigned cdw = blit->cdw + count * 4 + 2;
+ unsigned i;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_ALU_CONST, count * 4 + 1);
+ blit->cs[blit->cdw++] = id * 4;
+ for (i = 0; i < count * 4; i++) {
+ blit->cs[blit->cdw++] = fui(cst[i]);
+ }
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+void r6xx_set_default_sampler(struct r6xx_blit *blit, unsigned id)
+{
+ blit->cs[blit->cdw++] = PKT3(IT_SET_SAMPLER, 4);
+ blit->cs[blit->cdw++] = id * 3;
+ blit->cs[blit->cdw++] = SQ_TEX_SAMPLER_WORD0_0__CLAMP_X(SQ_TEX_CLAMP_LAST_TEXEL) |
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_Y(SQ_TEX_CLAMP_LAST_TEXEL) |
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_Z(SQ_TEX_WRAP) |
+ SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER(SQ_TEX_XY_FILTER_POINT) |
+ SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER(SQ_TEX_XY_FILTER_POINT) |
+ SQ_TEX_SAMPLER_WORD0_0__Z_FILTER(SQ_TEX_Z_FILTER_NONE) |
+ SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER(SQ_TEX_Z_FILTER_NONE) |
+ SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE(0);
+ blit->cs[blit->cdw++] = SQ_TEX_SAMPLER_WORD1_0__MIN_LOD(0) |
+ SQ_TEX_SAMPLER_WORD1_0__MAX_LOD(0) |
+ SQ_TEX_SAMPLER_WORD1_0__MAX_LOD(0) |
+ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS(0);
+ blit->cs[blit->cdw++] = SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_SEC(0) |
+ SQ_TEX_SAMPLER_WORD2_0__MC_COORD_TRUNCATE(1) |
+ SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA(0) |
+ SQ_TEX_SAMPLER_WORD2_0__HIGH_PRECISION_FILTER(0) |
+ SQ_TEX_SAMPLER_WORD2_0__TYPE(0);
+}
+
+void r6xx_set_tex_resource(struct r6xx_blit *blit, unsigned id, struct ctx_bo *bo)
+{
+ blit->cs[blit->cdw++] = PKT3(IT_SET_RESOURCE, 8);
+ blit->cs[blit->cdw++] = id * 7;
+ blit->cs[blit->cdw++] = SQ_TEX_RESOURCE_WORD0_0__DIM(SQ_TEX_DIM_2D) |
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE(bo->hw_tile) |
+ SQ_TEX_RESOURCE_WORD0_0__PITCH((bo->pitch >> 3) - 1) |
+ SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH(bo->w - 1);
+ blit->cs[blit->cdw++] = SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT(bo->hw_format) |
+ SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT(bo->h - 1);
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = SQ_TEX_RESOURCE_WORD4_0__REQUEST_SIZE(1) |
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X(SQ_SEL_X) |
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y(SQ_SEL_Y) |
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z(SQ_SEL_Z) |
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W(SQ_SEL_W);
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = SQ_VTX_CONSTANT_WORD6_0__TYPE(SQ_TEX_VTX_VALID_TEXTURE);
+ r6xx_emit_reloc(blit, bo);
+ r6xx_emit_reloc(blit, bo);
+}
+
+int r6xx_set_vtx_resource(struct r6xx_blit *blit, unsigned id,
+ struct r6xx_vbo *vbo)
+{
+ unsigned cdw = blit->cdw + 11;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_RESOURCE, 8);
+ blit->cs[blit->cdw++] = id * 7;
+ blit->cs[blit->cdw++] = vbo->offset;
+ blit->cs[blit->cdw++] = vbo->ndw * 4;
+ blit->cs[blit->cdw++] = SQ_VTX_CONSTANT_WORD2_0__STRIDE(vbo->stride) |
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT(vbo->data_format) |
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL(vbo->num_format_all) |
+ SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL(vbo->format_comp_all) |
+ SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL(vbo->srf_mode_all) |
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP(vbo->endian_swap);
+ blit->cs[blit->cdw++] = SQ_VTX_CONSTANT_WORD3_0__MEM_REQUEST_SIZE(vbo->mem_request_size);
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = SQ_VTX_CONSTANT_WORD6_0__TYPE(SQ_TEX_VTX_VALID_BUFFER);
+ r6xx_emit_reloc(blit, vbo->bo);
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_draw_auto(struct r6xx_blit *blit, struct r6xx_draw *draw)
+{
+ unsigned cdw = blit->cdw + 10;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONFIG_REG, 2);
+ blit->cs[blit->cdw++] = (VGT_PRIMITIVE_TYPE - SET_CONFIG_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = draw->primitive_type;
+
+ blit->cs[blit->cdw++] = PKT3(IT_INDEX_TYPE, 1);
+ blit->cs[blit->cdw++] = draw->index_type;
+ blit->cs[blit->cdw++] = PKT3(IT_NUM_INSTANCES, 1);
+ blit->cs[blit->cdw++] = draw->num_instances;
+ blit->cs[blit->cdw++] = PKT3(IT_DRAW_INDEX_AUTO, 2);
+ blit->cs[blit->cdw++] = draw->num_indices;
+ blit->cs[blit->cdw++] = draw->vgt_draw_initiator;
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_disable_depth(struct r6xx_blit *blit)
+{
+ unsigned cdw = blit->cdw + 6;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (DB_DEPTH_INFO - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (DB_DEPTH_CONTROL - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_surface_sync(struct r6xx_blit *blit,
+ struct ctx_bo *bo,
+ unsigned sync_type)
+{
+ unsigned cdw = blit->cdw + 5;
+ unsigned size = 0xffffffff;
+
+ if (bo) {
+ size = (bo->size + 255) >> 8;
+ cdw += 2;
+ }
+
+ blit->cs[blit->cdw++] = PKT3(IT_SURFACE_SYNC, 4);
+ blit->cs[blit->cdw++] = sync_type;
+ blit->cs[blit->cdw++] = size;
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = 10;
+ if (bo) {
+ r6xx_emit_reloc(blit, bo);
+ }
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_event(struct r6xx_blit *blit, unsigned event_iniator)
+{
+ unsigned cdw = blit->cdw + 2;
+
+ blit->cs[blit->cdw++] = PKT3(IT_EVENT_WRITE, 1);
+ blit->cs[blit->cdw++] = event_iniator;
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+int r6xx_set_default_state(struct r6xx_blit *blit)
+{
+ struct r6xx_sq_conf *sq_conf = &blit->sq_conf;
+ unsigned cdw = blit->cdw + 234, i;
+
+ if (blit->ctx->family < CHIP_RV770) {
+ blit->cs[blit->cdw++] = PKT3(IT_START_3D_CMDBUF, 1);
+ blit->cs[blit->cdw++] = 0;
+ }
+ blit->cs[blit->cdw++] = PKT3(IT_CONTEXT_CONTROL, 2);
+ blit->cs[blit->cdw++] = 0x80000000;
+ blit->cs[blit->cdw++] = 0x80000000;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONFIG_REG, 7);
+ blit->cs[blit->cdw++] = (SQ_CONFIG - SET_CONFIG_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = sq_conf->sq_config;
+ blit->cs[blit->cdw++] = SQ_GPR_RESOURCE_MGMT_1__NUM_PS_GPRS(sq_conf->num_ps_gprs) |
+ SQ_GPR_RESOURCE_MGMT_1__NUM_VS_GPRS(sq_conf->num_vs_gprs) |
+ SQ_GPR_RESOURCE_MGMT_1__NUM_CLAUSE_TEMP_GPRS(sq_conf->num_temp_gprs);
+ blit->cs[blit->cdw++] = SQ_GPR_RESOURCE_MGMT_2__NUM_GS_GPRS(sq_conf->num_gs_gprs) |
+ SQ_GPR_RESOURCE_MGMT_2__NUM_ES_GPRS(sq_conf->num_es_gprs);
+ blit->cs[blit->cdw++] = SQ_THREAD_RESOURCE_MGMT__NUM_PS_THREADS(sq_conf->num_ps_threads) |
+ SQ_THREAD_RESOURCE_MGMT__NUM_VS_THREADS(sq_conf->num_vs_threads) |
+ SQ_THREAD_RESOURCE_MGMT__NUM_GS_THREADS(sq_conf->num_gs_threads) |
+ SQ_THREAD_RESOURCE_MGMT__NUM_ES_THREADS(sq_conf->num_es_threads);
+ blit->cs[blit->cdw++] = SQ_STACK_RESOURCE_MGMT_1__NUM_PS_STACK_ENTRIES(sq_conf->num_ps_stack_entries) |
+ SQ_STACK_RESOURCE_MGMT_1__NUM_VS_STACK_ENTRIES(sq_conf->num_vs_stack_entries);
+ blit->cs[blit->cdw++] = SQ_STACK_RESOURCE_MGMT_2__NUM_GS_STACK_ENTRIES(sq_conf->num_gs_stack_entries) |
+ SQ_STACK_RESOURCE_MGMT_2__NUM_ES_STACK_ENTRIES(sq_conf->num_es_stack_entries);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONFIG_REG, 2);
+ blit->cs[blit->cdw++] = (VC_ENHANCE - SET_CONFIG_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONFIG_REG, 2);
+ blit->cs[blit->cdw++] = (DB_DEBUG - SET_CONFIG_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0x80000000;
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONFIG_REG, 2);
+ blit->cs[blit->cdw++] = (DB_WATERMARKS - SET_CONFIG_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = DB_WATERMARKS__DEPTH_FREE(4) |
+ DB_WATERMARKS__DEPTH_FLUSH(16) |
+ DB_WATERMARKS__FORCE_SUMMARIZE(0) |
+ DB_WATERMARKS__DEPTH_PENDING_FREE(4) |
+ DB_WATERMARKS__DEPTH_CACHELINE_FREE(16);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CTL_CONST, 3);
+ blit->cs[blit->cdw++] = (SQ_VTX_BASE_VTX_LOC - SET_CTL_CONST__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = 0;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 10);
+ blit->cs[blit->cdw++] = (SQ_ESGS_RING_ITEMSIZE - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0; // SQ_ESGS_RING_ITEMSIZE
+ blit->cs[blit->cdw++] = 0; // SQ_GSVS_RING_ITEMSIZE
+ blit->cs[blit->cdw++] = 0; // SQ_ESTMP_RING_ITEMSIZE
+ blit->cs[blit->cdw++] = 0; // SQ_GSTMP_RING_ITEMSIZE
+ blit->cs[blit->cdw++] = 0; // SQ_VSTMP_RING_ITEMSIZE
+ blit->cs[blit->cdw++] = 0; // SQ_PSTMP_RING_ITEMSIZE
+ blit->cs[blit->cdw++] = 0; // SQ_FBUF_RING_ITEMSIZE
+ blit->cs[blit->cdw++] = 0; // SQ_REDUC_RING_ITEMSIZE
+ blit->cs[blit->cdw++] = 0; // SQ_GS_VERT_ITEMSIZE
+
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 5);
+ blit->cs[blit->cdw++] = (CB_CLRCMP_CONTROL - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = CB_CLRCMP_CONTROL__CLRCMP_FCN_SEL(CLRCMP_SEL_SRC);
+ blit->cs[blit->cdw++] = 0; // CB_CLRCMP_SRC
+ blit->cs[blit->cdw++] = 0; // CB_CLRCMP_DST
+ blit->cs[blit->cdw++] = 0; // CB_CLRCMP_MSK
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (CB_SHADER_MASK - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = CB_SHADER_MASK__OUTPUT0_ENABLE(0xf);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 6);
+ blit->cs[blit->cdw++] = (SX_ALPHA_TEST_CONTROL - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0; // SX_ALPHA_TEST_CONTROL
+ blit->cs[blit->cdw++] = 0; // CB_BLEND_RED
+ blit->cs[blit->cdw++] = 0; // CB_BLEND_GREEN
+ blit->cs[blit->cdw++] = 0; // CB_BLEND_BLUE
+ blit->cs[blit->cdw++] = 0; // CB_BLEND_ALPHA
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (PA_SC_WINDOW_OFFSET - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET(0) |
+ PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET(0);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (PA_SC_CLIPRECT_RULE - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = PA_SC_CLIPRECT_RULE__CLIP_RULE(0xfffff);
+
+ /* clip boolean is set to always visible -> doesn't matter */
+ for (i = 0; i < PA_SC_CLIPRECT_0_TL__NUM; i++) {
+ r6xx_set_clip_rect(blit, i, 0, 0, 8192, 8192);
+ }
+
+ for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL__NUM; i++) {
+ r6xx_set_vport_scissor(blit, i, 0, 0, 8192, 8192);
+ }
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3);
+ blit->cs[blit->cdw++] = (PA_SC_MPASS_PS_CNTL - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = PA_SC_MODE_CNTL__FORCE_EOV_CNTDWN_ENABLE(1) |
+ PA_SC_MODE_CNTL__FORCE_EOV_REZ_ENABLE(1) |
+ PA_SC_MODE_CNTL__TILE_COVER_DISABLE(0) |
+ PA_SC_MODE_CNTL__TILE_COVER_NO_SCISSOR(1) |
+ 0x00500000;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 10);
+ blit->cs[blit->cdw++] = (PA_SC_LINE_CNTL - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0; // PA_SC_LINE_CNTL
+ blit->cs[blit->cdw++] = 0; // PA_SC_AA_CONFIG
+ blit->cs[blit->cdw++] = PA_SU_VTX_CNTL__ROUND_MODE(X_TRUNCATE) |
+ PA_SU_VTX_CNTL__PIX_CENTER(1) |
+ /* round to even, fixed point 1/256 */
+ PA_SU_VTX_CNTL__QUANT_MODE(X_1_256TH);
+ blit->cs[blit->cdw++] = fui(1.0); // PA_CL_GB_VERT_CLIP_ADJ
+ blit->cs[blit->cdw++] = fui(1.0); // PA_CL_GB_VERT_DISC_ADJ
+ blit->cs[blit->cdw++] = fui(1.0); // PA_CL_GB_HORZ_CLIP_ADJ
+ blit->cs[blit->cdw++] = fui(1.0); // PA_CL_GB_HORZ_DISC_ADJ
+ blit->cs[blit->cdw++] = 0; // PA_SC_AA_SAMPLE_LOCS_MCTX
+ blit->cs[blit->cdw++] = 0; // PA_SC_AA_SAMPLE_LOCS_8S_WD1_M
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (PA_SC_AA_MASK - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0xFFFFFFFF;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 6);
+ blit->cs[blit->cdw++] = (PA_CL_CLIP_CNTL - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = PA_CL_CLIP_CNTL__CLIP_DISABLE(1) |
+ PA_CL_CLIP_CNTL__ZCLIP_NEAR_DISABLE(0) |
+ PA_CL_CLIP_CNTL__ZCLIP_FAR_DISABLE(0);
+ blit->cs[blit->cdw++] = PA_SU_SC_MODE_CNTL__FACE(1);
+ blit->cs[blit->cdw++] = PA_CL_VTE_CNTL__VTX_XY_FMT(1) |
+ PA_CL_VTE_CNTL__VTX_Z_FMT(0) |
+ PA_CL_VTE_CNTL__VPORT_Z_SCALE_ENA(1) |
+ PA_CL_VTE_CNTL__VPORT_Z_OFFSET_ENA(1);
+ blit->cs[blit->cdw++] = 0; // PA_CL_VS_OUT_CNTL
+ blit->cs[blit->cdw++] = 0; // PA_CL_NANINF_CNTL
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (PA_CL_VPORT_ZSCALE_0 - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = fui(1.0);
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (PA_CL_VPORT_ZOFFSET_0 - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = fui(0.0);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 7);
+ blit->cs[blit->cdw++] = (PA_SU_POLY_OFFSET_DB_FMT_CNTL - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0xe8; // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ blit->cs[blit->cdw++] = 0; // PA_SU_POLY_OFFSET_CLAMP
+ blit->cs[blit->cdw++] = 0; // PA_SU_POLY_OFFSET_FRONT_SCALE
+ blit->cs[blit->cdw++] = 0; // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ blit->cs[blit->cdw++] = 0; // PA_SU_POLY_OFFSET_BACK_SCALE
+ blit->cs[blit->cdw++] = 0; // PA_SU_POLY_OFFSET_BACK_OFFSET
+
+ /* default Interpolator setup */
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (SPI_VS_OUT_ID_0 - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = SPI_VS_OUT_ID_0__SEMANTIC_0(0) |
+ SPI_VS_OUT_ID_0__SEMANTIC_1(1);
+ /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3);
+ blit->cs[blit->cdw++] = (SPI_PS_INPUT_CNTL_0 - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = SPI_PS_INPUT_CNTL_0__SEMANTIC(0) |
+ SPI_PS_INPUT_CNTL_0__DEFAULT_VAL(0x01) |
+ SPI_PS_INPUT_CNTL_0__SEL_CENTROID(1);
+ /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
+ blit->cs[blit->cdw++] = SPI_PS_INPUT_CNTL_0__SEMANTIC(1) |
+ SPI_PS_INPUT_CNTL_0__DEFAULT_VAL(0x01) |
+ SPI_PS_INPUT_CNTL_0__SEL_CENTROID(1);
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 5);
+ blit->cs[blit->cdw++] = (SPI_INPUT_Z - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0; // SPI_INPUT_Z
+ blit->cs[blit->cdw++] = 0; // SPI_FOG_CNTL
+ blit->cs[blit->cdw++] = 0; // SPI_FOG_FUNC_SCALE
+ blit->cs[blit->cdw++] = 0; // SPI_FOG_FUNC_BIAS
+
+ /* VGT */
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 5);
+ blit->cs[blit->cdw++] = (VGT_MAX_VTX_INDX - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0xffffff; // VGT_MAX_VTX_INDX
+ blit->cs[blit->cdw++] = 0; // VGT_MIN_VTX_INDX
+ blit->cs[blit->cdw++] = 0; // VGT_INDX_OFFSET
+ blit->cs[blit->cdw++] = 0; // VGT_MULTI_PRIM_IB_RESET_INDX
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (VGT_PRIMITIVEID_EN - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (VGT_MULTI_PRIM_IB_RESET_EN - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 3);
+ blit->cs[blit->cdw++] = (VGT_INSTANCE_STEP_RATE_0 - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0; // VGT_INSTANCE_STEP_RATE_0
+ blit->cs[blit->cdw++] = 0; // VGT_INSTANCE_STEP_RATE_1
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 18);
+ blit->cs[blit->cdw++] = (PA_SU_POINT_SIZE - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0; // PA_SU_POINT_SIZE
+ blit->cs[blit->cdw++] = 0; // PA_SU_POINT_MINMAX
+ blit->cs[blit->cdw++] = PA_SU_LINE_CNTL__WIDTH(8);
+ blit->cs[blit->cdw++] = 0; // PA_SC_LINE_STIPPLE
+ blit->cs[blit->cdw++] = 0; // VGT_OUTPUT_PATH_CNTL
+ blit->cs[blit->cdw++] = 0; // VGT_HOS_CNTL
+ blit->cs[blit->cdw++] = 0; // VGT_HOS_MAX_TESS_LEVEL
+ blit->cs[blit->cdw++] = 0; // VGT_HOS_MIN_TESS_LEVEL
+ blit->cs[blit->cdw++] = 0; // VGT_HOS_REUSE_DEPTH
+ blit->cs[blit->cdw++] = 0; // VGT_GROUP_PRIM_TYPE
+ blit->cs[blit->cdw++] = 0; // VGT_GROUP_FIRST_DECR
+ blit->cs[blit->cdw++] = 0; // VGT_GROUP_DECR
+ blit->cs[blit->cdw++] = 0; // VGT_GROUP_VECT_0_CNTL
+ blit->cs[blit->cdw++] = 0; // VGT_GROUP_VECT_1_CNTL
+ blit->cs[blit->cdw++] = 0; // VGT_GROUP_VECT_0_FMT_CNTL
+ blit->cs[blit->cdw++] = 0; // VGT_GROUP_VECT_1_FMT_CNTL
+ blit->cs[blit->cdw++] = 0; // VGT_GS_MODE
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 4);
+ blit->cs[blit->cdw++] = (VGT_STRMOUT_EN - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0; // VGT_STRMOUT_EN
+ blit->cs[blit->cdw++] = VGT_REUSE_OFF__REUSE_OFF(1); // VGT_REUSE_OFF
+ blit->cs[blit->cdw++] = 0; // VGT_VTX_CNT_EN
+
+ blit->cs[blit->cdw++] = PKT3(IT_SET_CONTEXT_REG, 2);
+ blit->cs[blit->cdw++] = (VGT_STRMOUT_BUFFER_EN - SET_CONTEXT_REG__OFFSET) >> 2;
+ blit->cs[blit->cdw++] = 0;
+
+ if (blit->cdw != cdw) {
+ fprintf(stderr, "%s %d cdw missmatch expected %d got %d\n",
+ __func__, __LINE__, cdw, blit->cdw);
+ return -EFBIG;
+ }
+ return 0;
+}
+
+
+/* solid vs */
+unsigned r6xx_solid_vs(uint32_t *shader)
+{
+ unsigned i = 0;
+
+ /* 0 */
+ shader[i++] = SQ_CF_WORD0__ADDR(4);
+ shader[i++] = SQ_CF_WORD1__CF_INST(SQ_CF_INST_VTX) |
+ SQ_CF_WORD1__POP_COUNT(0) |
+ SQ_CF_WORD1__CF_CONST(0) |
+ SQ_CF_WORD1__COND(SQ_CF_COND_ACTIVE) |
+ SQ_CF_WORD1__COUNT(0) |
+ SQ_CF_WORD1__VALID_PIXEL_MODE(0) |
+ SQ_CF_WORD1__END_OF_PROGRAM (0) |
+ SQ_CF_WORD1__WHOLE_QUAD_MODE(0) |
+ SQ_CF_WORD1__BARRIER(0);
+ /* 1 */
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_POS0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_POS) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(3);
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_X) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_Y) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_0) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_1) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(1);
+ /* 2 */
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_PARAM0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_PARAM) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(0);
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_Z) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_W) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_0) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(1) |
+ SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(0);
+ /* 3 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+ /* 4/5 */
+ shader[i++] = SQ_VTX_WORD0__VTX_INST(SQ_VTX_INST_FETCH) |
+ SQ_VTX_WORD0__FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA) |
+ SQ_VTX_WORD0__FETCH_WHOLE_QUAD(0) |
+ SQ_VTX_WORD0__BUFFER_ID(0) |
+ SQ_VTX_WORD0__SRC_GPR(0) |
+ SQ_VTX_WORD0__SRC_REL(0) |
+ SQ_VTX_WORD0__SRC_SEL_X(SQ_SEL_X) |
+ SQ_VTX_WORD0__MEGA_FETCH_COUNT(16);
+ shader[i++] = SQ_VTX_WORD1_GPR__DST_GPR(0) |
+ SQ_VTX_WORD1_GPR__DST_REL(0) |
+ SQ_VTX_WORD1__DST_SEL_X(SQ_SEL_X) |
+ SQ_VTX_WORD1__DST_SEL_Y(SQ_SEL_Y) |
+ SQ_VTX_WORD1__DST_SEL_Z(SQ_SEL_Z) |
+ SQ_VTX_WORD1__DST_SEL_W(SQ_SEL_W) |
+ SQ_VTX_WORD1__USE_CONST_FIELDS(1) |
+ SQ_VTX_WORD1__DATA_FORMAT(FMT_32_32_32_32_FLOAT) |
+ SQ_VTX_WORD1__NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED) |
+ SQ_VTX_WORD1__FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED) |
+ SQ_VTX_WORD1__SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE);
+ shader[i++] = SQ_VTX_WORD2__OFFSET(0) |
+ SQ_VTX_WORD2__ENDIAN_SWAP(SQ_ENDIAN_NONE) |
+ SQ_VTX_WORD2__CONST_BUF_NO_STRIDE(0) |
+ SQ_VTX_WORD2__MEGA_FETCH(1);
+ shader[i++] = 0x0;
+
+ return i;
+}
+
+unsigned r6xx_copy_vs(uint32_t *shader)
+{
+ unsigned i = 0;
+
+ /* 0 */
+ shader[i++] = SQ_CF_WORD0__ADDR(4);
+ shader[i++] = SQ_CF_WORD1__CF_INST(SQ_CF_INST_VTX) |
+ SQ_CF_WORD1__POP_COUNT(0) |
+ SQ_CF_WORD1__CF_CONST(0) |
+ SQ_CF_WORD1__COND(SQ_CF_COND_ACTIVE) |
+ SQ_CF_WORD1__COUNT(0) |
+ SQ_CF_WORD1__VALID_PIXEL_MODE(0) |
+ SQ_CF_WORD1__END_OF_PROGRAM (0) |
+ SQ_CF_WORD1__WHOLE_QUAD_MODE(0) |
+ SQ_CF_WORD1__BARRIER(0);
+ /* 1 */
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_POS0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_POS) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(3);
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_X) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_Y) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_0) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_1) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(1);
+ /* 2 */
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_PARAM0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_PARAM) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(0);
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_Z) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_W) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_0) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(1) |
+ SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(0);
+ /* 3 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+ /* 4/5 */
+ shader[i++] = SQ_VTX_WORD0__VTX_INST(SQ_VTX_INST_FETCH) |
+ SQ_VTX_WORD0__FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA) |
+ SQ_VTX_WORD0__FETCH_WHOLE_QUAD(0) |
+ SQ_VTX_WORD0__BUFFER_ID(0) |
+ SQ_VTX_WORD0__SRC_GPR(0) |
+ SQ_VTX_WORD0__SRC_REL(0) |
+ SQ_VTX_WORD0__SRC_SEL_X(SQ_SEL_X) |
+ SQ_VTX_WORD0__MEGA_FETCH_COUNT(16);
+ shader[i++] = SQ_VTX_WORD1_GPR__DST_GPR(0) |
+ SQ_VTX_WORD1_GPR__DST_REL(0) |
+ SQ_VTX_WORD1__DST_SEL_X(SQ_SEL_X) |
+ SQ_VTX_WORD1__DST_SEL_Y(SQ_SEL_Y) |
+ SQ_VTX_WORD1__DST_SEL_Z(SQ_SEL_Z) |
+ SQ_VTX_WORD1__DST_SEL_W(SQ_SEL_W) |
+ SQ_VTX_WORD1__USE_CONST_FIELDS(1) |
+ SQ_VTX_WORD1__DATA_FORMAT(FMT_32_32_32_32_FLOAT) |
+ SQ_VTX_WORD1__NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED) |
+ SQ_VTX_WORD1__FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED) |
+ SQ_VTX_WORD1__SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE);
+ shader[i++] = SQ_VTX_WORD2__OFFSET(0) |
+ SQ_VTX_WORD2__ENDIAN_SWAP(SQ_ENDIAN_NONE) |
+ SQ_VTX_WORD2__CONST_BUF_NO_STRIDE(0) |
+ SQ_VTX_WORD2__MEGA_FETCH(1);
+ shader[i++] = 0x0;
+
+ return i;
+}
+
+/* solid ps */
+unsigned r6xx_solid_ps(uint32_t* shader)
+{
+ unsigned i = 0;
+ /* 0 */
+ shader[i++] = SQ_CF_ALU_WORD0__ADDR(2) |
+ SQ_CF_ALU_WORD0__KCACHE_BANK0(0) |
+ SQ_CF_ALU_WORD0__KCACHE_BANK1(0) |
+ SQ_CF_ALU_WORD0__KCACHE_MODE0(SQ_CF_KCACHE_NOP);
+ shader[i++] = SQ_CF_ALU_WORD1__CF_INST(SQ_CF_INST_ALU) |
+ SQ_CF_ALU_WORD1__COUNT(3) |
+ SQ_CF_ALU_WORD1__KCACHE_MODE1(SQ_CF_KCACHE_NOP) |
+ SQ_CF_ALU_WORD1__KCACHE_ADDR0(0) |
+ SQ_CF_ALU_WORD1__KCACHE_ADDR1(0) |
+ SQ_CF_ALU_WORD1__WHOLE_QUAD_MODE(0) |
+ SQ_CF_ALU_WORD1__BARRIER(0);
+ /* 1 */
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_CB0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_PIXEL) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(0);
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_X) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_Y) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_Z) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_W) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(1) |
+ SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(1);
+ /* 2 */
+ shader[i++] = SQ_ALU_WORD0__SRC0_SEL(SQ_ALU_CFILE_0) |
+ SQ_ALU_WORD0__SRC0_REL(0) |
+ SQ_ALU_WORD0__SRC0_CHAN(SQ_CHAN_X) |
+ SQ_ALU_WORD0__SRC0_NEG(0) |
+ SQ_ALU_WORD0__SRC1_SEL(0) |
+ SQ_ALU_WORD0__SRC1_REL(0) |
+ SQ_ALU_WORD0__SRC1_CHAN(SQ_CHAN_X) |
+ SQ_ALU_WORD0__SRC1_NEG(0) |
+ SQ_ALU_WORD0__INDEX_MODE(0) |
+ SQ_ALU_WORD0__PRED_SEL(SQ_PRED_SEL_OFF) |
+ SQ_ALU_WORD0__LAST(0);
+ shader[i++] = SQ_ALU_WORD1_OP2__ALU_INST(SQ_OP2_INST_MOV) |
+ SQ_ALU_WORD1_OP2__SRC0_ABS(0) |
+ SQ_ALU_WORD1_OP2__SRC1_ABS(0) |
+ SQ_ALU_WORD1_OP2__UPDATE_EXECUTE_MASK(0) |
+ SQ_ALU_WORD1_OP2__UPDATE_PRED(0) |
+ SQ_ALU_WORD1_OP2__WRITE_MASK(1) |
+ SQ_ALU_WORD1_OP2__OMOD(SQ_ALU_OMOD_OFF) |
+ SQ_ALU_WORD1__BANK_SWIZZLE(SQ_ALU_VEC_012) |
+ SQ_ALU_WORD1__DST_GPR(0) |
+ SQ_ALU_WORD1__DST_REL(0) |
+ SQ_ALU_WORD1__DST_CHAN(SQ_CHAN_X) |
+ SQ_ALU_WORD1__CLAMP(1);
+ /* 3 */
+ shader[i++] = SQ_ALU_WORD0__SRC0_SEL(SQ_ALU_CFILE_0) |
+ SQ_ALU_WORD0__SRC0_REL(0) |
+ SQ_ALU_WORD0__SRC0_CHAN(SQ_CHAN_Y) |
+ SQ_ALU_WORD0__SRC0_NEG(0) |
+ SQ_ALU_WORD0__SRC1_SEL(0) |
+ SQ_ALU_WORD0__SRC1_REL(0) |
+ SQ_ALU_WORD0__SRC1_CHAN(SQ_CHAN_Y) |
+ SQ_ALU_WORD0__SRC1_NEG(0) |
+ SQ_ALU_WORD0__INDEX_MODE(0) |
+ SQ_ALU_WORD0__PRED_SEL(SQ_PRED_SEL_OFF) |
+ SQ_ALU_WORD0__LAST(0);
+ shader[i++] = SQ_ALU_WORD1_OP2__ALU_INST(SQ_OP2_INST_MOV) |
+ SQ_ALU_WORD1_OP2__SRC0_ABS(0) |
+ SQ_ALU_WORD1_OP2__SRC1_ABS(0) |
+ SQ_ALU_WORD1_OP2__UPDATE_EXECUTE_MASK(0) |
+ SQ_ALU_WORD1_OP2__UPDATE_PRED(0) |
+ SQ_ALU_WORD1_OP2__WRITE_MASK(1) |
+ SQ_ALU_WORD1_OP2__OMOD(SQ_ALU_OMOD_OFF) |
+ SQ_ALU_WORD1__BANK_SWIZZLE(SQ_ALU_VEC_012) |
+ SQ_ALU_WORD1__DST_GPR(0) |
+ SQ_ALU_WORD1__DST_REL(0) |
+ SQ_ALU_WORD1__DST_CHAN(SQ_CHAN_Y) |
+ SQ_ALU_WORD1__CLAMP(1);
+ /* 4 */
+ shader[i++] = SQ_ALU_WORD0__SRC0_SEL(SQ_ALU_CFILE_0) |
+ SQ_ALU_WORD0__SRC0_REL(0) |
+ SQ_ALU_WORD0__SRC0_CHAN(SQ_CHAN_Z) |
+ SQ_ALU_WORD0__SRC0_NEG(0) |
+ SQ_ALU_WORD0__SRC1_SEL(0) |
+ SQ_ALU_WORD0__SRC1_REL(0) |
+ SQ_ALU_WORD0__SRC1_CHAN(SQ_CHAN_Z) |
+ SQ_ALU_WORD0__SRC1_NEG(0) |
+ SQ_ALU_WORD0__INDEX_MODE(0) |
+ SQ_ALU_WORD0__PRED_SEL(SQ_PRED_SEL_OFF) |
+ SQ_ALU_WORD0__LAST(0);
+ shader[i++] = SQ_ALU_WORD1_OP2__ALU_INST(SQ_OP2_INST_MOV) |
+ SQ_ALU_WORD1_OP2__SRC0_ABS(0) |
+ SQ_ALU_WORD1_OP2__SRC1_ABS(0) |
+ SQ_ALU_WORD1_OP2__UPDATE_EXECUTE_MASK(0) |
+ SQ_ALU_WORD1_OP2__UPDATE_PRED(0) |
+ SQ_ALU_WORD1_OP2__WRITE_MASK(1) |
+ SQ_ALU_WORD1_OP2__OMOD(SQ_ALU_OMOD_OFF) |
+ SQ_ALU_WORD1__BANK_SWIZZLE(SQ_ALU_VEC_012) |
+ SQ_ALU_WORD1__DST_GPR(0) |
+ SQ_ALU_WORD1__DST_REL(0) |
+ SQ_ALU_WORD1__DST_CHAN(SQ_CHAN_Z) |
+ SQ_ALU_WORD1__CLAMP(1);
+ /* 5 */
+ shader[i++] = SQ_ALU_WORD0__SRC0_SEL(SQ_ALU_CFILE_0) |
+ SQ_ALU_WORD0__SRC0_REL(0) |
+ SQ_ALU_WORD0__SRC0_CHAN(SQ_CHAN_W) |
+ SQ_ALU_WORD0__SRC0_NEG(0) |
+ SQ_ALU_WORD0__SRC1_SEL(0) |
+ SQ_ALU_WORD0__SRC1_REL(0) |
+ SQ_ALU_WORD0__SRC1_CHAN(SQ_CHAN_W) |
+ SQ_ALU_WORD0__SRC1_NEG(0) |
+ SQ_ALU_WORD0__INDEX_MODE(0) |
+ SQ_ALU_WORD0__PRED_SEL(SQ_PRED_SEL_OFF) |
+ SQ_ALU_WORD0__LAST(1);
+ shader[i++] = SQ_ALU_WORD1_OP2__ALU_INST(SQ_OP2_INST_MOV) |
+ SQ_ALU_WORD1_OP2__SRC0_ABS(0) |
+ SQ_ALU_WORD1_OP2__SRC1_ABS(0) |
+ SQ_ALU_WORD1_OP2__UPDATE_EXECUTE_MASK(0) |
+ SQ_ALU_WORD1_OP2__UPDATE_PRED(0) |
+ SQ_ALU_WORD1_OP2__WRITE_MASK(1) |
+ SQ_ALU_WORD1_OP2__OMOD(SQ_ALU_OMOD_OFF) |
+ SQ_ALU_WORD1__BANK_SWIZZLE(SQ_ALU_VEC_012) |
+ SQ_ALU_WORD1__DST_GPR(0) |
+ SQ_ALU_WORD1__DST_REL(0) |
+ SQ_ALU_WORD1__DST_CHAN(SQ_CHAN_W) |
+ SQ_ALU_WORD1__CLAMP(1);
+
+ return i;
+}
+
+unsigned r6xx_copy_ps(uint32_t *shader)
+{
+ unsigned i = 0;
+
+ /* 0 */
+ shader[i++] = SQ_CF_WORD0__ADDR(2);
+ shader[i++] = SQ_CF_WORD1__CF_INST(SQ_CF_INST_TEX) |
+ SQ_CF_WORD1__COUNT(0) |
+ SQ_CF_WORD1__WHOLE_QUAD_MODE(0) |
+ SQ_CF_WORD1__BARRIER(1);
+ /* 1 */
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE(SQ_EXPORT_CB0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE(SQ_EXPORT_PIXEL) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__RW_REL(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__INDEX_GPR(0) |
+ SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE(0);
+ shader[i++] = SQ_CF_ALLOC_EXPORT_WORD1__CF_INST(SQ_CF_INST_EXPORT_DONE) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X(SQ_SEL_X) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y(SQ_SEL_Y) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z(SQ_SEL_Z) |
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W(SQ_SEL_W) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BURST_COUNT(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__END_OF_PROGRAM(1) |
+ SQ_CF_ALLOC_EXPORT_WORD1__WHOLE_QUAD_MODE(0) |
+ SQ_CF_ALLOC_EXPORT_WORD1__BARRIER(1);
+ /* TEX INST 0 */
+ shader[i++] = SQ_TEX_WORD0__TEX_INST(SQ_TEX_INST_SAMPLE) |
+ SQ_TEX_WORD0__BC_FRAC_MODE(0) |
+ SQ_TEX_WORD0__FETCH_WHOLE_QUAD(0) |
+ SQ_TEX_WORD0__RESOURCE_ID(0) |
+ SQ_TEX_WORD0__SRC_GPR(0) |
+ SQ_TEX_WORD0__SRC_REL(0);
+ shader[i++] = SQ_TEX_WORD1__DST_GPR(0) |
+ SQ_TEX_WORD1__DST_REL(0) |
+ SQ_TEX_WORD1__DST_SEL_X(SQ_SEL_X) |
+ SQ_TEX_WORD1__DST_SEL_Y(SQ_SEL_Y) |
+ SQ_TEX_WORD1__DST_SEL_Z(SQ_SEL_Z) |
+ SQ_TEX_WORD1__DST_SEL_W(SQ_SEL_W) |
+ SQ_TEX_WORD1__LOD_BIAS(0) |
+ SQ_TEX_WORD1__COORD_TYPE_X(0) |
+ SQ_TEX_WORD1__COORD_TYPE_Y(0) |
+ SQ_TEX_WORD1__COORD_TYPE_Z(0) |
+ SQ_TEX_WORD1__COORD_TYPE_W(0);
+ shader[i++] = SQ_TEX_WORD2__OFFSET_X(0) |
+ SQ_TEX_WORD2__OFFSET_Y(0) |
+ SQ_TEX_WORD2__OFFSET_Z(0) |
+ SQ_TEX_WORD2__SAMPLER_ID(0) |
+ SQ_TEX_WORD2__SRC_SEL_X(SQ_SEL_X) |
+ SQ_TEX_WORD2__SRC_SEL_Y(SQ_SEL_Y) |
+ SQ_TEX_WORD2__SRC_SEL_Z(SQ_SEL_0) |
+ SQ_TEX_WORD2__SRC_SEL_W(SQ_SEL_1);
+ shader[i++] = 0;
+
+ return i;
+}
diff --git a/r6xx.h b/r6xx.h
new file mode 100644
index 0000000..7465717
--- /dev/null
+++ b/r6xx.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Alex Deucher <alexander.deucher@amd.com>
+ * Jerome Glisse
+ */
+#ifndef R6XX_H
+#define R6XX_H
+
+#include "rati_file.h"
+
+struct r6xx_sq_conf {
+ unsigned ps_prio;
+ unsigned vs_prio;
+ unsigned gs_prio;
+ unsigned es_prio;
+ unsigned num_ps_gprs;
+ unsigned num_vs_gprs;
+ unsigned num_gs_gprs;
+ unsigned num_es_gprs;
+ unsigned num_temp_gprs;
+ unsigned num_ps_threads;
+ unsigned num_vs_threads;
+ unsigned num_gs_threads;
+ unsigned num_es_threads;
+ unsigned num_ps_stack_entries;
+ unsigned num_vs_stack_entries;
+ unsigned num_gs_stack_entries;
+ unsigned num_es_stack_entries;
+ unsigned sq_config;
+};
+
+struct r6xx_vbo {
+ unsigned offset;
+ unsigned ndw;
+ unsigned stride;
+ unsigned data_format;
+ unsigned num_format_all;
+ unsigned format_comp_all;
+ unsigned srf_mode_all;
+ unsigned endian_swap;
+ unsigned mem_request_size;
+ struct ctx_bo *bo;
+};
+
+struct r6xx_draw {
+ unsigned primitive_type;
+ unsigned num_instances;
+ unsigned index_type;
+ unsigned num_indices;
+ unsigned vgt_draw_initiator;
+};
+
+struct r6xx_blit;
+
+void r6xx_emit_reloc(struct r6xx_blit *blit, struct ctx_bo *bo);
+int r6xx_sq_conf(struct r6xx_blit *blit);
+int r6xx_set_vport_scissor(struct r6xx_blit *blit, unsigned id, int x1, int y1, int x2, int y2);
+int r6xx_set_generic_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2);
+int r6xx_set_window_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2);
+int r6xx_set_screen_scissor(struct r6xx_blit *blit, int x1, int y1, int x2, int y2);
+int r6xx_set_clip_rect(struct r6xx_blit *blit, unsigned id, int x1, int y1, int x2, int y2);
+int r6xx_set_render_target(struct r6xx_blit *blit, struct ctx_bo *bo);
+int r6xx_fs_setup(struct r6xx_blit *blit, struct ctx_bo *bo,
+ unsigned offset, unsigned ngprs, unsigned stack_size);
+int r6xx_vs_setup(struct r6xx_blit *blit, struct ctx_bo *bo,
+ unsigned offset, unsigned ngprs, unsigned stack_size,
+ unsigned cs_export_count);
+void r6xx_ps_setup(struct r6xx_blit *blit, struct ctx_bo *bo,
+ unsigned offset, unsigned ngprs, unsigned stack_size,
+ unsigned export_mode, unsigned num_interp);
+int r6xx_set_alu_consts(struct r6xx_blit *blit, unsigned id,
+ unsigned count, float *cst);
+void r6xx_set_default_sampler(struct r6xx_blit *blit, unsigned id);
+void r6xx_set_tex_resource(struct r6xx_blit *blit, unsigned id, struct ctx_bo *bo);
+int r6xx_set_vtx_resource(struct r6xx_blit *blit, unsigned id,
+ struct r6xx_vbo *vbo);
+int r6xx_draw_auto(struct r6xx_blit *blit, struct r6xx_draw *draw);
+int r6xx_disable_depth(struct r6xx_blit *blit);
+int r6xx_surface_sync(struct r6xx_blit *blit,
+ struct ctx_bo *bo,
+ unsigned sync_type);
+int r6xx_event(struct r6xx_blit *blit, unsigned event_iniator);
+int r6xx_set_default_state(struct r6xx_blit *blit);
+unsigned r6xx_solid_vs(uint32_t *shader);
+unsigned r6xx_copy_vs(uint32_t *shader);
+unsigned r6xx_solid_ps(uint32_t* shader);
+unsigned r6xx_copy_ps(uint32_t *shader);
+
+/*
+ * rfile helper
+ */
+int r6xx_rfile_legalize(struct rati_file *rfile);
+int r6xx_rfile_clear_offset(struct rati_file *rfile);
+
+/*
+ * tati helpers
+ */
+int r6xx_tati_cmd_buffer_write(struct rati_file *rfile,
+ unsigned idx, FILE *file);
+
+#endif
diff --git a/r6xx_rati.c b/r6xx_rati.c
new file mode 100644
index 0000000..d6949ee
--- /dev/null
+++ b/r6xx_rati.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Alex Deucher <alexander.deucher@amd.com>
+ * Jerome Glisse
+ */
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "xf86drm.h"
+#include "radeon_drm.h"
+#include "radeon_family.h"
+#include "r6xx.h"
+#include "r6xxd.h"
+
+static int r6xx_next_reloc_offset(struct rati_file *rfile, unsigned idx,
+ unsigned *next_idx, uint64_t *offset)
+{
+ unsigned type, it, count, ridx;
+ uint32_t *pm4 = rfile->cmd_buffer_ptr[idx];
+
+ type = PKTx_TYPE(pm4[*next_idx]);
+ count = PKTx_COUNT(pm4[*next_idx]);
+ it = PKT3_IT(pm4[*next_idx]);
+ if (type != 3 || it != IT_NOP) {
+ /* missing relocation */
+ return -EINVAL;
+ }
+ ridx = pm4[(*next_idx) + 1] / 4;
+ if (ridx >= rfile->header.ndata_buffers) {
+ /* relocation out of range */
+ return -EINVAL;
+ }
+ *offset = rfile->data_buffer[ridx].offset;
+ *next_idx += count + 1;
+ return 0;
+}
+
+static int r6xx_reg_clear_offset(struct rati_file *rfile, unsigned idx,
+ unsigned reg, unsigned pm4_idx,
+ unsigned *next_idx)
+{
+ uint32_t *pm4 = rfile->cmd_buffer_ptr[idx];
+ uint64_t offset;
+ int r;
+
+ /* xor the offset, this will keep offset from start of bo intact */
+ switch (reg) {
+ case VGT_STRMOUT_BUFFER_BASE_0:
+ case VGT_STRMOUT_BUFFER_BASE_1:
+ case VGT_STRMOUT_BUFFER_BASE_2:
+ case VGT_STRMOUT_BUFFER_BASE_3:
+ case CP_COHER_BASE:
+ case CB_COLOR0_FRAG:
+ case CB_COLOR1_FRAG:
+ case CB_COLOR2_FRAG:
+ case CB_COLOR3_FRAG:
+ case CB_COLOR4_FRAG:
+ case CB_COLOR5_FRAG:
+ case CB_COLOR6_FRAG:
+ case CB_COLOR7_FRAG:
+ case CB_COLOR0_TILE:
+ case CB_COLOR1_TILE:
+ case CB_COLOR2_TILE:
+ case CB_COLOR3_TILE:
+ case CB_COLOR4_TILE:
+ case CB_COLOR5_TILE:
+ case CB_COLOR6_TILE:
+ case CB_COLOR7_TILE:
+ case CB_COLOR0_BASE:
+ case CB_COLOR1_BASE:
+ case CB_COLOR2_BASE:
+ case CB_COLOR3_BASE:
+ case CB_COLOR4_BASE:
+ case CB_COLOR5_BASE:
+ case CB_COLOR6_BASE:
+ case CB_COLOR7_BASE:
+ case DB_DEPTH_BASE:
+ case DB_HTILE_DATA_BASE:
+ case SQ_PGM_START_FS:
+ case SQ_PGM_START_ES:
+ case SQ_PGM_START_VS:
+ case SQ_PGM_START_GS:
+ case SQ_PGM_START_PS:
+ case SQ_ALU_CONST_CACHE_GS_0:
+ case SQ_ALU_CONST_CACHE_GS_1:
+ case SQ_ALU_CONST_CACHE_GS_2:
+ case SQ_ALU_CONST_CACHE_GS_3:
+ case SQ_ALU_CONST_CACHE_GS_4:
+ case SQ_ALU_CONST_CACHE_GS_5:
+ case SQ_ALU_CONST_CACHE_GS_6:
+ case SQ_ALU_CONST_CACHE_GS_7:
+ case SQ_ALU_CONST_CACHE_GS_8:
+ case SQ_ALU_CONST_CACHE_GS_9:
+ case SQ_ALU_CONST_CACHE_GS_10:
+ case SQ_ALU_CONST_CACHE_GS_11:
+ case SQ_ALU_CONST_CACHE_GS_12:
+ case SQ_ALU_CONST_CACHE_GS_13:
+ case SQ_ALU_CONST_CACHE_GS_14:
+ case SQ_ALU_CONST_CACHE_GS_15:
+ case SQ_ALU_CONST_CACHE_PS_0:
+ case SQ_ALU_CONST_CACHE_PS_1:
+ case SQ_ALU_CONST_CACHE_PS_2:
+ case SQ_ALU_CONST_CACHE_PS_3:
+ case SQ_ALU_CONST_CACHE_PS_4:
+ case SQ_ALU_CONST_CACHE_PS_5:
+ case SQ_ALU_CONST_CACHE_PS_6:
+ case SQ_ALU_CONST_CACHE_PS_7:
+ case SQ_ALU_CONST_CACHE_PS_8:
+ case SQ_ALU_CONST_CACHE_PS_9:
+ case SQ_ALU_CONST_CACHE_PS_10:
+ case SQ_ALU_CONST_CACHE_PS_11:
+ case SQ_ALU_CONST_CACHE_PS_12:
+ case SQ_ALU_CONST_CACHE_PS_13:
+ case SQ_ALU_CONST_CACHE_PS_14:
+ case SQ_ALU_CONST_CACHE_PS_15:
+ case SQ_ALU_CONST_CACHE_VS_0:
+ case SQ_ALU_CONST_CACHE_VS_1:
+ case SQ_ALU_CONST_CACHE_VS_2:
+ case SQ_ALU_CONST_CACHE_VS_3:
+ case SQ_ALU_CONST_CACHE_VS_4:
+ case SQ_ALU_CONST_CACHE_VS_5:
+ case SQ_ALU_CONST_CACHE_VS_6:
+ case SQ_ALU_CONST_CACHE_VS_7:
+ case SQ_ALU_CONST_CACHE_VS_8:
+ case SQ_ALU_CONST_CACHE_VS_9:
+ case SQ_ALU_CONST_CACHE_VS_10:
+ case SQ_ALU_CONST_CACHE_VS_11:
+ case SQ_ALU_CONST_CACHE_VS_12:
+ case SQ_ALU_CONST_CACHE_VS_13:
+ case SQ_ALU_CONST_CACHE_VS_14:
+ case SQ_ALU_CONST_CACHE_VS_15:
+ case SX_MEMORY_EXPORT_BASE:
+ r = r6xx_next_reloc_offset(rfile, idx, next_idx, &offset);
+ if (r) {
+ return r;
+ }
+ pm4[pm4_idx] ^= ((offset >> 8) & 0xffffffff);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int r6xx_rfile_cmd_buffer_legalize(struct rati_file *rfile, unsigned idx)
+{
+ return -EINVAL;
+}
+
+static int r6xx_cmd_buffer_clear_offset(struct rati_file *rfile, unsigned idx)
+{
+ unsigned i, j, count, reg, it, header, next_idx, tmp;
+ uint32_t *pm4 = rfile->cmd_buffer_ptr[idx];
+ uint64_t offset;
+ int r;
+
+ for (i = 0; i < rfile->cmd_buffer[idx].ndw;) {
+ header = pm4[i];
+ count = PKTx_COUNT(header);
+ switch (PKTx_TYPE(header)) {
+ case 0:
+ reg = PKT0_REG(header);
+ next_idx = i + count + 1;
+ for (j = 0, i++; j < count; j++, reg += 4) {
+ r = r6xx_reg_clear_offset(rfile, idx, reg, i++, &next_idx);
+ if (r) {
+ return r;
+ }
+ }
+ break;
+ case 1:
+ case 2:
+ break;
+ case 3:
+ next_idx = i + count + 1;
+ it = PKT3_IT(pm4[i]);
+ switch (it) {
+ case IT_SET_CONFIG_REG:
+ reg = (pm4[++i] << 2) + SET_CONFIG_REG__OFFSET;
+ for (j = 0, i++; j < (count - 1); j++, reg += 4) {
+ r = r6xx_reg_clear_offset(rfile, idx, reg, i++, &next_idx);
+ if (r) {
+ return r;
+ }
+ }
+ break;
+ case IT_SET_CONTEXT_REG:
+ reg = (pm4[++i] << 2) + SET_CONTEXT_REG__OFFSET;
+ for (j = 0, i++; j < (count - 1); j++, reg += 4) {
+ r = r6xx_reg_clear_offset(rfile, idx, reg, i++, &next_idx);
+ if (r) {
+ return r;
+ }
+ }
+ break;
+ case IT_SET_PREDICATION:
+ tmp = (pm4[i + 2] >> 16) & 0x7;
+ if (tmp) {
+ r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset);
+ if (r) {
+ return r;
+ }
+ pm4[i + 1] ^= (offset & 0xffffffff);
+ pm4[i + 2] ^= ((offset >> 32ULL) & 0xff);
+ }
+ break;
+ case IT_DRAW_INDEX:
+ r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset);
+ if (r) {
+ return r;
+ }
+ pm4[i + 1] ^= (offset & 0xffffffff);
+ pm4[i + 2] ^= ((offset >> 32ULL) & 0xff);
+ break;
+ case IT_WAIT_REG_MEM:
+ if (pm4[i + 1] & 0x10) {
+ r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset);
+ if (r) {
+ return r;
+ }
+ pm4[i + 2] ^= (offset & 0xffffffff);
+ pm4[i + 3] ^= ((offset >> 32ULL) & 0xff);
+ }
+ break;
+ case IT_SURFACE_SYNC:
+ if (pm4[i + 2] != 0xffffffff || pm4[i + 3]) {
+ r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset);
+ if (r) {
+ return r;
+ }
+ pm4[i + 3] ^= ((offset >> 8ULL) & 0xffffffff);
+ }
+ break;
+ case IT_EVENT_WRITE:
+ case IT_EVENT_WRITE_EOP:
+ if (count > 1) {
+ r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset);
+ if (r) {
+ return r;
+ }
+ pm4[i + 2] ^= (offset & 0xfffffffc);
+ pm4[i + 3] ^= ((offset >> 32ULL) & 0xff);
+ }
+ break;
+ case IT_SET_RESOURCE:
+ for (j = 0, i +=2; j < ((count - 1) / 7); j++, i += 7) {
+ tmp = pm4[i + 6];
+ switch ((tmp >> 30) & 3) {
+ case SQ_TEX_VTX_VALID_TEXTURE:
+ r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset);
+ if (r) {
+ return r;
+ }
+ pm4[i + 2] ^= ((offset >> 8ULL) & 0xffffffff);
+ r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset);
+ if (r) {
+ return r;
+ }
+ pm4[i + 3] ^= ((offset >> 8ULL) & 0xffffffff);
+ break;
+ case SQ_TEX_VTX_VALID_BUFFER:
+ r = r6xx_next_reloc_offset(rfile, idx, &next_idx, &offset);
+ if (r) {
+ return r;
+ }
+ pm4[i + 0] ^= (offset & 0xffffffffULL);
+ pm4[i + 2] ^= ((offset >> 32ULL) & 0xff);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+#if 0
+ case PACKET3_STRMOUT_BUFFER_UPDATE:
+ /* Updating memory at DST_ADDRESS. */
+ if (idx_value & 0x1) {
+ u64 offset;
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
+ return -EINVAL;
+ }
+ offset = radeon_get_ib_value(p, idx+1);
+ offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
+ if ((offset + 4) > radeon_bo_size(reloc->robj)) {
+ DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
+ offset + 4, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+ offset += reloc->lobj.gpu_offset;
+ ib[idx+1] = offset;
+ ib[idx+2] = upper_32_bits(offset) & 0xff;
+ }
+ /* Reading data from SRC_ADDRESS. */
+ if (((idx_value >> 1) & 0x3) == 2) {
+ u64 offset;
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
+ return -EINVAL;
+ }
+ offset = radeon_get_ib_value(p, idx+3);
+ offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
+ if ((offset + 4) > radeon_bo_size(reloc->robj)) {
+ DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
+ offset + 4, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+ offset += reloc->lobj.gpu_offset;
+ ib[idx+3] = offset;
+ ib[idx+4] = upper_32_bits(offset) & 0xff;
+ }
+ break;
+ case PACKET3_COPY_DW:
+ if (idx_value & 0x1) {
+ u64 offset;
+ /* SRC is memory. */
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad COPY_DW (missing src reloc)\n");
+ return -EINVAL;
+ }
+ offset = radeon_get_ib_value(p, idx+1);
+ offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
+ if ((offset + 4) > radeon_bo_size(reloc->robj)) {
+ DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
+ offset + 4, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+ offset += reloc->lobj.gpu_offset;
+ ib[idx+1] = offset;
+ ib[idx+2] = upper_32_bits(offset) & 0xff;
+ }
+ if (idx_value & 0x2) {
+ u64 offset;
+ /* DST is memory. */
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
+ return -EINVAL;
+ }
+ offset = radeon_get_ib_value(p, idx+3);
+ offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
+ if ((offset + 4) > radeon_bo_size(reloc->robj)) {
+ DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
+ offset + 4, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+ offset += reloc->lobj.gpu_offset;
+ ib[idx+3] = offset;
+ ib[idx+4] = upper_32_bits(offset) & 0xff;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+ i = next_idx;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+int r6xx_rfile_legalize(struct rati_file *rfile)
+{
+ unsigned i;
+ int r;
+
+ for (i = 0; i < rfile->header.ncmd_buffers; i++) {
+ r = r6xx_rfile_cmd_buffer_legalize(rfile, i);
+ if (r) {
+ return r;
+ }
+ }
+ return 0;
+}
+
+int r6xx_rfile_clear_offset(struct rati_file *rfile)
+{
+ unsigned i;
+ int r = 0;
+
+ for (i = 0; i < rfile->header.ncmd_buffers; i++) {
+ if (rfile->cmd_buffer[i].flags & RATI_CMD_CLEAR_OFFSET) {
+ r = r6xx_cmd_buffer_clear_offset(rfile, i);
+ if (r) {
+ return r;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * tati helpers
+ */
+int r6xx_tati_cmd_buffer_write(struct rati_file *rfile,
+ unsigned idx, FILE *file)
+{
+ unsigned i, j, count, reg, it;
+ uint32_t *pm4 = rfile->cmd_buffer_ptr[idx];
+
+ for (i = 0; i < rfile->cmd_buffer[idx].ndw;) {
+ count = PKTx_COUNT(pm4[i]);
+ switch (PKTx_TYPE(pm4[i])) {
+ case 0:
+ reg = PKT0_REG(pm4[i]);
+ if (fprintf(file, " 0x%08x\n", pm4[i++]) < 0) {
+ return -EINVAL;
+ }
+ for (j = 0; j < count; j++, reg += 4) {
+ if (fprintf(file, " 0x%08x // reg 0x%08x\n", pm4[i++], reg) < 0) {
+ return -EINVAL;
+ }
+ }
+ break;
+ case 1:
+ case 2:
+ if (fprintf(file, " 0x%08x\n", pm4[i++]) < 0) {
+ return -EINVAL;
+ }
+ break;
+ case 3:
+ it = PKT3_IT(pm4[i]);
+ if (fprintf(file, " 0x%08x // pkt3 0x%02x\n", pm4[i++], it) < 0) {
+ return -EINVAL;
+ }
+ switch (it) {
+ case IT_SET_CONFIG_REG:
+ reg = (pm4[i] << 2) + SET_CONFIG_REG__OFFSET;
+ if (fprintf(file, " 0x%08x\n", pm4[i++]) < 0) {
+ return -EINVAL;
+ }
+ for (j = 0; j < (count - 1); j++, reg += 4) {
+ if (fprintf(file, " 0x%08x // reg 0x%08x\n", pm4[i++], reg) < 0) {
+ return -EINVAL;
+ }
+ }
+ break;
+ case IT_SET_CONTEXT_REG:
+ reg = (pm4[i] << 2) + SET_CONTEXT_REG__OFFSET;
+ if (fprintf(file, " 0x%08x\n", pm4[i++]) < 0) {
+ return -EINVAL;
+ }
+ for (j = 0; j < (count - 1); j++, reg += 4) {
+ if (fprintf(file, " 0x%08x // reg 0x%08x\n", pm4[i++], reg) < 0) {
+ return -EINVAL;
+ }
+ }
+ break;
+ default:
+ for (j = 0; j < count; j++) {
+ if (fprintf(file, " 0x%08x\n", pm4[i++]) < 0) {
+ return -EINVAL;
+ }
+ }
+ break;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
diff --git a/r6xx_replayx.c b/r6xx_replayx.c
new file mode 100644
index 0000000..e5a1615
--- /dev/null
+++ b/r6xx_replayx.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Alex Deucher <alexander.deucher@amd.com>
+ * Jerome Glisse
+ */
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "replayx.h"
+#include "xf86drm.h"
+#include "radeon_drm.h"
+#include "radeon_family.h"
+#include "r6xx.h"
+#include "r6xxd.h"
+
+static int r6xx_blit_init(struct ctx *ctx)
+{
+ struct r6xx_blit *blit = &ctx->blit.r6xx;
+ int r;
+
+ blit->ctx = ctx;
+ blit->vs_offset = 512;
+ blit->ps_offset = 1024;
+ blit->vbo_offset = 0;
+ r = r6xx_sq_conf(blit);
+ if (r) {
+ return r;
+ }
+ blit->cdw = 0;
+ blit->cs = malloc(16 << 10);
+ if (blit->cs == NULL) {
+ return -ENOMEM;
+ }
+ blit->ctx = ctx;
+ blit->shader_bo.size = 4096;
+ blit->shader_bo.alignment = 4096;
+ blit->shader_bo.flags = 0;
+ r = ctx_bo(ctx, &blit->shader_bo, NULL);
+ if (r) {
+ return r;
+ }
+
+ r = ctx_bo_map(ctx, &blit->shader_bo);
+ if (r) {
+ return r;
+ }
+
+ blit->vs_size = r6xx_copy_vs(blit->shader_bo.data + blit->vs_offset);
+ blit->ps_size = r6xx_copy_ps(blit->shader_bo.data + blit->ps_offset);
+
+ blit->relocs[0].handle = 0;
+ blit->relocs[0].read_domain = 0;
+ blit->relocs[0].write_domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+ blit->relocs[0].flags = 0;
+ blit->relocs[1].handle = blit->shader_bo.handle;
+ blit->relocs[1].read_domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+ blit->relocs[1].write_domain = 0;
+ blit->relocs[1].flags = 0;
+ blit->relocs[2].handle = 0;
+ blit->relocs[2].read_domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+ blit->relocs[2].write_domain = 0;
+ blit->relocs[2].flags = 0;
+ return 0;
+}
+
+static void r6xx_blit_fini(struct ctx *ctx)
+{
+ struct r6xx_blit *blit = &ctx->blit.r6xx;
+ ctx_bo_free(blit->ctx, &blit->shader_bo);
+}
+
+static int r6xx_blit(struct ctx *ctx, struct ctx_bo *bo)
+{
+ struct r6xx_blit *blit = &ctx->blit.r6xx;
+ unsigned x0, y0, x1, y1;
+ float *vtx;
+ struct r6xx_vbo vbo;
+ struct r6xx_draw draw;
+
+ ctx->front.hw_format = COLOR_8_8_8_8;
+ ctx->front.hw_tile = ARRAY_1D_TILED_THIN1;
+ blit->relocs[0].handle = ctx->front.handle;
+ blit->relocs[2].handle = bo->handle;
+ blit->cdw = 0;
+ blit->ctx = ctx;
+
+ r6xx_set_default_state(blit);
+ r6xx_disable_depth(blit);
+ r6xx_set_render_target(blit, &ctx->front);
+
+ /* vbo */
+ vbo.bo = &blit->shader_bo;
+ vbo.offset = blit->vbo_offset;
+ vbo.stride = 4 * 4;
+ vbo.num_format_all = SQ_NUM_FORMAT_NORM;
+ vbo.data_format = FMT_32_32_32_32_FLOAT;
+ vbo.srf_mode_all = 0;
+ vbo.format_comp_all = 0;
+ vbo.endian_swap = SQ_ENDIAN_NONE;
+ vbo.ndw = 4 * 4;
+ vbo.mem_request_size = 1;
+ vtx = blit->shader_bo.data + blit->vbo_offset;
+
+ x0 = 0;
+ y0 = 0;
+ x1 = bo->w;
+ y1 = bo->h;
+ vtx[ 0] = x0;
+ vtx[ 1] = y0;
+ vtx[ 2] = 0.0;
+ vtx[ 3] = 0.0;
+ vtx[ 4] = x0;
+ vtx[ 5] = y1;
+ vtx[ 6] = 0.0;
+ vtx[ 7] = bo->h;
+ vtx[ 8] = x1;
+ vtx[ 9] = y1;
+ vtx[10] = bo->w;
+ vtx[11] = bo->h;
+
+ r6xx_fs_setup(blit, &blit->shader_bo, blit->vs_offset, 0, 0);
+ r6xx_vs_setup(blit, &blit->shader_bo, blit->vs_offset, 2, 0, 0);
+ r6xx_ps_setup(blit, &blit->shader_bo, blit->ps_offset, 1, 0, 2, 1);
+
+ r6xx_set_tex_resource(blit, 0, bo);
+ r6xx_set_default_sampler(blit, 0);
+ r6xx_set_vtx_resource(blit, 160, &vbo);
+ r6xx_surface_sync(blit, NULL,
+ CP_COHER_CNTL__SH_ACTION_ENA(1) |
+ CP_COHER_CNTL__TC_ACTION_ENA(1) |
+ CP_COHER_CNTL__VC_ACTION_ENA(1));
+
+ draw.primitive_type = DI_PT_RECTLIST;
+ draw.num_indices = 3;
+
+ draw.num_instances = 1;
+ draw.index_type = DI_INDEX_SIZE_16_BIT;
+ draw.vgt_draw_initiator = VGT_DRAW_INITIATOR__SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX);
+ r6xx_draw_auto(blit, &draw);
+ r6xx_event(blit, VGT_EVENT_INITIATOR__EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT));
+
+ return ctx_cs(ctx, blit->cs, blit->cdw, blit->relocs, 3);
+}
+
+struct r6xx_target {
+ unsigned w;
+ unsigned h;
+ unsigned hw_format;
+ unsigned hw_tile;
+ unsigned pitch;
+ int reloc_id;
+};
+
+static int r6xx_next_reloc_id(uint32_t *pm4, unsigned *next_idx,
+ int *reloc_id)
+{
+ unsigned type, it, count;
+
+ type = PKTx_TYPE(pm4[*next_idx]);
+ count = PKTx_COUNT(pm4[*next_idx]);
+ it = PKT3_IT(pm4[*next_idx]);
+ if (type != 3 || it != IT_NOP) {
+ /* missing relocation */
+ return -EINVAL;
+ }
+ *reloc_id = pm4[(*next_idx) + 1] / 4;
+ *next_idx += count + 1;
+ return 0;
+}
+
+static int r6xx_reg_target(uint32_t *pm4, unsigned reg, unsigned value,
+ struct r6xx_target *target, unsigned *next_idx)
+{
+ unsigned idx;
+ int r;
+
+ switch (reg) {
+ case CB_COLOR0_BASE:
+ case CB_COLOR1_BASE:
+ case CB_COLOR2_BASE:
+ case CB_COLOR3_BASE:
+ case CB_COLOR4_BASE:
+ case CB_COLOR5_BASE:
+ case CB_COLOR6_BASE:
+ case CB_COLOR7_BASE:
+ idx = (reg - CB_COLOR0_BASE) / 4;
+ r = r6xx_next_reloc_id(pm4, next_idx, &target[idx].reloc_id);
+ if (r) {
+ return r;
+ }
+ break;
+ case CB_COLOR0_INFO:
+ case CB_COLOR1_INFO:
+ case CB_COLOR2_INFO:
+ case CB_COLOR3_INFO:
+ case CB_COLOR4_INFO:
+ case CB_COLOR5_INFO:
+ case CB_COLOR6_INFO:
+ case CB_COLOR7_INFO:
+ idx = (reg - CB_COLOR0_INFO) / 4;
+ target[idx].hw_format = (value >> 2) & 0x3f;
+ target[idx].hw_tile = (value >> 8) & 0xf;
+ break;
+ case CB_COLOR0_SIZE:
+ case CB_COLOR1_SIZE:
+ case CB_COLOR2_SIZE:
+ case CB_COLOR3_SIZE:
+ case CB_COLOR4_SIZE:
+ case CB_COLOR5_SIZE:
+ case CB_COLOR6_SIZE:
+ case CB_COLOR7_SIZE:
+ idx = (reg - CB_COLOR0_SIZE) / 4;
+ target[idx].pitch = (((value >> 0) & 0x3ff) + 1) * 8;
+ target[idx].w = target[idx].pitch;
+ target[idx].h = (((value >> 10) & 0x000fffff) + 1) * 64;
+ target[idx].h = target[idx].h / target[idx].pitch;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int r6xx_cmd_buffer_target(struct ctx *ctx, unsigned idx)
+{
+ uint32_t *pm4 = ctx->rfile.cmd_buffer_ptr[idx];
+ unsigned i, j, header, count, next_idx, reg, it;
+ struct r6xx_target target[8];
+ int r;
+
+ for (i = 0; i < 8; i++) {
+ target[i].reloc_id = -1;
+ }
+ ctx->ntarget = 0;
+ ctx->ctarget = 0;
+
+ for (i = 0; i < ctx->rfile.cmd_buffer[idx].ndw;) {
+ header = pm4[i];
+ count = PKTx_COUNT(header);
+ switch (PKTx_TYPE(header)) {
+ case 0:
+ reg = PKT0_REG(header);
+ next_idx = i + count + 1;
+ for (j = 0, i++; j < count; j++, reg += 4, i++) {
+ r = r6xx_reg_target(pm4, reg, pm4[i], target, &next_idx);
+ if (r) {
+ return r;
+ }
+ }
+ break;
+ case 1:
+ case 2:
+ break;
+ case 3:
+ next_idx = i + count + 1;
+ it = PKT3_IT(pm4[i]);
+ switch (it) {
+ case IT_SET_CONFIG_REG:
+ reg = (pm4[++i] << 2) + SET_CONFIG_REG__OFFSET;
+ for (j = 0, i++; j < (count - 1); j++, reg += 4, i++) {
+ r = r6xx_reg_target(pm4, reg, pm4[i], target, &next_idx);
+ if (r) {
+ return r;
+ }
+ }
+ break;
+ case IT_SET_CONTEXT_REG:
+ reg = (pm4[++i] << 2) + SET_CONTEXT_REG__OFFSET;
+ for (j = 0, i++; j < (count - 1); j++, reg += 4, i++) {
+ r = r6xx_reg_target(pm4, reg, pm4[i], target, &next_idx);
+ if (r) {
+ return r;
+ }
+ }
+ break;
+ case IT_DRAW_INDEX:
+ case IT_DRAW_INDEX_AUTO:
+ case IT_DRAW_INDEX_IMMD:
+ for (i = 0; i < 8; i++) {
+ if (target[i].reloc_id != 1 && target[i].reloc_id < ctx->rfile.header.ndata_buffers) {
+ ctx->target[ctx->ntarget] = &ctx->bos[target[i].reloc_id];
+ ctx->target[ctx->ntarget]->w = target[i].w;
+ ctx->target[ctx->ntarget]->h = target[i].h;
+ ctx->target[ctx->ntarget]->pitch = target[i].pitch;
+ ctx->target[ctx->ntarget]->hw_format = target[i].hw_format;
+ ctx->target[ctx->ntarget]->hw_tile = target[i].hw_tile;
+ ctx->ntarget++;
+ }
+ target[i].reloc_id = -1;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ i = next_idx;
+ }
+ return 0;
+}
+
+static int r6xx_target(struct ctx *ctx)
+{
+ unsigned i;
+ int r;
+
+ for (i = 0; i < ctx->rfile.header.ncmd_buffers; i++) {
+ r = r6xx_cmd_buffer_target(ctx, i);
+ if (r) {
+ return r;
+ }
+ }
+ return 0;
+}
+
+static int r6xx_compatible(struct ctx *ctx)
+{
+ const struct radeon_chipinfo *info;
+ unsigned family;
+ unsigned i;
+ int r;
+
+ /* check we can replay */
+ info = radeon_chipinfo_from_pciid(ctx->rfile.header.pciid);
+ if (info == NULL) {
+ fprintf(stderr, "%s unknown gpu pci id 0x%08x\n", __func__, ctx->rfile.header.pciid);
+ return -EINVAL;
+ }
+ family = info->family;
+ if (family < CHIP_R600 || family > CHIP_RS880) {
+ fprintf(stderr, "%s can't replay 0x%02x trace on 0x%02x hw\n", __func__, family, ctx->family);
+ return -EINVAL;
+ }
+
+ /* set front format & tile */
+ ctx->front.hw_format = COLOR_8_8_8_8;
+ ctx->front.hw_tile = ARRAY_1D_TILED_THIN1;
+
+ for (i = 0; i < ctx->nbos; i++) {
+ printf("bo[%d] %dkb\n", i, ctx->bos[i].size >> 10);
+ }
+ r = r6xx_rfile_clear_offset(&ctx->rfile);
+ if (r) {
+ return r;
+ }
+ return r6xx_target(ctx);
+}
+
+const struct ctx_drv _r6xx_drv = {
+ r6xx_compatible,
+ r6xx_blit_init,
+ r6xx_blit_fini,
+ r6xx_blit,
+};
diff --git a/replayx_r6xxd.h b/r6xxd.h
index 4284503..c2b67d4 100644
--- a/replayx_r6xxd.h
+++ b/r6xxd.h
@@ -23,17 +23,21 @@
*
* Authors:
* Alex Deucher <alexander.deucher@amd.com>
- * Jerome Glisse
+ * Jerome Glisse
*/
-#ifndef REPLAYX_R6XXD_H
-#define REPLAYX_R6XXD_H
+#ifndef R6XXD_H
+#define R6XXD_H
/*****************************************************************************
* PM4
*/
-#define PKT3(it, n) ((3 << 30) | ((((n) - 1) & 0x3fff) << 16) | (((it) & 0xff) << 8))
-#define PKT0(reg, n) ((0 << 30) | ((((n) - 1) & 0x3fff) << 16) | (((reg) >> 2) & 0xffff))
+#define PKT3(it, n) ((3 << 30) | ((((n) - 1) & 0x3fff) << 16) | (((it) & 0xff) << 8))
+#define PKT0(reg, n) ((0 << 30) | ((((n) - 1) & 0x3fff) << 16) | (((reg) >> 2) & 0xffff))
+#define PKTx_TYPE(v) (((v) >> 30) & 3)
+#define PKTx_COUNT(v) ((((v) >> 16) & 0x3fff) + 1)
+#define PKT3_IT(v) (((v) >> 8) & 0xff)
+#define PKT0_REG(v) (((v) & 0xffff) << 2)
/* packet3 commands */
#define IT_NOP 0x10
@@ -915,12 +919,33 @@
#define PA_SC_SCREEN_SCISSOR_BR__BR_X(x) (((x) & 0x00007fff) << 0)
#define PA_SC_SCREEN_SCISSOR_BR__BR_Y(x) (((x) & 0x00007fff) << 16)
#define CB_COLOR0_BASE 0x00028040
+#define CB_COLOR1_BASE 0x00028044
+#define CB_COLOR2_BASE 0x00028048
+#define CB_COLOR3_BASE 0x0002804c
+#define CB_COLOR4_BASE 0x00028050
+#define CB_COLOR5_BASE 0x00028054
+#define CB_COLOR6_BASE 0x00028058
+#define CB_COLOR7_BASE 0x0002805c
#define CB_COLOR0_SIZE 0x00028060
#define CB_COLOR0_SIZE__PITCH_TILE_MAX(x) (((x) & 0x000003ff) << 0)
#define CB_COLOR0_SIZE__SLICE_TILE_MAX(x) (((x) & 0x000fffff) << 10)
+#define CB_COLOR1_SIZE 0x00028064
+#define CB_COLOR2_SIZE 0x00028068
+#define CB_COLOR3_SIZE 0x0002806c
+#define CB_COLOR4_SIZE 0x00028070
+#define CB_COLOR5_SIZE 0x00028074
+#define CB_COLOR6_SIZE 0x00028078
+#define CB_COLOR7_SIZE 0x0002807c
#define CB_COLOR0_VIEW 0x00028080
#define CB_COLOR0_VIEW__SLICE_START(x) (((x) & 0x000007ff) << 0)
#define CB_COLOR0_VIEW__SLICE_MAX(x) (((x) & 0x000007ff) << 13)
+#define CB_COLOR1_VIEW 0x00028084
+#define CB_COLOR2_VIEW 0x00028088
+#define CB_COLOR3_VIEW 0x0002808c
+#define CB_COLOR4_VIEW 0x00028090
+#define CB_COLOR5_VIEW 0x00028094
+#define CB_COLOR6_VIEW 0x00028098
+#define CB_COLOR7_VIEW 0x0002809c
#define CB_COLOR0_INFO 0x000280a0
#define CB_COLOR0_INFO__ENDIAN(x) (((x) & 0x00000003) << 0)
#define ENDIAN_NONE 0
@@ -993,8 +1018,29 @@
#define CB_COLOR0_INFO__ROUND_MODE(x) (((x) & 0x00000001) << 25)
#define CB_COLOR0_INFO__TILE_COMPACT(x) (((x) & 0x00000001) << 26)
#define CB_COLOR0_INFO__SOURCE_FORMAT(x) (((x) & 0x00000001) << 27)
+#define CB_COLOR1_INFO 0x000280a4
+#define CB_COLOR2_INFO 0x000280a8
+#define CB_COLOR3_INFO 0x000280ac
+#define CB_COLOR4_INFO 0x000280b0
+#define CB_COLOR5_INFO 0x000280b4
+#define CB_COLOR6_INFO 0x000280b8
+#define CB_COLOR7_INFO 0x000280bc
#define CB_COLOR0_TILE 0x000280c0
+#define CB_COLOR1_TILE 0x000280c4
+#define CB_COLOR2_TILE 0x000280c8
+#define CB_COLOR3_TILE 0x000280cc
+#define CB_COLOR4_TILE 0x000280d0
+#define CB_COLOR5_TILE 0x000280d4
+#define CB_COLOR6_TILE 0x000280d8
+#define CB_COLOR7_TILE 0x000280dc
#define CB_COLOR0_FRAG 0x000280e0
+#define CB_COLOR1_FRAG 0x000280e4
+#define CB_COLOR2_FRAG 0x000280e8
+#define CB_COLOR3_FRAG 0x000280ec
+#define CB_COLOR4_FRAG 0x000280f0
+#define CB_COLOR5_FRAG 0x000280f4
+#define CB_COLOR6_FRAG 0x000280f8
+#define CB_COLOR7_FRAG 0x000280fc
#define CB_COLOR0_MASK 0x00028100
#define CB_COLOR0_MASK__CMASK_BLOCK_MAX(x) (((x) & 0x00000fff) << 0)
#define CB_COLOR0_MASK__FMASK_TILE_MAX(x) (((x) & 0x000fffff) << 12)
@@ -1616,8 +1662,53 @@
#define SQ_PGM_CF_OFFSET_FS__PGM_CF_OFFSET(x) (((x) & 0x000fffff) << 0)
#define SQ_VTX_SEMANTIC_CLEAR 0x000288e0
#define SQ_ALU_CONST_CACHE_PS_0 0x00028940
+#define SQ_ALU_CONST_CACHE_PS_1 0x00028944
+#define SQ_ALU_CONST_CACHE_PS_2 0x00028948
+#define SQ_ALU_CONST_CACHE_PS_3 0x0002894c
+#define SQ_ALU_CONST_CACHE_PS_4 0x00028950
+#define SQ_ALU_CONST_CACHE_PS_5 0x00028954
+#define SQ_ALU_CONST_CACHE_PS_6 0x00028958
+#define SQ_ALU_CONST_CACHE_PS_7 0x0002895c
+#define SQ_ALU_CONST_CACHE_PS_8 0x00028960
+#define SQ_ALU_CONST_CACHE_PS_9 0x00028964
+#define SQ_ALU_CONST_CACHE_PS_10 0x00028968
+#define SQ_ALU_CONST_CACHE_PS_11 0x0002896c
+#define SQ_ALU_CONST_CACHE_PS_12 0x00028970
+#define SQ_ALU_CONST_CACHE_PS_13 0x00028974
+#define SQ_ALU_CONST_CACHE_PS_14 0x00028978
+#define SQ_ALU_CONST_CACHE_PS_15 0x0002897c
#define SQ_ALU_CONST_CACHE_VS_0 0x00028980
+#define SQ_ALU_CONST_CACHE_VS_1 0x00028984
+#define SQ_ALU_CONST_CACHE_VS_2 0x00028988
+#define SQ_ALU_CONST_CACHE_VS_3 0x0002898c
+#define SQ_ALU_CONST_CACHE_VS_4 0x00028990
+#define SQ_ALU_CONST_CACHE_VS_5 0x00028994
+#define SQ_ALU_CONST_CACHE_VS_6 0x00028998
+#define SQ_ALU_CONST_CACHE_VS_7 0x0002899c
+#define SQ_ALU_CONST_CACHE_VS_8 0x000289a0
+#define SQ_ALU_CONST_CACHE_VS_9 0x000289a4
+#define SQ_ALU_CONST_CACHE_VS_10 0x000289a8
+#define SQ_ALU_CONST_CACHE_VS_11 0x000289ac
+#define SQ_ALU_CONST_CACHE_VS_12 0x000289b0
+#define SQ_ALU_CONST_CACHE_VS_13 0x000289b4
+#define SQ_ALU_CONST_CACHE_VS_14 0x000289b8
+#define SQ_ALU_CONST_CACHE_VS_15 0x000289bc
#define SQ_ALU_CONST_CACHE_GS_0 0x000289c0
+#define SQ_ALU_CONST_CACHE_GS_1 0x000289c4
+#define SQ_ALU_CONST_CACHE_GS_2 0x000289c8
+#define SQ_ALU_CONST_CACHE_GS_3 0x000289cc
+#define SQ_ALU_CONST_CACHE_GS_4 0x000289d0
+#define SQ_ALU_CONST_CACHE_GS_5 0x000289d4
+#define SQ_ALU_CONST_CACHE_GS_6 0x000289d8
+#define SQ_ALU_CONST_CACHE_GS_7 0x000289dc
+#define SQ_ALU_CONST_CACHE_GS_8 0x000289e0
+#define SQ_ALU_CONST_CACHE_GS_9 0x000289e4
+#define SQ_ALU_CONST_CACHE_GS_10 0x000289e8
+#define SQ_ALU_CONST_CACHE_GS_11 0x000289ec
+#define SQ_ALU_CONST_CACHE_GS_12 0x000289f0
+#define SQ_ALU_CONST_CACHE_GS_13 0x000289f4
+#define SQ_ALU_CONST_CACHE_GS_14 0x000289f8
+#define SQ_ALU_CONST_CACHE_GS_15 0x000289fc
#define PA_SU_POINT_SIZE 0x00028a00
#define PA_SU_POINT_SIZE__HEIGHT(x) (((x) & 0x0000ffff) << 0)
#define PA_SU_POINT_SIZE__WIDTH(x) (((x) & 0x0000ffff) << 16)
diff --git a/replayx.c b/replayx.c
index 0ba6003..fffa3fd 100644
--- a/replayx.c
+++ b/replayx.c
@@ -26,14 +26,15 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
+#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include "libdrm/radeon_drm.h"
#include "replayx.h"
#include "xf86drm.h"
-#include "radeon_drm.h"
static int ctx_start_visual(struct ctx *ctx)
{
@@ -226,8 +227,9 @@ static int ctx_start_dri2(struct ctx *ctx)
ctx->fd = open(ctx->device_name, O_RDWR | O_CLOEXEC);
if (ctx->fd == -1) {
- fprintf(stderr, "%s dri2 could not open %s\n", __func__,
- ctx->device_name);
+ perror(NULL);
+ fprintf(stderr, "%s dri2 could not open |%s| %d\n", __func__,
+ ctx->device_name, errno);
return -EINVAL;
}
diff --git a/replayx.h b/replayx.h
index 20347da..dda6d92 100644
--- a/replayx.h
+++ b/replayx.h
@@ -31,7 +31,8 @@
#include <xcb/dri2.h>
#include <xcb/xfixes.h>
#include <stdint.h>
-#include <libdrm/rati_file.h>
+#include <rati_file.h>
+#include "r6xx.h"
struct ctx;
@@ -52,76 +53,31 @@ struct ctx_bo {
unsigned hw_tile;
};
-#pragma pack(1)
struct radeon_cs_reloc {
uint32_t handle;
uint32_t read_domain;
uint32_t write_domain;
uint32_t flags;
};
-#pragma pack()
-
-struct r6xx_sq_conf {
- unsigned ps_prio;
- unsigned vs_prio;
- unsigned gs_prio;
- unsigned es_prio;
- unsigned num_ps_gprs;
- unsigned num_vs_gprs;
- unsigned num_gs_gprs;
- unsigned num_es_gprs;
- unsigned num_temp_gprs;
- unsigned num_ps_threads;
- unsigned num_vs_threads;
- unsigned num_gs_threads;
- unsigned num_es_threads;
- unsigned num_ps_stack_entries;
- unsigned num_vs_stack_entries;
- unsigned num_gs_stack_entries;
- unsigned num_es_stack_entries;
- unsigned sq_config;
-};
-
-struct r6xx_vbo {
- unsigned offset;
- unsigned ndw;
- unsigned stride;
- unsigned data_format;
- unsigned num_format_all;
- unsigned format_comp_all;
- unsigned srf_mode_all;
- unsigned endian_swap;
- unsigned mem_request_size;
- struct ctx_bo *bo;
-};
-
-struct r6xx_draw {
- unsigned primitive_type;
- unsigned num_instances;
- unsigned index_type;
- unsigned num_indices;
- unsigned vgt_draw_initiator;
-};
struct r6xx_blit {
- uint32_t *cs;
- unsigned cdw;
- struct ctx *ctx;
- struct ctx_bo shader_bo;
- struct radeon_cs_reloc relocs[3];
- struct r6xx_sq_conf sq_conf;
- unsigned vs_offset;
- unsigned ps_offset;
- unsigned vbo_offset;
- unsigned ps_size;
- unsigned vs_size;
+ uint32_t *cs;
+ unsigned cdw;
+ struct ctx *ctx;
+ struct ctx_bo shader_bo;
+ struct radeon_cs_reloc relocs[3];
+ struct r6xx_sq_conf sq_conf;
+ unsigned vs_offset;
+ unsigned ps_offset;
+ unsigned vbo_offset;
+ unsigned ps_size;
+ unsigned vs_size;
};
union ctx_blit {
- struct r6xx_blit r6xx;
+ struct r6xx_blit r6xx;
};
-
typedef int (*drv_compatible_t)(struct ctx *ctx);
typedef int (*drv_blit_init_t)(struct ctx *ctx);
typedef void (*drv_blit_fini_t)(struct ctx *ctx);
@@ -154,7 +110,7 @@ struct ctx {
struct ctx_drv drv;
uint32_t pciid;
union ctx_blit blit;
- struct ctx_bo **target;
+ struct ctx_bo *target[64];
unsigned ntarget;
unsigned ctarget;
};
diff --git a/replayx_drv.c b/replayx_drv.c
index 73a5061..e7b925b 100644
--- a/replayx_drv.c
+++ b/replayx_drv.c
@@ -27,6 +27,7 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
+#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
@@ -36,6 +37,7 @@
#include "replayx.h"
#include "xf86drm.h"
#include "radeon_drm.h"
+#include "radeon_family.h"
int ctx_bo(struct ctx *ctx, struct ctx_bo *bo, void *data)
{
@@ -146,7 +148,7 @@ int ctx_rati_load(struct ctx *ctx, const char *filename)
}
ctx->ntarget = 0;
- ctx->nbos = ctx->rfile.header.v001.ndata_buffers;
+ ctx->nbos = ctx->rfile.header.ndata_buffers;
ctx->bos = calloc(1, sizeof(*ctx->bos) * ctx->nbos);
ctx->relocs = calloc(1, sizeof(*ctx->relocs) * ctx->nbos);
ctx->target = calloc(1, sizeof(void*) * ctx->nbos);
@@ -157,8 +159,8 @@ int ctx_rati_load(struct ctx *ctx, const char *filename)
}
for (i = 0; i < ctx->nbos; ++i) {
- ctx->bos[i].size = ctx->rfile.data_buffer[i].v001.size;
- ctx->bos[i].alignment = ctx->rfile.data_buffer[i].v001.alignment;
+ ctx->bos[i].size = ctx->rfile.data_buffer[i].size;
+ ctx->bos[i].alignment = ctx->rfile.data_buffer[i].alignment;
r = ctx_bo(ctx, &ctx->bos[i], ctx->rfile.data_buffer_ptr[i]);
if (r) {
return r;
@@ -198,7 +200,7 @@ int ctx_cs(struct ctx *ctx, void *cs, unsigned ndw, void *relocs, unsigned nrelo
int ctx_cs_rati(struct ctx *ctx)
{
return ctx_cs(ctx, ctx->rfile.cmd_buffer_ptr[0],
- ctx->rfile.cmd_buffer[0].v001.ndw,
+ ctx->rfile.cmd_buffer[0].ndw,
ctx->relocs, ctx->nbos);
}
diff --git a/tati.c b/tati.c
index b2a4a03..6ea70f4 100644
--- a/tati.c
+++ b/tati.c
@@ -27,6 +27,7 @@
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
+#include <stdint.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -39,11 +40,16 @@ void usage(const char *exename)
exit(0);
}
+int r6xx_tati_cmd_buffer_write(struct rati_file *rfile,
+ unsigned idx, FILE *file);
+
int main(int argc, char *argv[])
{
struct rati_file rfile;
char dstname[256], *tmp;
+ tati_cmd_buffer_write_t write = NULL;
+ write = r6xx_tati_cmd_buffer_write;
if (argc != 2) {
usage(argv[0]);
}
@@ -60,5 +66,5 @@ int main(int argc, char *argv[])
fprintf(stderr, "failed reading %s\n", argv[1]);
return -1;
}
- return tati_file_write(&rfile, dstname);
+ return tati_file_write(&rfile, dstname, write);
}