diff options
author | Matthias Hopf <mhopf@suse.de> | 2009-01-27 17:47:50 +0100 |
---|---|---|
committer | Matthias Hopf <mhopf@suse.de> | 2009-01-27 17:47:50 +0100 |
commit | 3c8f65f13aee27e63141f087ef1f77c69834ed2e (patch) | |
tree | a75ca6899e2e2de7f691eed673706840040bbef4 | |
parent | 596202e201d9839a6c51a11130123c8d5a53cd13 (diff) |
Add textured triangle performance test 'P'.
-rw-r--r-- | Makefile | 23 | ||||
-rw-r--r-- | r600_demo.c | 7 | ||||
-rw-r--r-- | r600_lib.c | 44 | ||||
-rw-r--r-- | r600_lib.h | 4 | ||||
-rw-r--r-- | r600_perf.c | 397 |
5 files changed, 464 insertions, 11 deletions
@@ -4,7 +4,7 @@ INCLUDES=-I/usr/include/drm LIBS=-ldrm CFLAGS:=-Wall -O0 -g $(INCLUDES) $(VERSION_FLAGS) $(CFLAGS) CC=gcc -CFILES=r600_demo.c r600_lib.c r600_basic.c r600_init.c r600_triangles.c r600_texture.c r600_pm4.c r600_exa.c +CFILES=r600_demo.c r600_lib.c r600_basic.c r600_init.c r600_triangles.c r600_texture.c r600_pm4.c r600_exa.c r600_perf.c all: r600_demo convert_shader @@ -48,25 +48,30 @@ tarball: # DO NOT DELETE r600_basic.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h -r600_basic.o: r600_hwapi.h r600_emit.h r600_lib.h +r600_basic.o: r600_emit.h r600_hwapi.h r600_lib.h r600_broken.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h -r600_broken.o: r600_reg_r7xx.h r600_lib.h r600_shader.h radeon_drm.h +r600_broken.o: r600_reg_r7xx.h r600_emit.h r600_hwapi.h r600_lib.h +r600_broken.o: r600_shader.h r600_demo.o: radeon_drm.h r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_demo.o: r600_reg_r7xx.h r600_lib.h r600_hwapi.h +r600_emit.o: r600_hwapi.h r600_exa.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h -r600_exa.o: r600_hwapi.h r600_emit.h r600_lib.h r600_state.h r600_init.h +r600_exa.o: r600_emit.h r600_hwapi.h r600_lib.h r600_state.h r600_init.h r600_exa.o: r600_shader.h r600_init.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h -r600_init.o: r600_hwapi.h r600_emit.h r600_lib.h r600_state.h +r600_init.o: r600_emit.h r600_hwapi.h r600_lib.h r600_state.h r600_lib.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h -r600_lib.o: r600_hwapi.h r600_emit.h r600_lib.h r600_shader.h radeon_drm.h +r600_lib.o: r600_emit.h r600_hwapi.h r600_lib.h r600_shader.h radeon_drm.h +r600_perf.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h +r600_perf.o: r600_emit.h r600_hwapi.h r600_lib.h r600_state.h r600_init.h +r600_perf.o: r600_shader.h r600_pm4.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h -r600_pm4.o: r600_hwapi.h r600_emit.h r600_lib.h r600_state.h r600_init.h +r600_pm4.o: r600_emit.h r600_hwapi.h r600_lib.h r600_state.h r600_init.h r600_pm4.o: r600_shader.h r600_reg.o: r600_reg_auto_r6xx.h r600_reg_r6xx.h r600_reg_r7xx.h r600_texture.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h -r600_texture.o: r600_reg_r7xx.h r600_hwapi.h r600_emit.h r600_lib.h +r600_texture.o: r600_reg_r7xx.h r600_emit.h r600_hwapi.h r600_lib.h r600_texture.o: r600_state.h r600_init.h r600_shader.h r600_triangles.o: r600_reg.h r600_reg_auto_r6xx.h r600_reg_r6xx.h -r600_triangles.o: r600_reg_r7xx.h r600_hwapi.h r600_emit.h r600_lib.h +r600_triangles.o: r600_reg_r7xx.h r600_emit.h r600_hwapi.h r600_lib.h r600_triangles.o: r600_state.h r600_init.h r600_shader.h diff --git a/r600_demo.c b/r600_demo.c index e192a58..c3577b1 100644 --- a/r600_demo.c +++ b/r600_demo.c @@ -509,6 +509,8 @@ void usage (char *argv[]) { "e\tEXA solid test\n" "E\tEXA copy test\n" "\n" + "P\ttextured pixel performance test\n" + "\n" "[reg]s are dumped (also ranges) or written to, register addresses in hex\n" "\n", argv[0]); @@ -596,7 +598,7 @@ int main(int argc, char *argv[]) adapter.color_gpu = display_gpu; adapter.color_pitch = display_width; - adapter.color_height = 480; + adapter.color_height = display_height; adapter.depth_gpu = display_gpu + display_width*4*500; adapter.depth_pitch = display_width; @@ -690,6 +692,9 @@ int main(int argc, char *argv[]) case 'E': test_copy (&adapter); break; + case 'P': + test_tex_quad_perf (&adapter); + break; default: fprintf (stderr, "***** Don't know '%c' test\n\n", argv[optind][i]); exit (1); @@ -34,6 +34,7 @@ #include <unistd.h> #include <math.h> #include <assert.h> +#include <sys/time.h> #include "r600_reg.h" #include "r600_emit.h" @@ -67,6 +68,33 @@ void wait_reg (adapter_t *adapt, uint32_t reg, uint32_t v, const char *when) fprintf (stderr, "%s: set correctly after %d loops: 0x%x\n", when, i, v); } +float wait_reg_time (adapter_t *adapt, uint32_t reg, uint32_t v, const char *when, float maxtime) +{ + struct timeval start, end; + int i; + float diff; + + gettimeofday (&start, NULL); + do { + for (i = 0; i < 1e6; i++) + if (reg_read32 (reg) == v) + break; + gettimeofday (&end, NULL); + diff = end.tv_sec - start.tv_sec + ((float)end.tv_usec - start.tv_usec) / 1e6; + } while (i == 1e6 && diff < maxtime); + + if (i == 1e6) { + fprintf (stderr, "***** %s: still not set after %f seconds: 0x%x, should be 0x%x\n", + when, diff, reg_read32 (reg), v); + show_state (adapt); + fprintf (stderr, "***** FAILED\n\n"); + exit (1); + } else if (verbose >= 2) + fprintf (stderr, "%s: set correctly after %f seconds: 0x%x\n", when, diff, v); + + return diff; +} + void wait_3d_idle_clean() { //flush caches, don't generate timestamp @@ -221,7 +249,7 @@ uint64_t upload (adapter_t *adapt, void *shader, int size, int offset) #endif if (verbose >= 2) { int i; - printf ("Upload %d dwords to offset 0x%x:\n", size/4, offset); + printf ("Upload %d dwords to offset 0x%x -> 0x"PRINTF_UINT64_HEX"\n", size/4, offset, addr); for (i = 0; i < size/4; i++) printf (" %08x%s", ((uint32_t *)shader)[i], (i & 7) == 7 ? "\n":""); if ((i & 7) != 0) @@ -698,3 +726,17 @@ uint32_t *create_sample_texture (int width, int height, int pitch) } +float time_flush_cmds (adapter_t *adapt, float maxtime) +{ + reg_write32 (SCRATCH_REG5, 0xdeadbeef); + wait_reg (adapt, SCRATCH_REG5, 0xdeadbeef, "time_flush_cmds: init"); + + wait_3d_idle_clean(); + pack0 (SCRATCH_REG5, 1); + e32 (0xcafebabe); + + flush_cmds (); + + return wait_reg_time (adapt, SCRATCH_REG5, 0xcafebabe, "time_flush_cmds: fence", maxtime); +} + @@ -106,6 +106,8 @@ void dump_shader (adapter_t *adapt, uint32_t *shader, int size, char *what); /* FIXME: naming: this waits on the CPU, while the others let the CP wait only */ void wait_reg (adapter_t *adapt, uint32_t reg, uint32_t v, const char *when); +float wait_reg_time (adapter_t *adapt, uint32_t reg, uint32_t v, const char *when, float maxtime); +float time_flush_cmds (adapter_t *adapt, float maxtime); void wait_3d_idle_clean(void); void wait_3d_idle(void); @@ -137,6 +139,8 @@ void tmp_test (adapter_t *); /* r600_exa.c : */ void test_solid(adapter_t *adapt); void test_copy(adapter_t *adapt); +/* r600_perf.c : */ +void test_tex_quad_perf (adapter_t *); #endif diff --git a/r600_perf.c b/r600_perf.c new file mode 100644 index 0000000..745fe2a --- /dev/null +++ b/r600_perf.c @@ -0,0 +1,397 @@ +/* + * r600_demo + * + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Performance tests + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include "r600_reg.h" +#include "r600_emit.h" +#include "r600_lib.h" +#include "r600_state.h" +#include "r600_init.h" +#include "r600_shader.h" + + +#define MAX_NUM_QUADS 32768 +#define RENDER_QUAD_WIDTH 480 // Not 1:1 by intention +#define RENDER_QUAD_HEIGHT 600 + +/* + * Test rasterization performance with one texture + */ + +void test_tex_quad_perf(adapter_t *adapt) +{ + static uint32_t vs[] = { + // CF INST 0 + CF_DWORD0(ADDR(4)), + CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + COUNT(2), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_VTX), + WHOLE_QUAD_MODE(0), + BARRIER(1)), + // CF INST 1 + CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)), + CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)), + // CF INST 2 + CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)), + CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)), + // padding vtx/tex inst are 128 bit aligned + 0x00000000, + 0x00000000, + // VTX INST 0 + VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)), + VTX_DWORD1_GPR(DST_GPR(1), DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_UNSIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)), + VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1)), + VTX_DWORD_PAD, + // VTX INST 1 + VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)), + VTX_DWORD1_GPR(DST_GPR(0), DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_UNSIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)), + VTX_DWORD2(OFFSET(8), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0)), + VTX_DWORD_PAD, + } ; + + static uint32_t ps[] = { + // CF INST 0 + CF_DWORD0(ADDR(2)), + CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + COUNT(1), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_TEX), + WHOLE_QUAD_MODE(0), + BARRIER(1)), + // CF INST 1 + CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)), + CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)), + // TEX INST 0 + TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)), + TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)), + TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)), + } ; + + float *vb, *v; + + draw_config_t draw_conf; + cb_config_t cb_conf; + vtx_resource_t vtx_res; + tex_resource_t tex_res; + tex_sampler_t tex_samp; + shader_config_t vs_conf, ps_conf; + + uint64_t vb_addr, vs_addr, ps_addr; + + int i, render_num; + float render_time; + + CLEAR (draw_conf); + CLEAR (cb_conf); + CLEAR (vtx_res); + CLEAR (tex_res); + CLEAR (tex_samp); + CLEAR (vs_conf); + CLEAR (ps_conf); + + + printf ("\n* Textured Quad Performance Test\n\n"); + + if (verbose) { + dump_shader (adapt, vs, sizeof(vs), "vertex"); + dump_shader (adapt, ps, sizeof(ps), "pixel"); + printf ("\n"); + } + + + /* Init */ + start_3d(adapt); + set_default_state(adapt); + + + /* Scissor / viewport */ + ereg (PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + /* Not necessary due to PA_CL_VTE_CNTL */ +// pack0 (PA_CL_VPORT_XSCALE_0, 4); +// efloat (1.0); +// efloat (0.0); +// efloat (1.0); +// efloat (0.0); + ereg (PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + + + /* Create vertex buffer */ + vb = v = calloc (MAX_NUM_QUADS, 4 * 4 * sizeof (float)); + for (i = 0; i < MAX_NUM_QUADS; i++) { + *v++ = 514; *v++ = 2; + *v++ = rand() / (6.0 * RAND_MAX); *v++ = rand() / (6.0 * RAND_MAX); + *v++ = 514 + RENDER_QUAD_WIDTH; *v++ = 2; + *v++ = 1.0 - rand() / (6.0 * RAND_MAX); *v++ = rand() / (6.0 * RAND_MAX); + *v++ = 514 + RENDER_QUAD_WIDTH; *v++ = 2 + RENDER_QUAD_HEIGHT; + *v++ = 1.0 - rand() / (6.0 * RAND_MAX); *v++ = 1.0 - rand() / (6.0 * RAND_MAX); + *v++ = 514; *v++ = 2 + RENDER_QUAD_HEIGHT; + *v++ = rand() / (6.0 * RAND_MAX); *v++ = 1.0 - rand() / (6.0 * RAND_MAX); + } + + /* Upload */ + vs_addr = upload (adapt, vs, sizeof(vs), 0); + ps_addr = upload (adapt, ps, sizeof(ps), 4096); + vb_addr = upload (adapt, vb, MAX_NUM_QUADS * 4 * 4 * sizeof(float), 8192); + free (vb); + + + /* Shader */ + vs_conf.shader_addr = vs_addr; + vs_conf.num_gprs = 4; + vs_conf.stack_size = 1; + vs_setup (adapt, &vs_conf); + + ps_conf.shader_addr = ps_addr; + ps_conf.num_gprs = 1; + ps_conf.stack_size = 0; + ps_conf.uncached_first_inst = 1; + ps_conf.clamp_consts = 1; + ps_conf.export_mode = 2; + ps_setup (adapt, &ps_conf); + + + /* Texture */ + tex_res.id = 0; + tex_res.w = 512; + tex_res.h = 512; + tex_res.pitch = adapt->display_pitch; + tex_res.depth = 0; /* ? */ + tex_res.dim = 1; //2D + tex_res.base = adapt->display_gpu; + tex_res.mip_base = adapt->display_gpu; + tex_res.format = FMT_8_8_8_8; + tex_res.request_size = 0; /* 2 ? */ + tex_res.dst_sel_x = 0; + tex_res.dst_sel_y = 1; + tex_res.dst_sel_z = 2; + tex_res.dst_sel_w = 3; + tex_res.base_level = 0; + tex_res.last_level = 0; /* 1 test */ + tex_res.perf_modulation = 0; /* 1 ? */ + set_tex_resource (adapt, &tex_res); + + tex_samp.id = 0; + tex_samp.clamp_x = 0; + tex_samp.clamp_y = 0; + tex_samp.clamp_z = 0; + tex_samp.xy_mag_filter = 1; /* 0: point 1:bilinear 2:bicubic */ + tex_samp.xy_min_filter = 1; /* 0: point 1:bilinear 2:bicubic */ + tex_samp.z_filter = 0; /* 0: none 1: point 2: linear */ + tex_samp.mip_filter = 0; /* no mipmap */ + set_tex_sampler (adapt, &tex_samp); + + + /* Render setup */ + ereg (CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift)); + ereg (R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + ereg (CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */ + + cb_conf.id = 0; + cb_conf.w = adapt->color_pitch; + cb_conf.h = adapt->color_height; + cb_conf.base = adapt->color_gpu; + cb_conf.format = FMT_8_8_8_8; + cb_conf.comp_swap = 0; + cb_conf.source_format = 1; + cb_conf.blend_clamp = 1; + set_render_target(adapt, &cb_conf); + + ereg (PA_SU_SC_MODE_CNTL, (FACE_bit | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); + ereg (DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ + DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ + + + /* Interpolator setup */ + /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x + * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ + ereg (SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); + ereg (SPI_PS_IN_CONTROL_1, 0); + ereg (SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | + (0x03 << DEFAULT_VAL_shift) | + FLAT_SHADE_bit | + SEL_CENTROID_bit)); + ereg (SPI_INTERP_CONTROL_0, /* FLAT_SHADE_ENA_bit | */ 0); + + + /* Vertex buffer setup */ + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 4; + vtx_res.vtx_num_entries = MAX_NUM_QUADS * 4 * 4; /* Can overcommit if necessary */ + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = vb_addr; + set_vtx_resource (adapt, &vtx_res); + + + ereg (VGT_INSTANCE_STEP_RATE_0, 0); /* ? */ + ereg (VGT_INSTANCE_STEP_RATE_1, 0); + + ereg (VGT_MAX_VTX_INDX, vtx_res.vtx_num_entries / vtx_res.vtx_size_dw); + ereg (VGT_MIN_VTX_INDX, 0); + ereg (VGT_INDX_OFFSET, 0); + + flush_cmds (); + + /* Loop: Start with few quads only, if rendering less than a second, increase */ + for (render_num = 32; render_num < MAX_NUM_QUADS; render_num *= 2) + { + /* Draw */ + draw_conf.prim_type = DI_PT_QUADLIST; + draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; + draw_conf.num_instances = 1; + draw_conf.num_indices = render_num * 4; + draw_conf.index_type = DI_INDEX_SIZE_16_BIT; + draw_auto (adapt, &draw_conf); + + render_time = time_flush_cmds (adapt, 5); + printf (" Rendering %d textured quads: %.3f ms\n", render_num, render_time * 1000); + if (render_time >= 1) + break; + } + printf ("\n Rendering speed: %.1f textured Megapixels per second\n\n", + (float) render_num * (RENDER_QUAD_WIDTH * RENDER_QUAD_HEIGHT / 1e6) / render_time); +} + |