diff options
author | Matthias Hopf <mhopf@suse.de> | 2009-01-29 17:24:21 +0100 |
---|---|---|
committer | Matthias Hopf <mhopf@suse.de> | 2009-01-29 17:24:21 +0100 |
commit | 5d79809fa5c471c283363e4746b965cf755c6606 (patch) | |
tree | ea87fb46587cf01bff452768db55c0922f626d36 | |
parent | 3d13d84733d767b3b6b65771dfbf10705022cbad (diff) |
Use multiply-and-add MULADD instead of MUL for performance tests.
Getting actually close to marketing values.
-rw-r--r-- | r600_perf.c | 73 |
1 files changed, 27 insertions, 46 deletions
diff --git a/r600_perf.c b/r600_perf.c index cb0308e..c386d92 100644 --- a/r600_perf.c +++ b/r600_perf.c @@ -553,15 +553,11 @@ void test_alu_quad_perf(adapter_t *adapt) INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - ps[ps_size++] = ALU_DWORD1_OP2(adapt->chipset, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ps[ps_size++] = ALU_DWORD1_OP3(SRC2_SEL(2), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(127), DST_REL(ABSOLUTE), @@ -578,15 +574,11 @@ void test_alu_quad_perf(adapter_t *adapt) INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - ps[ps_size++] = ALU_DWORD1_OP2(adapt->chipset, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ps[ps_size++] = ALU_DWORD1_OP3(SRC2_SEL(2), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(127), DST_REL(ABSOLUTE), @@ -603,15 +595,11 @@ void test_alu_quad_perf(adapter_t *adapt) INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - ps[ps_size++] = ALU_DWORD1_OP2(adapt->chipset, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ps[ps_size++] = ALU_DWORD1_OP3(SRC2_SEL(2), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(127), DST_REL(ABSOLUTE), @@ -628,15 +616,11 @@ void test_alu_quad_perf(adapter_t *adapt) INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - ps[ps_size++] = ALU_DWORD1_OP2(adapt->chipset, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ps[ps_size++] = ALU_DWORD1_OP3(SRC2_SEL(2), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(127), DST_REL(ABSOLUTE), @@ -653,15 +637,11 @@ void test_alu_quad_perf(adapter_t *adapt) INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - ps[ps_size++] = ALU_DWORD1_OP2(adapt->chipset, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ps[ps_size++] = ALU_DWORD1_OP3(SRC2_SEL(2), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(126), DST_REL(ABSOLUTE), @@ -813,10 +793,11 @@ void test_alu_quad_perf(adapter_t *adapt) break; } + /* GFLOPS: MULADD is 2 FLOPs, 5 components per ALU instruction, alu_num instrs per clause, NUM_ALU_CLAUSES clauses */ printf ("\n ALU speed: %d clauses, %d ALUs -> %.1f Megapixels/s -> %.2f GigaFLOPS\n\n", NUM_ALU_CLAUSES, alu_num, (float) render_num * (RENDER_QUAD_WIDTH * RENDER_QUAD_HEIGHT / 1e6) / render_time, - (float) 5 * render_num * alu_num * NUM_ALU_CLAUSES * (RENDER_QUAD_WIDTH * RENDER_QUAD_HEIGHT / 1e9) / render_time); + (float) 2 * 5 * render_num * alu_num * NUM_ALU_CLAUSES * (RENDER_QUAD_WIDTH * RENDER_QUAD_HEIGHT / 1e9) / render_time); } } |