summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Hopf <mhopf@suse.de>2009-01-29 17:24:21 +0100
committerMatthias Hopf <mhopf@suse.de>2009-01-29 17:24:21 +0100
commit5d79809fa5c471c283363e4746b965cf755c6606 (patch)
treeea87fb46587cf01bff452768db55c0922f626d36
parent3d13d84733d767b3b6b65771dfbf10705022cbad (diff)
Use multiply-and-add MULADD instead of MUL for performance tests.
Getting actually close to marketing values.
-rw-r--r--r600_perf.c73
1 files changed, 27 insertions, 46 deletions
diff --git a/r600_perf.c b/r600_perf.c
index cb0308e..c386d92 100644
--- a/r600_perf.c
+++ b/r600_perf.c
@@ -553,15 +553,11 @@ void test_alu_quad_perf(adapter_t *adapt)
INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- ps[ps_size++] = ALU_DWORD1_OP2(adapt->chipset,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ps[ps_size++] = ALU_DWORD1_OP3(SRC2_SEL(2),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(127),
DST_REL(ABSOLUTE),
@@ -578,15 +574,11 @@ void test_alu_quad_perf(adapter_t *adapt)
INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- ps[ps_size++] = ALU_DWORD1_OP2(adapt->chipset,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ps[ps_size++] = ALU_DWORD1_OP3(SRC2_SEL(2),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(127),
DST_REL(ABSOLUTE),
@@ -603,15 +595,11 @@ void test_alu_quad_perf(adapter_t *adapt)
INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- ps[ps_size++] = ALU_DWORD1_OP2(adapt->chipset,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ps[ps_size++] = ALU_DWORD1_OP3(SRC2_SEL(2),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(127),
DST_REL(ABSOLUTE),
@@ -628,15 +616,11 @@ void test_alu_quad_perf(adapter_t *adapt)
INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- ps[ps_size++] = ALU_DWORD1_OP2(adapt->chipset,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ps[ps_size++] = ALU_DWORD1_OP3(SRC2_SEL(2),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(127),
DST_REL(ABSOLUTE),
@@ -653,15 +637,11 @@ void test_alu_quad_perf(adapter_t *adapt)
INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- ps[ps_size++] = ALU_DWORD1_OP2(adapt->chipset,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ps[ps_size++] = ALU_DWORD1_OP3(SRC2_SEL(2),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(126),
DST_REL(ABSOLUTE),
@@ -813,10 +793,11 @@ void test_alu_quad_perf(adapter_t *adapt)
break;
}
+ /* GFLOPS: MULADD is 2 FLOPs, 5 components per ALU instruction, alu_num instrs per clause, NUM_ALU_CLAUSES clauses */
printf ("\n ALU speed: %d clauses, %d ALUs -> %.1f Megapixels/s -> %.2f GigaFLOPS\n\n",
NUM_ALU_CLAUSES, alu_num,
(float) render_num * (RENDER_QUAD_WIDTH * RENDER_QUAD_HEIGHT / 1e6) / render_time,
- (float) 5 * render_num * alu_num * NUM_ALU_CLAUSES * (RENDER_QUAD_WIDTH * RENDER_QUAD_HEIGHT / 1e9) / render_time);
+ (float) 2 * 5 * render_num * alu_num * NUM_ALU_CLAUSES * (RENDER_QUAD_WIDTH * RENDER_QUAD_HEIGHT / 1e9) / render_time);
}
}