diff options
author | Vincent Lejeune <vljn@ovi.com> | 2013-03-19 16:01:50 +0100 |
---|---|---|
committer | Vincent Lejeune <vljn@ovi.com> | 2013-03-19 17:39:42 +0100 |
commit | e56f0f784d229880394800780f85237214715aa5 (patch) | |
tree | c8e4db8dacff0cabfc5e091fce8ad32368b3524c | |
parent | deee7e84f92b8612d70f53b43fbcb8c8317ad7aa (diff) |
r600g: Add get/set to handle ALLOC_EXPORT_RAT_WORD0codesize
-rw-r--r-- | src/gallium/drivers/r600/eg_asm.c | 38 | ||||
-rw-r--r-- | src/gallium/drivers/r600/eg_sq.h | 59 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 119 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 34 |
5 files changed, 248 insertions, 10 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index fffc436e82..cacb82ff37 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -106,6 +106,22 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); id++; + } else if (cfop->flags & CF_MEM) { + /* MEM_RAT_CACHELESS instructions */ + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(cf->output.gpr) | + S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(cf->output.elem_size) | + S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(cf->output.rat_id) | + S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(cf->output.rat_inst) | + S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(cf->output.index_gpr) | + S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); + bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | + S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size); + if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + id++; } else { /* branch, loop, call, return instructions */ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); @@ -118,6 +134,28 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) return 0; } +void eg_bytecode_export_rat_read(struct r600_bytecode *bc, + struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) { + output->rat_id = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(word0); + output->rat_inst = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(word0); + output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_TYPE(word0); + output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(word0); + output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(word0); + output->index_gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(word0); + + output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1); + output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1); + output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1); + output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1); + output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1); + output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); + output->op = r600_isa_cf_by_opcode(bc->isa, + G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), /* is_cf_alu = */ 0 ); + output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); + output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1); + output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1); +} + void eg_bytecode_export_read(struct r600_bytecode *bc, struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) { diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h index b534872f06..83588de035 100644 --- a/src/gallium/drivers/r600/eg_sq.h +++ b/src/gallium/drivers/r600/eg_sq.h @@ -176,6 +176,65 @@ #define G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(x) (((x) >> 30) & 0x3) #define C_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE 0x3FFFFFFF /* done */ +#define P_SQ_CF_ALLOC_EXPORT_WORD0_RAT +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(x) (((x) & 0xF) << 0) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(x) (((x) >> 0) & 0xF) +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(x) (((x) & 0x3F) << 4) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(x) (((x) >> 4) & 0x3F) +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_NOP 0x00000000 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_TYPED 0x00000001 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_CMPX_INT 0x00000004 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_ADD 0x00000007 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_SUB 0x00000008 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_RSUB 0x00000009 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_INT 0x0000000A +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_UINT 0x0000000B +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_INT 0x0000000C +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_UINT 0x0000000D +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_AND 0x0000000E +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_OR 0x0000000F +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_XOR 0x00000010 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_INC_UINT 0x00000012 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_DEC_UINT 0x00000013 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_DWORD 0x00000014 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_SHORT 0x00000015 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_BYTE 0x00000016 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_NOP_RTN 0x00000020 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_XCHG_RTN 0x00000022 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_CMPXCHG_INT_RTN 0x00000024 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_ADD_RTN 0x00000027 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_SUB_RTN 0x00000028 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_RSUB_RTN 0x00000029 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_INT_RTN 0x0000002A +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_UINT_RTN 0x0000002B +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_INT_RTN 0x0000002C +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_UINT_RTN 0x0000002D +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_AND_RTN 0x0000002E +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_OR_RTN 0x0000002F +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_XOR_RTN 0x00000030 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_INC_UINT_RTN 0x00000032 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_DEC_UINT_RTN 0x00000033 +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_MODE(x) (((x) & 0x3) << 11) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_MODE(x) (((x) >> 11) & 0x3) +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_NONE 0x00000000 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_0 0x00000001 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_1 0x00000002 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_INVALID 0x00000003 +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_TYPE(x) (((x) & 0x3) << 13) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_TYPE(x) (((x) >> 13) & 0x3) +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE 0x00000000 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE_IND 0x00000001 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE_ACK 0x00000002 +#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE_IND_ACK 0x00000003 +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(x) (((x) & 0x7F) << 15) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(x) (((x) >> 15) & 0x7F) +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_REL(x) (((x) & 0x1) << 22) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_REL(x) (((x) >> 22) & 0x1) +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(x) (((x) & 0x7F) << 23) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(x) (((x) >> 23) & 0x7F) +#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(x) (((x) & 0x3) << 30) +#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(x) (((x) >> 30) & 0x3) +/* done */ #define P_SQ_CF_ALLOC_EXPORT_WORD1 #define S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(x) (((x) & 0xF) << 16) #define G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(x) (((x) >> 16) & 0xF) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 0d570ca250..b4ace0d0ff 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1927,6 +1927,125 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) if (cf->output.end_of_program) fprintf(stderr, "EOP "); fprintf(stderr, "\n"); + } else if (r600_isa_cf(cf->op)->flags & CF_MEM) { + int o = 0; + const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", + "WRITE_IND_ACK"}; + const char *eg_rat_inst[] = { + "NOP", // 0 + "STORE_TYPED", + "STORE_RAW", + "STORE_RAW_FDENORM", + "CMPXCHG_INT", + "CMPXCHG_FLT", + "CMPXCHG_FDENORM", + "ADD", + "SUB", + "RSUB", + "MIN_INT", + "MIN_UINT", + "MAX_INT", + "MAX_UINT", + "AND", + "OR", + "XOR", + "MSKOR", + "INC_UINT", + "DEC_UINT", + "NOP_RTN", + "XCHG_RTN", + "XCHG_FDENORM_RTN", + "CMPXCHG_INT_RTN", + "CMPXCHG_FLT_RTN", + "CMPXCHG_FDENORM_RTN", + "ADD_RTN", + "SUB_RTN", + "SUBR_RTN", + "MIN_INT_RTN", + "MIN_UINT_RTN", + "MAX_INT_RTN", + "MAX_UINT_RTN", + "AND_RTN", + "OR_RTN", + "XOR_RTN", + "MSKOR_RTN", + "INC_UINT_RTN", + "DEC_UINT_RTN" + }; + + const char *cm_rat_inst[] = { + "NOP", // 0 + "STORE_TYPED", + "?", + "?", + "CMPX_INT", //4 + "?", "?", + "ADD", //7 + "SUB", + "RSUB", + "MIN_INT", + "MIN_UINT", + "MAX_INT", + "MAX_UINT", + "AND", + "OR", + "XOR", + "?", + "INC_UINT", //12 + "DEC_UINT", + "STORE_DWORD", + "STORE_SHORT", + "STORE_BYTE", + "?", "?", "?", "?", "?", "?", "?", "?", "?", + "NOP_RTN", //20 + "?", + "XCHG_RTN", //22 + "?", + "CMPXCHG_INT_RTN", //24 + "?", "?", + "ADD_RTN", //27 + "SUB_RTN", + "SUBR_RTN", + "MIN_INT_RTN", + "MIN_UINT_RTN", + "MAX_INT_RTN", + "MAX_UINT_RTN", + "AND_RTN", + "OR_RTN", + "XOR_RTN", + "?", + "INC_UINT_RTN", //32 + "DEC_UINT_RTN" + }; + o += fprintf(stderr, "%04d %08X %08X %s ", id, + bc->bytecode[id], bc->bytecode[id + 1], cfop->name); + o += print_indent(o, 35); + if (bc->chip_class < CAYMAN) + o += fprintf(stderr, "%s ", eg_rat_inst[cf->output.rat_inst]); + else + o += fprintf(stderr, "%s ", cm_rat_inst[cf->output.rat_inst]); + o += fprintf(stderr, "%s ", exp_type[cf->output.type]); + o += print_indent(o, 55); + o += fprintf(stderr, "R%d.", cf->output.gpr); + for (i = 0; i < 4; ++i) { + if (cf->output.comp_mask & (1 << i)) + o += print_swizzle(i); + else + o += print_swizzle(7); + } + o += fprintf(stderr, ", R%d, ", cf->output.index_gpr); + o += fprintf(stderr, "UAV:%d ", cf->output.rat_id); + + o += print_indent(o, 67); + + fprintf(stderr, " ES:%i ", cf->output.elem_size); + if (cf->output.array_size != 0xFFF) + fprintf(stderr, "AS:%i ", cf->output.array_size); + if (!cf->output.barrier) + fprintf(stderr, "NO_BARRIER "); + if (cf->output.end_of_program) + fprintf(stderr, "EOP "); + fprintf(stderr, "\n"); } else { fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id], bc->bytecode[id + 1], cfop->name); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 1465c313b7..38e6845b80 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -127,6 +127,11 @@ struct r600_bytecode_output { unsigned swizzle_w; unsigned burst_count; unsigned barrier; + + unsigned rat_id; + unsigned rat_inst; + unsigned index_gpr; + }; struct r600_bytecode_kcache { @@ -254,7 +259,8 @@ void r600_bytecode_export_read(struct r600_bytecode *bc, struct r600_bytecode_output *output, uint32_t word0, uint32_t word1); void eg_bytecode_export_read(struct r600_bytecode *bc, struct r600_bytecode_output *output, uint32_t word0, uint32_t word1); - +void eg_bytecode_export_rat_read(struct r600_bytecode *bc, + struct r600_bytecode_output *output, uint32_t word0, uint32_t word1); void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, unsigned *num_format, unsigned *format_comp, unsigned *endian); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 29facf70ab..44c7c1271c 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -568,6 +568,17 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx, return bytes_read; } +static +void r600_export_rat_from_byte_stream(struct r600_shader_ctx *ctx, + uint32_t word0, uint32_t word1) +{ + struct r600_bytecode_output output; + memset(&output, 0, sizeof(struct r600_bytecode_output)); + eg_bytecode_export_rat_read(ctx->bc, &output, word0, word1); + r600_bytecode_add_output(ctx->bc, &output); + return; +} + static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx, unsigned char * bytes, unsigned bytes_read) { @@ -588,7 +599,7 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, unsigned char * bytes, unsigned num_bytes) { unsigned bytes_read = 0; - unsigned i, byte; + unsigned byte; while (bytes_read < num_bytes) { char inst_type = bytes[bytes_read++]; switch (inst_type) { @@ -604,16 +615,21 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, bytes_read = r600_fc_from_byte_stream(ctx, bytes, bytes_read); break; - case 3: - r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE); - for (i = 0; i < 2; i++) { - for (byte = 0 ; byte < 4; byte++) { - ctx->bc->cf_last->isa[i] |= - (bytes[bytes_read++] << (byte * 8)); - } + case 3: { + uint32_t word0 = 0, word1 = 0; + for (byte = 0 ; byte < 4; byte++) + word0 |= (bytes[bytes_read++] << (byte * 8)); + for (byte = 0 ; byte < 4; byte++) + word1 |= (bytes[bytes_read++] << (byte * 8)); + if (((word1 >> 22) & 0xFF) == 0x57) { //CF_INST_MEM_RAT_CACHELESS + r600_export_rat_from_byte_stream(ctx, word0, word1); + } else { + r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE); + ctx->bc->cf_last->isa[0] |= word0; + ctx->bc->cf_last->isa[1] |= word1; } break; - + } case 4: bytes_read = r600_vtx_from_byte_stream(ctx, bytes, bytes_read); |