diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-02-28 18:38:11 -0500 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-02-28 18:38:11 -0500 |
commit | c7198168d1bf1bdaa13afc451fe5bd9ab00802f1 (patch) | |
tree | 3512de326b237cbb9b91bd7f36e656fdbc3cbc7e | |
parent | aa1c734b3ca445b5af743b9bad6a48ca7ba21f3c (diff) |
XXX: Initial lds worklds
-rw-r--r-- | src/gallium/drivers/r600/eg_asm.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/r600/eg_sq.h | 94 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 42 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_isa.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_isa.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 8 |
8 files changed, 168 insertions, 5 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index fffc436e823..6a484a67730 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -138,3 +138,9 @@ void eg_bytecode_export_read(struct r600_bytecode *bc, output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1); output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1); } + +void eg_bytecode_lds_read(struct r600_bytecode *bc, + struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1) +{ + +} diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h index b534872f062..f381924581c 100644 --- a/src/gallium/drivers/r600/eg_sq.h +++ b/src/gallium/drivers/r600/eg_sq.h @@ -514,6 +514,100 @@ #define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7) #define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF +#define R_008DFC_SQ_ALU_WORD1_LDS_DIRECT_LITERAL_LO 0x008DFC +#define S_SQ_WORD1_LDS_DIRECT_LITERAL_LO_OFFSET_A(x) (((x) & 0x1FFF) << 0) +#define G_SQ_WORD1_LDS_DIRECT_LITERAL_LO_OFFSET_A(x) (((x) >> 0) & 0x1FFF) +#define C_SQ_WORD1_LDS_DIRECT_LITERAL_LO_OFFSET_A 0xFFFFE000 +#define S_SQ_WORD1_LDS_DIRECT_LITERAL_LO_STRIDE_A(x) (((x) & 0x7F) << 13) +#define G_SQ_WORD1_LDS_DIRECT_LITERAL_LO_STRIDE_A(x) (((x) >> 13) & 0x7F) +#define C_SQ_WORD1_LDS_DIRECT_LITERAL_LO_STRIDE_A 0xFFF01FFF +#define S_SQ_WORD1_LDS_DIRECT_LITERAL_LO_THREAD_REL_A(x) (((x) & 0x1) << 22) +#define G_SQ_WORD1_LDS_DIRECT_LITERAL_LO_THREAD_REL_A(x) (((x) >> 22) & 0x1) +#define C_SQ_WORD1_LDS_DIRECT_LITERAL_LO_THREAD_REL_A 0xFFBFFFFF +#define R_008DFC_SQ_ALU_WORD1_LDS_IDX_OP 0x008DFC +#define S_SQ_WORD1_LDS_IDX_OP_SRC2_SEL(x) (((x) & 0x1FF) << 0) +#define G_SQ_WORD1_LDS_IDX_OP_SRC2_SEL(x) (((x) >> 0) & 0x1FF) +#define C_SQ_WORD1_LDS_IDX_OP_SRC2_SEL 0xFFFFFE00 +#define S_SQ_WORD1_LDS_IDX_OP_SRC2_REL(x) (((x) & 0x1) << 9) +#define G_SQ_WORD1_LDS_IDX_OP_SRC2_REL(x) (((x) >> 9) & 0x1) +#define C_SQ_WORD1_LDS_IDX_OP_SRC2_REL 0xFFFFFDFF +#define S_SQ_WORD1_LDS_IDX_OP_SRC2_CHAN(x) (((x) & 0x03) << 10) +#define G_SQ_WORD1_LDS_IDX_OP_SRC2_CHAN(x) (((x) >> 10) & 0x03) +#define C_SQ_WORD1_LDS_IDX_OP_SRC2_CHAN 0xFFFFF3FF +#define S_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_1(x) (((x) & 0x1) << 12) +#define G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_1(x) (((x) >> 12) & 0x1) +#define C_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_1 0xFFFFEFFF +#define S_SQ_WORD1_LDS_IDX_OP_ALU_INST(x) (((x) & 0x1F) << 13) +#define G_SQ_WORD1_LDS_IDX_OP_ALU_INST(x) (((x) >> 13) & 0x1F) +#define C_SQ_WORD1_LDS_IDX_OP_ALU_INST 0xFFFC1FFF +#define S_SQ_WORD1_LDS_IDX_OP_BANK_SWIZZLE(x) (((x) & 0x07) << 18) +#define G_SQ_WORD1_LDS_IDX_OP_BANK_SWIZZLE(x) (((x) >> 18) & 0x07) +#define C_SQ_WORD1_LDS_IDX_OP_BANK_SWIZZLE 0xFFE3FFFF +#define S_SQ_WORD1_LDS_IDX_OP_LDS_OP(x) (((x) & 0x3F) << 21) +#define G_SQ_WORD1_LDS_IDX_OP_LDS_OP(x) (((x) >> 21) & 0x3F) +#define C_SQ_WORD1_LDS_IDX_OP_LDS_OP 0xF81FFFFF +#define S_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_0(x) (((x) & 0x1) << 27) +#define G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_0(x) (((x) >> 27) & 0x1) +#define C_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_0 0xF7FFFFFF +#define S_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_2(x) (((x) & 0x1) << 28) +#define G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_2(x) (((x) >> 28) & 0x1) +#define C_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_2 0xEFFFFFFF +#define S_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_3(x) (((x) & 0x1) << 31) +#define G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_3(x) (((x) >> 31) & 0x1) +#define C_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_3 0x7FFFFFFF + +#define R_008DFC_SQ_ALU_WORD0_LDS_IDX_OP 0x008DFC +#define S_SQ_WORD0_LDS_IDX_OP_SRC0_SEL(x) (((x) & 0x1FF) << 0) +#define G_SQ_WORD0_LDS_IDX_OP_SRC0_SEL(x) (((x) >> 0) & 0x1FF) +#define C_SQ_WORD0_LDS_IDX_OP_SRC0_SEL 0xFFFFFE00 +#define S_SQ_WORD0_LDS_IDX_OP_SRC0_REL(x) (((x) & 0x1) << 9) +#define G_SQ_WORD0_LDS_IDX_OP_SRC0_REL(x) (((x) >> 9) & 0x1) +#define C_SQ_WORD0_LDS_IDX_OP_SRC0_REL 0xFFFFFDFF +#define S_SQ_WORD0_LDS_IDX_OP_SRC0_CHAN(x) (((x) & 0x03) << 10) +#define G_SQ_WORD0_LDS_IDX_OP_SRC0_CHAN(x) (((x) >> 10) & 0x03) +#define C_SQ_WORD0_LDS_IDX_OP_SRC0_CHAN 0xFFFFF3FF +#define S_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_4(x) (((x) & 0x1) << 12) +#define G_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_4(x) (((x) >> 12) & 0x1) +#define C_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_4 0xFFFFEFFF +#define S_SQ_WORD0_LDS_IDX_OP_SRC1_SEL(x) (((x) & 0x1FF) << 13) +#define G_SQ_WORD0_LDS_IDX_OP_SRC1_SEL(x) (((x) >> 13) & 0x1FF) +#define C_SQ_WORD0_LDS_IDX_OP_SRC1_SEL 0xFFC01FFF +#define S_SQ_WORD0_LDS_IDX_OP_SRC1_REL(x) (((x) & 0x1) << 22) +#define G_SQ_WORD0_LDS_IDX_OP_SRC1_REL(x) (((x) >> 22) & 0x1) +#define C_SQ_WORD0_LDS_IDX_OP_SRC1_REL 0xFFBFFFFF +#define S_SQ_WORD0_LDS_IDX_OP_SRC1_CHAN(x) (((x) & 0x03) << 23) +#define G_SQ_WORD0_LDS_IDX_OP_SRC1_CHAN(x) (((x) >> 23) & 0x03) +#define C_SQ_WORD0_LDS_IDX_OP_SRC1_CHAN 0xFE7FFFFF +#define S_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_5(x) (((x) & 0x1) << 25) +#define G_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_5(x) (((x) >> 25) & 0x1) +#define C_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_5 0xFDFFFFFF +#define S_SQ_WORD0_LDS_IDX_OP_INDEX_MODE(x) (((x) & 0x07) << 26) +#define G_SQ_WORD0_LDS_IDX_OP_INDEX_MODE(x) (((x) >> 26) & 0x07) +#define C_SQ_WORD0_LDS_IDX_OP_INDEX_MODE 0xE3FFFFFF +#define S_SQ_WORD0_LDS_IDX_OP_PRED_SEL(x) (((x) & 0x03) << 29) +#define G_SQ_WORD0_LDS_IDX_OP_PRED_SEL(x) (((x) >> 29) & 0x03) +#define C_SQ_WORD0_LDS_IDX_OP_PRED_SEL 0x9FFFFFFF +#define S_SQ_WORD0_LDS_IDX_OP_LAST(x) (((x) & 0x1) << 31) +#define G_SQ_WORD0_LDS_IDX_OP_LAST(x) (((x) >> 31) & 0x1) +#define C_SQ_WORD0_LDS_IDX_OP_LAST 0x7FFFFFFF + +#define R_008DFC_SQ_ALU_WORD1_LDS_DIRECT_LITERAL_HI 0x008DFC +#define S_SQ_WORD1_LDS_DIRECT_LITERAL_HI_OFFSET_B(x) (((x) & 0x1FFF) << 0) +#define G_SQ_WORD1_LDS_DIRECT_LITERAL_HI_OFFSET_B(x) (((x) >> 0) & 0x1FFF) +#define C_SQ_WORD1_LDS_DIRECT_LITERAL_HI_OFFSET_B 0xFFFFE000 +#define S_SQ_WORD1_LDS_DIRECT_LITERAL_HI_STRIDE_B(x) (((x) & 0x7F) << 13) +#define G_SQ_WORD1_LDS_DIRECT_LITERAL_HI_STRIDE_B(x) (((x) >> 13) & 0x7F) +#define C_SQ_WORD1_LDS_DIRECT_LITERAL_HI_STRIDE_B 0xFFF01FFF +#define S_SQ_WORD1_LDS_DIRECT_LITERAL_HI_THREAD_REL_B(x) (((x) & 0x1) << 22) +#define G_SQ_WORD1_LDS_DIRECT_LITERAL_HI_THREAD_REL_B(x) (((x) >> 22) & 0x1) +#define C_SQ_WORD1_LDS_DIRECT_LITERAL_HI_THREAD_REL_B 0xFFBFFFFF +#define S_SQ_WORD1_LDS_DIRECT_LITERAL_HI_DIRECT_READ_32(x) (((x) & 0x1) << 31) +#define G_SQ_WORD1_LDS_DIRECT_LITERAL_HI_DIRECT_READ_32(x) (((x) >> 31) & 0x1) +#define C_SQ_WORD1_LDS_DIRECT_LITERAL_HI_DIRECT_READ_32 0x7FFFFFFF + +#endif /* _8XX_LDS_H */ + + #define V_SQ_CF_COND_ACTIVE 0x00 #define V_SQ_CF_COND_FALSE 0x01 #define V_SQ_CF_COND_BOOL 0x02 diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 128464e101a..4c49e3fd620 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -257,7 +257,7 @@ static void evergreen_emit_direct_dispatch( int group_size = 1; int grid_size = 1; /* XXX: Enable lds and get size from cs_shader_state */ - unsigned lds_size = 0; + unsigned lds_size = 64; /* Calculate group_size/grid_size */ for (i = 0; i < 3; i++) { diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 3632aa5763c..0c09c2a48cc 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2690,3 +2690,45 @@ void r600_bytecode_export_read(struct r600_bytecode *bc, output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1); output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1); } + +void r600_bytecode_lds_read(struct r600_bytecode *bc, + struct r600_bytecode_alu *lds, uint32_t word0, uint32_t word1) +{ + /* WORD0 */ + alu->src[0].sel = G_SQ_WORD0_LDS_IDX_OP_SRC0_SEL(word0); + alu->src[0].rel = G_SQ_WORD0_LDS_IDX_OP_SRC0_REL(word0); + alu->src[0].chan = G_SQ_WORD0_LDS_IDX_OP_SRC0_CHAN(word0); + alu->src[1].sel = G_SQ_WORD0_LDS_IDX_OP_SRC1_SEL(word0); + alu->src[1].rel = G_SQ_WORD0_LDS_IDX_OP_SRC1_REL(word0); + alu->src[1].chan = G_SQ_WORD0_LDS_IDX_OP_SRC1_CHAN(word0); + alu->index_mode = G_SQ_WORD0_LDS_IDX_OP_INDEX_MODE(word0); + alu->pred_sel = G_SQ_WORD0_LDS_IDX_OP_PRED_SEL(word0); + alut->last = G_SQ_WORD0_LDS_IDX_OP_LAST(word0); + + /* WORD1 */ + alu->src[2].sel = G_SQ_WORD1_LDS_IDX_OP_SRC2_SEL(word1); + alu->src[2].rel = G_SQ_WORD1_LDS_IDX_OP_SRC2_REL(word1); + alu->src[2].chan = G_SQ_WORD1_LDS_IDX_OP_SRC2_CHAN(word1); + alu->op = r600_isa_lds_by_opcode(bc->isa, + G_SQ_WORD1_LDS_IDX_OP_ALU_INST(word1)); + alu->bank_swizzle = G_SQ_WORD1_LDS_IDX_OP_BANK_SWIZZLE(word1); + if (alu->bank_swizzle) { + alu->bank_swizzle_forc = alu->bank_swizzle; + } + + alu->lds_offset = + (G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_0(word1) << 0) | + (G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_1(word1) << 1) | + (G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_2(word1) << 2) | + (G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_3(word1) << 3) | + (G_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_4(word0) << 4) | + (G_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_5(word0) << 5) + ; + + if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/ + { + alu->is_op3 = 1; + alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1); + alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1); + +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 03cd2382140..0c8caf820be 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -58,6 +58,7 @@ struct r600_bytecode_alu { unsigned bank_swizzle_force; unsigned omod; unsigned index_mode; + unsigned lds_offset; }; struct r600_bytecode_tex { diff --git a/src/gallium/drivers/r600/r600_isa.c b/src/gallium/drivers/r600/r600_isa.c index 20b17594f49..28cba24eb10 100644 --- a/src/gallium/drivers/r600/r600_isa.c +++ b/src/gallium/drivers/r600/r600_isa.c @@ -54,15 +54,20 @@ int r600_isa_init(struct r600_context *ctx, struct r600_isa *isa) { isa->cf_map = calloc(256, sizeof(unsigned)); if (!isa->cf_map) return -1; + isa->lds_map = calloc(256, sizeof(unsigned)); + if (!isa->lds_map) + return -1; for (i = 0; i < TABLE_SIZE(alu_op_table); ++i) { const struct alu_op_info *op = &alu_op_table[i]; unsigned opc; - if (op->flags & AF_LDS || op->slots[isa->hw_class] == 0) + if (op->slots[isa->hw_class] == 0) continue; opc = op->opcode[isa->hw_class >> 1]; assert(opc != -1); - if (op->src_count == 3) + if (op->flags & AF_LDS) + isa->lds_map[(opc >> 8)] = i + 1; + else if (op->src_count == 3) isa->alu_op3_map[opc] = i + 1; else isa->alu_op2_map[opc] = i + 1; @@ -105,6 +110,8 @@ int r600_isa_destroy(struct r600_isa *isa) { free(isa->fetch_map); if (isa->cf_map) free(isa->cf_map); + if (isa->lds_map) + free(isa->lds_map); free(isa); return 0; diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h index c6f115fc070..e792e23870b 100644 --- a/src/gallium/drivers/r600/r600_isa.h +++ b/src/gallium/drivers/r600/r600_isa.h @@ -1132,6 +1132,7 @@ struct r600_isa { unsigned *alu_op3_map; unsigned *fetch_map; unsigned *cf_map; + unsigned *lds_map; }; struct r600_context; @@ -1202,6 +1203,14 @@ r600_isa_alu_by_opcode(struct r600_isa* isa, unsigned opcode, unsigned is_op3) { } static inline unsigned +r600_isa_lds_by_opcode(struct r600_isa* isa, unsigned opcode) { + unsigned op; + op = isa->lds_map[opcode]; + assert(op); + return op - 1; +} + +static inline unsigned r600_isa_fetch_by_opcode(struct r600_isa* isa, unsigned opcode) { unsigned op; assert(isa->fetch_map); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 86424634d0f..8683247d7d8 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -317,7 +317,8 @@ static unsigned r600_src_from_byte_stream(unsigned char * bytes, } static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx, - unsigned char * bytes, unsigned bytes_read) + unsigned char * bytes, unsigned bytes_read, + unsigned is_lds) { unsigned src_idx, src_num; struct r600_bytecode_alu alu; @@ -587,7 +588,7 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, switch (inst_type) { case 0: bytes_read = r600_alu_from_byte_stream(ctx, bytes, - bytes_read); + bytes_read, 0); break; case 1: bytes_read = r600_tex_from_byte_stream(ctx, bytes, @@ -614,6 +615,9 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, case 5: bytes_read = r600_export_from_byte_stream(ctx, bytes, bytes_read); + case 6: + bytes_read = r600_alu_from_byte_stream(ctx, bytes, + bytes_read, 1); break; default: /* XXX: Error here */ |