summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-02-28 18:38:11 -0500
committerTom Stellard <thomas.stellard@amd.com>2013-02-28 18:38:11 -0500
commitc7198168d1bf1bdaa13afc451fe5bd9ab00802f1 (patch)
tree3512de326b237cbb9b91bd7f36e656fdbc3cbc7e
parentaa1c734b3ca445b5af743b9bad6a48ca7ba21f3c (diff)
XXX: Initial lds worklds
-rw-r--r--src/gallium/drivers/r600/eg_asm.c6
-rw-r--r--src/gallium/drivers/r600/eg_sq.h94
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c2
-rw-r--r--src/gallium/drivers/r600/r600_asm.c42
-rw-r--r--src/gallium/drivers/r600/r600_asm.h1
-rw-r--r--src/gallium/drivers/r600/r600_isa.c11
-rw-r--r--src/gallium/drivers/r600/r600_isa.h9
-rw-r--r--src/gallium/drivers/r600/r600_shader.c8
8 files changed, 168 insertions, 5 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index fffc436e823..6a484a67730 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -138,3 +138,9 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
}
+
+void eg_bytecode_lds_read(struct r600_bytecode *bc,
+ struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
+{
+
+}
diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h
index b534872f062..f381924581c 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -514,6 +514,100 @@
#define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7)
#define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF
+#define R_008DFC_SQ_ALU_WORD1_LDS_DIRECT_LITERAL_LO 0x008DFC
+#define S_SQ_WORD1_LDS_DIRECT_LITERAL_LO_OFFSET_A(x) (((x) & 0x1FFF) << 0)
+#define G_SQ_WORD1_LDS_DIRECT_LITERAL_LO_OFFSET_A(x) (((x) >> 0) & 0x1FFF)
+#define C_SQ_WORD1_LDS_DIRECT_LITERAL_LO_OFFSET_A 0xFFFFE000
+#define S_SQ_WORD1_LDS_DIRECT_LITERAL_LO_STRIDE_A(x) (((x) & 0x7F) << 13)
+#define G_SQ_WORD1_LDS_DIRECT_LITERAL_LO_STRIDE_A(x) (((x) >> 13) & 0x7F)
+#define C_SQ_WORD1_LDS_DIRECT_LITERAL_LO_STRIDE_A 0xFFF01FFF
+#define S_SQ_WORD1_LDS_DIRECT_LITERAL_LO_THREAD_REL_A(x) (((x) & 0x1) << 22)
+#define G_SQ_WORD1_LDS_DIRECT_LITERAL_LO_THREAD_REL_A(x) (((x) >> 22) & 0x1)
+#define C_SQ_WORD1_LDS_DIRECT_LITERAL_LO_THREAD_REL_A 0xFFBFFFFF
+#define R_008DFC_SQ_ALU_WORD1_LDS_IDX_OP 0x008DFC
+#define S_SQ_WORD1_LDS_IDX_OP_SRC2_SEL(x) (((x) & 0x1FF) << 0)
+#define G_SQ_WORD1_LDS_IDX_OP_SRC2_SEL(x) (((x) >> 0) & 0x1FF)
+#define C_SQ_WORD1_LDS_IDX_OP_SRC2_SEL 0xFFFFFE00
+#define S_SQ_WORD1_LDS_IDX_OP_SRC2_REL(x) (((x) & 0x1) << 9)
+#define G_SQ_WORD1_LDS_IDX_OP_SRC2_REL(x) (((x) >> 9) & 0x1)
+#define C_SQ_WORD1_LDS_IDX_OP_SRC2_REL 0xFFFFFDFF
+#define S_SQ_WORD1_LDS_IDX_OP_SRC2_CHAN(x) (((x) & 0x03) << 10)
+#define G_SQ_WORD1_LDS_IDX_OP_SRC2_CHAN(x) (((x) >> 10) & 0x03)
+#define C_SQ_WORD1_LDS_IDX_OP_SRC2_CHAN 0xFFFFF3FF
+#define S_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_1(x) (((x) & 0x1) << 12)
+#define G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_1(x) (((x) >> 12) & 0x1)
+#define C_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_1 0xFFFFEFFF
+#define S_SQ_WORD1_LDS_IDX_OP_ALU_INST(x) (((x) & 0x1F) << 13)
+#define G_SQ_WORD1_LDS_IDX_OP_ALU_INST(x) (((x) >> 13) & 0x1F)
+#define C_SQ_WORD1_LDS_IDX_OP_ALU_INST 0xFFFC1FFF
+#define S_SQ_WORD1_LDS_IDX_OP_BANK_SWIZZLE(x) (((x) & 0x07) << 18)
+#define G_SQ_WORD1_LDS_IDX_OP_BANK_SWIZZLE(x) (((x) >> 18) & 0x07)
+#define C_SQ_WORD1_LDS_IDX_OP_BANK_SWIZZLE 0xFFE3FFFF
+#define S_SQ_WORD1_LDS_IDX_OP_LDS_OP(x) (((x) & 0x3F) << 21)
+#define G_SQ_WORD1_LDS_IDX_OP_LDS_OP(x) (((x) >> 21) & 0x3F)
+#define C_SQ_WORD1_LDS_IDX_OP_LDS_OP 0xF81FFFFF
+#define S_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_0(x) (((x) & 0x1) << 27)
+#define G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_0(x) (((x) >> 27) & 0x1)
+#define C_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_0 0xF7FFFFFF
+#define S_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_2(x) (((x) & 0x1) << 28)
+#define G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_2(x) (((x) >> 28) & 0x1)
+#define C_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_2 0xEFFFFFFF
+#define S_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_3(x) (((x) & 0x1) << 31)
+#define G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_3(x) (((x) >> 31) & 0x1)
+#define C_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_3 0x7FFFFFFF
+
+#define R_008DFC_SQ_ALU_WORD0_LDS_IDX_OP 0x008DFC
+#define S_SQ_WORD0_LDS_IDX_OP_SRC0_SEL(x) (((x) & 0x1FF) << 0)
+#define G_SQ_WORD0_LDS_IDX_OP_SRC0_SEL(x) (((x) >> 0) & 0x1FF)
+#define C_SQ_WORD0_LDS_IDX_OP_SRC0_SEL 0xFFFFFE00
+#define S_SQ_WORD0_LDS_IDX_OP_SRC0_REL(x) (((x) & 0x1) << 9)
+#define G_SQ_WORD0_LDS_IDX_OP_SRC0_REL(x) (((x) >> 9) & 0x1)
+#define C_SQ_WORD0_LDS_IDX_OP_SRC0_REL 0xFFFFFDFF
+#define S_SQ_WORD0_LDS_IDX_OP_SRC0_CHAN(x) (((x) & 0x03) << 10)
+#define G_SQ_WORD0_LDS_IDX_OP_SRC0_CHAN(x) (((x) >> 10) & 0x03)
+#define C_SQ_WORD0_LDS_IDX_OP_SRC0_CHAN 0xFFFFF3FF
+#define S_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_4(x) (((x) & 0x1) << 12)
+#define G_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_4(x) (((x) >> 12) & 0x1)
+#define C_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_4 0xFFFFEFFF
+#define S_SQ_WORD0_LDS_IDX_OP_SRC1_SEL(x) (((x) & 0x1FF) << 13)
+#define G_SQ_WORD0_LDS_IDX_OP_SRC1_SEL(x) (((x) >> 13) & 0x1FF)
+#define C_SQ_WORD0_LDS_IDX_OP_SRC1_SEL 0xFFC01FFF
+#define S_SQ_WORD0_LDS_IDX_OP_SRC1_REL(x) (((x) & 0x1) << 22)
+#define G_SQ_WORD0_LDS_IDX_OP_SRC1_REL(x) (((x) >> 22) & 0x1)
+#define C_SQ_WORD0_LDS_IDX_OP_SRC1_REL 0xFFBFFFFF
+#define S_SQ_WORD0_LDS_IDX_OP_SRC1_CHAN(x) (((x) & 0x03) << 23)
+#define G_SQ_WORD0_LDS_IDX_OP_SRC1_CHAN(x) (((x) >> 23) & 0x03)
+#define C_SQ_WORD0_LDS_IDX_OP_SRC1_CHAN 0xFE7FFFFF
+#define S_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_5(x) (((x) & 0x1) << 25)
+#define G_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_5(x) (((x) >> 25) & 0x1)
+#define C_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_5 0xFDFFFFFF
+#define S_SQ_WORD0_LDS_IDX_OP_INDEX_MODE(x) (((x) & 0x07) << 26)
+#define G_SQ_WORD0_LDS_IDX_OP_INDEX_MODE(x) (((x) >> 26) & 0x07)
+#define C_SQ_WORD0_LDS_IDX_OP_INDEX_MODE 0xE3FFFFFF
+#define S_SQ_WORD0_LDS_IDX_OP_PRED_SEL(x) (((x) & 0x03) << 29)
+#define G_SQ_WORD0_LDS_IDX_OP_PRED_SEL(x) (((x) >> 29) & 0x03)
+#define C_SQ_WORD0_LDS_IDX_OP_PRED_SEL 0x9FFFFFFF
+#define S_SQ_WORD0_LDS_IDX_OP_LAST(x) (((x) & 0x1) << 31)
+#define G_SQ_WORD0_LDS_IDX_OP_LAST(x) (((x) >> 31) & 0x1)
+#define C_SQ_WORD0_LDS_IDX_OP_LAST 0x7FFFFFFF
+
+#define R_008DFC_SQ_ALU_WORD1_LDS_DIRECT_LITERAL_HI 0x008DFC
+#define S_SQ_WORD1_LDS_DIRECT_LITERAL_HI_OFFSET_B(x) (((x) & 0x1FFF) << 0)
+#define G_SQ_WORD1_LDS_DIRECT_LITERAL_HI_OFFSET_B(x) (((x) >> 0) & 0x1FFF)
+#define C_SQ_WORD1_LDS_DIRECT_LITERAL_HI_OFFSET_B 0xFFFFE000
+#define S_SQ_WORD1_LDS_DIRECT_LITERAL_HI_STRIDE_B(x) (((x) & 0x7F) << 13)
+#define G_SQ_WORD1_LDS_DIRECT_LITERAL_HI_STRIDE_B(x) (((x) >> 13) & 0x7F)
+#define C_SQ_WORD1_LDS_DIRECT_LITERAL_HI_STRIDE_B 0xFFF01FFF
+#define S_SQ_WORD1_LDS_DIRECT_LITERAL_HI_THREAD_REL_B(x) (((x) & 0x1) << 22)
+#define G_SQ_WORD1_LDS_DIRECT_LITERAL_HI_THREAD_REL_B(x) (((x) >> 22) & 0x1)
+#define C_SQ_WORD1_LDS_DIRECT_LITERAL_HI_THREAD_REL_B 0xFFBFFFFF
+#define S_SQ_WORD1_LDS_DIRECT_LITERAL_HI_DIRECT_READ_32(x) (((x) & 0x1) << 31)
+#define G_SQ_WORD1_LDS_DIRECT_LITERAL_HI_DIRECT_READ_32(x) (((x) >> 31) & 0x1)
+#define C_SQ_WORD1_LDS_DIRECT_LITERAL_HI_DIRECT_READ_32 0x7FFFFFFF
+
+#endif /* _8XX_LDS_H */
+
+
#define V_SQ_CF_COND_ACTIVE 0x00
#define V_SQ_CF_COND_FALSE 0x01
#define V_SQ_CF_COND_BOOL 0x02
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 128464e101a..4c49e3fd620 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -257,7 +257,7 @@ static void evergreen_emit_direct_dispatch(
int group_size = 1;
int grid_size = 1;
/* XXX: Enable lds and get size from cs_shader_state */
- unsigned lds_size = 0;
+ unsigned lds_size = 64;
/* Calculate group_size/grid_size */
for (i = 0; i < 3; i++) {
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 3632aa5763c..0c09c2a48cc 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2690,3 +2690,45 @@ void r600_bytecode_export_read(struct r600_bytecode *bc,
output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
}
+
+void r600_bytecode_lds_read(struct r600_bytecode *bc,
+ struct r600_bytecode_alu *lds, uint32_t word0, uint32_t word1)
+{
+ /* WORD0 */
+ alu->src[0].sel = G_SQ_WORD0_LDS_IDX_OP_SRC0_SEL(word0);
+ alu->src[0].rel = G_SQ_WORD0_LDS_IDX_OP_SRC0_REL(word0);
+ alu->src[0].chan = G_SQ_WORD0_LDS_IDX_OP_SRC0_CHAN(word0);
+ alu->src[1].sel = G_SQ_WORD0_LDS_IDX_OP_SRC1_SEL(word0);
+ alu->src[1].rel = G_SQ_WORD0_LDS_IDX_OP_SRC1_REL(word0);
+ alu->src[1].chan = G_SQ_WORD0_LDS_IDX_OP_SRC1_CHAN(word0);
+ alu->index_mode = G_SQ_WORD0_LDS_IDX_OP_INDEX_MODE(word0);
+ alu->pred_sel = G_SQ_WORD0_LDS_IDX_OP_PRED_SEL(word0);
+ alut->last = G_SQ_WORD0_LDS_IDX_OP_LAST(word0);
+
+ /* WORD1 */
+ alu->src[2].sel = G_SQ_WORD1_LDS_IDX_OP_SRC2_SEL(word1);
+ alu->src[2].rel = G_SQ_WORD1_LDS_IDX_OP_SRC2_REL(word1);
+ alu->src[2].chan = G_SQ_WORD1_LDS_IDX_OP_SRC2_CHAN(word1);
+ alu->op = r600_isa_lds_by_opcode(bc->isa,
+ G_SQ_WORD1_LDS_IDX_OP_ALU_INST(word1));
+ alu->bank_swizzle = G_SQ_WORD1_LDS_IDX_OP_BANK_SWIZZLE(word1);
+ if (alu->bank_swizzle) {
+ alu->bank_swizzle_forc = alu->bank_swizzle;
+ }
+
+ alu->lds_offset =
+ (G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_0(word1) << 0) |
+ (G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_1(word1) << 1) |
+ (G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_2(word1) << 2) |
+ (G_SQ_WORD1_LDS_IDX_OP_IDX_OFFSET_3(word1) << 3) |
+ (G_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_4(word0) << 4) |
+ (G_SQ_WORD0_LDS_IDX_OP_IDX_OFFSET_5(word0) << 5)
+ ;
+
+ if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/
+ {
+ alu->is_op3 = 1;
+ alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1);
+ alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1);
+
+}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 03cd2382140..0c8caf820be 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -58,6 +58,7 @@ struct r600_bytecode_alu {
unsigned bank_swizzle_force;
unsigned omod;
unsigned index_mode;
+ unsigned lds_offset;
};
struct r600_bytecode_tex {
diff --git a/src/gallium/drivers/r600/r600_isa.c b/src/gallium/drivers/r600/r600_isa.c
index 20b17594f49..28cba24eb10 100644
--- a/src/gallium/drivers/r600/r600_isa.c
+++ b/src/gallium/drivers/r600/r600_isa.c
@@ -54,15 +54,20 @@ int r600_isa_init(struct r600_context *ctx, struct r600_isa *isa) {
isa->cf_map = calloc(256, sizeof(unsigned));
if (!isa->cf_map)
return -1;
+ isa->lds_map = calloc(256, sizeof(unsigned));
+ if (!isa->lds_map)
+ return -1;
for (i = 0; i < TABLE_SIZE(alu_op_table); ++i) {
const struct alu_op_info *op = &alu_op_table[i];
unsigned opc;
- if (op->flags & AF_LDS || op->slots[isa->hw_class] == 0)
+ if (op->slots[isa->hw_class] == 0)
continue;
opc = op->opcode[isa->hw_class >> 1];
assert(opc != -1);
- if (op->src_count == 3)
+ if (op->flags & AF_LDS)
+ isa->lds_map[(opc >> 8)] = i + 1;
+ else if (op->src_count == 3)
isa->alu_op3_map[opc] = i + 1;
else
isa->alu_op2_map[opc] = i + 1;
@@ -105,6 +110,8 @@ int r600_isa_destroy(struct r600_isa *isa) {
free(isa->fetch_map);
if (isa->cf_map)
free(isa->cf_map);
+ if (isa->lds_map)
+ free(isa->lds_map);
free(isa);
return 0;
diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
index c6f115fc070..e792e23870b 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -1132,6 +1132,7 @@ struct r600_isa {
unsigned *alu_op3_map;
unsigned *fetch_map;
unsigned *cf_map;
+ unsigned *lds_map;
};
struct r600_context;
@@ -1202,6 +1203,14 @@ r600_isa_alu_by_opcode(struct r600_isa* isa, unsigned opcode, unsigned is_op3) {
}
static inline unsigned
+r600_isa_lds_by_opcode(struct r600_isa* isa, unsigned opcode) {
+ unsigned op;
+ op = isa->lds_map[opcode];
+ assert(op);
+ return op - 1;
+}
+
+static inline unsigned
r600_isa_fetch_by_opcode(struct r600_isa* isa, unsigned opcode) {
unsigned op;
assert(isa->fetch_map);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 86424634d0f..8683247d7d8 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -317,7 +317,8 @@ static unsigned r600_src_from_byte_stream(unsigned char * bytes,
}
static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
- unsigned char * bytes, unsigned bytes_read)
+ unsigned char * bytes, unsigned bytes_read,
+ unsigned is_lds)
{
unsigned src_idx, src_num;
struct r600_bytecode_alu alu;
@@ -587,7 +588,7 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
switch (inst_type) {
case 0:
bytes_read = r600_alu_from_byte_stream(ctx, bytes,
- bytes_read);
+ bytes_read, 0);
break;
case 1:
bytes_read = r600_tex_from_byte_stream(ctx, bytes,
@@ -614,6 +615,9 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
case 5:
bytes_read = r600_export_from_byte_stream(ctx, bytes,
bytes_read);
+ case 6:
+ bytes_read = r600_alu_from_byte_stream(ctx, bytes,
+ bytes_read, 1);
break;
default:
/* XXX: Error here */