summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerome Glisse <jglisse@redhat.com>2010-02-13 21:58:58 +0100
committerJerome Glisse <jglisse@redhat.com>2010-02-13 21:58:58 +0100
commit41cbc9da9cdf4df09e92d216b8b2f4cd6b34b453 (patch)
treea3c5be1305cabbe767a2be9cea1e9e8896a97ca7
parentbbad7babb8640f4779d1ff355b098b36c790729f (diff)
fs shader first pass
-rw-r--r--r600_atom.c65
-rw-r--r--r600_atom_api.h55
-rw-r--r--r600_shader.c282
-rw-r--r--r600d.h154
-rw-r--r--radeon_atom.h25
-rw-r--r--test.c27
6 files changed, 282 insertions, 326 deletions
diff --git a/r600_atom.c b/r600_atom.c
index f55f7af..4d906ad 100644
--- a/r600_atom.c
+++ b/r600_atom.c
@@ -1434,18 +1434,10 @@ out_err:
* r600_vs_shader
*/
struct r600_vs_shader {
- struct radeon_atom atom;
- u32 pkts[16];
- u8 input_semantic[32];
- u8 input_resource_id[32];
- u8 input_gpr[32];
- u8 ninputs;
- u8 output_semantic[32];
- u8 output_gpr[32];
- u8 noutputs;
- u32 ndwords;
- u32 *opcodes;
- struct r600_atoms *atoms;
+ struct radeon_atom atom;
+ u32 pkts[64];
+ struct drm_r600_vs_shader vs;
+ struct r600_atoms *atoms;
};
static void r600_vs_shader_release(struct kref *kref)
@@ -1456,7 +1448,7 @@ static void r600_vs_shader_release(struct kref *kref)
mutex_lock(&vs_shader->atoms->mutex);
list_del_init(&vs_shader->atom.list);
mutex_unlock(&vs_shader->atoms->mutex);
- kfree(vs_shader->opcodes);
+ kfree(vs_shader->vs.opcodes);
kfree(vs_shader);
}
@@ -1467,13 +1459,20 @@ static int r600_vs_shader_emit(struct radeon_device *rdev,
{
struct r600_vs_shader *vs_shader = (struct r600_vs_shader *)atom;
struct r600_batch *batch = (struct r600_batch *)data;
- u32 *opcodes = batch->shaders->ptr;
+ u32 *opcodes = batch->shaders->ptr, ndwords;
int r;
- memcpy(&opcodes[batch->shaders_idx], vs_shader->opcodes, vs_shader->ndwords * 4);
+ r = r600_shader_build_fs(rdev, &opcodes[batch->shaders_idx], &ndwords,
+ &batch->inputs, &vs_shader->vs);
+ if (r)
+ return r;
vs_shader->pkts[2] = batch->shaders_idx >> 6;
vs_shader->pkts[4] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT);
- batch->shaders_idx += ((vs_shader->ndwords * 4) + 63) & 0xFFFFFFC0;
+ batch->shaders_idx += (ndwords + 63) & 0xFFFFFFC0;
+ memcpy(&opcodes[batch->shaders_idx], vs_shader->vs.opcodes, vs_shader->vs.ndwords * 4);
+ vs_shader->pkts[13] = batch->shaders_idx >> 6;
+ vs_shader->pkts[15] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT);
+ batch->shaders_idx += (vs_shader->vs.ndwords + 63) & 0xFFFFFFC0;
r = radeon_ib_copy(ib, vs_shader->pkts, atom->npkts);
return r;
}
@@ -1504,24 +1503,29 @@ static int r600_vs_shader_create(struct radeon_device *rdev,
r = -EINVAL;
goto out_err;
}
- vs_shader->opcodes = kmalloc(4 * pvs_shader.ndwords, GFP_KERNEL);
- if (vs_shader->opcodes == NULL) {
+ memcpy(&vs_shader->vs, &pvs_shader, sizeof(struct drm_r600_vs_shader));
+ vs_shader->vs.opcodes = kmalloc(4 * pvs_shader.ndwords, GFP_KERNEL);
+ if (vs_shader->vs.opcodes == NULL) {
dev_err(rdev->dev, "shader too big\n");
r = -ENOMEM;
goto out_err;
}
- memcpy(vs_shader->opcodes, pvs_shader.opcodes, 4 * pvs_shader.ndwords);
- vs_shader->ndwords = pvs_shader.ndwords;
- vs_shader->ninputs = pvs_shader.ninputs;
- vs_shader->noutputs = pvs_shader.noutputs;
- for (i = 0; i < 32; i++) {
- vs_shader->input_semantic[i] = pvs_shader.input_semantic[i];
- vs_shader->input_resource_id[i] = pvs_shader.input_resource_id[i];
- vs_shader->input_gpr[i] = pvs_shader.input_gpr[i];
- vs_shader->output_semantic[i] = pvs_shader.output_semantic[i];
- vs_shader->output_gpr[i] = pvs_shader.output_gpr[i];
- }
+ memcpy(vs_shader->vs.opcodes, pvs_shader.opcodes, 4 * pvs_shader.ndwords);
vs_shader->atom.npkts = 0;
+ /* SQ_PGM_START_FS */
+ vs_shader->pkts[vs_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1);
+ vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000225;
+ vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000000;
+ vs_shader->pkts[vs_shader->atom.npkts++] = PKT3(PKT3_NOP, 0);
+ vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000000;
+ /* SQ_PGM_RESOURCES_FS */
+ vs_shader->pkts[vs_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1);
+ vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000229;
+ vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000000;
+ /* SQ_PGM_CF_OFFSET_FS */
+ vs_shader->pkts[vs_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1);
+ vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000237;
+ vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000000;
/* SQ_PGM_START_VS */
vs_shader->pkts[vs_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1);
vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000216;
@@ -1587,7 +1591,7 @@ static int r600_ps_shader_emit(struct radeon_device *rdev,
memcpy(&opcodes[batch->shaders_idx], ps_shader->opcodes, ps_shader->ndwords * 4);
ps_shader->pkts[2] = batch->shaders_idx >> 6;
ps_shader->pkts[4] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT);
- batch->shaders_idx += ((ps_shader->ndwords * 4) + 63) & 0xFFFFFFC0;
+ batch->shaders_idx += (ps_shader->ndwords + 63) & 0xFFFFFFC0;
r = radeon_ib_copy(ib, ps_shader->pkts, atom->npkts);
return r;
}
@@ -1873,6 +1877,7 @@ int r600_batches_queue(struct radeon_device *rdev,
r = r600_batch_alloc(&rbatch);
if (r)
return r;
+ memcpy(&rbatch->inputs, &batch->inputs, sizeof(struct drm_r600_vs_input));
mutex_lock(&atoms->mutex);
i = 0;
if (batch->blend == NULL || batch->cb_cntl == NULL ||
diff --git a/r600_atom_api.h b/r600_atom_api.h
index 8b5b1ff..e910bc5 100644
--- a/r600_atom_api.h
+++ b/r600_atom_api.h
@@ -304,23 +304,46 @@ struct drm_r600_ps_shader {
u32 sq_pgm_exports_ps;
};
-struct drm_r600_batch {
- struct radeon_atom *vs_constants;
- struct radeon_atom *ps_constants;
- struct radeon_atom *blend;
- struct radeon_atom *cb;
- struct radeon_atom *cb_cntl;
- struct radeon_atom *pa;
- struct radeon_atom *tp;
- struct radeon_atom *vport;
- struct radeon_atom *db;
- struct radeon_atom *db_cntl;
- struct radeon_atom *vgt;
- struct radeon_atom *spi;
- struct radeon_atom *sx;
- struct radeon_atom *vs_shader;
- struct radeon_atom *ps_shader;
+struct drm_r600_vs_buffer {
+ struct radeon_bo *bo;
+ u32 resource_id;
+ u32 sq_vtx_constant_word0;
+ u32 sq_vtx_constant_word2;
+ u32 sq_vtx_constant_word3;
+};
+
+struct drm_r600_vs_element {
+ u32 buffer_id;
+ u32 semantic;
+ u32 sq_vtx_word0;
+ u32 sq_vtx_word1;
+ u32 sq_vtx_word2;
};
+struct drm_r600_vs_input {
+ u32 nelements;
+ u32 nbuffers;
+ struct drm_r600_vs_element elements[32];
+ struct drm_r600_vs_buffer buffers[32];
+};
+
+struct drm_r600_batch {
+ struct radeon_atom *vs_constants;
+ struct radeon_atom *ps_constants;
+ struct radeon_atom *blend;
+ struct radeon_atom *cb;
+ struct radeon_atom *cb_cntl;
+ struct radeon_atom *pa;
+ struct radeon_atom *tp;
+ struct radeon_atom *vport;
+ struct radeon_atom *db;
+ struct radeon_atom *db_cntl;
+ struct radeon_atom *vgt;
+ struct radeon_atom *spi;
+ struct radeon_atom *sx;
+ struct radeon_atom *vs_shader;
+ struct radeon_atom *ps_shader;
+ struct drm_r600_vs_input inputs;
+};
#endif
diff --git a/r600_shader.c b/r600_shader.c
index 7f8794d..fd482c8 100644
--- a/r600_shader.c
+++ b/r600_shader.c
@@ -20,72 +20,71 @@
struct r600_block {
struct list_head list;
u32 idx;
- u32 f_idx;
- u32 ninst;
u32 last;
};
struct r600_inst_name {
+ char safe;
char name[64];
};
static struct r600_inst_name sq_cf_inst_name[] = {
- {"NOP"},
- {"TEX"},
- {"VTX"},
- {"VTX_TC"},
- {"LOOP_START"},
- {"LOOP_END"},
- {"LOOP_START_DX10"},
- {"LOOP_START_NO_AL"},
- {"LOOP_CONTINUE"},
- {"LOOP_BREAK"},
- {"JUMP"},
- {"PUSH"},
- {"PUSH_ELSE"},
- {"ELSE"},
- {"POP"},
- {"POP_JUMP"},
- {"POP_PUSH"},
- {"POP_PUSH_ELSE"},
- {"CALL"},
- {"CALL_FS"},
- {"RETURN"},
- {"EMIT_VERTEX"},
- {"EMIT_CUT_VERTEX"},
- {"CUT_VERTEX"},
- {"KILL"},
+ {1, "NOP"},
+ {0, "TEX"},
+ {0, "VTX"},
+ {0, "VTX_TC"},
+ {1, "LOOP_START"},
+ {1, "LOOP_END"},
+ {1, "LOOP_START_DX10"},
+ {1, "LOOP_START_NO_AL"},
+ {1, "LOOP_CONTINUE"},
+ {1, "LOOP_BREAK"},
+ {1, "JUMP"},
+ {1, "PUSH"},
+ {1, "PUSH_ELSE"},
+ {1, "ELSE"},
+ {1, "POP"},
+ {1, "POP_JUMP"},
+ {1, "POP_PUSH"},
+ {1, "POP_PUSH_ELSE"},
+ {1, "CALL"},
+ {1, "CALL_FS"},
+ {1, "RETURN"},
+ {0, "EMIT_VERTEX"},
+ {0, "EMIT_CUT_VERTEX"},
+ {0, "CUT_VERTEX"},
+ {1, "KILL"},
};
static struct r600_inst_name sq_cf_alu_inst_name[] = {
- {"unknown"},
- {"unknown"},
- {"unknown"},
- {"unknown"},
- {"unknown"},
- {"unknown"},
- {"unknown"},
- {"unknown"},
- {"ALU"},
- {"ALU_PUSH_BEFORE"},
- {"ALU_POP_AFTER"},
- {"ALU_POP2_AFTER"},
- {"unknown"},
- {"ALU_CONTINUE"},
- {"ALU_BREAK"},
- {"ALU_ELSE_AFTER"},
+ {0, "unknown"},
+ {0, "unknown"},
+ {0, "unknown"},
+ {0, "unknown"},
+ {0, "unknown"},
+ {0, "unknown"},
+ {0, "unknown"},
+ {0, "unknown"},
+ {1, "ALU"},
+ {1, "ALU_PUSH_BEFORE"},
+ {1, "ALU_POP_AFTER"},
+ {1, "ALU_POP2_AFTER"},
+ {0, "unknown"},
+ {1, "ALU_CONTINUE"},
+ {1, "ALU_BREAK"},
+ {1, "ALU_ELSE_AFTER"},
};
static struct r600_inst_name sq_cf_alloc_export_inst_name[] = {
- {"MEM_STREAM0"},
- {"MEM_STREAM1"},
- {"MEM_STREAM2"},
- {"MEM_STREAM3"},
- {"MEM_SCRATCH"},
- {"MEM_REDUCTION"},
- {"MEM_RING"},
- {"EXPORT"},
- {"EXPORT_DONE"},
+ {0, "MEM_STREAM0"},
+ {0, "MEM_STREAM1"},
+ {0, "MEM_STREAM2"},
+ {0, "MEM_STREAM3"},
+ {0, "MEM_SCRATCH"},
+ {0, "MEM_REDUCTION"},
+ {0, "MEM_RING"},
+ {1, "EXPORT"},
+ {1, "EXPORT_DONE"},
};
void r600_disassemble_sq_cf_inst(u32 *bytecode, u32 ndwords, u32 idx)
@@ -157,37 +156,6 @@ struct r600_block *r600_block_new(u32 *bytecode, u32 ndwords, u32 idx)
blk->last = 0;
inst = (bytecode[idx+1] >> 23) & 0x7F;
if ((inst & 0x78) >= 0x40) {
- blk->f_idx = G_008DFC_ALU_ADDR(bytecode[idx+0]) << 1;
- blk->ninst = G_008DFC_ALU_COUNT(bytecode[idx+1]);
-#if 0
- /* SQ_CF_ALU */
- switch (G_008DFC_CF_ALU_INST(bytecode[idx+1])) {
- case V_008DFC_SQ_CF_INST_ALU:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_ALU_PUSH_BEFORE:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_PUSH_BEFORE\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_ALU_POP_AFTER:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_POP_AFTER\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_ALU_POP2_AFTER:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_POP2_AFTER\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_ALU_CONTINUE:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_CONTINUE\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_ALU_BREAK:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_BREAK\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_ALU_ELSE_AFTER:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_ELSE_AFTER\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- default:
- printf("Block[%d %d %d] unknown cf alu instruction 0x%02X\n", blk->idx, blk->f_idx, blk->ninst, G_008DFC_CF_ALU_INST(bytecode[idx+1]));
- break;
- }
-#endif
r600_disassemble_sq_cf_alu_inst(bytecode, ndwords, idx);
} else {
if (G_008DFC_CF_INST(bytecode[idx+1]) < 0x20) {
@@ -196,115 +164,6 @@ struct r600_block *r600_block_new(u32 *bytecode, u32 ndwords, u32 idx)
r600_disassemble_sq_cf_alloc_export_inst(bytecode, ndwords, idx);
blk->last = G_008DFC_END_OF_PROGRAM(bytecode[idx+1]);
}
-#if 0
- switch (G_008DFC_CF_INST(bytecode[idx+1])) {
- case V_008DFC_SQ_CF_INST_NOP:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_NOP\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_TEX:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_TEX\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_VTX:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_VTX\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_VTX_TC:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_VTX_TC\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_LOOP_START:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_START\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_LOOP_END:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_END\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_LOOP_START_DX10:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_START_DX10\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_LOOP_START_NO_AL:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_START_NO_AL\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_LOOP_CONTINUE:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_CONTINUE\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_LOOP_BREAK:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_BREAK\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_JUMP:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_JUMP\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_PUSH:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_PUSH\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_PUSH_ELSE:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_PUSH_ELSE\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_ELSE:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ELSE\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_POP:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_POP\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_POP_JUMP:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_POP_JUMP\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_POP_PUSH:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_POP_PUSH\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_POP_PUSH_ELSE:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_POP_PUSH_ELSE\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_CALL:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_CALL\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_CALL_FS:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_CALL_FS\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_RETURN:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_RETURN\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_EMIT_VERTEX:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_EMIT_VERTEX\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_EMIT_CUT_VERTEX:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_EMIT_CUT_VERTEX\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_CUT_VERTEX:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_CUT_VERTEX\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_KILL:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_KILL\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_MEM_STREAM0:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_STREAM0\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_MEM_STREAM1:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_STREAM1\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_MEM_STREAM2:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_STREAM2\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_MEM_STREAM3:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_STREAM3\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_MEM_SCRATCH:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_SCRATCH\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_MEM_REDUCTION:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_REDUCTION\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_MEM_RING:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_RING\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_EXPORT:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_EXPORT\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- case V_008DFC_SQ_CF_INST_EXPORT_DONE:
- printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_EXPORT_DONE\n", blk->idx, blk->f_idx, blk->ninst);
- break;
- default:
- printf("Block[%d %d %d] unknown cf instruction 0x%02X\n", blk->idx, blk->f_idx, blk->ninst, G_008DFC_CF_INST(bytecode[idx+1]));
- break;
- }
-#endif
}
return blk;
}
@@ -322,3 +181,42 @@ void r600_shader_disassemble(u32 *bytecode, u32 ndwords)
idx += 2;
} while (!blk->last);
}
+
+int r600_shader_build_fs(struct radeon_device *rdev,
+ u32 *bytecode, u32 *ndwords,
+ struct drm_r600_vs_input *inputs,
+ struct drm_r600_vs_shader *vs)
+{
+ u32 idx = 0, i, rid, gpr, j;
+
+ *ndwords = 0;
+ if (!inputs->nelements) {
+ dev_err(rdev->dev, "need at least one input for vertex shader\n");
+ return -EINVAL;
+ }
+ bytecode[idx++] = 0x00000001;
+ bytecode[idx++] = 0x81200000 | S_008DFC_COUNT(inputs->nelements - 1);
+ for (i = 0; i < inputs->nelements; i++) {
+ if (inputs->elements[i].buffer_id >= inputs->nbuffers) {
+ dev_err(rdev->dev, "elements %d referencing invalid buffer %d\n",
+ i, inputs->elements[i].buffer_id);
+ return -EINVAL;
+ }
+ rid = inputs->buffers[inputs->elements[i].buffer_id].resource_id;
+ for (j = 0, gpr = -1; j < vs->ninputs; j++) {
+ if (vs->input_semantic[j] == inputs->elements[i].semantic) {
+ gpr = vs->input_gpr[j];
+ break;
+ }
+ }
+ /* if vs has no corresponding input skip the elements */
+ if (gpr == -1)
+ continue;
+ bytecode[idx++] = (inputs->elements[i].sq_vtx_word0 & 0xFC000000) | S_008DFC_BUFFER_ID(rid);
+ bytecode[idx++] = (inputs->elements[i].sq_vtx_word1 & 0xFFFFFC00) | S_008DFC_DST_GPR(gpr);
+ bytecode[idx++] = inputs->elements[i].sq_vtx_word2;
+ bytecode[idx++] = 0xCAFEDEAD;
+ }
+ *ndwords = idx;
+ return 0;
+}
diff --git a/r600d.h b/r600d.h
index ae51a2c..3d1ffee 100644
--- a/r600d.h
+++ b/r600d.h
@@ -498,83 +498,81 @@
#define S_008DFC_SEL_W(x) (((x) & 0x7) << 9)
#define G_008DFC_SEL_W(x) (((x) >> 9) & 0x7)
#define C_008DFC_SEL_W 0xFFFFF1FF
-#define R_008DFC_SQ_ALU_WORD0 0x008DFC
-#define S_008DFC_SRC0_SEL(x) (((x) & 0x1FF) << 0)
-#define G_008DFC_SRC0_SEL(x) (((x) >> 0) & 0x1FF)
-#define C_008DFC_SRC0_SEL 0xFFFFFE00
-#define S_008DFC_SRC0_REL(x) (((x) & 0x1) << 9)
-#define G_008DFC_SRC0_REL(x) (((x) >> 9) & 0x1)
-#define C_008DFC_SRC0_REL 0xFFFFFDFF
-#define S_008DFC_SRC0_CHAN(x) (((x) & 0x3) << 10)
-#define G_008DFC_SRC0_CHAN(x) (((x) >> 10) & 0x3)
-#define C_008DFC_SRC0_CHAN 0xFFFFF3FF
-#define S_008DFC_SRC0_NEG(x) (((x) & 0x1) << 12)
-#define G_008DFC_SRC0_NEG(x) (((x) >> 12) & 0x1)
-#define C_008DFC_SRC0_NEG 0xFFFFEFFF
-#define S_008DFC_SRC1_SEL(x) (((x) & 0x1FF) << 13)
-#define G_008DFC_SRC1_SEL(x) (((x) >> 13) & 0x1FF)
-#define C_008DFC_SRC1_SEL 0xFFC01FFF
-#define S_008DFC_SRC1_REL(x) (((x) & 0x1) << 22)
-#define G_008DFC_SRC1_REL(x) (((x) >> 22) & 0x1)
-#define C_008DFC_SRC1_REL 0xFFBFFFFF
-#define S_008DFC_SRC1_CHAN(x) (((x) & 0x3) << 23)
-#define G_008DFC_SRC1_CHAN(x) (((x) >> 23) & 0x3)
-#define C_008DFC_SRC1_CHAN 0xFE7FFFFF
-#define S_008DFC_SRC1_NEG(x) (((x) & 0x1) << 25)
-#define G_008DFC_SRC1_NEG(x) (((x) >> 25) & 0x1)
-#define C_008DFC_SRC1_NEG 0xFDFFFFFF
-#define S_008DFC_INDEX_MODE(x) (((x) & 0x7) << 26)
-#define G_008DFC_INDEX_MODE(x) (((x) >> 26) & 0x7)
-#define C_008DFC_INDEX_MODE 0xE3FFFFFF
-#define S_008DFC_PRED_SEL(x) (((x) & 0x3) << 29)
-#define G_008DFC_PRED_SEL(x) (((x) >> 29) & 0x3)
-#define C_008DFC_PRED_SEL 0x9FFFFFFF
-#define S_008DFC_LAST(x) (((x) & 0x1) << 31)
-#define G_008DFC_LAST(x) (((x) >> 31) & 0x1)
-#define C_008DFC_LAST 0x7FFFFFFF
-#define R_008DFC_SQ_ALU_WORD1 0x008DFC
-#define S_008DFC_ENCODING(x) (((x) & 0x7) << 15)
-#define G_008DFC_ENCODING(x) (((x) >> 15) & 0x7)
-#define C_008DFC_ENCODING 0xFFFC7FFF
-#define S_008DFC_BANK_SWIZZLE(x) (((x) & 0x7) << 18)
-#define G_008DFC_BANK_SWIZZLE(x) (((x) >> 18) & 0x7)
-#define C_008DFC_BANK_SWIZZLE 0xFFE3FFFF
-#define S_008DFC_DST_GPR(x) (((x) & 0x7F) << 21)
-#define G_008DFC_DST_GPR(x) (((x) >> 21) & 0x7F)
-#define C_008DFC_DST_GPR 0xF01FFFFF
-#define S_008DFC_DST_REL(x) (((x) & 0x1) << 28)
-#define G_008DFC_DST_REL(x) (((x) >> 28) & 0x1)
-#define C_008DFC_DST_REL 0xEFFFFFFF
-#define S_008DFC_DST_CHAN(x) (((x) & 0x3) << 29)
-#define G_008DFC_DST_CHAN(x) (((x) >> 29) & 0x3)
-#define C_008DFC_DST_CHAN 0x9FFFFFFF
-#define S_008DFC_CLAMP(x) (((x) & 0x1) << 31)
-#define G_008DFC_CLAMP(x) (((x) >> 31) & 0x1)
-#define C_008DFC_CLAMP 0x7FFFFFFF
-#define R_008DFC_SQ_ALU_WORD1_OP2 0x008DFC
-#define S_008DFC_SRC0_ABS(x) (((x) & 0x1) << 0)
-#define G_008DFC_SRC0_ABS(x) (((x) >> 0) & 0x1)
-#define C_008DFC_SRC0_ABS 0xFFFFFFFE
-#define S_008DFC_SRC1_ABS(x) (((x) & 0x1) << 1)
-#define G_008DFC_SRC1_ABS(x) (((x) >> 1) & 0x1)
-#define C_008DFC_SRC1_ABS 0xFFFFFFFD
-#define S_008DFC_UPDATE_EXECUTE_MASK(x) (((x) & 0x1) << 2)
-#define G_008DFC_UPDATE_EXECUTE_MASK(x) (((x) >> 2) & 0x1)
-#define C_008DFC_UPDATE_EXECUTE_MASK 0xFFFFFFFB
-#define S_008DFC_UPDATE_PRED(x) (((x) & 0x1) << 3)
-#define G_008DFC_UPDATE_PRED(x) (((x) >> 3) & 0x1)
-#define C_008DFC_UPDATE_PRED 0xFFFFFFF7
-#define S_008DFC_WRITE_MASK(x) (((x) & 0x1) << 4)
-#define G_008DFC_WRITE_MASK(x) (((x) >> 4) & 0x1)
-#define C_008DFC_WRITE_MASK 0xFFFFFFEF
-#define S_008DFC_FOG_MERGE(x) (((x) & 0x1) << 5)
-#define G_008DFC_FOG_MERGE(x) (((x) >> 5) & 0x1)
-#define C_008DFC_FOG_MERGE 0xFFFFFFDF
-#define S_008DFC_OMOD(x) (((x) & 0x3) << 6)
-#define G_008DFC_OMOD(x) (((x) >> 6) & 0x3)
-#define C_008DFC_OMOD 0xFFFFFF3F
-#define S_008DFC_ALU_INST(x) (((x) & 0x3FF) << 8)
-#define G_008DFC_ALU_INST(x) (((x) >> 8) & 0x3FF)
-#define C_008DFC_ALU_INST 0xFFFC00FF
+#define R_008DFC_SQ_VTX_WORD0 0x008DFC
+#define S_008DFC_VTX_INST(x) (((x) & 0x1F) << 0)
+#define G_008DFC_VTX_INST(x) (((x) >> 0) & 0x1F)
+#define C_008DFC_VTX_INST 0xFFFFFFE0
+#define S_008DFC_FETCH_TYPE(x) (((x) & 0x3) << 5)
+#define G_008DFC_FETCH_TYPE(x) (((x) >> 5) & 0x3)
+#define C_008DFC_FETCH_TYPE 0xFFFFFF9F
+#define S_008DFC_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7)
+#define G_008DFC_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1)
+#define C_008DFC_FETCH_WHOLE_QUAD 0xFFFFFF7F
+#define S_008DFC_BUFFER_ID(x) (((x) & 0xFF) << 8)
+#define G_008DFC_BUFFER_ID(x) (((x) >> 8) & 0xFF)
+#define C_008DFC_BUFFER_ID 0xFFFF00FF
+#define S_008DFC_SRC_GPR(x) (((x) & 0x7F) << 16)
+#define G_008DFC_SRC_GPR(x) (((x) >> 16) & 0x7F)
+#define C_008DFC_SRC_GPR 0xFF80FFFF
+#define S_008DFC_SRC_REL(x) (((x) & 0x1) << 23)
+#define G_008DFC_SRC_REL(x) (((x) >> 23) & 0x1)
+#define C_008DFC_SRC_REL 0xFF7FFFFF
+#define S_008DFC_SRC_SEL_X(x) (((x) & 0x3) << 24)
+#define G_008DFC_SRC_SEL_X(x) (((x) >> 24) & 0x3)
+#define C_008DFC_SRC_SEL_X 0xFCFFFFFF
+#define S_008DFC_MEGA_FETCH_COUNT(x) (((x) & 0x3F) << 26)
+#define G_008DFC_MEGA_FETCH_COUNT(x) (((x) >> 26) & 0x3F)
+#define C_008DFC_MEGA_FETCH_COUNT 0x03FFFFFF
+#define R_008DFC_SQ_VTX_WORD1 0x008DFC
+#define S_008DFC_DST_SEL_X(x) (((x) & 0x7) << 9)
+#define G_008DFC_DST_SEL_X(x) (((x) >> 9) & 0x7)
+#define C_008DFC_DST_SEL_X 0xFFFFF1FF
+#define S_008DFC_DST_SEL_Y(x) (((x) & 0x7) << 12)
+#define G_008DFC_DST_SEL_Y(x) (((x) >> 12) & 0x7)
+#define C_008DFC_DST_SEL_Y 0xFFFF8FFF
+#define S_008DFC_DST_SEL_Z(x) (((x) & 0x7) << 15)
+#define G_008DFC_DST_SEL_Z(x) (((x) >> 15) & 0x7)
+#define C_008DFC_DST_SEL_Z 0xFFFC7FFF
+#define S_008DFC_DST_SEL_W(x) (((x) & 0x7) << 18)
+#define G_008DFC_DST_SEL_W(x) (((x) >> 18) & 0x7)
+#define C_008DFC_DST_SEL_W 0xFFE3FFFF
+#define S_008DFC_USE_CONST_FIELDS(x) (((x) & 0x1) << 21)
+#define G_008DFC_USE_CONST_FIELDS(x) (((x) >> 21) & 0x1)
+#define C_008DFC_USE_CONST_FIELDS 0xFFDFFFFF
+#define S_008DFC_DATA_FORMAT(x) (((x) & 0x3F) << 22)
+#define G_008DFC_DATA_FORMAT(x) (((x) >> 22) & 0x3F)
+#define C_008DFC_DATA_FORMAT 0xF03FFFFF
+#define S_008DFC_NUM_FORMAT_ALL(x) (((x) & 0x3) << 28)
+#define G_008DFC_NUM_FORMAT_ALL(x) (((x) >> 28) & 0x3)
+#define C_008DFC_NUM_FORMAT_ALL 0xCFFFFFFF
+#define S_008DFC_FORMAT_COMP_ALL(x) (((x) & 0x1) << 30)
+#define G_008DFC_FORMAT_COMP_ALL(x) (((x) >> 30) & 0x1)
+#define C_008DFC_FORMAT_COMP_ALL 0xBFFFFFFF
+#define S_008DFC_SRF_MODE_ALL(x) (((x) & 0x1) << 31)
+#define G_008DFC_SRF_MODE_ALL(x) (((x) >> 31) & 0x1)
+#define C_008DFC_SRF_MODE_ALL 0x7FFFFFFF
+#define R_008DFC_SQ_VTX_WORD1_GPR 0x008DFC
+#define S_008DFC_DST_GPR(x) (((x) & 0x7F) << 0)
+#define G_008DFC_DST_GPR(x) (((x) >> 0) & 0x7F)
+#define C_008DFC_DST_GPR 0xFFFFFF80
+#define S_008DFC_DST_REL(x) (((x) & 0x1) << 7)
+#define G_008DFC_DST_REL(x) (((x) >> 7) & 0x1)
+#define C_008DFC_DST_REL 0xFFFFFF7F
+#define R_008DFC_SQ_VTX_WORD2 0x008DFC
+#define S_008DFC_OFFSET(x) (((x) & 0xFFFF) << 0)
+#define G_008DFC_OFFSET(x) (((x) >> 0) & 0xFFFF)
+#define C_008DFC_OFFSET 0xFFFF0000
+#define S_008DFC_ENDIAN_SWAP(x) (((x) & 0x3) << 16)
+#define G_008DFC_ENDIAN_SWAP(x) (((x) >> 16) & 0x3)
+#define C_008DFC_ENDIAN_SWAP 0xFFFCFFFF
+#define S_008DFC_CONST_BUF_NO_STRIDE(x) (((x) & 0x1) << 18)
+#define G_008DFC_CONST_BUF_NO_STRIDE(x) (((x) >> 18) & 0x1)
+#define C_008DFC_CONST_BUF_NO_STRIDE 0xFFFBFFFF
+#define S_008DFC_MEGA_FETCH(x) (((x) & 0x1) << 19)
+#define G_008DFC_MEGA_FETCH(x) (((x) >> 19) & 0x1)
+#define C_008DFC_MEGA_FETCH 0xFFF7FFFF
+#define S_008DFC_ALT_CONST(x) (((x) & 0x1) << 20)
+#define G_008DFC_ALT_CONST(x) (((x) >> 20) & 0x1)
+#define C_008DFC_ALT_CONST 0xFFEFFFFF
#endif
diff --git a/radeon_atom.h b/radeon_atom.h
index 79829a2..10d9935 100644
--- a/radeon_atom.h
+++ b/radeon_atom.h
@@ -55,16 +55,17 @@ struct radeon_atom {
/* R600 */
#define R600_BATCH_NATOMS 15
struct r600_batch {
- struct list_head list;
- struct list_head pre_flushes;
- struct list_head post_flushes;
- struct radeon_atom *atoms[R600_BATCH_NATOMS];
- struct radeon_atom *emit_atoms[R600_BATCH_NATOMS];
- u32 nemit_atoms;
- u32 nflushes;
- u32 npkts;
- struct radeon_bo *shaders;
- u32 shaders_idx;
+ struct list_head list;
+ struct list_head pre_flushes;
+ struct list_head post_flushes;
+ struct radeon_atom *atoms[R600_BATCH_NATOMS];
+ struct radeon_atom *emit_atoms[R600_BATCH_NATOMS];
+ u32 nemit_atoms;
+ u32 nflushes;
+ u32 npkts;
+ struct radeon_bo *shaders;
+ u32 shaders_idx;
+ struct drm_r600_vs_input inputs;
};
#define R600_SHADER_SIZE (32 * 1024)
@@ -120,6 +121,10 @@ static inline void radeon_atom_put(struct radeon_atom *atom)
}
/* R600 */
+extern int r600_shader_build_fs(struct radeon_device *rdev,
+ u32 *bytecode, u32 *ndwords,
+ struct drm_r600_vs_input *inputs,
+ struct drm_r600_vs_shader *vs);
extern int r600_atoms_init(struct radeon_device *rdev, struct r600_atoms *atoms);
extern void r600_atoms_release(struct radeon_device *rdev, struct r600_atoms *atoms);
extern int r600_atom_create(struct radeon_device *rdev,
diff --git a/test.c b/test.c
index 1da3ffb..37dbe42 100644
--- a/test.c
+++ b/test.c
@@ -365,6 +365,11 @@ int r600_tri_flat(struct radeon *radeon)
if (r)
return r;
/* vs_shader */
+ vs_shader.ninputs = 2;
+ vs_shader.input_semantic[0] = 1;
+ vs_shader.input_gpr[0] = 1;
+ vs_shader.input_semantic[1] = 2;
+ vs_shader.input_gpr[1] = 2;
vs_shader.sq_pgm_resources_vs = 0x00000006;
vs_shader.ndwords = 64;
vs_shader.opcodes = vsshaders;
@@ -386,6 +391,28 @@ int r600_tri_flat(struct radeon *radeon)
if (r)
return r;
+ /* inputs */
+ batch.inputs.nelements = 2;
+ batch.inputs.nbuffers = 2;
+ batch.inputs.buffers[0].bo = NULL;
+ batch.inputs.buffers[0].sq_vtx_constant_word0 = 0x00000000;
+ batch.inputs.buffers[0].sq_vtx_constant_word2 = 0x03001C00;
+ batch.inputs.buffers[0].sq_vtx_constant_word3 = 0x00000001;
+ batch.inputs.buffers[1].bo = NULL;
+ batch.inputs.buffers[1].sq_vtx_constant_word0 = 0x0000000C;
+ batch.inputs.buffers[1].sq_vtx_constant_word2 = 0x02301C00;
+ batch.inputs.buffers[1].sq_vtx_constant_word3 = 0x00000001;
+ batch.inputs.elements[0].buffer_id = 0;
+ batch.inputs.elements[0].semantic = 1;
+ batch.inputs.elements[0].sq_vtx_word0 = 0x7C000000;
+ batch.inputs.elements[0].sq_vtx_word1 = 0x1C351000;
+ batch.inputs.elements[0].sq_vtx_word2 = 0x00080000;
+ batch.inputs.elements[1].buffer_id = 1;
+ batch.inputs.elements[1].semantic = 2;
+ batch.inputs.elements[1].sq_vtx_word0 = 0x7C000000;
+ batch.inputs.elements[1].sq_vtx_word1 = 0x18ED1000;
+ batch.inputs.elements[1].sq_vtx_word2 = 0x00080000;
+
/* batch */
r = radeon_batches_queue(rdev, &batch);
if (r)