diff options
author | Jerome Glisse <jglisse@redhat.com> | 2010-02-13 21:58:58 +0100 |
---|---|---|
committer | Jerome Glisse <jglisse@redhat.com> | 2010-02-13 21:58:58 +0100 |
commit | 41cbc9da9cdf4df09e92d216b8b2f4cd6b34b453 (patch) | |
tree | a3c5be1305cabbe767a2be9cea1e9e8896a97ca7 | |
parent | bbad7babb8640f4779d1ff355b098b36c790729f (diff) |
fs shader first pass
-rw-r--r-- | r600_atom.c | 65 | ||||
-rw-r--r-- | r600_atom_api.h | 55 | ||||
-rw-r--r-- | r600_shader.c | 282 | ||||
-rw-r--r-- | r600d.h | 154 | ||||
-rw-r--r-- | radeon_atom.h | 25 | ||||
-rw-r--r-- | test.c | 27 |
6 files changed, 282 insertions, 326 deletions
diff --git a/r600_atom.c b/r600_atom.c index f55f7af..4d906ad 100644 --- a/r600_atom.c +++ b/r600_atom.c @@ -1434,18 +1434,10 @@ out_err: * r600_vs_shader */ struct r600_vs_shader { - struct radeon_atom atom; - u32 pkts[16]; - u8 input_semantic[32]; - u8 input_resource_id[32]; - u8 input_gpr[32]; - u8 ninputs; - u8 output_semantic[32]; - u8 output_gpr[32]; - u8 noutputs; - u32 ndwords; - u32 *opcodes; - struct r600_atoms *atoms; + struct radeon_atom atom; + u32 pkts[64]; + struct drm_r600_vs_shader vs; + struct r600_atoms *atoms; }; static void r600_vs_shader_release(struct kref *kref) @@ -1456,7 +1448,7 @@ static void r600_vs_shader_release(struct kref *kref) mutex_lock(&vs_shader->atoms->mutex); list_del_init(&vs_shader->atom.list); mutex_unlock(&vs_shader->atoms->mutex); - kfree(vs_shader->opcodes); + kfree(vs_shader->vs.opcodes); kfree(vs_shader); } @@ -1467,13 +1459,20 @@ static int r600_vs_shader_emit(struct radeon_device *rdev, { struct r600_vs_shader *vs_shader = (struct r600_vs_shader *)atom; struct r600_batch *batch = (struct r600_batch *)data; - u32 *opcodes = batch->shaders->ptr; + u32 *opcodes = batch->shaders->ptr, ndwords; int r; - memcpy(&opcodes[batch->shaders_idx], vs_shader->opcodes, vs_shader->ndwords * 4); + r = r600_shader_build_fs(rdev, &opcodes[batch->shaders_idx], &ndwords, + &batch->inputs, &vs_shader->vs); + if (r) + return r; vs_shader->pkts[2] = batch->shaders_idx >> 6; vs_shader->pkts[4] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT); - batch->shaders_idx += ((vs_shader->ndwords * 4) + 63) & 0xFFFFFFC0; + batch->shaders_idx += (ndwords + 63) & 0xFFFFFFC0; + memcpy(&opcodes[batch->shaders_idx], vs_shader->vs.opcodes, vs_shader->vs.ndwords * 4); + vs_shader->pkts[13] = batch->shaders_idx >> 6; + vs_shader->pkts[15] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT); + batch->shaders_idx += (vs_shader->vs.ndwords + 63) & 0xFFFFFFC0; r = radeon_ib_copy(ib, vs_shader->pkts, atom->npkts); return r; } @@ -1504,24 +1503,29 @@ static int r600_vs_shader_create(struct radeon_device *rdev, r = -EINVAL; goto out_err; } - vs_shader->opcodes = kmalloc(4 * pvs_shader.ndwords, GFP_KERNEL); - if (vs_shader->opcodes == NULL) { + memcpy(&vs_shader->vs, &pvs_shader, sizeof(struct drm_r600_vs_shader)); + vs_shader->vs.opcodes = kmalloc(4 * pvs_shader.ndwords, GFP_KERNEL); + if (vs_shader->vs.opcodes == NULL) { dev_err(rdev->dev, "shader too big\n"); r = -ENOMEM; goto out_err; } - memcpy(vs_shader->opcodes, pvs_shader.opcodes, 4 * pvs_shader.ndwords); - vs_shader->ndwords = pvs_shader.ndwords; - vs_shader->ninputs = pvs_shader.ninputs; - vs_shader->noutputs = pvs_shader.noutputs; - for (i = 0; i < 32; i++) { - vs_shader->input_semantic[i] = pvs_shader.input_semantic[i]; - vs_shader->input_resource_id[i] = pvs_shader.input_resource_id[i]; - vs_shader->input_gpr[i] = pvs_shader.input_gpr[i]; - vs_shader->output_semantic[i] = pvs_shader.output_semantic[i]; - vs_shader->output_gpr[i] = pvs_shader.output_gpr[i]; - } + memcpy(vs_shader->vs.opcodes, pvs_shader.opcodes, 4 * pvs_shader.ndwords); vs_shader->atom.npkts = 0; + /* SQ_PGM_START_FS */ + vs_shader->pkts[vs_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1); + vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000225; + vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000000; + vs_shader->pkts[vs_shader->atom.npkts++] = PKT3(PKT3_NOP, 0); + vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000000; + /* SQ_PGM_RESOURCES_FS */ + vs_shader->pkts[vs_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1); + vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000229; + vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000000; + /* SQ_PGM_CF_OFFSET_FS */ + vs_shader->pkts[vs_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1); + vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000237; + vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000000; /* SQ_PGM_START_VS */ vs_shader->pkts[vs_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1); vs_shader->pkts[vs_shader->atom.npkts++] = 0x00000216; @@ -1587,7 +1591,7 @@ static int r600_ps_shader_emit(struct radeon_device *rdev, memcpy(&opcodes[batch->shaders_idx], ps_shader->opcodes, ps_shader->ndwords * 4); ps_shader->pkts[2] = batch->shaders_idx >> 6; ps_shader->pkts[4] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT); - batch->shaders_idx += ((ps_shader->ndwords * 4) + 63) & 0xFFFFFFC0; + batch->shaders_idx += (ps_shader->ndwords + 63) & 0xFFFFFFC0; r = radeon_ib_copy(ib, ps_shader->pkts, atom->npkts); return r; } @@ -1873,6 +1877,7 @@ int r600_batches_queue(struct radeon_device *rdev, r = r600_batch_alloc(&rbatch); if (r) return r; + memcpy(&rbatch->inputs, &batch->inputs, sizeof(struct drm_r600_vs_input)); mutex_lock(&atoms->mutex); i = 0; if (batch->blend == NULL || batch->cb_cntl == NULL || diff --git a/r600_atom_api.h b/r600_atom_api.h index 8b5b1ff..e910bc5 100644 --- a/r600_atom_api.h +++ b/r600_atom_api.h @@ -304,23 +304,46 @@ struct drm_r600_ps_shader { u32 sq_pgm_exports_ps; }; -struct drm_r600_batch { - struct radeon_atom *vs_constants; - struct radeon_atom *ps_constants; - struct radeon_atom *blend; - struct radeon_atom *cb; - struct radeon_atom *cb_cntl; - struct radeon_atom *pa; - struct radeon_atom *tp; - struct radeon_atom *vport; - struct radeon_atom *db; - struct radeon_atom *db_cntl; - struct radeon_atom *vgt; - struct radeon_atom *spi; - struct radeon_atom *sx; - struct radeon_atom *vs_shader; - struct radeon_atom *ps_shader; +struct drm_r600_vs_buffer { + struct radeon_bo *bo; + u32 resource_id; + u32 sq_vtx_constant_word0; + u32 sq_vtx_constant_word2; + u32 sq_vtx_constant_word3; +}; + +struct drm_r600_vs_element { + u32 buffer_id; + u32 semantic; + u32 sq_vtx_word0; + u32 sq_vtx_word1; + u32 sq_vtx_word2; }; +struct drm_r600_vs_input { + u32 nelements; + u32 nbuffers; + struct drm_r600_vs_element elements[32]; + struct drm_r600_vs_buffer buffers[32]; +}; + +struct drm_r600_batch { + struct radeon_atom *vs_constants; + struct radeon_atom *ps_constants; + struct radeon_atom *blend; + struct radeon_atom *cb; + struct radeon_atom *cb_cntl; + struct radeon_atom *pa; + struct radeon_atom *tp; + struct radeon_atom *vport; + struct radeon_atom *db; + struct radeon_atom *db_cntl; + struct radeon_atom *vgt; + struct radeon_atom *spi; + struct radeon_atom *sx; + struct radeon_atom *vs_shader; + struct radeon_atom *ps_shader; + struct drm_r600_vs_input inputs; +}; #endif diff --git a/r600_shader.c b/r600_shader.c index 7f8794d..fd482c8 100644 --- a/r600_shader.c +++ b/r600_shader.c @@ -20,72 +20,71 @@ struct r600_block { struct list_head list; u32 idx; - u32 f_idx; - u32 ninst; u32 last; }; struct r600_inst_name { + char safe; char name[64]; }; static struct r600_inst_name sq_cf_inst_name[] = { - {"NOP"}, - {"TEX"}, - {"VTX"}, - {"VTX_TC"}, - {"LOOP_START"}, - {"LOOP_END"}, - {"LOOP_START_DX10"}, - {"LOOP_START_NO_AL"}, - {"LOOP_CONTINUE"}, - {"LOOP_BREAK"}, - {"JUMP"}, - {"PUSH"}, - {"PUSH_ELSE"}, - {"ELSE"}, - {"POP"}, - {"POP_JUMP"}, - {"POP_PUSH"}, - {"POP_PUSH_ELSE"}, - {"CALL"}, - {"CALL_FS"}, - {"RETURN"}, - {"EMIT_VERTEX"}, - {"EMIT_CUT_VERTEX"}, - {"CUT_VERTEX"}, - {"KILL"}, + {1, "NOP"}, + {0, "TEX"}, + {0, "VTX"}, + {0, "VTX_TC"}, + {1, "LOOP_START"}, + {1, "LOOP_END"}, + {1, "LOOP_START_DX10"}, + {1, "LOOP_START_NO_AL"}, + {1, "LOOP_CONTINUE"}, + {1, "LOOP_BREAK"}, + {1, "JUMP"}, + {1, "PUSH"}, + {1, "PUSH_ELSE"}, + {1, "ELSE"}, + {1, "POP"}, + {1, "POP_JUMP"}, + {1, "POP_PUSH"}, + {1, "POP_PUSH_ELSE"}, + {1, "CALL"}, + {1, "CALL_FS"}, + {1, "RETURN"}, + {0, "EMIT_VERTEX"}, + {0, "EMIT_CUT_VERTEX"}, + {0, "CUT_VERTEX"}, + {1, "KILL"}, }; static struct r600_inst_name sq_cf_alu_inst_name[] = { - {"unknown"}, - {"unknown"}, - {"unknown"}, - {"unknown"}, - {"unknown"}, - {"unknown"}, - {"unknown"}, - {"unknown"}, - {"ALU"}, - {"ALU_PUSH_BEFORE"}, - {"ALU_POP_AFTER"}, - {"ALU_POP2_AFTER"}, - {"unknown"}, - {"ALU_CONTINUE"}, - {"ALU_BREAK"}, - {"ALU_ELSE_AFTER"}, + {0, "unknown"}, + {0, "unknown"}, + {0, "unknown"}, + {0, "unknown"}, + {0, "unknown"}, + {0, "unknown"}, + {0, "unknown"}, + {0, "unknown"}, + {1, "ALU"}, + {1, "ALU_PUSH_BEFORE"}, + {1, "ALU_POP_AFTER"}, + {1, "ALU_POP2_AFTER"}, + {0, "unknown"}, + {1, "ALU_CONTINUE"}, + {1, "ALU_BREAK"}, + {1, "ALU_ELSE_AFTER"}, }; static struct r600_inst_name sq_cf_alloc_export_inst_name[] = { - {"MEM_STREAM0"}, - {"MEM_STREAM1"}, - {"MEM_STREAM2"}, - {"MEM_STREAM3"}, - {"MEM_SCRATCH"}, - {"MEM_REDUCTION"}, - {"MEM_RING"}, - {"EXPORT"}, - {"EXPORT_DONE"}, + {0, "MEM_STREAM0"}, + {0, "MEM_STREAM1"}, + {0, "MEM_STREAM2"}, + {0, "MEM_STREAM3"}, + {0, "MEM_SCRATCH"}, + {0, "MEM_REDUCTION"}, + {0, "MEM_RING"}, + {1, "EXPORT"}, + {1, "EXPORT_DONE"}, }; void r600_disassemble_sq_cf_inst(u32 *bytecode, u32 ndwords, u32 idx) @@ -157,37 +156,6 @@ struct r600_block *r600_block_new(u32 *bytecode, u32 ndwords, u32 idx) blk->last = 0; inst = (bytecode[idx+1] >> 23) & 0x7F; if ((inst & 0x78) >= 0x40) { - blk->f_idx = G_008DFC_ALU_ADDR(bytecode[idx+0]) << 1; - blk->ninst = G_008DFC_ALU_COUNT(bytecode[idx+1]); -#if 0 - /* SQ_CF_ALU */ - switch (G_008DFC_CF_ALU_INST(bytecode[idx+1])) { - case V_008DFC_SQ_CF_INST_ALU: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_ALU_PUSH_BEFORE: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_PUSH_BEFORE\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_ALU_POP_AFTER: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_POP_AFTER\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_ALU_POP2_AFTER: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_POP2_AFTER\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_ALU_CONTINUE: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_CONTINUE\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_ALU_BREAK: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_BREAK\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_ALU_ELSE_AFTER: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ALU_ELSE_AFTER\n", blk->idx, blk->f_idx, blk->ninst); - break; - default: - printf("Block[%d %d %d] unknown cf alu instruction 0x%02X\n", blk->idx, blk->f_idx, blk->ninst, G_008DFC_CF_ALU_INST(bytecode[idx+1])); - break; - } -#endif r600_disassemble_sq_cf_alu_inst(bytecode, ndwords, idx); } else { if (G_008DFC_CF_INST(bytecode[idx+1]) < 0x20) { @@ -196,115 +164,6 @@ struct r600_block *r600_block_new(u32 *bytecode, u32 ndwords, u32 idx) r600_disassemble_sq_cf_alloc_export_inst(bytecode, ndwords, idx); blk->last = G_008DFC_END_OF_PROGRAM(bytecode[idx+1]); } -#if 0 - switch (G_008DFC_CF_INST(bytecode[idx+1])) { - case V_008DFC_SQ_CF_INST_NOP: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_NOP\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_TEX: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_TEX\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_VTX: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_VTX\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_VTX_TC: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_VTX_TC\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_LOOP_START: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_START\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_LOOP_END: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_END\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_LOOP_START_DX10: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_START_DX10\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_LOOP_START_NO_AL: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_START_NO_AL\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_LOOP_CONTINUE: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_CONTINUE\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_LOOP_BREAK: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_LOOP_BREAK\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_JUMP: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_JUMP\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_PUSH: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_PUSH\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_PUSH_ELSE: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_PUSH_ELSE\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_ELSE: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_ELSE\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_POP: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_POP\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_POP_JUMP: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_POP_JUMP\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_POP_PUSH: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_POP_PUSH\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_POP_PUSH_ELSE: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_POP_PUSH_ELSE\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_CALL: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_CALL\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_CALL_FS: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_CALL_FS\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_RETURN: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_RETURN\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_EMIT_VERTEX: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_EMIT_VERTEX\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_EMIT_CUT_VERTEX: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_EMIT_CUT_VERTEX\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_CUT_VERTEX: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_CUT_VERTEX\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_KILL: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_KILL\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_MEM_STREAM0: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_STREAM0\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_MEM_STREAM1: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_STREAM1\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_MEM_STREAM2: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_STREAM2\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_MEM_STREAM3: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_STREAM3\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_MEM_SCRATCH: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_SCRATCH\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_MEM_REDUCTION: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_REDUCTION\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_MEM_RING: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_MEM_RING\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_EXPORT: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_EXPORT\n", blk->idx, blk->f_idx, blk->ninst); - break; - case V_008DFC_SQ_CF_INST_EXPORT_DONE: - printf("Block[%d %d %d] V_008DFC_SQ_CF_INST_EXPORT_DONE\n", blk->idx, blk->f_idx, blk->ninst); - break; - default: - printf("Block[%d %d %d] unknown cf instruction 0x%02X\n", blk->idx, blk->f_idx, blk->ninst, G_008DFC_CF_INST(bytecode[idx+1])); - break; - } -#endif } return blk; } @@ -322,3 +181,42 @@ void r600_shader_disassemble(u32 *bytecode, u32 ndwords) idx += 2; } while (!blk->last); } + +int r600_shader_build_fs(struct radeon_device *rdev, + u32 *bytecode, u32 *ndwords, + struct drm_r600_vs_input *inputs, + struct drm_r600_vs_shader *vs) +{ + u32 idx = 0, i, rid, gpr, j; + + *ndwords = 0; + if (!inputs->nelements) { + dev_err(rdev->dev, "need at least one input for vertex shader\n"); + return -EINVAL; + } + bytecode[idx++] = 0x00000001; + bytecode[idx++] = 0x81200000 | S_008DFC_COUNT(inputs->nelements - 1); + for (i = 0; i < inputs->nelements; i++) { + if (inputs->elements[i].buffer_id >= inputs->nbuffers) { + dev_err(rdev->dev, "elements %d referencing invalid buffer %d\n", + i, inputs->elements[i].buffer_id); + return -EINVAL; + } + rid = inputs->buffers[inputs->elements[i].buffer_id].resource_id; + for (j = 0, gpr = -1; j < vs->ninputs; j++) { + if (vs->input_semantic[j] == inputs->elements[i].semantic) { + gpr = vs->input_gpr[j]; + break; + } + } + /* if vs has no corresponding input skip the elements */ + if (gpr == -1) + continue; + bytecode[idx++] = (inputs->elements[i].sq_vtx_word0 & 0xFC000000) | S_008DFC_BUFFER_ID(rid); + bytecode[idx++] = (inputs->elements[i].sq_vtx_word1 & 0xFFFFFC00) | S_008DFC_DST_GPR(gpr); + bytecode[idx++] = inputs->elements[i].sq_vtx_word2; + bytecode[idx++] = 0xCAFEDEAD; + } + *ndwords = idx; + return 0; +} @@ -498,83 +498,81 @@ #define S_008DFC_SEL_W(x) (((x) & 0x7) << 9) #define G_008DFC_SEL_W(x) (((x) >> 9) & 0x7) #define C_008DFC_SEL_W 0xFFFFF1FF -#define R_008DFC_SQ_ALU_WORD0 0x008DFC -#define S_008DFC_SRC0_SEL(x) (((x) & 0x1FF) << 0) -#define G_008DFC_SRC0_SEL(x) (((x) >> 0) & 0x1FF) -#define C_008DFC_SRC0_SEL 0xFFFFFE00 -#define S_008DFC_SRC0_REL(x) (((x) & 0x1) << 9) -#define G_008DFC_SRC0_REL(x) (((x) >> 9) & 0x1) -#define C_008DFC_SRC0_REL 0xFFFFFDFF -#define S_008DFC_SRC0_CHAN(x) (((x) & 0x3) << 10) -#define G_008DFC_SRC0_CHAN(x) (((x) >> 10) & 0x3) -#define C_008DFC_SRC0_CHAN 0xFFFFF3FF -#define S_008DFC_SRC0_NEG(x) (((x) & 0x1) << 12) -#define G_008DFC_SRC0_NEG(x) (((x) >> 12) & 0x1) -#define C_008DFC_SRC0_NEG 0xFFFFEFFF -#define S_008DFC_SRC1_SEL(x) (((x) & 0x1FF) << 13) -#define G_008DFC_SRC1_SEL(x) (((x) >> 13) & 0x1FF) -#define C_008DFC_SRC1_SEL 0xFFC01FFF -#define S_008DFC_SRC1_REL(x) (((x) & 0x1) << 22) -#define G_008DFC_SRC1_REL(x) (((x) >> 22) & 0x1) -#define C_008DFC_SRC1_REL 0xFFBFFFFF -#define S_008DFC_SRC1_CHAN(x) (((x) & 0x3) << 23) -#define G_008DFC_SRC1_CHAN(x) (((x) >> 23) & 0x3) -#define C_008DFC_SRC1_CHAN 0xFE7FFFFF -#define S_008DFC_SRC1_NEG(x) (((x) & 0x1) << 25) -#define G_008DFC_SRC1_NEG(x) (((x) >> 25) & 0x1) -#define C_008DFC_SRC1_NEG 0xFDFFFFFF -#define S_008DFC_INDEX_MODE(x) (((x) & 0x7) << 26) -#define G_008DFC_INDEX_MODE(x) (((x) >> 26) & 0x7) -#define C_008DFC_INDEX_MODE 0xE3FFFFFF -#define S_008DFC_PRED_SEL(x) (((x) & 0x3) << 29) -#define G_008DFC_PRED_SEL(x) (((x) >> 29) & 0x3) -#define C_008DFC_PRED_SEL 0x9FFFFFFF -#define S_008DFC_LAST(x) (((x) & 0x1) << 31) -#define G_008DFC_LAST(x) (((x) >> 31) & 0x1) -#define C_008DFC_LAST 0x7FFFFFFF -#define R_008DFC_SQ_ALU_WORD1 0x008DFC -#define S_008DFC_ENCODING(x) (((x) & 0x7) << 15) -#define G_008DFC_ENCODING(x) (((x) >> 15) & 0x7) -#define C_008DFC_ENCODING 0xFFFC7FFF -#define S_008DFC_BANK_SWIZZLE(x) (((x) & 0x7) << 18) -#define G_008DFC_BANK_SWIZZLE(x) (((x) >> 18) & 0x7) -#define C_008DFC_BANK_SWIZZLE 0xFFE3FFFF -#define S_008DFC_DST_GPR(x) (((x) & 0x7F) << 21) -#define G_008DFC_DST_GPR(x) (((x) >> 21) & 0x7F) -#define C_008DFC_DST_GPR 0xF01FFFFF -#define S_008DFC_DST_REL(x) (((x) & 0x1) << 28) -#define G_008DFC_DST_REL(x) (((x) >> 28) & 0x1) -#define C_008DFC_DST_REL 0xEFFFFFFF -#define S_008DFC_DST_CHAN(x) (((x) & 0x3) << 29) -#define G_008DFC_DST_CHAN(x) (((x) >> 29) & 0x3) -#define C_008DFC_DST_CHAN 0x9FFFFFFF -#define S_008DFC_CLAMP(x) (((x) & 0x1) << 31) -#define G_008DFC_CLAMP(x) (((x) >> 31) & 0x1) -#define C_008DFC_CLAMP 0x7FFFFFFF -#define R_008DFC_SQ_ALU_WORD1_OP2 0x008DFC -#define S_008DFC_SRC0_ABS(x) (((x) & 0x1) << 0) -#define G_008DFC_SRC0_ABS(x) (((x) >> 0) & 0x1) -#define C_008DFC_SRC0_ABS 0xFFFFFFFE -#define S_008DFC_SRC1_ABS(x) (((x) & 0x1) << 1) -#define G_008DFC_SRC1_ABS(x) (((x) >> 1) & 0x1) -#define C_008DFC_SRC1_ABS 0xFFFFFFFD -#define S_008DFC_UPDATE_EXECUTE_MASK(x) (((x) & 0x1) << 2) -#define G_008DFC_UPDATE_EXECUTE_MASK(x) (((x) >> 2) & 0x1) -#define C_008DFC_UPDATE_EXECUTE_MASK 0xFFFFFFFB -#define S_008DFC_UPDATE_PRED(x) (((x) & 0x1) << 3) -#define G_008DFC_UPDATE_PRED(x) (((x) >> 3) & 0x1) -#define C_008DFC_UPDATE_PRED 0xFFFFFFF7 -#define S_008DFC_WRITE_MASK(x) (((x) & 0x1) << 4) -#define G_008DFC_WRITE_MASK(x) (((x) >> 4) & 0x1) -#define C_008DFC_WRITE_MASK 0xFFFFFFEF -#define S_008DFC_FOG_MERGE(x) (((x) & 0x1) << 5) -#define G_008DFC_FOG_MERGE(x) (((x) >> 5) & 0x1) -#define C_008DFC_FOG_MERGE 0xFFFFFFDF -#define S_008DFC_OMOD(x) (((x) & 0x3) << 6) -#define G_008DFC_OMOD(x) (((x) >> 6) & 0x3) -#define C_008DFC_OMOD 0xFFFFFF3F -#define S_008DFC_ALU_INST(x) (((x) & 0x3FF) << 8) -#define G_008DFC_ALU_INST(x) (((x) >> 8) & 0x3FF) -#define C_008DFC_ALU_INST 0xFFFC00FF +#define R_008DFC_SQ_VTX_WORD0 0x008DFC +#define S_008DFC_VTX_INST(x) (((x) & 0x1F) << 0) +#define G_008DFC_VTX_INST(x) (((x) >> 0) & 0x1F) +#define C_008DFC_VTX_INST 0xFFFFFFE0 +#define S_008DFC_FETCH_TYPE(x) (((x) & 0x3) << 5) +#define G_008DFC_FETCH_TYPE(x) (((x) >> 5) & 0x3) +#define C_008DFC_FETCH_TYPE 0xFFFFFF9F +#define S_008DFC_FETCH_WHOLE_QUAD(x) (((x) & 0x1) << 7) +#define G_008DFC_FETCH_WHOLE_QUAD(x) (((x) >> 7) & 0x1) +#define C_008DFC_FETCH_WHOLE_QUAD 0xFFFFFF7F +#define S_008DFC_BUFFER_ID(x) (((x) & 0xFF) << 8) +#define G_008DFC_BUFFER_ID(x) (((x) >> 8) & 0xFF) +#define C_008DFC_BUFFER_ID 0xFFFF00FF +#define S_008DFC_SRC_GPR(x) (((x) & 0x7F) << 16) +#define G_008DFC_SRC_GPR(x) (((x) >> 16) & 0x7F) +#define C_008DFC_SRC_GPR 0xFF80FFFF +#define S_008DFC_SRC_REL(x) (((x) & 0x1) << 23) +#define G_008DFC_SRC_REL(x) (((x) >> 23) & 0x1) +#define C_008DFC_SRC_REL 0xFF7FFFFF +#define S_008DFC_SRC_SEL_X(x) (((x) & 0x3) << 24) +#define G_008DFC_SRC_SEL_X(x) (((x) >> 24) & 0x3) +#define C_008DFC_SRC_SEL_X 0xFCFFFFFF +#define S_008DFC_MEGA_FETCH_COUNT(x) (((x) & 0x3F) << 26) +#define G_008DFC_MEGA_FETCH_COUNT(x) (((x) >> 26) & 0x3F) +#define C_008DFC_MEGA_FETCH_COUNT 0x03FFFFFF +#define R_008DFC_SQ_VTX_WORD1 0x008DFC +#define S_008DFC_DST_SEL_X(x) (((x) & 0x7) << 9) +#define G_008DFC_DST_SEL_X(x) (((x) >> 9) & 0x7) +#define C_008DFC_DST_SEL_X 0xFFFFF1FF +#define S_008DFC_DST_SEL_Y(x) (((x) & 0x7) << 12) +#define G_008DFC_DST_SEL_Y(x) (((x) >> 12) & 0x7) +#define C_008DFC_DST_SEL_Y 0xFFFF8FFF +#define S_008DFC_DST_SEL_Z(x) (((x) & 0x7) << 15) +#define G_008DFC_DST_SEL_Z(x) (((x) >> 15) & 0x7) +#define C_008DFC_DST_SEL_Z 0xFFFC7FFF +#define S_008DFC_DST_SEL_W(x) (((x) & 0x7) << 18) +#define G_008DFC_DST_SEL_W(x) (((x) >> 18) & 0x7) +#define C_008DFC_DST_SEL_W 0xFFE3FFFF +#define S_008DFC_USE_CONST_FIELDS(x) (((x) & 0x1) << 21) +#define G_008DFC_USE_CONST_FIELDS(x) (((x) >> 21) & 0x1) +#define C_008DFC_USE_CONST_FIELDS 0xFFDFFFFF +#define S_008DFC_DATA_FORMAT(x) (((x) & 0x3F) << 22) +#define G_008DFC_DATA_FORMAT(x) (((x) >> 22) & 0x3F) +#define C_008DFC_DATA_FORMAT 0xF03FFFFF +#define S_008DFC_NUM_FORMAT_ALL(x) (((x) & 0x3) << 28) +#define G_008DFC_NUM_FORMAT_ALL(x) (((x) >> 28) & 0x3) +#define C_008DFC_NUM_FORMAT_ALL 0xCFFFFFFF +#define S_008DFC_FORMAT_COMP_ALL(x) (((x) & 0x1) << 30) +#define G_008DFC_FORMAT_COMP_ALL(x) (((x) >> 30) & 0x1) +#define C_008DFC_FORMAT_COMP_ALL 0xBFFFFFFF +#define S_008DFC_SRF_MODE_ALL(x) (((x) & 0x1) << 31) +#define G_008DFC_SRF_MODE_ALL(x) (((x) >> 31) & 0x1) +#define C_008DFC_SRF_MODE_ALL 0x7FFFFFFF +#define R_008DFC_SQ_VTX_WORD1_GPR 0x008DFC +#define S_008DFC_DST_GPR(x) (((x) & 0x7F) << 0) +#define G_008DFC_DST_GPR(x) (((x) >> 0) & 0x7F) +#define C_008DFC_DST_GPR 0xFFFFFF80 +#define S_008DFC_DST_REL(x) (((x) & 0x1) << 7) +#define G_008DFC_DST_REL(x) (((x) >> 7) & 0x1) +#define C_008DFC_DST_REL 0xFFFFFF7F +#define R_008DFC_SQ_VTX_WORD2 0x008DFC +#define S_008DFC_OFFSET(x) (((x) & 0xFFFF) << 0) +#define G_008DFC_OFFSET(x) (((x) >> 0) & 0xFFFF) +#define C_008DFC_OFFSET 0xFFFF0000 +#define S_008DFC_ENDIAN_SWAP(x) (((x) & 0x3) << 16) +#define G_008DFC_ENDIAN_SWAP(x) (((x) >> 16) & 0x3) +#define C_008DFC_ENDIAN_SWAP 0xFFFCFFFF +#define S_008DFC_CONST_BUF_NO_STRIDE(x) (((x) & 0x1) << 18) +#define G_008DFC_CONST_BUF_NO_STRIDE(x) (((x) >> 18) & 0x1) +#define C_008DFC_CONST_BUF_NO_STRIDE 0xFFFBFFFF +#define S_008DFC_MEGA_FETCH(x) (((x) & 0x1) << 19) +#define G_008DFC_MEGA_FETCH(x) (((x) >> 19) & 0x1) +#define C_008DFC_MEGA_FETCH 0xFFF7FFFF +#define S_008DFC_ALT_CONST(x) (((x) & 0x1) << 20) +#define G_008DFC_ALT_CONST(x) (((x) >> 20) & 0x1) +#define C_008DFC_ALT_CONST 0xFFEFFFFF #endif diff --git a/radeon_atom.h b/radeon_atom.h index 79829a2..10d9935 100644 --- a/radeon_atom.h +++ b/radeon_atom.h @@ -55,16 +55,17 @@ struct radeon_atom { /* R600 */ #define R600_BATCH_NATOMS 15 struct r600_batch { - struct list_head list; - struct list_head pre_flushes; - struct list_head post_flushes; - struct radeon_atom *atoms[R600_BATCH_NATOMS]; - struct radeon_atom *emit_atoms[R600_BATCH_NATOMS]; - u32 nemit_atoms; - u32 nflushes; - u32 npkts; - struct radeon_bo *shaders; - u32 shaders_idx; + struct list_head list; + struct list_head pre_flushes; + struct list_head post_flushes; + struct radeon_atom *atoms[R600_BATCH_NATOMS]; + struct radeon_atom *emit_atoms[R600_BATCH_NATOMS]; + u32 nemit_atoms; + u32 nflushes; + u32 npkts; + struct radeon_bo *shaders; + u32 shaders_idx; + struct drm_r600_vs_input inputs; }; #define R600_SHADER_SIZE (32 * 1024) @@ -120,6 +121,10 @@ static inline void radeon_atom_put(struct radeon_atom *atom) } /* R600 */ +extern int r600_shader_build_fs(struct radeon_device *rdev, + u32 *bytecode, u32 *ndwords, + struct drm_r600_vs_input *inputs, + struct drm_r600_vs_shader *vs); extern int r600_atoms_init(struct radeon_device *rdev, struct r600_atoms *atoms); extern void r600_atoms_release(struct radeon_device *rdev, struct r600_atoms *atoms); extern int r600_atom_create(struct radeon_device *rdev, @@ -365,6 +365,11 @@ int r600_tri_flat(struct radeon *radeon) if (r) return r; /* vs_shader */ + vs_shader.ninputs = 2; + vs_shader.input_semantic[0] = 1; + vs_shader.input_gpr[0] = 1; + vs_shader.input_semantic[1] = 2; + vs_shader.input_gpr[1] = 2; vs_shader.sq_pgm_resources_vs = 0x00000006; vs_shader.ndwords = 64; vs_shader.opcodes = vsshaders; @@ -386,6 +391,28 @@ int r600_tri_flat(struct radeon *radeon) if (r) return r; + /* inputs */ + batch.inputs.nelements = 2; + batch.inputs.nbuffers = 2; + batch.inputs.buffers[0].bo = NULL; + batch.inputs.buffers[0].sq_vtx_constant_word0 = 0x00000000; + batch.inputs.buffers[0].sq_vtx_constant_word2 = 0x03001C00; + batch.inputs.buffers[0].sq_vtx_constant_word3 = 0x00000001; + batch.inputs.buffers[1].bo = NULL; + batch.inputs.buffers[1].sq_vtx_constant_word0 = 0x0000000C; + batch.inputs.buffers[1].sq_vtx_constant_word2 = 0x02301C00; + batch.inputs.buffers[1].sq_vtx_constant_word3 = 0x00000001; + batch.inputs.elements[0].buffer_id = 0; + batch.inputs.elements[0].semantic = 1; + batch.inputs.elements[0].sq_vtx_word0 = 0x7C000000; + batch.inputs.elements[0].sq_vtx_word1 = 0x1C351000; + batch.inputs.elements[0].sq_vtx_word2 = 0x00080000; + batch.inputs.elements[1].buffer_id = 1; + batch.inputs.elements[1].semantic = 2; + batch.inputs.elements[1].sq_vtx_word0 = 0x7C000000; + batch.inputs.elements[1].sq_vtx_word1 = 0x18ED1000; + batch.inputs.elements[1].sq_vtx_word2 = 0x00080000; + /* batch */ r = radeon_batches_queue(rdev, &batch); if (r) |