diff options
author | Jerome Glisse <jglisse@redhat.com> | 2010-02-13 23:31:37 +0100 |
---|---|---|
committer | Jerome Glisse <jglisse@redhat.com> | 2010-02-13 23:31:37 +0100 |
commit | 2c54f82547c5c7f07f21582a5d9abe429d38e60e (patch) | |
tree | 9e573706bc22548473d5e73bde9f60b23528b013 | |
parent | 41cbc9da9cdf4df09e92d216b8b2f4cd6b34b453 (diff) |
fs shader at work
-rw-r--r-- | r600_atom.c | 91 | ||||
-rw-r--r-- | r600_shader.c | 17 | ||||
-rw-r--r-- | radeon_atom.h | 11 | ||||
-rw-r--r-- | test.c | 48 |
4 files changed, 158 insertions, 9 deletions
diff --git a/r600_atom.c b/r600_atom.c index 4d906ad..5a20e60 100644 --- a/r600_atom.c +++ b/r600_atom.c @@ -49,6 +49,34 @@ static void r600_emit_flush(struct radeon_device *rdev, RADEON_GEM_DOMAIN_GTT); } +static void r600_emit_resources(struct radeon_device *rdev, + struct radeon_ib *ib, + struct radeon_bo *bo, + u32 dw0, u32 dw1, u32 dw2, u32 dw3, + u32 dw4, u32 dw5, u32 dw6, u32 dw7) +{ + ib->ptr[ib->cpkts++] = PKT3(PKT3_SURFACE_SYNC, 3); + ib->ptr[ib->cpkts++] = 0x01000000; + ib->ptr[ib->cpkts++] = radeon_bo_size(bo) >> 8; + ib->ptr[ib->cpkts++] = 0x00000000; + ib->ptr[ib->cpkts++] = 0x0000000A; + ib->ptr[ib->cpkts++] = PKT3(PKT3_NOP, 0); + ib->ptr[ib->cpkts++] = radeon_ib_reloc(ib, bo, RADEON_GEM_DOMAIN_VRAM | + RADEON_GEM_DOMAIN_GTT); + ib->ptr[ib->cpkts++] = PKT3(PKT3_SET_RESOURCE, 7); + ib->ptr[ib->cpkts++] = dw0; + ib->ptr[ib->cpkts++] = dw1; + ib->ptr[ib->cpkts++] = dw2; + ib->ptr[ib->cpkts++] = dw3; + ib->ptr[ib->cpkts++] = dw4; + ib->ptr[ib->cpkts++] = dw5; + ib->ptr[ib->cpkts++] = dw6; + ib->ptr[ib->cpkts++] = dw7; + ib->ptr[ib->cpkts++] = PKT3(PKT3_NOP, 0); + ib->ptr[ib->cpkts++] = radeon_ib_reloc(ib, bo, RADEON_GEM_DOMAIN_VRAM | + RADEON_GEM_DOMAIN_GTT); +} + /* * r600_cb */ @@ -1466,12 +1494,14 @@ static int r600_vs_shader_emit(struct radeon_device *rdev, &batch->inputs, &vs_shader->vs); if (r) return r; +printf("FS at %d\n", batch->shaders_idx); vs_shader->pkts[2] = batch->shaders_idx >> 6; vs_shader->pkts[4] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT); batch->shaders_idx += (ndwords + 63) & 0xFFFFFFC0; memcpy(&opcodes[batch->shaders_idx], vs_shader->vs.opcodes, vs_shader->vs.ndwords * 4); vs_shader->pkts[13] = batch->shaders_idx >> 6; vs_shader->pkts[15] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT); +printf("VS at %d\n", batch->shaders_idx); batch->shaders_idx += (vs_shader->vs.ndwords + 63) & 0xFFFFFFC0; r = radeon_ib_copy(ib, vs_shader->pkts, atom->npkts); return r; @@ -1800,6 +1830,8 @@ static int r600_batch_alloc(struct r600_batch **batch) INIT_LIST_HEAD(&rbatch->pre_flushes); INIT_LIST_HEAD(&rbatch->post_flushes); rbatch->nemit_atoms = 0; + rbatch->shaders_idx = 0; + rbatch->nfs_resources = 0; *batch = rbatch; return 0; } @@ -1807,6 +1839,32 @@ static int r600_batch_alloc(struct r600_batch **batch) /* * r600_batches */ +static int r600_batches_fs_resource_is_present(struct radeon_device *rdev, + struct r600_batches *batches, + struct drm_r600_vs_buffer *buffer) +{ + int i; + + for (i = 0; i < batches->nfs_resources; i++) { + if (!memcmp(&batches->fs_resource[i], buffer, sizeof(struct drm_r600_vs_buffer))) { + buffer->resource_id = i; + return 1; + } + } + return 0; +} + +static void r600_batches_fs_resource_add(struct radeon_device *rdev, + struct r600_batches *batches, + struct drm_r600_vs_buffer *buffer) +{ + int i = batches->nfs_resources++; + + buffer->resource_id = i; + memcpy(&batches->fs_resource[i], buffer, sizeof(struct drm_r600_vs_buffer)); + radeon_bo_ref(buffer->bo); +} + static void r600_batches_clear_locked(struct radeon_device *rdev, struct r600_batches *batches) { struct r600_batch *batch, *n; @@ -1826,6 +1884,10 @@ static void r600_batches_clear_locked(struct radeon_device *rdev, struct r600_ba } INIT_LIST_HEAD(&batches->batches); batches->npkts = 0; + for (i = 0; i < batches->nfs_resources; i++) { + radeon_bo_unref(batches->fs_resource[i].bo); + } + batches->nfs_resources = 0; } static int r600_batches_flush_locked(struct radeon_device *rdev, struct r600_batches *batches) @@ -1834,6 +1896,16 @@ static int r600_batches_flush_locked(struct radeon_device *rdev, struct r600_bat struct radeon_atom_flush *flush; int r, i; + for (i = 0; i < batches->nfs_resources; i++) { + r600_emit_resources(rdev, batches->ib, batches->fs_resource[i].bo, + (batches->fs_resource[i].resource_id + 320) * 7, + batches->fs_resource[i].sq_vtx_constant_word0, + radeon_bo_size(batches->fs_resource[i].bo) - + batches->fs_resource[i].sq_vtx_constant_word0, + batches->fs_resource[i].sq_vtx_constant_word2, + batches->fs_resource[i].sq_vtx_constant_word3, + 0, 0, 0xC0000000); + } list_for_each_entry(batch, &batches->batches, list) { list_for_each_entry(flush, &batch->pre_flushes, list) { r600_emit_flush(rdev, batches->ib, flush->bo, flush->flags); @@ -1874,6 +1946,13 @@ int r600_batches_queue(struct radeon_device *rdev, struct r600_vgt *vgt; int r, i; + for (i = 0; i < batch->inputs.nbuffers; i++) { + if (batch->inputs.buffers[i].sq_vtx_constant_word0 >= + radeon_bo_size(batch->inputs.buffers[i].bo)) { + dev_err(rdev->dev, "offset for vertex buffer %d bigger than buffer\n", i); + return -EINVAL; + } + } r = r600_batch_alloc(&rbatch); if (r) return r; @@ -1959,6 +2038,13 @@ reprocess: rbatch->npkts = 0; /* flush + wait until = 5dw */ rbatch->npkts += 5; + for (i = 0; i < rbatch->inputs.nbuffers; i++) { + if (!r600_batches_fs_resource_is_present(rdev, batches, &rbatch->inputs.buffers[i])) { + rbatch->nfs_resources += 1; + rbatch->inputs.buffers[i].resource_id = -1; + } + } + rbatch->npkts += rbatch->nfs_resources * 18; for (i = 0; i < R600_BATCH_NATOMS; i++) { if (rbatch->atoms[i]) { r = rbatch->atoms[i]->process(rdev, rbatch->atoms[i], batches->last_id[i], rbatch); @@ -1994,6 +2080,11 @@ reprocess: batches->last_id[i] = rbatch->atoms[i]->id; } } + for (i = 0; i < rbatch->inputs.nbuffers; i++) { + if (rbatch->inputs.buffers[i].resource_id == -1) { + r600_batches_fs_resource_add(rdev, batches, &rbatch->inputs.buffers[i]); + } + } batches->npkts += rbatch->npkts; list_add_tail(&rbatch->list, &batches->batches); mutex_unlock(&atoms->mutex); diff --git a/r600_shader.c b/r600_shader.c index fd482c8..de22654 100644 --- a/r600_shader.c +++ b/r600_shader.c @@ -89,6 +89,10 @@ static struct r600_inst_name sq_cf_alloc_export_inst_name[] = { void r600_disassemble_sq_cf_inst(u32 *bytecode, u32 ndwords, u32 idx) { + u32 addr = G_008DFC_ADDR(bytecode[idx+0]) << 1; + u32 count = G_008DFC_COUNT(bytecode[idx+1]); + u32 i; + printf("0x%08X 0x%08X CF_INST: %s\n", bytecode[idx+0], bytecode[idx+1], sq_cf_inst_name[G_008DFC_CF_INST(bytecode[idx+1])].name); printf(" word0: addr (in dw) %d\n", G_008DFC_ADDR(bytecode[idx+0]) << 1); @@ -103,6 +107,10 @@ void r600_disassemble_sq_cf_inst(u32 *bytecode, u32 ndwords, u32 idx) sq_cf_inst_name[G_008DFC_CF_INST(bytecode[idx+1])].name); printf(" word1: whole quad mode %d\n", G_008DFC_WHOLE_QUAD_MODE(bytecode[idx+1])); printf(" word1: barrier %d\n", G_008DFC_BARRIER(bytecode[idx+1])); + for (i = 0; i <= count; i++) + printf(" 0x%08X 0x%08X 0x%08X 0x%08X\n", + bytecode[idx+addr+0+(i*4)], bytecode[idx+addr+1+(i*4)], + bytecode[idx+addr+2+(i*4)], bytecode[idx+addr+3+(i*4)]); } void r600_disassemble_sq_cf_alu_inst(u32 *bytecode, u32 ndwords, u32 idx) @@ -160,6 +168,7 @@ struct r600_block *r600_block_new(u32 *bytecode, u32 ndwords, u32 idx) } else { if (G_008DFC_CF_INST(bytecode[idx+1]) < 0x20) { r600_disassemble_sq_cf_inst(bytecode, ndwords, idx); + blk->last = G_008DFC_END_OF_PROGRAM(bytecode[idx+1]); } else { r600_disassemble_sq_cf_alloc_export_inst(bytecode, ndwords, idx); blk->last = G_008DFC_END_OF_PROGRAM(bytecode[idx+1]); @@ -194,8 +203,10 @@ int r600_shader_build_fs(struct radeon_device *rdev, dev_err(rdev->dev, "need at least one input for vertex shader\n"); return -EINVAL; } - bytecode[idx++] = 0x00000001; - bytecode[idx++] = 0x81200000 | S_008DFC_COUNT(inputs->nelements - 1); + bytecode[idx++] = 0x00000002; + bytecode[idx++] = 0x81000000 | S_008DFC_COUNT(inputs->nelements - 1); + bytecode[idx++] = 0x00000000; + bytecode[idx++] = 0x8A000000; for (i = 0; i < inputs->nelements; i++) { if (inputs->elements[i].buffer_id >= inputs->nbuffers) { dev_err(rdev->dev, "elements %d referencing invalid buffer %d\n", @@ -209,6 +220,7 @@ int r600_shader_build_fs(struct radeon_device *rdev, break; } } +printf("elements %d: gpr %d resource id %d\n", i, gpr, rid); /* if vs has no corresponding input skip the elements */ if (gpr == -1) continue; @@ -218,5 +230,6 @@ int r600_shader_build_fs(struct radeon_device *rdev, bytecode[idx++] = 0xCAFEDEAD; } *ndwords = idx; + r600_shader_disassemble(bytecode, idx); return 0; } diff --git a/radeon_atom.h b/radeon_atom.h index 10d9935..cdeb2bd 100644 --- a/radeon_atom.h +++ b/radeon_atom.h @@ -66,15 +66,18 @@ struct r600_batch { struct radeon_bo *shaders; u32 shaders_idx; struct drm_r600_vs_input inputs; + u32 nfs_resources; }; #define R600_SHADER_SIZE (32 * 1024) struct r600_batches { - struct radeon_ib *ib; - u32 npkts; - struct list_head batches; - u32 last_id[R600_BATCH_NATOMS]; + struct radeon_ib *ib; + u32 npkts; + struct list_head batches; + u32 nfs_resources; + struct drm_r600_vs_buffer fs_resource[160]; + u32 last_id[R600_BATCH_NATOMS]; }; struct r600_atoms { @@ -51,6 +51,7 @@ static u32 vsconstants[16] = { 0x00000000, 0x00000000, 0x00000000, 0x3F800000, }; +#if 0 static u32 vsshaders[64] = { 0x0000001C, 0x81000400, 0x00000005, 0x80000000, 0x00000007, 0xA04C0000, 0xC001A03C, 0x94000688, @@ -69,6 +70,26 @@ static u32 vsshaders[64] = { 0x7C000000, 0x1C351001, 0x00080000, 0x0BEADEAF, 0x7C000300, 0x18ED1002, 0x00080000, 0x0BEADEAF, }; +#else +static u32 vsshaders[64] = { + 0x00000000, 0x89800000, 0x00000005, 0x80000000, + 0x00000007, 0xA04C0000, 0xC001A03C, 0x94000688, + 0xC0024000, 0x94200688, 0x900000F8, 0x00A80C90, + 0x00000000, 0x00000000, 0x00200001, 0x006C2810, + 0x00A00401, 0x206C2800, 0x01200801, 0x406C2800, + 0x81A00C01, 0x606C2800, 0x00202001, 0x006C2800, + 0x00A02401, 0x206C2810, 0x01202801, 0x406C2800, + 0x81A02C01, 0x606C2800, 0x00204001, 0x006C2800, + 0x00A04401, 0x206C2800, 0x01204801, 0x406C2810, + 0x81A04C01, 0x606C2800, 0x00206001, 0x006C2800, + 0x00A06401, 0x206C2800, 0x01206801, 0x406C2800, + 0x81A06C01, 0x606C2810, 0x00000002, 0x00940C90, + 0x00000402, 0x20940C90, 0x00000802, 0x40940C90, + 0x80000C02, 0x60940C90, 0x00000000, 0x00000000, + 0x7C000000, 0x1C351001, 0x00080000, 0x0BEADEAF, + 0x7C000300, 0x18ED1002, 0x00080000, 0x0BEADEAF, +}; +#endif static u32 psshaders[20] = { 0x00000003, 0x80000000, 0x00000005, 0xA00C0000, @@ -78,6 +99,17 @@ static u32 psshaders[20] = { 0x80000C00, 0x60340C90, 0x00000000, 0x00000000, }; +static float rvbo1[32] = { + 0.000000, 0.000000, -1.000000, 0.500000, + 0.500000, 0.500000, 0.000000, 250.000000, + 0.000000, -1.000000, 0.500000, 0.500000, + 0.500000, 0.000000, 250.000000, 250.000000, + -1.000000, 0.500000, 0.500000, 0.500000, + 0.000000, 0.000000, 250.000000, -1.000000, + 0.500000, 0.500000, 0.500000, 0.000000, + 0.000000, 0.000000, 0.000000, 0.000000, +}; + void r600_shader_disassemble(u32 *bytecode, u32 ndwords); int r600_tri_flat(struct radeon *radeon) { @@ -97,9 +129,18 @@ int r600_tri_flat(struct radeon *radeon) struct drm_r600_ps_shader ps_shader; struct drm_r600_batch batch; struct drm_radeon_atom atom; + struct radeon_bo *vbo1; int r; - r600_shader_disassemble(vsshaders, 64); + vbo1 = radeon_bo_open(radeon->bom, 0, 4096, 0, RADEON_GEM_DOMAIN_GTT, 0); + if (vbo1 == NULL) { + fprintf(stderr, "Failed to create vbo1 bo\n"); + return -ENOMEM; + } + memset_bo(vbo1, 0); + memcpy_bo(vbo1, (u32*)rvbo1, 32); + +// r600_shader_disassemble(vsshaders, 64); r = radeon_device_init(&rdev, radeon); if (r) return r; @@ -394,11 +435,11 @@ int r600_tri_flat(struct radeon *radeon) /* inputs */ batch.inputs.nelements = 2; batch.inputs.nbuffers = 2; - batch.inputs.buffers[0].bo = NULL; + batch.inputs.buffers[0].bo = vbo1; batch.inputs.buffers[0].sq_vtx_constant_word0 = 0x00000000; batch.inputs.buffers[0].sq_vtx_constant_word2 = 0x03001C00; batch.inputs.buffers[0].sq_vtx_constant_word3 = 0x00000001; - batch.inputs.buffers[1].bo = NULL; + batch.inputs.buffers[1].bo = vbo1; batch.inputs.buffers[1].sq_vtx_constant_word0 = 0x0000000C; batch.inputs.buffers[1].sq_vtx_constant_word2 = 0x02301C00; batch.inputs.buffers[1].sq_vtx_constant_word3 = 0x00000001; @@ -419,5 +460,6 @@ int r600_tri_flat(struct radeon *radeon) return r; r = radeon_batches_flush(rdev); radeon_device_release(rdev); + radeon_bo_unref(vbo1); return r; } |