summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerome Glisse <jglisse@redhat.com>2010-02-13 23:31:37 +0100
committerJerome Glisse <jglisse@redhat.com>2010-02-13 23:31:37 +0100
commit2c54f82547c5c7f07f21582a5d9abe429d38e60e (patch)
tree9e573706bc22548473d5e73bde9f60b23528b013
parent41cbc9da9cdf4df09e92d216b8b2f4cd6b34b453 (diff)
fs shader at work
-rw-r--r--r600_atom.c91
-rw-r--r--r600_shader.c17
-rw-r--r--radeon_atom.h11
-rw-r--r--test.c48
4 files changed, 158 insertions, 9 deletions
diff --git a/r600_atom.c b/r600_atom.c
index 4d906ad..5a20e60 100644
--- a/r600_atom.c
+++ b/r600_atom.c
@@ -49,6 +49,34 @@ static void r600_emit_flush(struct radeon_device *rdev,
RADEON_GEM_DOMAIN_GTT);
}
+static void r600_emit_resources(struct radeon_device *rdev,
+ struct radeon_ib *ib,
+ struct radeon_bo *bo,
+ u32 dw0, u32 dw1, u32 dw2, u32 dw3,
+ u32 dw4, u32 dw5, u32 dw6, u32 dw7)
+{
+ ib->ptr[ib->cpkts++] = PKT3(PKT3_SURFACE_SYNC, 3);
+ ib->ptr[ib->cpkts++] = 0x01000000;
+ ib->ptr[ib->cpkts++] = radeon_bo_size(bo) >> 8;
+ ib->ptr[ib->cpkts++] = 0x00000000;
+ ib->ptr[ib->cpkts++] = 0x0000000A;
+ ib->ptr[ib->cpkts++] = PKT3(PKT3_NOP, 0);
+ ib->ptr[ib->cpkts++] = radeon_ib_reloc(ib, bo, RADEON_GEM_DOMAIN_VRAM |
+ RADEON_GEM_DOMAIN_GTT);
+ ib->ptr[ib->cpkts++] = PKT3(PKT3_SET_RESOURCE, 7);
+ ib->ptr[ib->cpkts++] = dw0;
+ ib->ptr[ib->cpkts++] = dw1;
+ ib->ptr[ib->cpkts++] = dw2;
+ ib->ptr[ib->cpkts++] = dw3;
+ ib->ptr[ib->cpkts++] = dw4;
+ ib->ptr[ib->cpkts++] = dw5;
+ ib->ptr[ib->cpkts++] = dw6;
+ ib->ptr[ib->cpkts++] = dw7;
+ ib->ptr[ib->cpkts++] = PKT3(PKT3_NOP, 0);
+ ib->ptr[ib->cpkts++] = radeon_ib_reloc(ib, bo, RADEON_GEM_DOMAIN_VRAM |
+ RADEON_GEM_DOMAIN_GTT);
+}
+
/*
* r600_cb
*/
@@ -1466,12 +1494,14 @@ static int r600_vs_shader_emit(struct radeon_device *rdev,
&batch->inputs, &vs_shader->vs);
if (r)
return r;
+printf("FS at %d\n", batch->shaders_idx);
vs_shader->pkts[2] = batch->shaders_idx >> 6;
vs_shader->pkts[4] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT);
batch->shaders_idx += (ndwords + 63) & 0xFFFFFFC0;
memcpy(&opcodes[batch->shaders_idx], vs_shader->vs.opcodes, vs_shader->vs.ndwords * 4);
vs_shader->pkts[13] = batch->shaders_idx >> 6;
vs_shader->pkts[15] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT);
+printf("VS at %d\n", batch->shaders_idx);
batch->shaders_idx += (vs_shader->vs.ndwords + 63) & 0xFFFFFFC0;
r = radeon_ib_copy(ib, vs_shader->pkts, atom->npkts);
return r;
@@ -1800,6 +1830,8 @@ static int r600_batch_alloc(struct r600_batch **batch)
INIT_LIST_HEAD(&rbatch->pre_flushes);
INIT_LIST_HEAD(&rbatch->post_flushes);
rbatch->nemit_atoms = 0;
+ rbatch->shaders_idx = 0;
+ rbatch->nfs_resources = 0;
*batch = rbatch;
return 0;
}
@@ -1807,6 +1839,32 @@ static int r600_batch_alloc(struct r600_batch **batch)
/*
* r600_batches
*/
+static int r600_batches_fs_resource_is_present(struct radeon_device *rdev,
+ struct r600_batches *batches,
+ struct drm_r600_vs_buffer *buffer)
+{
+ int i;
+
+ for (i = 0; i < batches->nfs_resources; i++) {
+ if (!memcmp(&batches->fs_resource[i], buffer, sizeof(struct drm_r600_vs_buffer))) {
+ buffer->resource_id = i;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void r600_batches_fs_resource_add(struct radeon_device *rdev,
+ struct r600_batches *batches,
+ struct drm_r600_vs_buffer *buffer)
+{
+ int i = batches->nfs_resources++;
+
+ buffer->resource_id = i;
+ memcpy(&batches->fs_resource[i], buffer, sizeof(struct drm_r600_vs_buffer));
+ radeon_bo_ref(buffer->bo);
+}
+
static void r600_batches_clear_locked(struct radeon_device *rdev, struct r600_batches *batches)
{
struct r600_batch *batch, *n;
@@ -1826,6 +1884,10 @@ static void r600_batches_clear_locked(struct radeon_device *rdev, struct r600_ba
}
INIT_LIST_HEAD(&batches->batches);
batches->npkts = 0;
+ for (i = 0; i < batches->nfs_resources; i++) {
+ radeon_bo_unref(batches->fs_resource[i].bo);
+ }
+ batches->nfs_resources = 0;
}
static int r600_batches_flush_locked(struct radeon_device *rdev, struct r600_batches *batches)
@@ -1834,6 +1896,16 @@ static int r600_batches_flush_locked(struct radeon_device *rdev, struct r600_bat
struct radeon_atom_flush *flush;
int r, i;
+ for (i = 0; i < batches->nfs_resources; i++) {
+ r600_emit_resources(rdev, batches->ib, batches->fs_resource[i].bo,
+ (batches->fs_resource[i].resource_id + 320) * 7,
+ batches->fs_resource[i].sq_vtx_constant_word0,
+ radeon_bo_size(batches->fs_resource[i].bo) -
+ batches->fs_resource[i].sq_vtx_constant_word0,
+ batches->fs_resource[i].sq_vtx_constant_word2,
+ batches->fs_resource[i].sq_vtx_constant_word3,
+ 0, 0, 0xC0000000);
+ }
list_for_each_entry(batch, &batches->batches, list) {
list_for_each_entry(flush, &batch->pre_flushes, list) {
r600_emit_flush(rdev, batches->ib, flush->bo, flush->flags);
@@ -1874,6 +1946,13 @@ int r600_batches_queue(struct radeon_device *rdev,
struct r600_vgt *vgt;
int r, i;
+ for (i = 0; i < batch->inputs.nbuffers; i++) {
+ if (batch->inputs.buffers[i].sq_vtx_constant_word0 >=
+ radeon_bo_size(batch->inputs.buffers[i].bo)) {
+ dev_err(rdev->dev, "offset for vertex buffer %d bigger than buffer\n", i);
+ return -EINVAL;
+ }
+ }
r = r600_batch_alloc(&rbatch);
if (r)
return r;
@@ -1959,6 +2038,13 @@ reprocess:
rbatch->npkts = 0;
/* flush + wait until = 5dw */
rbatch->npkts += 5;
+ for (i = 0; i < rbatch->inputs.nbuffers; i++) {
+ if (!r600_batches_fs_resource_is_present(rdev, batches, &rbatch->inputs.buffers[i])) {
+ rbatch->nfs_resources += 1;
+ rbatch->inputs.buffers[i].resource_id = -1;
+ }
+ }
+ rbatch->npkts += rbatch->nfs_resources * 18;
for (i = 0; i < R600_BATCH_NATOMS; i++) {
if (rbatch->atoms[i]) {
r = rbatch->atoms[i]->process(rdev, rbatch->atoms[i], batches->last_id[i], rbatch);
@@ -1994,6 +2080,11 @@ reprocess:
batches->last_id[i] = rbatch->atoms[i]->id;
}
}
+ for (i = 0; i < rbatch->inputs.nbuffers; i++) {
+ if (rbatch->inputs.buffers[i].resource_id == -1) {
+ r600_batches_fs_resource_add(rdev, batches, &rbatch->inputs.buffers[i]);
+ }
+ }
batches->npkts += rbatch->npkts;
list_add_tail(&rbatch->list, &batches->batches);
mutex_unlock(&atoms->mutex);
diff --git a/r600_shader.c b/r600_shader.c
index fd482c8..de22654 100644
--- a/r600_shader.c
+++ b/r600_shader.c
@@ -89,6 +89,10 @@ static struct r600_inst_name sq_cf_alloc_export_inst_name[] = {
void r600_disassemble_sq_cf_inst(u32 *bytecode, u32 ndwords, u32 idx)
{
+ u32 addr = G_008DFC_ADDR(bytecode[idx+0]) << 1;
+ u32 count = G_008DFC_COUNT(bytecode[idx+1]);
+ u32 i;
+
printf("0x%08X 0x%08X CF_INST: %s\n", bytecode[idx+0], bytecode[idx+1],
sq_cf_inst_name[G_008DFC_CF_INST(bytecode[idx+1])].name);
printf(" word0: addr (in dw) %d\n", G_008DFC_ADDR(bytecode[idx+0]) << 1);
@@ -103,6 +107,10 @@ void r600_disassemble_sq_cf_inst(u32 *bytecode, u32 ndwords, u32 idx)
sq_cf_inst_name[G_008DFC_CF_INST(bytecode[idx+1])].name);
printf(" word1: whole quad mode %d\n", G_008DFC_WHOLE_QUAD_MODE(bytecode[idx+1]));
printf(" word1: barrier %d\n", G_008DFC_BARRIER(bytecode[idx+1]));
+ for (i = 0; i <= count; i++)
+ printf(" 0x%08X 0x%08X 0x%08X 0x%08X\n",
+ bytecode[idx+addr+0+(i*4)], bytecode[idx+addr+1+(i*4)],
+ bytecode[idx+addr+2+(i*4)], bytecode[idx+addr+3+(i*4)]);
}
void r600_disassemble_sq_cf_alu_inst(u32 *bytecode, u32 ndwords, u32 idx)
@@ -160,6 +168,7 @@ struct r600_block *r600_block_new(u32 *bytecode, u32 ndwords, u32 idx)
} else {
if (G_008DFC_CF_INST(bytecode[idx+1]) < 0x20) {
r600_disassemble_sq_cf_inst(bytecode, ndwords, idx);
+ blk->last = G_008DFC_END_OF_PROGRAM(bytecode[idx+1]);
} else {
r600_disassemble_sq_cf_alloc_export_inst(bytecode, ndwords, idx);
blk->last = G_008DFC_END_OF_PROGRAM(bytecode[idx+1]);
@@ -194,8 +203,10 @@ int r600_shader_build_fs(struct radeon_device *rdev,
dev_err(rdev->dev, "need at least one input for vertex shader\n");
return -EINVAL;
}
- bytecode[idx++] = 0x00000001;
- bytecode[idx++] = 0x81200000 | S_008DFC_COUNT(inputs->nelements - 1);
+ bytecode[idx++] = 0x00000002;
+ bytecode[idx++] = 0x81000000 | S_008DFC_COUNT(inputs->nelements - 1);
+ bytecode[idx++] = 0x00000000;
+ bytecode[idx++] = 0x8A000000;
for (i = 0; i < inputs->nelements; i++) {
if (inputs->elements[i].buffer_id >= inputs->nbuffers) {
dev_err(rdev->dev, "elements %d referencing invalid buffer %d\n",
@@ -209,6 +220,7 @@ int r600_shader_build_fs(struct radeon_device *rdev,
break;
}
}
+printf("elements %d: gpr %d resource id %d\n", i, gpr, rid);
/* if vs has no corresponding input skip the elements */
if (gpr == -1)
continue;
@@ -218,5 +230,6 @@ int r600_shader_build_fs(struct radeon_device *rdev,
bytecode[idx++] = 0xCAFEDEAD;
}
*ndwords = idx;
+ r600_shader_disassemble(bytecode, idx);
return 0;
}
diff --git a/radeon_atom.h b/radeon_atom.h
index 10d9935..cdeb2bd 100644
--- a/radeon_atom.h
+++ b/radeon_atom.h
@@ -66,15 +66,18 @@ struct r600_batch {
struct radeon_bo *shaders;
u32 shaders_idx;
struct drm_r600_vs_input inputs;
+ u32 nfs_resources;
};
#define R600_SHADER_SIZE (32 * 1024)
struct r600_batches {
- struct radeon_ib *ib;
- u32 npkts;
- struct list_head batches;
- u32 last_id[R600_BATCH_NATOMS];
+ struct radeon_ib *ib;
+ u32 npkts;
+ struct list_head batches;
+ u32 nfs_resources;
+ struct drm_r600_vs_buffer fs_resource[160];
+ u32 last_id[R600_BATCH_NATOMS];
};
struct r600_atoms {
diff --git a/test.c b/test.c
index 37dbe42..a20dadc 100644
--- a/test.c
+++ b/test.c
@@ -51,6 +51,7 @@ static u32 vsconstants[16] = {
0x00000000, 0x00000000, 0x00000000, 0x3F800000,
};
+#if 0
static u32 vsshaders[64] = {
0x0000001C, 0x81000400, 0x00000005, 0x80000000,
0x00000007, 0xA04C0000, 0xC001A03C, 0x94000688,
@@ -69,6 +70,26 @@ static u32 vsshaders[64] = {
0x7C000000, 0x1C351001, 0x00080000, 0x0BEADEAF,
0x7C000300, 0x18ED1002, 0x00080000, 0x0BEADEAF,
};
+#else
+static u32 vsshaders[64] = {
+ 0x00000000, 0x89800000, 0x00000005, 0x80000000,
+ 0x00000007, 0xA04C0000, 0xC001A03C, 0x94000688,
+ 0xC0024000, 0x94200688, 0x900000F8, 0x00A80C90,
+ 0x00000000, 0x00000000, 0x00200001, 0x006C2810,
+ 0x00A00401, 0x206C2800, 0x01200801, 0x406C2800,
+ 0x81A00C01, 0x606C2800, 0x00202001, 0x006C2800,
+ 0x00A02401, 0x206C2810, 0x01202801, 0x406C2800,
+ 0x81A02C01, 0x606C2800, 0x00204001, 0x006C2800,
+ 0x00A04401, 0x206C2800, 0x01204801, 0x406C2810,
+ 0x81A04C01, 0x606C2800, 0x00206001, 0x006C2800,
+ 0x00A06401, 0x206C2800, 0x01206801, 0x406C2800,
+ 0x81A06C01, 0x606C2810, 0x00000002, 0x00940C90,
+ 0x00000402, 0x20940C90, 0x00000802, 0x40940C90,
+ 0x80000C02, 0x60940C90, 0x00000000, 0x00000000,
+ 0x7C000000, 0x1C351001, 0x00080000, 0x0BEADEAF,
+ 0x7C000300, 0x18ED1002, 0x00080000, 0x0BEADEAF,
+};
+#endif
static u32 psshaders[20] = {
0x00000003, 0x80000000, 0x00000005, 0xA00C0000,
@@ -78,6 +99,17 @@ static u32 psshaders[20] = {
0x80000C00, 0x60340C90, 0x00000000, 0x00000000,
};
+static float rvbo1[32] = {
+ 0.000000, 0.000000, -1.000000, 0.500000,
+ 0.500000, 0.500000, 0.000000, 250.000000,
+ 0.000000, -1.000000, 0.500000, 0.500000,
+ 0.500000, 0.000000, 250.000000, 250.000000,
+ -1.000000, 0.500000, 0.500000, 0.500000,
+ 0.000000, 0.000000, 250.000000, -1.000000,
+ 0.500000, 0.500000, 0.500000, 0.000000,
+ 0.000000, 0.000000, 0.000000, 0.000000,
+};
+
void r600_shader_disassemble(u32 *bytecode, u32 ndwords);
int r600_tri_flat(struct radeon *radeon)
{
@@ -97,9 +129,18 @@ int r600_tri_flat(struct radeon *radeon)
struct drm_r600_ps_shader ps_shader;
struct drm_r600_batch batch;
struct drm_radeon_atom atom;
+ struct radeon_bo *vbo1;
int r;
- r600_shader_disassemble(vsshaders, 64);
+ vbo1 = radeon_bo_open(radeon->bom, 0, 4096, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ if (vbo1 == NULL) {
+ fprintf(stderr, "Failed to create vbo1 bo\n");
+ return -ENOMEM;
+ }
+ memset_bo(vbo1, 0);
+ memcpy_bo(vbo1, (u32*)rvbo1, 32);
+
+// r600_shader_disassemble(vsshaders, 64);
r = radeon_device_init(&rdev, radeon);
if (r)
return r;
@@ -394,11 +435,11 @@ int r600_tri_flat(struct radeon *radeon)
/* inputs */
batch.inputs.nelements = 2;
batch.inputs.nbuffers = 2;
- batch.inputs.buffers[0].bo = NULL;
+ batch.inputs.buffers[0].bo = vbo1;
batch.inputs.buffers[0].sq_vtx_constant_word0 = 0x00000000;
batch.inputs.buffers[0].sq_vtx_constant_word2 = 0x03001C00;
batch.inputs.buffers[0].sq_vtx_constant_word3 = 0x00000001;
- batch.inputs.buffers[1].bo = NULL;
+ batch.inputs.buffers[1].bo = vbo1;
batch.inputs.buffers[1].sq_vtx_constant_word0 = 0x0000000C;
batch.inputs.buffers[1].sq_vtx_constant_word2 = 0x02301C00;
batch.inputs.buffers[1].sq_vtx_constant_word3 = 0x00000001;
@@ -419,5 +460,6 @@ int r600_tri_flat(struct radeon *radeon)
return r;
r = radeon_batches_flush(rdev);
radeon_device_release(rdev);
+ radeon_bo_unref(vbo1);
return r;
}