pixel shader

author: Jerome Glisse <jglisse@redhat.com> 2010-02-13 14:38:38 +0100
committer: Jerome Glisse <jglisse@redhat.com> 2010-02-13 14:38:38 +0100
commit: 6c2097cd142555bcfd77837d9d5865cc7b2bdd72 (patch)
tree: 6f123e9b84c8db59e942b746815702aeab4522e4
parent: b883154cb3cf5b5274ad37b0ba5b325a6f7cf146 (diff)
4 files changed, 187 insertions, 107 deletions
diff --git a/r600_atom.c b/r600_atom.c
index 0349d40..f55f7af 100644
--- a/r600_atom.c
+++ b/r600_atom.c
@@ -57,9 +57,7 @@ struct r600_cb {
 	u32			pkts[512];
 	u32			placements[2];
 	struct radeon_bo	*bo;
-	struct radeon_bo	*psshader1;
 	struct radeon_bo	*vbo1;
-	struct radeon_bo	*vbo2;
 	struct r600_atoms	*atoms;
 };
 
@@ -73,12 +71,8 @@ static void r600_cb_release(struct kref *kref)
 	mutex_unlock(&cb->atoms->mutex);
 	if (cb->bo)
 		radeon_bo_unref(cb->bo);
-	if (cb->psshader1)
-		radeon_bo_unref(cb->psshader1);
 	if (cb->vbo1)
 		radeon_bo_unref(cb->vbo1);
-	if (cb->vbo2)
-		radeon_bo_unref(cb->vbo2);
 	kfree(cb);
 }
 
@@ -93,12 +87,10 @@ static int r600_cb_emit(struct radeon_device *rdev,
 	cb->pkts[16] = radeon_ib_reloc(ib, cb->bo, cb->placements[0] | cb->placements[1]);
 	cb->pkts[21] = radeon_ib_reloc(ib, cb->bo, cb->placements[0] | cb->placements[1]);
 	cb->pkts[26] = radeon_ib_reloc(ib, cb->bo, cb->placements[0] | cb->placements[1]);
-	cb->pkts[87] = radeon_ib_reloc(ib, cb->psshader1, RADEON_GEM_DOMAIN_GTT);
-	cb->pkts[92] = radeon_ib_reloc(ib, cb->psshader1, RADEON_GEM_DOMAIN_GTT);
-	cb->pkts[117] = radeon_ib_reloc(ib, cb->vbo1, RADEON_GEM_DOMAIN_GTT);
-	cb->pkts[128] = radeon_ib_reloc(ib, cb->vbo1, RADEON_GEM_DOMAIN_GTT);
-	cb->pkts[135] = radeon_ib_reloc(ib, cb->vbo1, RADEON_GEM_DOMAIN_GTT);
-	cb->pkts[146] = radeon_ib_reloc(ib, cb->vbo1, RADEON_GEM_DOMAIN_GTT);
+	cb->pkts[96] = radeon_ib_reloc(ib, cb->vbo1, RADEON_GEM_DOMAIN_GTT);
+	cb->pkts[107] = radeon_ib_reloc(ib, cb->vbo1, RADEON_GEM_DOMAIN_GTT);
+	cb->pkts[114] = radeon_ib_reloc(ib, cb->vbo1, RADEON_GEM_DOMAIN_GTT);
+	cb->pkts[125] = radeon_ib_reloc(ib, cb->vbo1, RADEON_GEM_DOMAIN_GTT);
 	r = radeon_ib_copy(ib, cb->pkts, atom->npkts);
 	return r;
 }
@@ -1443,7 +1435,7 @@ out_err:
  */
 struct r600_vs_shader {
 	struct radeon_atom	atom;
-	u32			pkts[32];
+	u32			pkts[16];
 	u8			input_semantic[32];
 	u8			input_resource_id[32];
 	u8			input_gpr[32];
@@ -1554,6 +1546,120 @@ out_err:
 }
 
 /*
+ * r600_ps_shader
+ */
+struct r600_ps_shader {
+	struct radeon_atom	atom;
+	u32			pkts[16];
+	u8			input_semantic[32];
+	u8			input_gpr[32];
+	u8			ninputs;
+	u8			output_semantic[32];
+	u8			output_gpr[32];
+	u8			noutputs;
+	u32			ndwords;
+	u32			*opcodes;
+	struct r600_atoms	*atoms;
+};
+
+static void r600_ps_shader_release(struct kref *kref)
+{
+	struct radeon_atom *atom = container_of(kref, struct radeon_atom, kref);
+	struct r600_ps_shader *ps_shader = container_of(atom, struct r600_ps_shader, atom);
+
+	mutex_lock(&ps_shader->atoms->mutex);
+	list_del_init(&ps_shader->atom.list);
+	mutex_unlock(&ps_shader->atoms->mutex);
+	kfree(ps_shader->opcodes);
+	kfree(ps_shader);
+}
+
+static int r600_ps_shader_emit(struct radeon_device *rdev,
+			struct radeon_atom *atom,
+			void *data,
+			struct radeon_ib *ib)
+{
+	struct r600_ps_shader *ps_shader = (struct r600_ps_shader *)atom;
+	struct r600_batch *batch = (struct r600_batch *)data;
+	u32 *opcodes = batch->shaders->ptr;
+	int r;
+
+	memcpy(&opcodes[batch->shaders_idx], ps_shader->opcodes, ps_shader->ndwords * 4);
+	ps_shader->pkts[2] = batch->shaders_idx >> 6;
+	ps_shader->pkts[4] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT);
+	batch->shaders_idx += ((ps_shader->ndwords * 4) + 63) & 0xFFFFFFC0;
+	r = radeon_ib_copy(ib, ps_shader->pkts, atom->npkts);
+	return r;
+}
+
+static int r600_ps_shader_create(struct radeon_device *rdev,
+				struct r600_atoms *atoms,
+				struct drm_radeon_atom *patom,
+				struct radeon_atom **atom)
+{
+	struct drm_r600_ps_shader pps_shader;
+	struct r600_ps_shader *ps_shader;
+	int r, i;
+
+	ps_shader = kmalloc(sizeof(*ps_shader), GFP_KERNEL);
+	if (ps_shader == NULL)
+		return -ENOMEM;
+	/* make sure structure properly initialized */
+	memset(ps_shader, 0, sizeof(*ps_shader));
+	r = radeon_atom_init(&ps_shader->atom, &atoms->idr, &r600_ps_shader_release,
+			&r600_ps_shader_emit, &r600_atom_process_default,
+			ps_shader->pkts);
+	if (r)
+		goto out_err;
+	/* KERNEL use get user data */
+	memcpy(&pps_shader, (void*)(unsigned long)patom->data, sizeof(struct drm_r600_ps_shader));
+	if (pps_shader.ndwords > ((R600_SHADER_SIZE / 2) - 1024)) {
+		dev_err(rdev->dev, "shader too big\n");
+		r = -EINVAL;
+		goto out_err;
+	}
+	ps_shader->opcodes = kmalloc(4 * pps_shader.ndwords, GFP_KERNEL);
+	if (ps_shader->opcodes == NULL) {
+		dev_err(rdev->dev, "shader too big\n");
+		r = -ENOMEM;
+		goto out_err;
+	}
+	memcpy(ps_shader->opcodes, pps_shader.opcodes, 4 * pps_shader.ndwords);
+	ps_shader->ndwords = pps_shader.ndwords;
+	ps_shader->ninputs = pps_shader.ninputs;
+	ps_shader->noutputs = pps_shader.noutputs;
+	for (i = 0; i < 32; i++) {
+		ps_shader->input_semantic[i] = pps_shader.input_semantic[i];
+		ps_shader->input_gpr[i] = pps_shader.input_gpr[i];
+		ps_shader->output_semantic[i] = pps_shader.output_semantic[i];
+		ps_shader->output_gpr[i] = pps_shader.output_gpr[i];
+	}
+	ps_shader->atom.npkts = 0;
+	/* SQ_PGM_START_PS */
+	ps_shader->pkts[ps_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1);
+	ps_shader->pkts[ps_shader->atom.npkts++] = 0x00000210;
+	ps_shader->pkts[ps_shader->atom.npkts++] = 0x00000000;
+	ps_shader->pkts[ps_shader->atom.npkts++] = PKT3(PKT3_NOP, 0);
+	ps_shader->pkts[ps_shader->atom.npkts++] = 0x00000000;
+	/* SQ_PGM_RESOURCES_PS */
+	ps_shader->pkts[ps_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 2);
+	ps_shader->pkts[ps_shader->atom.npkts++] = 0x00000214;
+	ps_shader->pkts[ps_shader->atom.npkts++] = pps_shader.sq_pgm_resources_ps;
+	ps_shader->pkts[ps_shader->atom.npkts++] = pps_shader.sq_pgm_exports_ps;
+	/* SQ_PGM_CF_OFFSET_PS */
+	ps_shader->pkts[ps_shader->atom.npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1);
+	ps_shader->pkts[ps_shader->atom.npkts++] = 0x00000233;
+	ps_shader->pkts[ps_shader->atom.npkts++] = 0;
+	*atom = &ps_shader->atom;
+fprintf(stderr, "%s %d pkts\n", __func__, (*atom)->npkts);
+	return 0;
+out_err:
+	radeon_atom_put(&ps_shader->atom);
+	*atom = NULL;
+	return r;
+}
+
+/*
  * r600 atom core functions
  */
 int r600_atom_create(struct radeon_device *rdev,
@@ -1658,6 +1764,13 @@ int r600_atom_create(struct radeon_device *rdev,
 		atom->type = patom->type;
 		list_add_tail(&atom->list, &atoms->vs_shader_atoms);
 		break;
+	case R600_ATOM_PS_SHADER:
+		r = r600_ps_shader_create(rdev, atoms, patom, &atom);
+		if (r)
+			return r;
+		atom->type = patom->type;
+		list_add_tail(&atom->list, &atoms->ps_shader_atoms);
+		break;
 	default:
 		dev_err(rdev->dev, "unknown R600 atom type 0x%08X\n", patom->type);
 		return -EINVAL;
@@ -1767,7 +1880,7 @@ int r600_batches_queue(struct radeon_device *rdev,
 		batch->tp == NULL || batch->cb == NULL ||
 		batch->db_cntl == NULL || batch->vgt == NULL ||
 		batch->spi == NULL || batch->sx == NULL ||
-		batch->vs_shader == NULL) {
+		batch->vs_shader == NULL || batch->ps_shader == NULL) {
 		mutex_unlock(&atoms->mutex);
 		kfree(rbatch);
 		dev_err(rdev->dev, "invalid batch\n");
@@ -1787,37 +1900,27 @@ int r600_batches_queue(struct radeon_device *rdev,
 		dev_err(rdev->dev, "can't map buffer for shaders\n");
 		return r;
 	}
-	rbatch->atoms[i++] = batch->blend;
-	kref_get(&batch->blend->kref);
-	rbatch->atoms[i++] = batch->cb_cntl;
-	kref_get(&batch->cb_cntl->kref);
-	rbatch->atoms[i++] = batch->pa;
-	kref_get(&batch->pa->kref);
-	rbatch->atoms[i++] = batch->vport;
-	kref_get(&batch->vport->kref);
-	rbatch->atoms[i++] = batch->tp;
-	kref_get(&batch->tp->kref);
-	rbatch->atoms[i++] = batch->vs_shader;
-	kref_get(&batch->vs_shader->kref);
+	rbatch->atoms[i++] = batch->blend; kref_get(&batch->blend->kref);
+	rbatch->atoms[i++] = batch->cb_cntl; kref_get(&batch->cb_cntl->kref);
+	rbatch->atoms[i++] = batch->pa; kref_get(&batch->pa->kref);
+	rbatch->atoms[i++] = batch->vport; kref_get(&batch->vport->kref);
+	rbatch->atoms[i++] = batch->tp; kref_get(&batch->tp->kref);
+	rbatch->atoms[i++] = batch->vs_shader; kref_get(&batch->vs_shader->kref);
+	rbatch->atoms[i++] = batch->ps_shader; kref_get(&batch->ps_shader->kref);
+	rbatch->atoms[i++] = batch->db_cntl; kref_get(&batch->db_cntl->kref);
 	rbatch->atoms[i++] = batch->vs_constants;
 	if (batch->vs_constants)
 		kref_get(&batch->vs_constants->kref);
 	rbatch->atoms[i++] = batch->ps_constants;
 	if (batch->ps_constants)
 		kref_get(&batch->ps_constants->kref);
-	rbatch->atoms[i++] = batch->db_cntl;
-	kref_get(&batch->db_cntl->kref);
 	rbatch->atoms[i++] = batch->db;
 	if (batch->db)
 		kref_get(&batch->db->kref);
-	rbatch->atoms[i++] = batch->vgt;
-	kref_get(&batch->vgt->kref);
-	rbatch->atoms[i++] = batch->spi;
-	kref_get(&batch->spi->kref);
-	rbatch->atoms[i++] = batch->sx;
-	kref_get(&batch->sx->kref);
-	rbatch->atoms[i++] = batch->cb;
-	kref_get(&batch->cb->kref);
+	rbatch->atoms[i++] = batch->vgt; kref_get(&batch->vgt->kref);
+	rbatch->atoms[i++] = batch->spi; kref_get(&batch->spi->kref);
+	rbatch->atoms[i++] = batch->sx; kref_get(&batch->sx->kref);
+	rbatch->atoms[i++] = batch->cb; kref_get(&batch->cb->kref);
 	db_cntl = container_of(batch->db_cntl, struct r600_db_cntl, atom);
 	if (db_cntl->need_z || db_cntl->need_h || db_cntl->need_s) {
 		struct r600_db *db;
@@ -1947,6 +2050,7 @@ int r600_atoms_init(struct radeon_device *rdev, struct r600_atoms *atoms)
 	INIT_LIST_HEAD(&atoms->vgt_atoms);
 	INIT_LIST_HEAD(&atoms->vport_atoms);
 	INIT_LIST_HEAD(&atoms->vs_shader_atoms);
+	INIT_LIST_HEAD(&atoms->ps_shader_atoms);
 	idr_init(&atoms->idr);
 	atoms->npipes = 2;
 	atoms->nbanks = 4;
@@ -2033,25 +2137,6 @@ void r600_tflat(struct radeon_atom *atom)
     WPKT(PKT3(PKT3_SET_LOOP_CONST, 1));
     WPKT(0x00000020);
     WPKT(0x0100000F);
-    WPKT(PKT3(PKT3_SURFACE_SYNC, 3));
-    WPKT(0x08000000);
-    WPKT(0x00000001);
-    WPKT(0x00000000);
-    WPKT(0x0000000A);
-    WPKT_RELOC("psshader1");
-    WPKT(PKT3(PKT3_SET_CONTEXT_REG, 1));
-    WPKT(0x00000210);
-    WPKT(0x00000000);
-    WPKT_RELOC("psshader1");
-    WPKT(PKT3(PKT3_SET_CONTEXT_REG, 1));
-    WPKT(0x00000214);
-    WPKT(0x00000003);
-    WPKT(PKT3(PKT3_SET_CONTEXT_REG, 1));
-    WPKT(0x00000215);
-    WPKT(0x00000002);
-    WPKT(PKT3(PKT3_SET_CONTEXT_REG, 1));
-    WPKT(0x00000233);
-    WPKT(0x00000000);
     WPKT(PKT3(PKT3_SET_LOOP_CONST, 1));
     WPKT(0x00000000);
     WPKT(0x01000FFF);
@@ -2109,14 +2194,6 @@ void r600_tflat(struct radeon_atom *atom)
     WPKT(0x00000002);
 }
 
-static u32 rpsshader1[20] = {
-    0x00000003, 0x80000000, 0x00000005, 0xA00C0000,
-    0xC0008000, 0x94200688, 0x900000F8, 0x00480C90,
-    0x00000000, 0x00000000, 0x00000000, 0x00340C90,
-    0x00000400, 0x20340C90, 0x00000800, 0x40340C90,
-    0x80000C00, 0x60340C90, 0x00000000, 0x00000000,
-};
-
 static float rvbo1[32] = {
     0.000000, 0.000000, -1.000000, 0.500000,
     0.500000, 0.500000, 0.000000, 250.000000,
@@ -2128,28 +2205,10 @@ static float rvbo1[32] = {
     0.000000, 0.000000, 0.000000, 0.000000,
 };
 
-static float rvbo2[25] = {
-     0.900000, -0.900000, -30.000000,
-     0.900000,  0.900000, -30.000000,
-    -0.900000,  0.000000, -30.000000,
-     0.000000,  0.000000,   0.000000,
-     0.000000,  0.000000,   0.000000, 0.000000,
-     0.000000,  0.000000,   1.000000,
-     1.000000,  0.000000,   0.000000,
-     0.000000,  1.000000,   0.000000,
-};
-
 int r600_tflat_init(struct radeon *radeon, struct r600_cb *cb)
 {
 	int r = 0;
 
-	cb->psshader1 = radeon_bo_open(radeon->bom, 0, 4096, 0,
-					RADEON_GEM_DOMAIN_GTT, 0);
-	if (cb->psshader1 == NULL) {
-		fprintf(stderr, "Failed to create psshader1 bo\n");
-		r = -ENOMEM;
-		goto out_err;
-	}
 	cb->vbo1 = radeon_bo_open(radeon->bom, 0, 4096, 0,
 					RADEON_GEM_DOMAIN_GTT, 0);
 	if (cb->vbo1 == NULL) {
@@ -2157,26 +2216,11 @@ int r600_tflat_init(struct radeon *radeon, struct r600_cb *cb)
 		r = -ENOMEM;
 		goto out_err;
 	}
-	cb->vbo2 = radeon_bo_open(radeon->bom, 0, 4096, 0,
-					RADEON_GEM_DOMAIN_GTT, 0);
-	if (cb->vbo2 == NULL) {
-		fprintf(stderr, "Failed to create vbo2 bo\n");
-		r = -ENOMEM;
-		goto out_err;
-	}
-	memset_bo(cb->psshader1, 0);
 	memset_bo(cb->vbo1, 0);
-	memset_bo(cb->vbo2, 0);
-	memcpy_bo(cb->psshader1, rpsshader1, 20);
 	memcpy_bo(cb->vbo1, (u32*)rvbo1, 32);
-	memcpy_bo(cb->vbo2, (u32*)rvbo2, 25);
 	return 0;
 out_err:
-	if (cb->psshader1)
-		radeon_bo_unref(cb->psshader1);
 	if (cb->vbo1)
 		radeon_bo_unref(cb->vbo1);
-	if (cb->vbo2)
-		radeon_bo_unref(cb->vbo2);
 	return r;
 }
diff --git a/r600_atom_api.h b/r600_atom_api.h
index 762b710..8b5b1ff 100644
--- a/r600_atom_api.h
+++ b/r600_atom_api.h
@@ -40,6 +40,7 @@ struct drm_radeon_atom {
 #define R600_ATOM_SPI		11
 #define R600_ATOM_SX		12
 #define R600_ATOM_VS_SHADER	13
+#define R600_ATOM_PS_SHADER	14
 
 struct drm_r600_cb {
 	u32			pitch;
@@ -264,6 +265,17 @@ struct drm_r600_spi {
 	u32			spi_vs_out_id_9;
 };
 
+/* sx - shader export */
+struct drm_r600_sx {
+	u32			sx_alpha_ref;
+	u32			sx_alpha_test_control;
+	u32			sx_export_buffer_sizes;
+	u32			sx_misc;
+	u32			offset;
+	struct radeon_bo	*buffer;
+	u32			placements[2];
+};
+
 /* vs_shader - vertex shader */
 struct drm_r600_vs_shader {
 	u8			input_semantic[32];
@@ -278,18 +290,20 @@ struct drm_r600_vs_shader {
 	u32			sq_pgm_resources_vs;
 };
 
-/* sx - shader export */
-struct drm_r600_sx {
-	u32			sx_alpha_ref;
-	u32			sx_alpha_test_control;
-	u32			sx_export_buffer_sizes;
-	u32			sx_misc;
-	u32			offset;
-	struct radeon_bo	*buffer;
-	u32			placements[2];
+/* ps_shader - pixel shader */
+struct drm_r600_ps_shader {
+	u8			input_semantic[32];
+	u8			input_gpr[32];
+	u8			ninputs;
+	u8			output_semantic[32];
+	u8			output_gpr[32];
+	u8			noutputs;
+	u32			*opcodes;
+	u32			ndwords;
+	u32			sq_pgm_resources_ps;
+	u32			sq_pgm_exports_ps;
 };
 
-
 struct drm_r600_batch {
 	struct radeon_atom	*vs_constants;
 	struct radeon_atom	*ps_constants;
@@ -305,6 +319,7 @@ struct drm_r600_batch {
 	struct radeon_atom	*spi;
 	struct radeon_atom	*sx;
 	struct radeon_atom	*vs_shader;
+	struct radeon_atom	*ps_shader;
 };
 
 
diff --git a/radeon_atom.h b/radeon_atom.h
index 7d5eb6f..79829a2 100644
--- a/radeon_atom.h
+++ b/radeon_atom.h
@@ -53,7 +53,7 @@ struct radeon_atom {
 };
 
 /* R600 */
-#define R600_BATCH_NATOMS	14
+#define R600_BATCH_NATOMS	15
 struct r600_batch {
 	struct list_head	list;
 	struct list_head	pre_flushes;
@@ -91,6 +91,7 @@ struct r600_atoms {
 	struct list_head	vgt_atoms;
 	struct list_head	vport_atoms;
 	struct list_head	vs_shader_atoms;
+	struct list_head	ps_shader_atoms;
 	struct idr		idr;
 	struct mutex		mutex;
 	struct r600_batches	batches;
diff --git a/test.c b/test.c
index 1868c67..3b12ee5 100644
--- a/test.c
+++ b/test.c
@@ -67,6 +67,14 @@ static u32 vsshaders[64] = {
     0x7C000300, 0x18ED1002, 0x00080000, 0x0BEADEAF,
 };
 
+static u32 psshaders[20] = {
+    0x00000003, 0x80000000, 0x00000005, 0xA00C0000,
+    0xC0008000, 0x94200688, 0x900000F8, 0x00480C90,
+    0x00000000, 0x00000000, 0x00000000, 0x00340C90,
+    0x00000400, 0x20340C90, 0x00000800, 0x40340C90,
+    0x80000C00, 0x60340C90, 0x00000000, 0x00000000,
+};
+
 int r600_tri_flat(struct radeon *radeon)
 {
 	struct radeon_device *rdev;
@@ -82,6 +90,7 @@ int r600_tri_flat(struct radeon *radeon)
 	struct drm_r600_spi spi;
 	struct drm_r600_sx sx;
 	struct drm_r600_vs_shader vs_shader;
+	struct drm_r600_ps_shader ps_shader;
 	struct drm_r600_batch batch;
 	struct drm_radeon_atom atom;
 	int r;
@@ -360,6 +369,17 @@ int r600_tri_flat(struct radeon *radeon)
 	r = radeon_atom_create(rdev, &atom, &batch.vs_shader);
 	if (r)
 		return r;
+	/* ps_shader */
+	ps_shader.sq_pgm_resources_ps = 0x00000003;
+	ps_shader.sq_pgm_exports_ps = 0x00000002;
+	ps_shader.ndwords = 20;
+	ps_shader.opcodes = psshaders;
+	atom.type = R600_ATOM_PS_SHADER;
+	atom.id = 0;
+	atom.data = (uint64_t)(uintptr_t)&ps_shader;
+	r = radeon_atom_create(rdev, &atom, &batch.ps_shader);
+	if (r)
+		return r;
 
 	/* batch */
 	r =  radeon_batches_queue(rdev, &batch);
author	Jerome Glisse <jglisse@redhat.com>	2010-02-13 14:38:38 +0100
committer	Jerome Glisse <jglisse@redhat.com>	2010-02-13 14:38:38 +0100
commit	6c2097cd142555bcfd77837d9d5865cc7b2bdd72 (patch)
tree	6f123e9b84c8db59e942b746815702aeab4522e4
parent	b883154cb3cf5b5274ad37b0ba5b325a6f7cf146 (diff)