12 files changed, 150 insertions, 535 deletions
diff --git a/Makefile b/Makefile
index cb3d879..7faeb69 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 OBJECTS = radeon.o mode.o test.o r700_atom.o radeon_device.o radeon_atom.o\
-	r600_atom.o r600_shader.o r600_batch.o
+	r600_atom.o r600_batch.o
 CFLAGS = -g3 -O0 -std=gnu99 -I/usr/include/drm
 LDFLAGS = -ldrm -ldrm_radeon
 DEPS = radeon.h
diff --git a/r600_atom.c b/r600_atom.c
index 1254935..4e90568 100644
--- a/r600_atom.c
+++ b/r600_atom.c
@@ -15,7 +15,7 @@
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 #include "radeon_device.h"
-#include "r600_atom.h"
+#include "r600_winsys.h"
 #include "r600d.h"
 
 /*
@@ -654,16 +654,9 @@ int r600_vs_shader_emit(struct radeon_device *rdev,
 	u32 *opcodes = batch->shaders->ptr, ndwords;
 	int r;
 
-	r = r600_shader_build_fs(rdev, &opcodes[batch->shaders_idx], &ndwords,
-				&batch->inputs.drm, vs_shader);
-	if (r)
-		return r;
+	memcpy(&opcodes[batch->shaders_idx], vs_shader->opcodes, vs_shader->ndwords * 4);
 	atom->pkts[2] = batch->shaders_idx >> 6;
 	atom->pkts[4] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT);
-	batch->shaders_idx += (ndwords + 63) & 0xFFFFFFC0;
-	memcpy(&opcodes[batch->shaders_idx], vs_shader->opcodes, vs_shader->ndwords * 4);
-	atom->pkts[13] = batch->shaders_idx >> 6;
-	atom->pkts[15] = radeon_ib_reloc(ib, batch->shaders, RADEON_GEM_DOMAIN_GTT);
 	batch->shaders_idx += (vs_shader->ndwords + 63) & 0xFFFFFFC0;
 	r = radeon_ib_copy(ib, atom->pkts, atom->npkts);
 	return r;
@@ -680,20 +673,6 @@ int r600_vs_shader_create(struct radeon_device *rdev, struct radeon_atom *atom,
 		return -ENOMEM;
 	memcpy(vs_shader, data, sizeof(struct drm_r600_vs_shader));
 	atom->state = vs_shader;
-	/* SQ_PGM_START_FS */
-	atom->pkts[atom->npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1);
-	atom->pkts[atom->npkts++] = 0x00000225;
-	atom->pkts[atom->npkts++] = 0x00000000;
-	atom->pkts[atom->npkts++] = PKT3(PKT3_NOP, 0);
-	atom->pkts[atom->npkts++] = 0x00000000;
-	/* SQ_PGM_RESOURCES_FS */
-	atom->pkts[atom->npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1);
-	atom->pkts[atom->npkts++] = 0x00000229;
-	atom->pkts[atom->npkts++] = 0x00000000;
-	/* SQ_PGM_CF_OFFSET_FS */
-	atom->pkts[atom->npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1);
-	atom->pkts[atom->npkts++] = 0x00000237;
-	atom->pkts[atom->npkts++] = 0x00000000;
 	/* SQ_PGM_START_VS */
 	atom->pkts[atom->npkts++] = PKT3(PKT3_SET_CONTEXT_REG, 1);
 	atom->pkts[atom->npkts++] = 0x00000216;
diff --git a/r600_atom.h b/r600_atom.h
deleted file mode 100644
index fdf7942..0000000
--- a/r600_atom.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright © 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-#ifndef R600_ATOM_H
-#define R600_ATOM_H
-
-#include "r600_atom_kernel.h"
-#include "list.h"
-
-struct radeon_device;
-
-/* public API */
-struct drm_radeon_atom {
-	u32			type;
-	u32			id;
-	void			*data;
-};
-
-struct r600_state_container {
-	void			*data;
-	u32			nbo;
-	struct radeon_bo	*bo[32];
-};
-
-
-/* R600 */
-extern void r600_shader_disassemble(u32 *bytecode, u32 ndwords);
-extern int r600_atoms_init(struct radeon_device *rdev);
-extern void r600_atoms_release(struct radeon_device *rdev);
-extern struct radeon_atom *r600_atom_create(struct radeon_device *rdev, struct drm_radeon_atom *patom);
-extern int r600_batches_flush(struct radeon_device *rdev);
-
-#endif
diff --git a/r600_atom_kernel.h b/r600_atom_kernel.h
index ba646a1..dc3dcf4 100644
--- a/r600_atom_kernel.h
+++ b/r600_atom_kernel.h
@@ -27,6 +27,7 @@
 #define R600_ATOM_DB_CNTL	8
 #define R600_ATOM_VS_SHADER	9
 #define R600_ATOM_PS_SHADER	10
+#define R600_BATCH_NATOMS	11
 
 struct drm_r600_cb {
 	u32			pitch;
@@ -163,6 +164,19 @@ struct drm_r600_db_cntl {
 	u32			db_alpha_to_mask;
 };
 
+/* vs_shader - vertex shader */
+struct drm_r600_vs_shader {
+	u32			sq_pgm_resources_vs;
+	u8			input_semantic[32];
+	u8			input_gpr[32];
+	u8			ninputs;
+	u8			output_semantic[32];
+	u8			fog_output_id;
+	u8			noutputs;
+	u32			ndwords;
+	u32			opcodes[512];
+};
+
 /* ps_shader - pixel shader */
 struct drm_r600_ps_shader {
 	u32			spi_ps_in_control_0;
@@ -174,4 +188,42 @@ struct drm_r600_ps_shader {
 	u32			opcodes[512];
 };
 
+struct drm_r600_vs_buffer {
+	u32			handle;
+	u32			resource_id;
+	u32			sq_vtx_constant_word0;
+	u32			sq_vtx_constant_word2;
+	u32			sq_vtx_constant_word3;
+};
+
+struct drm_r600_vs_element {
+	u32			buffer_id;
+	u32			semantic;
+	u32			sq_vtx_word0;
+	u32			sq_vtx_word1;
+	u32			sq_vtx_word2;
+};
+
+struct drm_r600_vs_input {
+	u32				nelements;
+	u32				nbuffers;
+	struct drm_r600_vs_element	elements[32];
+	struct drm_r600_vs_buffer	buffers[32];
+};
+
+struct drm_r600_batch {
+	struct radeon_atom		*vs_constants;
+	struct radeon_atom		*ps_constants;
+	struct radeon_atom		*blend;
+	struct radeon_atom		*cb;
+	struct radeon_atom		*cb_cntl;
+	struct radeon_atom		*pa;
+	struct radeon_atom		*vport;
+	struct radeon_atom		*db;
+	struct radeon_atom		*db_cntl;
+	struct radeon_atom		*vs_shader;
+	struct radeon_atom		*ps_shader;
+	struct drm_r600_vs_input	inputs;
+};
+
 #endif
diff --git a/r600_batch.c b/r600_batch.c
index b6432d5..a322430 100644
--- a/r600_batch.c
+++ b/r600_batch.c
@@ -15,7 +15,7 @@
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 #include "radeon_device.h"
-#include "r600_atom.h"
+#include "r600_winsys.h"
 #include "r600d.h"
 
 /* state creation functions prototype */
@@ -205,8 +205,9 @@ out_err:
 	return r;
 }
 
-int r600_batches_queue(struct radeon_device *rdev, struct drm_r600_batch *batch)
+int r600_batches_queue(struct radeon_device *rdev, struct r600_request *rq)
 {
+	struct drm_r600_batch *batch = rq->data;
 	struct r600_batch *rbatch;
 	struct r600_batches *batches =  &rdev->batches;
 	int r, i, j;
@@ -303,7 +304,7 @@ reprocess:
 			batches->last_id[i] = rbatch->atoms[i]->id;
 		}
 	}
-	radeon_device_set_bo_list(rdev, rbatch->inputs.nbo, rbatch->inputs.bo);
+	radeon_device_set_bo_list(rdev, rq->nbo, rq->bo);
 	for (i = 0; i < rbatch->inputs.drm.nbuffers; i++) {
 		if (rbatch->inputs.drm.buffers[i].resource_id == -1) {
 			r = r600_batches_fs_resource_add(rdev, batches, &rbatch->inputs.drm.buffers[i]);
@@ -394,36 +395,32 @@ static struct r600_atom_funcs _r600_atom_funcs[] = {
 		&r600_ps_shader_create, &r600_ps_shader_emit},
 };
 
-
-struct radeon_atom *r600_atom_create(struct radeon_device *rdev, struct drm_radeon_atom *patom)
+struct radeon_atom *r600_atom_create(struct radeon_device *rdev, struct r600_request *rq)
 {
 	struct radeon_atom *atom;
-	struct r600_state_container *cont;
 	int r;
 
-	if (!patom->type || patom->type >= R600_BATCH_NATOMS) {
+	if (!rq->type || rq->type >= R600_BATCH_NATOMS) {
 		fprintf(stderr, "%s %d invalid atom type %d\n", __func__, __LINE__);
 		return NULL;
 	}
-	if (_r600_atom_funcs[patom->type].type != patom->type) {
+	if (_r600_atom_funcs[rq->type].type != rq->type) {
 		fprintf(stderr, "%s %d mismatch for type %d have %d\n",
-			__func__, __LINE__, _r600_atom_funcs[patom->type].type, patom->type);
+			__func__, __LINE__, _r600_atom_funcs[rq->type].type, rq->type);
 		return NULL;
 	}
-	cont = patom->data;
-	radeon_device_set_bo_list(rdev, cont->nbo, cont->bo);
+	radeon_device_set_bo_list(rdev, rq->nbo, rq->bo);
 	/* create the atom */
 	atom = malloc(sizeof(struct radeon_atom));
 	if (atom == NULL)
 		return NULL;
 	memset(atom, 0, sizeof(struct radeon_atom));
-	INIT_LIST_HEAD(&atom->list);
 	kref_init(&atom->kref);
-	atom->emit = _r600_atom_funcs[patom->type].emit;
+	atom->emit = _r600_atom_funcs[rq->type].emit;
 	atom->nflushes = 0;
-	atom->id = crc_64(cont->data, _r600_atom_funcs[patom->type].size);
-	atom->type = patom->type;
-	r = _r600_atom_funcs[patom->type].create(rdev, atom, cont->data);
+	atom->id = crc_64(rq->data, _r600_atom_funcs[rq->type].size);
+	atom->type = rq->type;
+	r = _r600_atom_funcs[rq->type].create(rdev, atom, rq->data);
 	if (r) {
 		radeon_atom_put(atom);
 		return NULL;
diff --git a/r600_shader.c b/r600_shader.c
deleted file mode 100644
index addd2af..0000000
--- a/r600_shader.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright © 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-#include "radeon_device.h"
-#include "r600d.h"
-
-struct r600_block {
-	struct list_head	list;
-	u32			idx;
-	u32			last;
-};
-
-struct r600_inst_name {
-	char		safe;
-	char		name[64];
-};
-
-static struct r600_inst_name sq_cf_inst_name[] = {
-	{1, "NOP"},
-	{0, "TEX"},
-	{0, "VTX"},
-	{0, "VTX_TC"},
-	{1, "LOOP_START"},
-	{1, "LOOP_END"},
-	{1, "LOOP_START_DX10"},
-	{1, "LOOP_START_NO_AL"},
-	{1, "LOOP_CONTINUE"},
-	{1, "LOOP_BREAK"},
-	{1, "JUMP"},
-	{1, "PUSH"},
-	{1, "PUSH_ELSE"},
-	{1, "ELSE"},
-	{1, "POP"},
-	{1, "POP_JUMP"},
-	{1, "POP_PUSH"},
-	{1, "POP_PUSH_ELSE"},
-	{1, "CALL"},
-	{1, "CALL_FS"},
-	{1, "RETURN"},
-	{0, "EMIT_VERTEX"},
-	{0, "EMIT_CUT_VERTEX"},
-	{0, "CUT_VERTEX"},
-	{1, "KILL"},
-};
-
-static struct r600_inst_name sq_cf_alu_inst_name[] = {
-	{0, "unknown"},
-	{0, "unknown"},
-	{0, "unknown"},
-	{0, "unknown"},
-	{0, "unknown"},
-	{0, "unknown"},
-	{0, "unknown"},
-	{0, "unknown"},
-	{1, "ALU"},
-	{1, "ALU_PUSH_BEFORE"},
-	{1, "ALU_POP_AFTER"},
-	{1, "ALU_POP2_AFTER"},
-	{0, "unknown"},
-	{1, "ALU_CONTINUE"},
-	{1, "ALU_BREAK"},
-	{1, "ALU_ELSE_AFTER"},
-};
-
-static struct r600_inst_name sq_cf_alloc_export_inst_name[] = {
-	{0, "MEM_STREAM0"},
-	{0, "MEM_STREAM1"},
-	{0, "MEM_STREAM2"},
-	{0, "MEM_STREAM3"},
-	{0, "MEM_SCRATCH"},
-	{0, "MEM_REDUCTION"},
-	{0, "MEM_RING"},
-	{1, "EXPORT"},
-	{1, "EXPORT_DONE"},
-};
-
-void r600_disassemble_sq_cf_inst(u32 *bytecode, u32 ndwords, u32 idx)
-{
-	u32 addr = G_008DFC_ADDR(bytecode[idx+0]) << 1;
-	u32 count = G_008DFC_COUNT(bytecode[idx+1]);
-	u32 i;
-
-	printf("0x%08X 0x%08X CF_INST: %s\n", bytecode[idx+0], bytecode[idx+1],
-		sq_cf_inst_name[G_008DFC_CF_INST(bytecode[idx+1])].name);
-	printf("  word0: addr (in dw) %d\n", G_008DFC_ADDR(bytecode[idx+0]) << 1);
-	printf("  word1: pop count %d\n", G_008DFC_POP_COUNT(bytecode[idx+1]));
-	printf("  word1: cf const %d\n", G_008DFC_CF_CONST(bytecode[idx+1]));
-	printf("  word1: cond %d\n", G_008DFC_COND(bytecode[idx+1]));
-	printf("  word1: count %d\n", G_008DFC_COUNT(bytecode[idx+1]));
-	printf("  word1: call count %d\n", G_008DFC_CALL_COUNT(bytecode[idx+1]));
-	printf("  word1: end of program %d\n", G_008DFC_END_OF_PROGRAM(bytecode[idx+1]));
-	printf("  word1: valid pixel mode %d\n", G_008DFC_VALID_PIXEL_MODE(bytecode[idx+1]));
-	printf("  word1: inst 0x%02X (%s)\n", G_008DFC_CF_INST(bytecode[idx+1]),
-		sq_cf_inst_name[G_008DFC_CF_INST(bytecode[idx+1])].name);
-	printf("  word1: whole quad mode %d\n", G_008DFC_WHOLE_QUAD_MODE(bytecode[idx+1]));
-	printf("  word1: barrier %d\n", G_008DFC_BARRIER(bytecode[idx+1]));
-	for (i = 0; i <= count; i++)
-		printf("    0x%08X 0x%08X 0x%08X 0x%08X\n",
-				bytecode[idx+addr+0+(i*4)], bytecode[idx+addr+1+(i*4)],
-				bytecode[idx+addr+2+(i*4)], bytecode[idx+addr+3+(i*4)]);
-}
-
-void r600_disassemble_sq_cf_alu_inst(u32 *bytecode, u32 ndwords, u32 idx)
-{
-	printf("0x%08X 0x%08X CF_ALU_INST: %s\n", bytecode[idx+0], bytecode[idx+1],
-		sq_cf_alu_inst_name[G_008DFC_CF_ALU_INST(bytecode[idx+1])].name);
-	printf("  word0: addr (in dw) %d\n", G_008DFC_ALU_ADDR(bytecode[idx+0]) << 1);
-	printf("  word0: kcache bank0 %d\n", G_008DFC_KCACHE_BANK0(bytecode[idx+0]));
-	printf("  word0: kcache bank1 %d\n", G_008DFC_KCACHE_BANK1(bytecode[idx+0]));
-	printf("  word0: kcache mode0 %d\n", G_008DFC_KCACHE_MODE0(bytecode[idx+0]));
-	printf("  word1: kcache mode1 %d\n", G_008DFC_KCACHE_MODE1(bytecode[idx+1]));
-	printf("  word1: kcache addr0 %d\n", G_008DFC_KCACHE_ADDR0(bytecode[idx+1]));
-	printf("  word1: kcache addr1 %d\n", G_008DFC_KCACHE_ADDR1(bytecode[idx+1]));
-	printf("  word1: count %d\n", G_008DFC_ALU_COUNT(bytecode[idx+1]));
-	printf("  word1: use waterfall %d\n", G_008DFC_USES_WATERFALL(bytecode[idx+1]));
-	printf("  word1: inst 0x%02X (%s)\n", G_008DFC_CF_ALU_INST(bytecode[idx+1]),
-		sq_cf_alu_inst_name[G_008DFC_CF_ALU_INST(bytecode[idx+1])].name);
-	printf("  word1: whole quad mode %d\n", G_008DFC_WHOLE_QUAD_MODE(bytecode[idx+1]));
-	printf("  word1: barrier %d\n", G_008DFC_BARRIER(bytecode[idx+1]));
-}
-
-void r600_disassemble_sq_cf_alloc_export_inst(u32 *bytecode, u32 ndwords, u32 idx)
-{
-	printf("0x%08X 0x%08X CF_ALLOC_EXPORT: %s\n", bytecode[idx+0], bytecode[idx+1],
-		sq_cf_alloc_export_inst_name[G_008DFC_CF_INST(bytecode[idx+1])-0x20].name);
-	printf("  word0: array base %d\n", G_008DFC_ARRAY_BASE(bytecode[idx+0]));
-	printf("  word0: type %d\n", G_008DFC_TYPE(bytecode[idx+0]));
-	printf("  word0: rw gpr %d\n", G_008DFC_RW_GPR(bytecode[idx+0]));
-	printf("  word0: rw rel %d\n", G_008DFC_RW_REL(bytecode[idx+0]));
-	printf("  word0: index gpr %d\n", G_008DFC_INDEX_GPR(bytecode[idx+0]));
-	printf("  word0: elem size %d\n", G_008DFC_ELEM_SIZE(bytecode[idx+0]));
-	printf("  word1: burst count %d\n", G_008DFC_BURST_COUNT(bytecode[idx+1]));
-	printf("  word1: end of program %d\n", G_008DFC_END_OF_PROGRAM(bytecode[idx+1]));
-	printf("  word1: valid pixel mode %d\n", G_008DFC_VALID_PIXEL_MODE(bytecode[idx+1]));
-	printf("  word1: inst 0x%02X (%s)\n", G_008DFC_CF_INST(bytecode[idx+1]),
-		sq_cf_alloc_export_inst_name[G_008DFC_CF_INST(bytecode[idx+1])-0x20].name);
-	printf("  word1: whole quad mode %d\n", G_008DFC_WHOLE_QUAD_MODE(bytecode[idx+1]));
-	printf("  word1: barrier %d\n", G_008DFC_BARRIER(bytecode[idx+1]));
-}
-
-struct r600_block *r600_block_new(u32 *bytecode, u32 ndwords, u32 idx)
-{
-	struct r600_block *blk;
-	u32 inst;
-
-	blk = malloc(sizeof(struct r600_block));
-	if (blk == NULL)
-		return NULL;
-	INIT_LIST_HEAD(&blk->list);
-	blk->idx = idx;
-	blk->last = 0;
-	inst = (bytecode[idx+1] >> 23) & 0x7F;
-	if ((inst & 0x78) >= 0x40) {
-		r600_disassemble_sq_cf_alu_inst(bytecode, ndwords, idx);
-	} else {
-		if (G_008DFC_CF_INST(bytecode[idx+1]) < 0x20) {
-			r600_disassemble_sq_cf_inst(bytecode, ndwords, idx);
-			blk->last = G_008DFC_END_OF_PROGRAM(bytecode[idx+1]);
-		} else {
-			r600_disassemble_sq_cf_alloc_export_inst(bytecode, ndwords, idx);
-			blk->last = G_008DFC_END_OF_PROGRAM(bytecode[idx+1]);
-		}
-	}
-	return blk;
-}
-
-void r600_shader_disassemble(u32 *bytecode, u32 ndwords)
-{
-	struct r600_block *blk = NULL;
-	u32 idx = 0;
-
-	do {
-		free(blk);
-		blk = r600_block_new(bytecode, ndwords, idx);
-		if (blk == NULL)
-			return;
-		idx += 2;
-	} while (!blk->last);
-}
-
-int r600_shader_build_fs(struct radeon_device *rdev,
-				u32 *bytecode, u32 *ndwords,
-				struct drm_r600_vs_input *inputs,
-				struct drm_r600_vs_shader *vs)
-{
-	u32 idx = 0, i, rid, gpr, j;
-
-	*ndwords = 0;
-	if (!inputs->nelements) {
-		dev_err(rdev->dev, "need at least one input for vertex shader\n");
-		return -EINVAL;
-	}
-	bytecode[idx++] = 0x00000002;
-	bytecode[idx++] = 0x81000000 | S_008DFC_COUNT(inputs->nelements - 1);
-	bytecode[idx++] = 0x00000000;
-	bytecode[idx++] = 0x8A000000;
-	for (i = 0; i < inputs->nelements; i++) {
-		if (inputs->elements[i].buffer_id >= inputs->nbuffers) {
-			dev_err(rdev->dev, "elements %d referencing invalid buffer %d\n",
-				i, inputs->elements[i].buffer_id);
-			return -EINVAL;
-		}
-		rid = inputs->buffers[inputs->elements[i].buffer_id].resource_id;
-		for (j = 0, gpr = -1; j < vs->ninputs; j++) {
-			if (vs->input_semantic[j] == inputs->elements[i].semantic) {
-				gpr = vs->input_gpr[j];
-				break;
-			}
-		}
-		/* if vs has no corresponding input skip the elements */
-		if (gpr == -1)
-			continue;
-		bytecode[idx++] = (inputs->elements[i].sq_vtx_word0 & 0xFC000000) | S_008DFC_BUFFER_ID(rid);
-		bytecode[idx++] = (inputs->elements[i].sq_vtx_word1 & 0xFFFFFC00) | S_008DFC_DST_GPR(gpr);
-		bytecode[idx++] = inputs->elements[i].sq_vtx_word2;
-		bytecode[idx++] = 0xCAFEDEAD;
-	}
-	*ndwords = idx;
-//	r600_shader_disassemble(bytecode, idx);
-	return 0;
-}
diff --git a/r600_winsys.h b/r600_winsys.h
index 11242c3..9897b51 100644
--- a/r600_winsys.h
+++ b/r600_winsys.h
@@ -17,76 +17,24 @@
 #ifndef R600_WINSYS_H
 #define R600_WINSYS_H
 
-#include "r600_atom.h"
+#include "r600_atom_kernel.h"
 
 /* opaque structure */
 struct radeon_device *rdev;
 struct radeon_atom *atom;
 
-
-
-struct drm_r600_vs_buffer {
-	u32			handle;
-	u32			resource_id;
-	u32			sq_vtx_constant_word0;
-	u32			sq_vtx_constant_word2;
-	u32			sq_vtx_constant_word3;
-};
-
-struct drm_r600_vs_element {
-	u32			buffer_id;
-	u32			semantic;
-	u32			sq_vtx_word0;
-	u32			sq_vtx_word1;
-	u32			sq_vtx_word2;
-};
-struct drm_r600_vs_input {
-	u32				nelements;
-	u32				nbuffers;
-	struct drm_r600_vs_element	elements[32];
-	struct drm_r600_vs_buffer	buffers[32];
-};
-struct r600_vs_input {
-	struct drm_r600_vs_input	drm;
-	struct radeon_bo		*bo[32];
-	u32				nbo;
-};
-
-struct r600_vs_buffer {
-	struct drm_r600_vs_buffer	drm;
-	struct radeon_bo		*bo;
-};
-
-struct drm_r600_batch {
-	struct radeon_atom		*vs_constants;
-	struct radeon_atom		*ps_constants;
-	struct radeon_atom		*blend;
-	struct radeon_atom		*cb;
-	struct radeon_atom		*cb_cntl;
-	struct radeon_atom		*pa;
-	struct radeon_atom		*vport;
-	struct radeon_atom		*db;
-	struct radeon_atom		*db_cntl;
-	struct radeon_atom		*vs_shader;
-	struct radeon_atom		*ps_shader;
-	struct r600_vs_input		inputs;
-};
-/* vs_shader - vertex shader */
-struct drm_r600_vs_shader {
-	u32			sq_pgm_resources_vs;
-	u8			input_semantic[32];
-	u8			input_gpr[32];
-	u8			ninputs;
-	u8			output_semantic[32];
-	u8			fog_output_id;
-	u8			noutputs;
-	u32			ndwords;
-	u32			opcodes[512];
+struct r600_request {
+	u32			type;
+	void			*data;
+	u32			nbo;
+	struct radeon_bo	*bo[32];
 };
 
-/* atom */
-extern int radeon_batches_queue(struct radeon_device *rdev, void *batch);
-extern int radeon_batches_flush(struct radeon_device *rdev);
+extern int r600_batches_queue(struct radeon_device*, struct r600_request*);
+extern int r600_batches_flush(struct radeon_device*);
+extern struct radeon_atom *r600_atom_create(struct radeon_device*, struct r600_request*);
+extern int r600_atoms_init(struct radeon_device *rdev);
+extern void r600_atoms_release(struct radeon_device *rdev);
 
 /* core functions */
 extern int radeon_device_init(struct radeon_device **rdev, struct radeon_bo_manager *bom, int fd);
diff --git a/r700_atom.c b/r700_atom.c
index 0359bcd..2f55be3 100644
--- a/r700_atom.c
+++ b/r700_atom.c
@@ -15,7 +15,7 @@
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 #include "radeon_device.h"
-#include "r600_atom.h"
+#include "r600_winsys.h"
 #include "r600d.h"
 
 void r700_batches_states_default(struct radeon_device *rdev, struct r600_batches *batches)
diff --git a/radeon_atom.c b/radeon_atom.c
index e862757..fab82ed 100644
--- a/radeon_atom.c
+++ b/radeon_atom.c
@@ -15,7 +15,7 @@
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 #include "radeon_device.h"
-#include "r600_atom.h"
+#include "r600_winsys.h"
 
 void radeon_atom_flush_cleanup(struct list_head *flushes)
 {
@@ -52,16 +52,6 @@ int radeon_atom_emit_default(struct radeon_device *rdev, struct radeon_atom *ato
 	return radeon_ib_copy(ib, atom->pkts, atom->npkts);
 }
 
-int radeon_batches_queue(struct radeon_device *rdev, void *batch)
-{
-	return r600_batches_queue(rdev, batch);
-}
-
-int radeon_batches_flush(struct radeon_device *rdev)
-{
-	return r600_batches_flush(rdev);
-}
-
 struct radeon_bo *radeon_bo_lookup(struct radeon_device *rdev, u32 handle)
 {
 	int i;
diff --git a/radeon_device.c b/radeon_device.c
index 7027e7a..d677c62 100644
--- a/radeon_device.c
+++ b/radeon_device.c
@@ -15,7 +15,7 @@
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 #include "radeon_device.h"
-#include "r600_atom.h"
+#include "r600_winsys.h"
 
 #pragma pack(1)
 struct ib_reloc_gem {
diff --git a/radeon_device.h b/radeon_device.h
index 505576a..b943e0e 100644
--- a/radeon_device.h
+++ b/radeon_device.h
@@ -57,8 +57,12 @@ int kref_put(struct kref *kref, void (*release) (struct kref *kref));
 
 /* UTILITIES END ************************************************************/
 
+#include "r600_atom_kernel.h"
+
 struct radeon_device;
 struct radeon_atom;
+struct drm_r600_vs_input;
+struct drm_r600_vs_shader;
 
 struct radeon_ib {
 	u32	*ptr;
@@ -93,7 +97,6 @@ struct radeon_atom {
 	radeon_atom_emit_t		emit;
 };
 
-
 struct r600_atom_funcs {
 	u32				type;
 	u32				size;
@@ -101,54 +104,17 @@ struct r600_atom_funcs {
 	radeon_atom_emit_t		emit;
 };
 
-/* R600 */
-
-struct drm_r600_vs_buffer {
-	u32			handle;
-	u32			resource_id;
-	u32			sq_vtx_constant_word0;
-	u32			sq_vtx_constant_word2;
-	u32			sq_vtx_constant_word3;
+struct r600_vs_buffer {
+	struct drm_r600_vs_buffer	drm;
+	struct radeon_bo		*bo;
 };
 
-struct drm_r600_vs_element {
-	u32			buffer_id;
-	u32			semantic;
-	u32			sq_vtx_word0;
-	u32			sq_vtx_word1;
-	u32			sq_vtx_word2;
-};
-struct drm_r600_vs_input {
-	u32				nelements;
-	u32				nbuffers;
-	struct drm_r600_vs_element	elements[32];
-	struct drm_r600_vs_buffer	buffers[32];
-};
 struct r600_vs_input {
 	struct drm_r600_vs_input	drm;
 	struct radeon_bo		*bo[32];
 	u32				nbo;
 };
 
-struct r600_vs_buffer {
-	struct drm_r600_vs_buffer	drm;
-	struct radeon_bo		*bo;
-};
-struct drm_r600_batch {
-	struct radeon_atom		*vs_constants;
-	struct radeon_atom		*ps_constants;
-	struct radeon_atom		*blend;
-	struct radeon_atom		*cb;
-	struct radeon_atom		*cb_cntl;
-	struct radeon_atom		*pa;
-	struct radeon_atom		*vport;
-	struct radeon_atom		*db;
-	struct radeon_atom		*db_cntl;
-	struct radeon_atom		*vs_shader;
-	struct radeon_atom		*ps_shader;
-	struct r600_vs_input		inputs;
-};
-#define R600_BATCH_NATOMS	11
 struct r600_batch {
 	struct list_head		list;
 	struct list_head		pre_flushes;
@@ -160,8 +126,8 @@ struct r600_batch {
 	u32				npkts;
 	struct radeon_bo		*shaders;
 	u32				shaders_idx;
-	struct r600_vs_input		inputs;
 	u32				nfs_resources;
+	struct r600_vs_input		inputs;
 };
 
 struct r600_batches {
@@ -172,19 +138,6 @@ struct r600_batches {
 	struct r600_vs_buffer		fs_resource[160];
 	u32				last_id[R600_BATCH_NATOMS];
 };
-/* vs_shader - vertex shader */
-struct drm_r600_vs_shader {
-	u32			sq_pgm_resources_vs;
-	u8			input_semantic[32];
-	u8			input_gpr[32];
-	u8			ninputs;
-	u8			output_semantic[32];
-	u8			fog_output_id;
-	u8			noutputs;
-	u32			ndwords;
-	u32			opcodes[512];
-};
-
 
 struct radeon_device {
 	int				fd;
@@ -196,12 +149,6 @@ struct radeon_device {
 	unsigned			group_bytes;
 	struct radeon_bo_manager	*bom;
 };
-extern int r600_shader_build_fs(struct radeon_device *rdev,
-				u32 *bytecode, u32 *ndwords,
-				struct drm_r600_vs_input *inputs,
-				struct drm_r600_vs_shader *vs);
-extern int r600_batches_queue(struct radeon_device *rdev,
-				struct drm_r600_batch *batch);
 
 extern u32 radeon_ib_reloc(struct radeon_ib *ib, struct radeon_bo *bo, u32 d);
 extern int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
diff --git a/test.c b/test.c
index 27d3a8d..e76efbc 100644
--- a/test.c
+++ b/test.c
@@ -52,7 +52,7 @@ static u32 vsconstants[16] = {
 };
 
 static u32 vsshaders[64] = {
-    0x00000000, 0x89800000, 0x00000005, 0x80000000,
+    0x0000001C, 0x81000400, 0x00000005, 0x80000000,
     0x00000007, 0xA04C0000, 0xC001A03C, 0x94000688,
     0xC0024000, 0x94200688, 0x900000F8, 0x00A80C90,
     0x00000000, 0x00000000, 0x00200001, 0x006C2810,
@@ -67,7 +67,7 @@ static u32 vsshaders[64] = {
     0x00000402, 0x20940C90, 0x00000802, 0x40940C90,
     0x80000C02, 0x60940C90, 0x00000000, 0x00000000,
     0x7C000000, 0x1C351001, 0x00080000, 0x0BEADEAF,
-    0x7C000300, 0x18ED1002, 0x00080000, 0x0BEADEAF,
+    0x7C000100, 0x18ED1002, 0x00080000, 0x0BEADEAF,
 };
 
 static u32 psshaders[20] = {
@@ -92,7 +92,7 @@ static float rvbo1[32] = {
 int r600_tri_flat(struct radeon *radeon)
 {
 	struct radeon_device *rdev;
-	struct r600_state_container cont;
+	struct r600_request rq;
 	struct drm_r600_blend blend;
 	struct drm_r600_cb cb;
 	struct drm_r600_cb_cntl cb_cntl;
@@ -103,11 +103,9 @@ int r600_tri_flat(struct radeon *radeon)
 	struct drm_r600_vs_shader vs_shader;
 	struct drm_r600_ps_shader ps_shader;
 	struct drm_r600_batch batch;
-	struct drm_radeon_atom atom;
 	struct radeon_bo *vbo1;
 	int r;
 
-//	r600_shader_disassemble(vsshaders, 64);
 	vbo1 = radeon_bo_open(radeon->bom, 0, 4096, 0, RADEON_GEM_DOMAIN_GTT, 0);
 	if (vbo1 == NULL) {
 		fprintf(stderr, "Failed to create vbo1 bo\n");
@@ -127,13 +125,11 @@ int r600_tri_flat(struct radeon *radeon)
 	cb.placements[0] = RADEON_GEM_DOMAIN_VRAM;
 	cb.placements[1] = 0;
 	cb.handle = radeon->mode.bo->handle;
-	atom.type = R600_ATOM_CB;
-	atom.id = 0;
-	cont.bo[0] = radeon->mode.bo;
-	cont.nbo = 1;
-	cont.data = &cb;
-	atom.data = &cont;
-	batch.cb = r600_atom_create(rdev, &atom);
+	rq.type = R600_ATOM_CB;
+	rq.bo[0] = radeon->mode.bo;
+	rq.nbo = 1;
+	rq.data = &cb;
+	batch.cb = r600_atom_create(rdev, &rq);
 	/* build pa */
 	pa.pa_sc_mpass_ps_cntl = 0x00000000;
 	pa.pa_sc_mode_cntl = 0x00514000;
@@ -160,12 +156,10 @@ int r600_tri_flat(struct radeon *radeon)
 	pa.pa_su_poly_offset_front_offset = 0x00000000;
 	pa.pa_su_poly_offset_back_scale = 0x00000000;
 	pa.pa_su_poly_offset_back_offset = 0x00000000;
-	atom.type = R600_ATOM_PA;
-	atom.id = 0;
-	cont.data = &pa;
-	atom.data = &cont;
-	batch.pa = r600_atom_create(rdev, &atom);
-	/* cb control */
+	rq.type = R600_ATOM_PA;
+	rq.data = &pa;
+	batch.pa = r600_atom_create(rdev, &rq);
+	/* cb rqrol */
 	cb_cntl.cb_target_mask = 0x0000000f;
 	cb_cntl.cb_shader_mask = 0x0000000f;
 	cb_cntl.cb_clrcmp_control = 0x01000000;
@@ -184,11 +178,9 @@ int r600_tri_flat(struct radeon *radeon)
 	cb_cntl.cb_fog_blue = 0x00000000;
 	cb_cntl.cb_fog_green = 0x00000000;
 	cb_cntl.cb_fog_red = 0x00000000;
-	atom.type = R600_ATOM_CB_CNTL;
-	atom.id = 0;
-	cont.data = &cb_cntl;
-	atom.data = &cont;
-	batch.cb_cntl = r600_atom_create(rdev, &atom);
+	rq.type = R600_ATOM_CB_CNTL;
+	rq.data = &cb_cntl;
+	batch.cb_cntl = r600_atom_create(rdev, &rq);
 	/* viewport */
 	vport.pa_sc_vport_zmin_0 = 0x00000000;
 	vport.pa_sc_vport_zmax_0 = 0x3f800000;
@@ -214,11 +206,9 @@ int r600_tri_flat(struct radeon *radeon)
 	vport.pa_sc_cliprect_3_br = 0x00fa00fa;
 	vport.pa_sc_generic_scissor_tl = 0x80000000;
 	vport.pa_sc_generic_scissor_br = 0x00fa00fa;
-	atom.type = R600_ATOM_VPORT;
-	atom.id = 0;
-	cont.data = &vport;
-	atom.data = &cont;
-	batch.vport = r600_atom_create(rdev, &atom);
+	rq.type = R600_ATOM_VPORT;
+	rq.data = &vport;
+	batch.vport = r600_atom_create(rdev, &rq);
 	/* blend */
 	blend.cb_blend0_control = 0x00010001;
 	blend.cb_blend1_control = 0x00000000;
@@ -229,11 +219,9 @@ int r600_tri_flat(struct radeon *radeon)
 	blend.cb_blend6_control = 0x00000000;
 	blend.cb_blend7_control = 0x00000000;
 	blend.cb_blend_control = 0x00010001;
-	atom.type = R600_ATOM_BLEND;
-	atom.id = 0;
-	cont.data = &blend;
-	atom.data = &cont;
-	batch.blend = r600_atom_create(rdev, &atom);
+	rq.type = R600_ATOM_BLEND;
+	rq.data = &blend;
+	batch.blend = r600_atom_create(rdev, &rq);
 	/* ps constant */
 	batch.ps_constants = NULL;
 	/* vs constant */
@@ -241,14 +229,12 @@ int r600_tri_flat(struct radeon *radeon)
 	vs_constants.offset = 0x400;
 	memcpy(vs_constants.constants, vsconstants, vs_constants.nconstants * 4 * 4);
 	batch.vs_constants = NULL;
-	atom.type = R600_ATOM_CONSTANTS;
-	atom.id = 0;
-	cont.data = &vs_constants;
-	atom.data = &cont;
-	batch.vs_constants = r600_atom_create(rdev, &atom);
+	rq.type = R600_ATOM_CONSTANTS;
+	rq.data = &vs_constants;
+	batch.vs_constants = r600_atom_create(rdev, &rq);
 	/* db */
 	batch.db = NULL;
-	/* db control */
+	/* db rqrol */
 	db_cntl.db_stencil_clear = 0x00000000;
 	db_cntl.db_depth_clear = 0x3F800000;
 	db_cntl.db_stencilrefmask = 0xFFFFFF00;
@@ -260,11 +246,9 @@ int r600_tri_flat(struct radeon *radeon)
 	db_cntl.db_alpha_to_mask = 0x0000AA00;
 	db_cntl.db_sresults_compare_state1 = 0x00000000;
 	db_cntl.db_preload_control = 0x00000000;
-	atom.type = R600_ATOM_DB_CNTL;
-	atom.id = 0;
-	cont.data = &db_cntl;
-	atom.data = &cont;
-	batch.db_cntl = r600_atom_create(rdev, &atom);
+	rq.type = R600_ATOM_DB_CNTL;
+	rq.data = &db_cntl;
+	batch.db_cntl = r600_atom_create(rdev, &rq);
 	/* vs_shader */
 	vs_shader.ninputs = 2;
 	vs_shader.input_semantic[0] = 1;
@@ -276,11 +260,9 @@ int r600_tri_flat(struct radeon *radeon)
 	vs_shader.sq_pgm_resources_vs = 0x00000106;
 	vs_shader.ndwords = 64;
 	memcpy(vs_shader.opcodes, vsshaders, vs_shader.ndwords * 4);
-	atom.type = R600_ATOM_VS_SHADER;
-	atom.id = 0;
-	cont.data = &vs_shader;
-	atom.data = &cont;
-	batch.vs_shader = r600_atom_create(rdev, &atom);
+	rq.type = R600_ATOM_VS_SHADER;
+	rq.data = &vs_shader;
+	batch.vs_shader = r600_atom_create(rdev, &rq);
 	/* ps_shader */
 	ps_shader.spi_ps_input_cntl[0] = 0x00000804;
 	ps_shader.spi_ps_input_cntl[1] = 0x00000000;
@@ -290,41 +272,41 @@ int r600_tri_flat(struct radeon *radeon)
 	ps_shader.sq_pgm_exports_ps = 0x00000002;
 	ps_shader.ndwords = 20;
 	memcpy(ps_shader.opcodes, psshaders, ps_shader.ndwords * 4);
-	atom.type = R600_ATOM_PS_SHADER;
-	atom.id = 0;
-	cont.data = &ps_shader;
-	atom.data = &cont;
-	batch.ps_shader = r600_atom_create(rdev, &atom);
+	rq.type = R600_ATOM_PS_SHADER;
+	rq.data = &ps_shader;
+	batch.ps_shader = r600_atom_create(rdev, &rq);
 
 	/* inputs */
-	batch.inputs.bo[0] = vbo1;
-	batch.inputs.nbo = 1;
-	batch.inputs.drm.nelements = 2;
-	batch.inputs.drm.nbuffers = 2;
-	batch.inputs.drm.buffers[0].handle = vbo1->handle;
-	batch.inputs.drm.buffers[0].sq_vtx_constant_word0 = 0x00000000;
-	batch.inputs.drm.buffers[0].sq_vtx_constant_word2 = 0x03001C00;
-	batch.inputs.drm.buffers[0].sq_vtx_constant_word3 = 0x00000001;
-	batch.inputs.drm.buffers[1].handle = vbo1->handle;
-	batch.inputs.drm.buffers[1].sq_vtx_constant_word0 = 0x0000000C;
-	batch.inputs.drm.buffers[1].sq_vtx_constant_word2 = 0x02301C00;
-	batch.inputs.drm.buffers[1].sq_vtx_constant_word3 = 0x00000001;
-	batch.inputs.drm.elements[0].buffer_id = 0;
-	batch.inputs.drm.elements[0].semantic = 1;
-	batch.inputs.drm.elements[0].sq_vtx_word0 = 0x7C000000;
-	batch.inputs.drm.elements[0].sq_vtx_word1 = 0x1C351000;
-	batch.inputs.drm.elements[0].sq_vtx_word2 = 0x00080000;
-	batch.inputs.drm.elements[1].buffer_id = 1;
-	batch.inputs.drm.elements[1].semantic = 2;
-	batch.inputs.drm.elements[1].sq_vtx_word0 = 0x7C000000;
-	batch.inputs.drm.elements[1].sq_vtx_word1 = 0x18ED1000;
-	batch.inputs.drm.elements[1].sq_vtx_word2 = 0x00080000;
+	batch.inputs.nelements = 2;
+	batch.inputs.nbuffers = 2;
+	batch.inputs.buffers[0].handle = vbo1->handle;
+	batch.inputs.buffers[0].sq_vtx_constant_word0 = 0x00000000;
+	batch.inputs.buffers[0].sq_vtx_constant_word2 = 0x03001C00;
+	batch.inputs.buffers[0].sq_vtx_constant_word3 = 0x00000001;
+	batch.inputs.buffers[1].handle = vbo1->handle;
+	batch.inputs.buffers[1].sq_vtx_constant_word0 = 0x0000000C;
+	batch.inputs.buffers[1].sq_vtx_constant_word2 = 0x02301C00;
+	batch.inputs.buffers[1].sq_vtx_constant_word3 = 0x00000001;
+	batch.inputs.elements[0].buffer_id = 0;
+	batch.inputs.elements[0].semantic = 1;
+	batch.inputs.elements[0].sq_vtx_word0 = 0x7C000000;
+	batch.inputs.elements[0].sq_vtx_word1 = 0x1C351000;
+	batch.inputs.elements[0].sq_vtx_word2 = 0x00080000;
+	batch.inputs.elements[1].buffer_id = 1;
+	batch.inputs.elements[1].semantic = 2;
+	batch.inputs.elements[1].sq_vtx_word0 = 0x7C000000;
+	batch.inputs.elements[1].sq_vtx_word1 = 0x18ED1000;
+	batch.inputs.elements[1].sq_vtx_word2 = 0x00080000;
+	rq.type = 0;
+	rq.data = &batch;
+	rq.bo[0] = vbo1;
+	rq.nbo = 1;
 
 	/* batch */
-	r =  radeon_batches_queue(rdev, &batch);
+	r =  r600_batches_queue(rdev, &rq);
 	if (r)
 		return r;
-	r = radeon_batches_flush(rdev);
+	r = r600_batches_flush(rdev);
 	radeon_device_release(rdev);
 	radeon_bo_unref(vbo1);
 	return r;