summaryrefslogtreecommitdiff
path: root/i965
diff options
context:
space:
mode:
authorLuc Verhaegen <libv@skynet.be>2010-03-14 05:01:55 +0100
committerLuc Verhaegen <libv@skynet.be>2010-03-14 05:01:55 +0100
commitda7796a7d40ca2cd4a03e99ddf38e3a7256a401f (patch)
treef692761e1ce757ce13112c602efc8228d422b78d /i965
parentcdb0aeec165a846b647e6f4e722c95c09e185ce1 (diff)
Import i915 and i965 dri drivers from mesa 7.5.0.7.5.0
Diffstat (limited to 'i965')
-rw-r--r--i965/Makefile.am4
-rw-r--r--i965/brw_cc.c2
-rw-r--r--i965/brw_context.c9
-rw-r--r--i965/brw_context.h105
-rw-r--r--i965/brw_curbe.c136
-rw-r--r--i965/brw_defines.h28
-rw-r--r--i965/brw_draw.c20
-rw-r--r--i965/brw_draw_upload.c90
-rw-r--r--i965/brw_eu.h32
-rw-r--r--i965/brw_eu_debug.c5
-rw-r--r--i965/brw_eu_emit.c252
-rw-r--r--i965/brw_fallback.c2
-rw-r--r--i965/brw_misc_state.c9
-rw-r--r--i965/brw_program.c38
-rw-r--r--i965/brw_sf_state.c58
-rw-r--r--i965/brw_state.h1
-rw-r--r--i965/brw_state_batch.c2
-rw-r--r--i965/brw_state_dump.c28
-rw-r--r--i965/brw_structs.h6
-rw-r--r--i965/brw_tex.c9
-rw-r--r--i965/brw_vs.h5
-rw-r--r--i965/brw_vs_constval.c50
-rw-r--r--i965/brw_vs_emit.c335
-rw-r--r--i965/brw_vs_state.c8
-rw-r--r--i965/brw_vtbl.c106
-rw-r--r--i965/brw_wm.c139
-rw-r--r--i965/brw_wm.h23
-rw-r--r--i965/brw_wm_emit.c34
-rw-r--r--i965/brw_wm_fp.c136
-rw-r--r--i965/brw_wm_glsl.c924
-rw-r--r--i965/brw_wm_pass0.c55
-rw-r--r--i965/brw_wm_pass1.c29
-rw-r--r--i965/brw_wm_pass2.c49
-rw-r--r--i965/brw_wm_sampler_state.c13
-rw-r--r--i965/brw_wm_state.c37
-rw-r--r--i965/brw_wm_surface_state.c445
-rw-r--r--i965/intel_state.c233
37 files changed, 2333 insertions, 1124 deletions
diff --git a/i965/Makefile.am b/i965/Makefile.am
index 69abe6c..5a118af 100644
--- a/i965/Makefile.am
+++ b/i965/Makefile.am
@@ -15,7 +15,7 @@ i965_dri_la_SOURCES = \
../shared/intel_clear.c \
../shared/intel_context.c \
../shared/intel_decode.c \
- ../shared/intel_depthstencil.c \
+ ../shared/intel_extensions.c \
../shared/intel_fbo.c \
../shared/intel_mipmap_tree.c \
../shared/intel_regions.c \
@@ -25,7 +25,7 @@ i965_dri_la_SOURCES = \
../shared/intel_pixel_bitmap.c \
../shared/intel_pixel_copy.c \
../shared/intel_pixel_draw.c \
- intel_state.c \
+ ../shared/intel_state.c \
../shared/intel_swapbuffers.c \
../shared/intel_tex.c \
../shared/intel_tex_copy.c \
diff --git a/i965/brw_cc.c b/i965/brw_cc.c
index 8237016..c724218 100644
--- a/i965/brw_cc.c
+++ b/i965/brw_cc.c
@@ -88,7 +88,7 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
memset(key, 0, sizeof(*key));
- key->stencil = ctx->Stencil.Enabled;
+ key->stencil = ctx->Stencil._Enabled;
key->stencil_two_side = ctx->Stencil._TestTwoSide;
if (key->stencil) {
diff --git a/i965/brw_context.c b/i965/brw_context.c
index 4357100..4dbe551 100644
--- a/i965/brw_context.c
+++ b/i965/brw_context.c
@@ -111,14 +111,15 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
ctx->Const.MaxTextureImageUnits);
ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
- /* Advertise the full hardware capabilities. The new memory
- * manager should cope much better with overload situations:
+ /* Mesa limits textures to 4kx4k; it would be nice to fix that someday
*/
- ctx->Const.MaxTextureLevels = 12;
+ ctx->Const.MaxTextureLevels = 13;
ctx->Const.Max3DTextureLevels = 9;
ctx->Const.MaxCubeTextureLevels = 12;
- ctx->Const.MaxTextureRectSize = (1<<11);
+ ctx->Const.MaxTextureRectSize = (1<<12);
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
/* if conformance mode is set, swrast can handle any size AA point */
ctx->Const.MaxPointSizeAA = 255.0;
diff --git a/i965/brw_context.h b/i965/brw_context.h
index df90c20..577497b 100644
--- a/i965/brw_context.h
+++ b/i965/brw_context.h
@@ -46,7 +46,7 @@
*
* CURBE - constant URB entry. An urb region (entry) used to hold
* constant values which the fixed function units can be instructed to
- * preload into the GRF when spawining a thread.
+ * preload into the GRF when spawning a thread.
*
* VUE - vertex URB entry. An urb entry holding a vertex and usually
* a vertex header. The header contains control information and
@@ -63,7 +63,7 @@
* special and may be overwritten.
*
* MRF - message register file. Threads communicate (and terminate)
- * by sending messages. Message parameters are placed in contigous
+ * by sending messages. Message parameters are placed in contiguous
* MRF registers. All program output is via these messages. URB
* entries are populated by sending a message to the shared URB
* function containing the new data, together with a control word,
@@ -141,7 +141,8 @@ struct brw_context;
#define BRW_NEW_BATCH 0x10000
/** brw->depth_region updated */
#define BRW_NEW_DEPTH_BUFFER 0x20000
-#define BRW_NEW_NR_SURFACES 0x40000
+#define BRW_NEW_NR_WM_SURFACES 0x40000
+#define BRW_NEW_NR_VS_SURFACES 0x80000
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@@ -154,19 +155,25 @@ struct brw_state_flags {
GLuint cache;
};
+
+/** Subclass of Mesa vertex program */
struct brw_vertex_program {
struct gl_vertex_program program;
GLuint id;
+ dri_bo *const_buffer; /** Program constant buffer/surface */
+ GLboolean use_const_buffer;
};
-
+/** Subclass of Mesa fragment program */
struct brw_fragment_program {
struct gl_fragment_program program;
- GLuint id;
-};
-
+ GLuint id; /**< serial no. to identify frag progs, never re-used */
+ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
+ dri_bo *const_buffer; /** Program constant buffer/surface */
+ GLboolean use_const_buffer;
+};
/* Data about a particular attempt to compile a program. Note that
@@ -182,7 +189,7 @@ struct brw_wm_prog_data {
GLuint total_grf;
GLuint total_scratch;
- GLuint nr_params;
+ GLuint nr_params; /**< number of float params/constants */
GLboolean error;
/* Pointer to tracked values (only valid once
@@ -221,6 +228,7 @@ struct brw_vs_prog_data {
GLuint urb_read_length;
GLuint total_grf;
GLuint outputs_written;
+ GLuint nr_params; /**< number of float params/constants */
GLuint inputs_read;
@@ -237,8 +245,35 @@ struct brw_vs_ouput_sizes {
};
+/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 16
-#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS
+
+/**
+ * Size of our surface binding table for the WM.
+ * This contains pointers to the drawing surfaces and current texture
+ * objects and shader constant buffers (+2).
+ */
+#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+
+/**
+ * Helpers to convert drawing buffers, textures and constant buffers
+ * to surface binding table indexes, for WM.
+ */
+#define SURF_INDEX_DRAW(d) (d)
+#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS)
+#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t))
+
+/**
+ * Size of surface binding table for the VS.
+ * Only one constant buffer for now.
+ */
+#define BRW_VS_MAX_SURF 1
+
+/**
+ * Only a VS constant buffer
+ */
+#define SURF_INDEX_VERT_CONST_BUFFER 0
+
enum brw_cache_id {
BRW_CC_VP,
@@ -418,8 +453,8 @@ struct brw_context
struct brw_tracked_state **atoms;
GLuint nr_atoms;
- GLuint nr_draw_regions;
- struct intel_region *draw_regions[MAX_DRAW_BUFFERS];
+ GLuint nr_color_regions;
+ struct intel_region *color_regions[MAX_DRAW_BUFFERS];
struct intel_region *depth_region;
/**
@@ -512,8 +547,8 @@ struct brw_context
/* BRW_NEW_CURBE_OFFSETS:
*/
struct {
- GLuint wm_start;
- GLuint wm_size;
+ GLuint wm_start; /**< pos of first wm const in CURBE buffer */
+ GLuint wm_size; /**< number of float[4] consts, multiple of 16 */
GLuint clip_start;
GLuint clip_size;
GLuint vs_start;
@@ -545,6 +580,11 @@ struct brw_context
dri_bo *prog_bo;
dri_bo *state_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ dri_bo *bind_bo;
+ dri_bo *surf_bo[BRW_VS_MAX_SURF];
+ GLuint nr_surfaces;
} vs;
struct {
@@ -576,9 +616,10 @@ struct brw_context
struct brw_wm_prog_data *prog_data;
struct brw_wm_compile *compile_data;
- /* Input sizes, calculated from active vertex program:
+ /** Input sizes, calculated from active vertex program.
+ * One bit per fragment program input attribute.
*/
- GLuint input_size_masks[4];
+ GLbitfield input_size_masks[4];
/** Array of surface default colors (texture border color) */
dri_bo *sdc_bo[BRW_MAX_TEX_UNIT];
@@ -587,7 +628,7 @@ struct brw_context
GLuint nr_surfaces;
GLuint max_threads;
- dri_bo *scratch_buffer;
+ dri_bo *scratch_bo;
GLuint sampler_count;
dri_bo *sampler_bo;
@@ -627,8 +668,6 @@ struct brw_context
* brw_vtbl.c
*/
void brwInitVtbl( struct brw_context *brw );
-void brw_do_flush( struct brw_context *brw,
- GLuint flags );
/*======================================================================
* brw_context.c
@@ -670,7 +709,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions );
*/
void brw_upload_urb_fence(struct brw_context *brw);
-void brw_upload_constant_buffer_state(struct brw_context *brw);
+/* brw_curbe.c
+ */
+void brw_upload_cs_urb_state(struct brw_context *brw);
/*======================================================================
@@ -683,6 +724,32 @@ brw_context( GLcontext *ctx )
return (struct brw_context *)ctx;
}
+static INLINE struct brw_vertex_program *
+brw_vertex_program(struct gl_vertex_program *p)
+{
+ return (struct brw_vertex_program *) p;
+}
+
+static INLINE const struct brw_vertex_program *
+brw_vertex_program_const(const struct gl_vertex_program *p)
+{
+ return (const struct brw_vertex_program *) p;
+}
+
+static INLINE struct brw_fragment_program *
+brw_fragment_program(struct gl_fragment_program *p)
+{
+ return (struct brw_fragment_program *) p;
+}
+
+static INLINE const struct brw_fragment_program *
+brw_fragment_program_const(const struct gl_fragment_program *p)
+{
+ return (const struct brw_fragment_program *) p;
+}
+
+
+
#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
#endif
diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c
index 4eaaa5f..9197fed 100644
--- a/i965/brw_curbe.c
+++ b/i965/brw_curbe.c
@@ -38,23 +38,28 @@
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
#include "intel_batchbuffer.h"
+#include "intel_regions.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
#include "brw_util.h"
-/* Partition the CURBE between the various users of constant values:
+/**
+ * Partition the CURBE between the various users of constant values:
+ * Note that vertex and fragment shaders can now fetch constants out
+ * of constant buffers. We no longer allocatea block of the GRF for
+ * constants. That greatly reduces the demand for space in the CURBE.
+ * Some of the comments within are dated...
*/
static void calculate_curbe_offsets( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
/* CACHE_NEW_WM_PROG */
- GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+ const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
/* BRW_NEW_VERTEX_PROGRAM */
- struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
- GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16;
+ const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16;
GLuint nr_clip_regs = 0;
GLuint total_regs;
@@ -138,24 +143,24 @@ const struct brw_tracked_state brw_curbe_offsets = {
* fixed-function hardware in a double-buffering scheme to avoid a
* pipeline stall each time the contents of the curbe is changed.
*/
-void brw_upload_constant_buffer_state(struct brw_context *brw)
+void brw_upload_cs_urb_state(struct brw_context *brw)
{
- struct brw_constant_buffer_state cbs;
- memset(&cbs, 0, sizeof(cbs));
+ struct brw_cs_urb_state cs_urb;
+ memset(&cs_urb, 0, sizeof(cs_urb));
/* It appears that this is the state packet for the CS unit, ie. the
* urb entries detailed here are housed in the CS range from the
* URB_FENCE command.
*/
- cbs.header.opcode = CMD_CONST_BUFFER_STATE;
- cbs.header.length = sizeof(cbs)/4 - 2;
+ cs_urb.header.opcode = CMD_CS_URB_STATE;
+ cs_urb.header.length = sizeof(cs_urb)/4 - 2;
/* BRW_NEW_URB_FENCE */
- cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
- cbs.bits0.urb_entry_size = brw->urb.csize - 1;
+ cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+ cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
assert(brw->urb.nr_cs_entries);
- BRW_CACHED_BATCH_STRUCT(brw, &cbs);
+ BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
}
static GLfloat fixed_plane[6][4] = {
@@ -174,10 +179,12 @@ static GLfloat fixed_plane[6][4] = {
static void prepare_constant_buffer(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
- struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
- struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
- GLuint sz = brw->curbe.total_size;
- GLuint bufsz = sz * 16 * sizeof(GLfloat);
+ const struct brw_vertex_program *vp =
+ brw_vertex_program_const(brw->vertex_program);
+ const struct brw_fragment_program *fp =
+ brw_fragment_program_const(brw->fragment_program);
+ const GLuint sz = brw->curbe.total_size;
+ const GLuint bufsz = sz * 16 * sizeof(GLfloat);
GLfloat *buf;
GLuint i;
@@ -189,27 +196,25 @@ static void prepare_constant_buffer(struct brw_context *brw)
brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags;
if (sz == 0) {
-
if (brw->curbe.last_buf) {
free(brw->curbe.last_buf);
brw->curbe.last_buf = NULL;
brw->curbe.last_bufsz = 0;
}
-
return;
}
- buf = (GLfloat *)malloc(bufsz);
-
- memset(buf, 0, bufsz);
+ buf = (GLfloat *) _mesa_calloc(bufsz);
+ /* fragment shader constants */
if (brw->curbe.wm_size) {
GLuint offset = brw->curbe.wm_start * 16;
_mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+ /* copy float constants */
for (i = 0; i < brw->wm.prog_data->nr_params; i++)
- buf[offset + i] = brw->wm.prog_data->param[i][0];
+ buf[offset + i] = *brw->wm.prog_data->param[i];
}
@@ -244,18 +249,20 @@ static void prepare_constant_buffer(struct brw_context *brw)
}
}
-
+ /* vertex shader constants */
if (brw->curbe.vs_size) {
GLuint offset = brw->curbe.vs_start * 16;
- GLuint nr = vp->program.Base.Parameters->NumParameters;
+ GLuint nr = brw->vs.prog_data->nr_params / 4;
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+ /* XXX just use a memcpy here */
for (i = 0; i < nr; i++) {
- buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0];
- buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1];
- buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2];
- buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3];
+ const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i];
+ buf[offset + i * 4 + 0] = value[0];
+ buf[offset + i * 4 + 1] = value[1];
+ buf[offset + i * 4 + 2] = value[2];
+ buf[offset + i * 4 + 3] = value[3];
}
}
@@ -274,11 +281,14 @@ static void prepare_constant_buffer(struct brw_context *brw)
brw->curbe.last_buf &&
bufsz == brw->curbe.last_bufsz &&
memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
- free(buf);
+ /* constants have not changed */
+ _mesa_free(buf);
}
else {
+ /* constants have changed */
if (brw->curbe.last_buf)
- free(brw->curbe.last_buf);
+ _mesa_free(brw->curbe.last_buf);
+
brw->curbe.last_buf = buf;
brw->curbe.last_bufsz = bufsz;
@@ -326,11 +336,77 @@ static void prepare_constant_buffer(struct brw_context *brw)
}
+/**
+ * Copy Mesa program parameters into given constant buffer.
+ */
+static void
+update_constant_buffer(struct brw_context *brw,
+ const struct gl_program_parameter_list *params,
+ dri_bo *const_buffer)
+{
+ struct intel_context *intel = &brw->intel;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+
+ /* copy Mesa program constants into the buffer */
+ if (const_buffer && size > 0) {
+
+ assert(const_buffer);
+ assert(const_buffer->size >= size);
+
+ if (intel->intelScreen->kernel_exec_fencing) {
+ drm_intel_gem_bo_map_gtt(const_buffer);
+ memcpy(const_buffer->virtual, params->ParameterValues, size);
+ drm_intel_gem_bo_unmap_gtt(const_buffer);
+ }
+ else {
+ dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+ }
+
+ if (0) {
+ int i;
+ for (i = 0; i < params->NumParameters; i++) {
+ float *p = params->ParameterValues[i];
+ printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]);
+ }
+ }
+ }
+}
+
+
+/** Copy current vertex program's parameters into the constant buffer */
+static void
+update_vertex_constant_buffer(struct brw_context *brw)
+{
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ if (0) {
+ printf("update VS constants in buffer %p\n", vp->const_buffer);
+ printf("program %u\n", vp->program.Base.Id);
+ }
+ if (vp->use_const_buffer)
+ update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer);
+}
+
+
+/** Copy current fragment program's parameters into the constant buffer */
+static void
+update_fragment_constant_buffer(struct brw_context *brw)
+{
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ if (fp->use_const_buffer)
+ update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer);
+}
+
+
static void emit_constant_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
GLuint sz = brw->curbe.total_size;
+ update_vertex_constant_buffer(brw);
+ update_fragment_constant_buffer(brw);
+
BEGIN_BATCH(2, IGNORE_CLIPRECTS);
if (sz == 0) {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
diff --git a/i965/brw_defines.h b/i965/brw_defines.h
index 39c3225..98fc909 100644
--- a/i965/brw_defines.h
+++ b/i965/brw_defines.h
@@ -225,6 +225,24 @@
#define BRW_RASTRULE_UPPER_LEFT 0
#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "IntelĀ® 965 Express Chipset Family and IntelĀ® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at
+ * http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT 2
+#define BRW_RASTRULE_LOWER_RIGHT 3
#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
@@ -349,9 +367,10 @@
#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
-#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
-#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
-#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
@@ -368,6 +387,7 @@
#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
#define BRW_SURFACEFORMAT_R1_UINT 0x181
#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
@@ -734,7 +754,7 @@
#define CMD_URB_FENCE 0x6000
-#define CMD_CONST_BUFFER_STATE 0x6001
+#define CMD_CS_URB_STATE 0x6001
#define CMD_CONST_BUFFER 0x6002
#define CMD_STATE_BASE_ADDRESS 0x6101
diff --git a/i965/brw_draw.c b/i965/brw_draw.c
index 0b64999..5342622 100644
--- a/i965/brw_draw.c
+++ b/i965/brw_draw.c
@@ -127,6 +127,7 @@ static void brw_emit_prim(struct brw_context *brw,
uint32_t hw_prim)
{
struct brw_3d_primitive prim_packet;
+ struct intel_context *intel = &brw->intel;
if (INTEL_DEBUG & DEBUG_PRIMS)
_mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
@@ -146,10 +147,27 @@ static void brw_emit_prim(struct brw_context *brw,
/* Can't wrap here, since we rely on the validated state. */
brw->no_batch_wrap = GL_TRUE;
+
+ /* If we're set to always flush, do it before and after the primitive emit.
+ * We want to catch both missed flushes that hurt instruction/state cache
+ * and missed flushes of the render cache as it heads to other parts of
+ * the besides the draw code.
+ */
+ if (intel->always_flush_cache) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH(intel->vtbl.flush_cmd());
+ ADVANCE_BATCH();
+ }
if (prim_packet.verts_per_instance) {
intel_batchbuffer_data( brw->intel.batch, &prim_packet,
sizeof(prim_packet), LOOP_CLIPRECTS);
}
+ if (intel->always_flush_cache) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH(intel->vtbl.flush_cmd());
+ ADVANCE_BATCH();
+ }
+
brw->no_batch_wrap = GL_FALSE;
}
@@ -393,6 +411,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
retval = GL_TRUE;
}
+ if (intel->always_flush_batch)
+ intel_batchbuffer_flush(intel->batch);
out:
UNLOCK_HARDWARE(intel);
diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c
index 73d6dea..b91b20b 100644
--- a/i965/brw_draw_upload.c
+++ b/i965/brw_draw_upload.c
@@ -156,7 +156,13 @@ static GLuint byte_types_scale[5] = {
};
-static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized )
+/**
+ * Given vertex array type/size/format/normalized info, return
+ * the appopriate hardware surface type.
+ * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
+ */
+static GLuint get_surface_type( GLenum type, GLuint size,
+ GLenum format, GLboolean normalized )
{
if (INTEL_DEBUG & DEBUG_VERTS)
_mesa_printf("type %s size %d normalized %d\n",
@@ -171,11 +177,20 @@ static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized )
case GL_BYTE: return byte_types_norm[size];
case GL_UNSIGNED_INT: return uint_types_norm[size];
case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
- case GL_UNSIGNED_BYTE: return ubyte_types_norm[size];
+ case GL_UNSIGNED_BYTE:
+ if (format == GL_BGRA) {
+ /* See GL_EXT_vertex_array_bgra */
+ assert(size == 4);
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ }
+ else {
+ return ubyte_types_norm[size];
+ }
default: assert(0); return 0;
}
}
else {
+ assert(format == GL_RGBA); /* sanity check */
switch (type) {
case GL_DOUBLE: return double_types[size];
case GL_FLOAT: return float_types[size];
@@ -262,6 +277,7 @@ copy_array_to_vbo_array( struct brw_context *brw,
struct brw_vertex_element *element,
GLuint dst_stride)
{
+ struct intel_context *intel = &brw->intel;
GLuint size = element->count * dst_stride;
get_space(brw, size, &element->bo, &element->offset);
@@ -274,29 +290,52 @@ copy_array_to_vbo_array( struct brw_context *brw,
}
if (dst_stride == element->glarray->StrideB) {
- dri_bo_subdata(element->bo,
- element->offset,
- size,
- element->glarray->Ptr);
+ if (intel->intelScreen->kernel_exec_fencing) {
+ drm_intel_gem_bo_map_gtt(element->bo);
+ memcpy((char *)element->bo->virtual + element->offset,
+ element->glarray->Ptr, size);
+ drm_intel_gem_bo_unmap_gtt(element->bo);
+ } else {
+ dri_bo_subdata(element->bo,
+ element->offset,
+ size,
+ element->glarray->Ptr);
+ }
} else {
- void *data;
char *dest;
- const char *src = element->glarray->Ptr;
+ const unsigned char *src = element->glarray->Ptr;
int i;
- data = _mesa_malloc(dst_stride * element->count);
- dest = data;
- for (i = 0; i < element->count; i++) {
- memcpy(dest, src, dst_stride);
- src += element->glarray->StrideB;
- dest += dst_stride;
- }
+ if (intel->intelScreen->kernel_exec_fencing) {
+ drm_intel_gem_bo_map_gtt(element->bo);
+ dest = element->bo->virtual;
+ dest += element->offset;
- dri_bo_subdata(element->bo,
- element->offset,
- size,
- data);
- _mesa_free(data);
+ for (i = 0; i < element->count; i++) {
+ memcpy(dest, src, dst_stride);
+ src += element->glarray->StrideB;
+ dest += dst_stride;
+ }
+
+ drm_intel_gem_bo_unmap_gtt(element->bo);
+ } else {
+ void *data;
+
+ data = _mesa_malloc(dst_stride * element->count);
+ dest = data;
+ for (i = 0; i < element->count; i++) {
+ memcpy(dest, src, dst_stride);
+ src += element->glarray->StrideB;
+ dest += dst_stride;
+ }
+
+ dri_bo_subdata(element->bo,
+ element->offset,
+ size,
+ data);
+
+ _mesa_free(data);
+ }
}
}
@@ -484,6 +523,7 @@ static void brw_emit_vertices(struct brw_context *brw)
struct brw_vertex_element *input = enabled[i];
uint32_t format = get_surface_type(input->glarray->Type,
input->glarray->Size,
+ input->glarray->Format,
input->glarray->Normalized);
uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
@@ -547,9 +587,15 @@ static void brw_prepare_indices(struct brw_context *brw)
/* Straight upload
*/
- dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+ if (intel->intelScreen->kernel_exec_fencing) {
+ drm_intel_gem_bo_map_gtt(bo);
+ memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
+ drm_intel_gem_bo_unmap_gtt(bo);
+ } else {
+ dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+ }
} else {
- offset = (GLuint)index_buffer->ptr;
+ offset = (GLuint) (unsigned long) index_buffer->ptr;
/* If the index buffer isn't aligned to its element size, we have to
* rebase it into a temporary.
diff --git a/i965/brw_eu.h b/i965/brw_eu.h
index b36a197..003332f 100644
--- a/i965/brw_eu.h
+++ b/i965/brw_eu.h
@@ -97,7 +97,7 @@ struct brw_glsl_call;
#define BRW_EU_MAX_INSN_STACK 5
-#define BRW_EU_MAX_INSN 1200
+#define BRW_EU_MAX_INSN 4000
struct brw_compile {
struct brw_instruction store[BRW_EU_MAX_INSN];
@@ -170,6 +170,13 @@ static INLINE struct brw_reg brw_reg( GLuint file,
GLuint writemask )
{
struct brw_reg reg;
+ if (type == BRW_GENERAL_REGISTER_FILE)
+ assert(nr < 128);
+ else if (type == BRW_MESSAGE_REGISTER_FILE)
+ assert(nr < 9);
+ else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(nr <= BRW_ARF_IP);
+
reg.type = type;
reg.file = file;
reg.nr = nr;
@@ -723,6 +730,13 @@ static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset
return ptr;
}
+/** Do two brw_regs refer to the same register? */
+static INLINE GLboolean
+brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+ return r1.file == r2.file && r1.nr == r2.nr;
+}
+
static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
{
return &p->store[p->nr_insn];
@@ -841,12 +855,24 @@ void brw_math( struct brw_compile *p,
void brw_dp_READ_16( struct brw_compile *p,
struct brw_reg dest,
- GLuint msg_reg_nr,
GLuint scratch_offset );
+void brw_dp_READ_4( struct brw_compile *p,
+ struct brw_reg dest,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index );
+
+void brw_dp_READ_4_vs( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index );
+
void brw_dp_WRITE_16( struct brw_compile *p,
struct brw_reg src,
- GLuint msg_reg_nr,
GLuint scratch_offset );
/* If/else/endif. Works by manipulating the execution flags on each
diff --git a/i965/brw_eu_debug.c b/i965/brw_eu_debug.c
index 91dbbd5..29f3f6d 100644
--- a/i965/brw_eu_debug.c
+++ b/i965/brw_eu_debug.c
@@ -65,6 +65,7 @@ void brw_print_reg( struct brw_reg hwreg )
hwreg.width == BRW_WIDTH_8 &&
hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* vector register */
_mesa_printf("vec%d", hwreg.nr);
}
else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
@@ -72,8 +73,12 @@ void brw_print_reg( struct brw_reg hwreg )
hwreg.width == BRW_WIDTH_1 &&
hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* "scalar" register */
_mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
}
+ else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
+ _mesa_printf("imm %f", hwreg.dw1.f);
+ }
else {
_mesa_printf("%s%d.%d<%d;%d,%d>:%s",
file[hwreg.file],
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
index 4e099b5..2a147fb 100644
--- a/i965/brw_eu_emit.c
+++ b/i965/brw_eu_emit.c
@@ -55,6 +55,9 @@ static void guess_execution_size( struct brw_instruction *insn,
static void brw_set_dest( struct brw_instruction *insn,
struct brw_reg dest )
{
+ if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(dest.nr < 128);
+
insn->bits1.da1.dest_reg_file = dest.file;
insn->bits1.da1.dest_reg_type = dest.type;
insn->bits1.da1.dest_address_mode = dest.address_mode;
@@ -96,10 +99,13 @@ static void brw_set_dest( struct brw_instruction *insn,
}
static void brw_set_src0( struct brw_instruction *insn,
- struct brw_reg reg )
+ struct brw_reg reg )
{
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+ if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(reg.nr < 128);
+
insn->bits1.da1.src0_reg_file = reg.file;
insn->bits1.da1.src0_reg_type = reg.type;
insn->bits2.da1.src0_abs = reg.abs;
@@ -169,10 +175,12 @@ static void brw_set_src0( struct brw_instruction *insn,
void brw_set_src1( struct brw_instruction *insn,
- struct brw_reg reg )
+ struct brw_reg reg )
{
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+ assert(reg.nr < 128);
+
insn->bits1.da1.src1_reg_file = reg.file;
insn->bits1.da1.src1_reg_type = reg.type;
insn->bits3.da1.src1_abs = reg.abs;
@@ -312,24 +320,25 @@ static void brw_set_dp_read_message( struct brw_instruction *insn,
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.dp_read.binding_table_index = binding_table_index;
- insn->bits3.dp_read.msg_control = msg_control;
- insn->bits3.dp_read.msg_type = msg_type;
- insn->bits3.dp_read.target_cache = target_cache;
- insn->bits3.dp_read.response_length = response_length;
- insn->bits3.dp_read.msg_length = msg_length;
- insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
- insn->bits3.dp_read.end_of_thread = end_of_thread;
+ insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
+ insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
+ insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
+ insn->bits3.dp_read.response_length = response_length; /*16:19*/
+ insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
+ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+ insn->bits3.dp_read.pad1 = 0; /*28:30*/
+ insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
}
static void brw_set_sampler_message(struct brw_context *brw,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint sampler,
- GLuint msg_type,
- GLuint response_length,
- GLuint msg_length,
- GLboolean eot)
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot)
{
brw_set_src1(insn, brw_imm_d(0));
@@ -407,7 +416,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p,
* Convenience routines.
*/
#define ALU1(OP) \
-struct brw_instruction *brw_##OP(struct brw_compile *p, \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0) \
{ \
@@ -415,7 +424,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \
}
#define ALU2(OP) \
-struct brw_instruction *brw_##OP(struct brw_compile *p, \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0, \
struct brw_reg src1) \
@@ -469,9 +478,9 @@ void brw_NOP(struct brw_compile *p)
*/
struct brw_instruction *brw_JMPI(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg src0,
- struct brw_reg src1)
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
@@ -674,7 +683,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
struct brw_instruction *brw_WHILE(struct brw_compile *p,
- struct brw_instruction *do_insn)
+ struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
@@ -762,7 +771,7 @@ void brw_CMP(struct brw_compile *p,
* Helpers for the various SEND message types:
*/
-/* Invert 8 values
+/** Extended math function, float[8].
*/
void brw_math( struct brw_compile *p,
struct brw_reg dest,
@@ -794,7 +803,9 @@ void brw_math( struct brw_compile *p,
data_type);
}
-/* Use 2 send instructions to invert 16 elements
+/**
+ * Extended math function, float[16].
+ * Use 2 send instructions.
*/
void brw_math_16( struct brw_compile *p,
struct brw_reg dest,
@@ -847,22 +858,26 @@ void brw_math_16( struct brw_compile *p,
}
-
-
+/**
+ * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
void brw_dp_WRITE_16( struct brw_compile *p,
struct brw_reg src,
- GLuint msg_reg_nr,
GLuint scratch_offset )
{
+ GLuint msg_reg_nr = 1;
{
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ /* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
brw_imm_d(scratch_offset));
-
+
brw_pop_insn_state(p);
}
@@ -879,7 +894,7 @@ void brw_dp_WRITE_16( struct brw_compile *p,
brw_set_src0(insn, src);
brw_set_dp_write_message(insn,
- 255, /* bti */
+ 255, /* binding table index (255=stateless) */
BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
msg_length,
@@ -887,24 +902,29 @@ void brw_dp_WRITE_16( struct brw_compile *p,
0, /* response_length */
0); /* eot */
}
-
}
+/**
+ * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
void brw_dp_READ_16( struct brw_compile *p,
struct brw_reg dest,
- GLuint msg_reg_nr,
GLuint scratch_offset )
{
+ GLuint msg_reg_nr = 1;
{
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
+ /* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
brw_imm_d(scratch_offset));
-
+
brw_pop_insn_state(p);
}
@@ -919,10 +939,10 @@ void brw_dp_READ_16( struct brw_compile *p,
brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
brw_set_dp_read_message(insn,
- 255, /* bti */
- 3, /* msg_control */
+ 255, /* binding table index (255=stateless) */
+ 3, /* msg_control (3 means 4 Owords) */
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- 1, /* target cache */
+ 1, /* target cache (render/scratch) */
1, /* msg_length */
2, /* response_length */
0); /* eot */
@@ -930,14 +950,143 @@ void brw_dp_READ_16( struct brw_compile *p,
}
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ * If relAddr is true, we'll do an indirect fetch using the address register.
+ */
+void brw_dp_READ_4( struct brw_compile *p,
+ struct brw_reg dest,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index )
+{
+ /* XXX: relAddr not implemented */
+ GLuint msg_reg_nr = 1;
+ {
+ struct brw_reg b;
+ brw_push_insn_state(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ brw_MOV(p, b, brw_imm_ud(location));
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ /* cast dest to a uword[8] vector */
+ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(insn,
+ bind_table_index,
+ 0, /* msg_control (0 means 1 Oword) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
+ */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index)
+{
+ GLuint msg_reg_nr = 1;
+
+ assert(oword < 2);
+ /*
+ printf("vs const read msg, location %u, msg_reg_nr %d\n",
+ location, msg_reg_nr);
+ */
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ {
+ struct brw_reg b;
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /*b = get_element_ud(b, 2);*/
+ if (relAddr) {
+ brw_ADD(p, b, addrReg, brw_imm_ud(location));
+ }
+ else {
+ brw_MOV(p, b, brw_imm_ud(location));
+ }
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ /*insn->header.access_mode = BRW_ALIGN_16;*/
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(insn,
+ bind_table_index,
+ oword, /* 0 = lower Oword, 1 = upper Oword */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+
void brw_fb_WRITE(struct brw_compile *p,
- struct brw_reg dest,
- GLuint msg_reg_nr,
- struct brw_reg src0,
- GLuint binding_table_index,
- GLuint msg_length,
- GLuint response_length,
- GLboolean eot)
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot)
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
@@ -958,7 +1107,11 @@ void brw_fb_WRITE(struct brw_compile *p,
}
-
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed. See volume 4, page 161 of docs.
+ */
void brw_SAMPLE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
@@ -973,8 +1126,8 @@ void brw_SAMPLE(struct brw_compile *p,
{
GLboolean need_stall = 0;
- if(writemask == 0) {
-/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+ if (writemask == 0) {
+ /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
return;
}
@@ -1006,7 +1159,7 @@ void brw_SAMPLE(struct brw_compile *p,
if (newmask != writemask) {
need_stall = 1;
-/* _mesa_printf("need stall %x %x\n", newmask , writemask); */
+ /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
}
else {
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
@@ -1047,14 +1200,13 @@ void brw_SAMPLE(struct brw_compile *p,
eot);
}
- if (need_stall)
- {
+ if (need_stall) {
struct brw_reg reg = vec8(offset(dest, response_length-1));
/* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
*/
brw_push_insn_state(p);
- brw_set_compression_control(p, GL_FALSE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, reg, reg);
brw_pop_insn_state(p);
}
diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c
index 5f4f2d5..2993574 100644
--- a/i965/brw_fallback.c
+++ b/i965/brw_fallback.c
@@ -75,7 +75,7 @@ static GLboolean do_check_fallback(struct brw_context *brw)
/* _NEW_STENCIL
*/
- if (ctx->Stencil.Enabled &&
+ if (ctx->Stencil._Enabled &&
(ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) {
DBG("FALLBACK: stencil\n");
return GL_TRUE;
diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c
index f311663..9bc5c35 100644
--- a/i965/brw_misc_state.c
+++ b/i965/brw_misc_state.c
@@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = {
static void prepare_binding_table_pointers(struct brw_context *brw)
{
+ brw_add_validated_bo(brw, brw->vs.bind_bo);
brw_add_validated_bo(brw, brw->wm.bind_bo);
}
@@ -117,13 +118,11 @@ static void upload_binding_table_pointers(struct brw_context *brw)
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
- OUT_BATCH(0); /* vs */
+ OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
- OUT_RELOC(brw->wm.bind_bo,
- I915_GEM_DOMAIN_SAMPLER, 0,
- 0);
+ OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
ADVANCE_BATCH();
}
@@ -178,7 +177,7 @@ static void upload_psp_urb_cbs(struct brw_context *brw )
{
upload_pipelined_state_pointers(brw);
brw_upload_urb_fence(brw);
- brw_upload_constant_buffer_state(brw);
+ brw_upload_cs_urb_state(brw);
}
const struct brw_tracked_state brw_psp_urb_cbs = {
diff --git a/i965/brw_program.c b/i965/brw_program.c
index 0c86911..bac6918 100644
--- a/i965/brw_program.c
+++ b/i965/brw_program.c
@@ -38,6 +38,7 @@
#include "brw_context.h"
#include "brw_util.h"
+#include "brw_wm.h"
static void brwBindProgram( GLcontext *ctx,
GLenum target,
@@ -94,7 +95,12 @@ static struct gl_program *brwNewProgram( GLcontext *ctx,
static void brwDeleteProgram( GLcontext *ctx,
struct gl_program *prog )
{
-
+ if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+ struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
+ dri_bo_unreference(brw_fprog->const_buffer);
+ }
+
_mesa_delete_program( ctx, prog );
}
@@ -110,30 +116,36 @@ static void brwProgramStringNotify( GLcontext *ctx,
GLenum target,
struct gl_program *prog )
{
+ struct brw_context *brw = brw_context(ctx);
+
if (target == GL_FRAGMENT_PROGRAM_ARB) {
struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
- struct brw_context *brw = brw_context(ctx);
- struct brw_fragment_program *p = (struct brw_fragment_program *)prog;
- struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
+ struct brw_fragment_program *newFP = brw_fragment_program(fprog);
+ const struct brw_fragment_program *curFP =
+ brw_fragment_program_const(brw->fragment_program);
+
if (fprog->FogOption) {
_mesa_append_fog_code(ctx, fprog);
fprog->FogOption = GL_NONE;
}
- if (p == fp)
+ if (newFP == curFP)
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
- p->id = brw->program_id++;
+ newFP->id = brw->program_id++;
+ newFP->isGLSL = brw_wm_is_glsl(fprog);
}
else if (target == GL_VERTEX_PROGRAM_ARB) {
- struct brw_context *brw = brw_context(ctx);
- struct brw_vertex_program *p = (struct brw_vertex_program *)prog;
- struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
- if (p == vp)
+ struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
+ struct brw_vertex_program *newVP = brw_vertex_program(vprog);
+ const struct brw_vertex_program *curVP =
+ brw_vertex_program_const(brw->vertex_program);
+
+ if (newVP == curVP)
brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
- if (p->program.IsPositionInvariant) {
- _mesa_insert_mvp_code(ctx, &p->program);
+ if (newVP->program.IsPositionInvariant) {
+ _mesa_insert_mvp_code(ctx, &newVP->program);
}
- p->id = brw->program_id++;
+ newVP->id = brw->program_id++;
/* Also tell tnl about it:
*/
diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c
index e22d080..c999187 100644
--- a/i965/brw_sf_state.c
+++ b/i965/brw_sf_state.c
@@ -43,10 +43,12 @@ static void upload_sf_vp(struct brw_context *brw)
const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
struct brw_sf_viewport sfv;
GLfloat y_scale, y_bias;
+ const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
memset(&sfv, 0, sizeof(sfv));
- if (intel_rendering_to_texture(ctx)) {
+ if (render_to_fbo) {
y_scale = 1.0;
y_bias = 0;
}
@@ -57,8 +59,6 @@ static void upload_sf_vp(struct brw_context *brw)
/* _NEW_VIEWPORT */
- const GLfloat *v = ctx->Viewport._WindowMap.m;
-
sfv.viewport.m00 = v[MAT_SX];
sfv.viewport.m11 = v[MAT_SY] * y_scale;
sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
@@ -66,7 +66,9 @@ static void upload_sf_vp(struct brw_context *brw)
sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
- /* _NEW_SCISSOR */
+ /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
+ * for DrawBuffer->_[XY]{min,max}
+ */
/* The scissor only needs to handle the intersection of drawable and
* scissor rect. Clipping to the boundaries of static shared buffers
@@ -75,7 +77,7 @@ static void upload_sf_vp(struct brw_context *brw)
* Note that the hardware's coordinates are inclusive, while Mesa's min is
* inclusive but max is exclusive.
*/
- if (intel_rendering_to_texture(ctx)) {
+ if (render_to_fbo) {
/* texmemory: Y=0=bottom */
sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
@@ -97,7 +99,8 @@ static void upload_sf_vp(struct brw_context *brw)
const struct brw_tracked_state brw_sf_vp = {
.dirty = {
.mesa = (_NEW_VIEWPORT |
- _NEW_SCISSOR),
+ _NEW_SCISSOR |
+ _NEW_BUFFERS),
.brw = 0,
.cache = 0
},
@@ -111,10 +114,13 @@ struct brw_sf_unit_key {
unsigned int nr_urb_entries, urb_size, sfsize;
GLenum front_face, cull_face;
- GLboolean scissor, line_smooth, point_sprite, point_attenuated;
+ unsigned scissor:1;
+ unsigned line_smooth:1;
+ unsigned point_sprite:1;
+ unsigned point_attenuated:1;
+ unsigned render_to_fbo:1;
float line_width;
float point_size;
- GLboolean render_to_texture;
};
static void
@@ -144,10 +150,10 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
key->line_smooth = ctx->Line.SmoothFlag;
key->point_sprite = ctx->Point.PointSprite;
- key->point_size = ctx->Point.Size;
+ key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
key->point_attenuated = ctx->Point._Attenuated;
- key->render_to_texture = intel_rendering_to_texture(&brw->intel.ctx);
+ key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
}
static dri_bo *
@@ -194,10 +200,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
else
sf.sf5.front_winding = BRW_FRONTWINDING_CW;
- /* The viewport is inverted for rendering to texture, and that inverts
+ /* The viewport is inverted for rendering to a FBO, and that inverts
* polygon front/back orientation.
*/
- sf.sf5.front_winding ^= key->render_to_texture;
+ sf.sf5.front_winding ^= key->render_to_fbo;
switch (key->cull_face) {
case GL_FRONT:
@@ -228,7 +234,33 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
sf.sf6.line_width = 0;
/* _NEW_POINT */
- sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; /* opengl conventions */
+ key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+ if (!key->render_to_fbo) {
+ /* Rendering to an OpenGL window */
+ sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+ }
+ else {
+ /* If rendering to an FBO, the pixel coordinate system is
+ * inverted with respect to the normal OpenGL coordinate
+ * system, so BRW_RASTRULE_LOWER_RIGHT is correct.
+ * But this value is listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "IntelĀ® 965 Express Chipset Family and IntelĀ® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at
+ * http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * It does work on at least some devices, if not all;
+ * if devices that don't support it can be identified,
+ * the likely failure case is that points are rasterized
+ * incorrectly, which is no worse than occurs without
+ * the value, so we're using it here.
+ */
+ sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+ }
/* XXX clamp max depends on AA vs. non-AA */
sf.sf7.sprite_point = key->point_sprite;
diff --git a/i965/brw_state.h b/i965/brw_state.h
index df839c5..81b0a45 100644
--- a/i965/brw_state.h
+++ b/i965/brw_state.h
@@ -52,7 +52,6 @@ const struct brw_tracked_state brw_cc_vp;
const struct brw_tracked_state brw_check_fallback;
const struct brw_tracked_state brw_clip_prog;
const struct brw_tracked_state brw_clip_unit;
-const struct brw_tracked_state brw_constant_buffer_state;
const struct brw_tracked_state brw_constant_buffer;
const struct brw_tracked_state brw_curbe_offsets;
const struct brw_tracked_state brw_invarient_state;
diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c
index dc87859..811940e 100644
--- a/i965/brw_state_batch.c
+++ b/i965/brw_state_batch.c
@@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw )
{
clear_batch_cache(brw);
-/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */
-
brw->state.dirty.mesa |= ~0;
brw->state.dirty.brw |= ~0;
brw->state.dirty.cache |= ~0;
diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c
index b28c57c..a713262 100644
--- a/i965/brw_state_dump.c
+++ b/i965/brw_state_dump.c
@@ -84,6 +84,19 @@ get_965_surfacetype(unsigned int surfacetype)
}
}
+static const char *
+get_965_surface_format(unsigned int surface_format)
+{
+ switch (surface_format) {
+ case 0x000: return "r32g32b32a32_float";
+ case 0x0c1: return "b8g8r8a8_unorm";
+ case 0x100: return "b5g6r5_unorm";
+ case 0x102: return "b5g5r5a1_unorm";
+ case 0x104: return "b4g4r4a4_unorm";
+ default: return "unknown";
+ }
+}
+
static void dump_wm_surface_state(struct brw_context *brw)
{
int i;
@@ -95,7 +108,7 @@ static void dump_wm_surface_state(struct brw_context *brw)
char name[20];
if (surf_bo == NULL) {
- fprintf(stderr, "WM SS%d: NULL\n", i);
+ fprintf(stderr, " WM SS%d: NULL\n", i);
continue;
}
dri_bo_map(surf_bo, GL_FALSE);
@@ -103,8 +116,9 @@ static void dump_wm_surface_state(struct brw_context *brw)
surf = (struct brw_surface_state *)(surf_bo->virtual);
sprintf(name, "WM SS%d", i);
- state_out(name, surf, surfoff, 0, "%s\n",
- get_965_surfacetype(surf->ss0.surface_type));
+ state_out(name, surf, surfoff, 0, "%s %s\n",
+ get_965_surfacetype(surf->ss0.surface_type),
+ get_965_surface_format(surf->ss0.surface_format));
state_out(name, surf, surfoff, 1, "offset\n");
state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n",
surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
@@ -162,6 +176,14 @@ static void brw_debug_prog(const char *name, dri_bo *prog)
fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
name, (unsigned int)prog->offset + i * 4 * 4,
data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
+ /* Stop at the end of the program. It'd be nice to keep track of the actual
+ * intended program size instead of guessing like this.
+ */
+ if (data[i * 4 + 0] == 0 &&
+ data[i * 4 + 1] == 0 &&
+ data[i * 4 + 2] == 0 &&
+ data[i * 4 + 3] == 0)
+ break;
}
dri_bo_unmap(prog);
diff --git a/i965/brw_structs.h b/i965/brw_structs.h
index 4e577d0..89e2981 100644
--- a/i965/brw_structs.h
+++ b/i965/brw_structs.h
@@ -439,7 +439,7 @@ struct brw_urb_fence
} bits1;
};
-struct brw_constant_buffer_state /* previously brw_command_streamer */
+struct brw_cs_urb_state
{
struct header header;
@@ -1031,10 +1031,10 @@ struct brw_surface_state
GLuint writedisable_green:1;
GLuint writedisable_red:1;
GLuint writedisable_alpha:1;
- GLuint surface_format:9;
+ GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */
GLuint data_return_format:1;
GLuint pad0:1;
- GLuint surface_type:3;
+ GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */
} ss0;
struct {
diff --git a/i965/brw_tex.c b/i965/brw_tex.c
index ef99e9c..71bff16 100644
--- a/i965/brw_tex.c
+++ b/i965/brw_tex.c
@@ -32,21 +32,12 @@
#include "main/glheader.h"
#include "main/mtypes.h"
-#include "main/imports.h"
-#include "main/simple_list.h"
-#include "main/enums.h"
-#include "main/image.h"
#include "main/teximage.h"
-#include "main/texstore.h"
-#include "main/texformat.h"
-
-#include "texmem.h"
#include "intel_context.h"
#include "intel_regions.h"
#include "intel_tex.h"
#include "brw_context.h"
-#include "brw_defines.h"
void brw_FrameBufferTexInit( struct brw_context *brw,
diff --git a/i965/brw_vs.h b/i965/brw_vs.h
index 99d0e93..1e4f660 100644
--- a/i965/brw_vs.h
+++ b/i965/brw_vs.h
@@ -75,6 +75,11 @@ struct brw_vs_compile {
struct brw_reg userplane[6];
+ /** we may need up to 3 constants per instruction (if use_const_buffer) */
+ struct {
+ GLint index;
+ struct brw_reg reg;
+ } current_const[3];
};
void brw_vs_emit( struct brw_vs_compile *c );
diff --git a/i965/brw_vs_constval.c b/i965/brw_vs_constval.c
index 9977677..249a800 100644
--- a/i965/brw_vs_constval.c
+++ b/i965/brw_vs_constval.c
@@ -39,8 +39,8 @@
*/
struct tracker {
GLboolean twoside;
- GLubyte active[PROGRAM_OUTPUT+1][128];
- GLuint size_masks[4];
+ GLubyte active[PROGRAM_OUTPUT+1][MAX_PROGRAM_TEMPS];
+ GLbitfield size_masks[4]; /**< one bit per fragment program input attrib */
};
@@ -53,8 +53,10 @@ static void set_active_component( struct tracker *t,
case PROGRAM_TEMPORARY:
case PROGRAM_INPUT:
case PROGRAM_OUTPUT:
+ assert(file < PROGRAM_OUTPUT + 1);
+ assert(index < Elements(t->active[0]));
t->active[file][index] |= active;
-
+ break;
default:
break;
}
@@ -96,7 +98,7 @@ static GLubyte get_active( struct tracker *t,
struct prog_src_register src )
{
GLuint i;
- GLubyte active = src.NegateBase; /* NOTE! */
+ GLubyte active = src.Negate; /* NOTE! */
if (src.RelAddr)
return 0xf;
@@ -108,10 +110,15 @@ static GLubyte get_active( struct tracker *t,
return active;
}
+/**
+ * Return the size (1,2,3 or 4) of the output/result for VERT_RESULT_idx.
+ */
static GLubyte get_output_size( struct tracker *t,
GLuint idx )
{
- GLubyte active = t->active[PROGRAM_OUTPUT][idx];
+ GLubyte active;
+ assert(idx < VERT_RESULT_MAX);
+ active = t->active[PROGRAM_OUTPUT][idx];
if (active & (1<<3)) return 4;
if (active & (1<<2)) return 3;
if (active & (1<<1)) return 2;
@@ -123,7 +130,7 @@ static GLubyte get_output_size( struct tracker *t,
*/
static void calc_sizes( struct tracker *t )
{
- GLuint i;
+ GLint vertRes;
if (t->twoside) {
t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |=
@@ -133,12 +140,27 @@ static void calc_sizes( struct tracker *t )
t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1];
}
- for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
- switch (get_output_size(t, i)) {
- case 4: t->size_masks[4-1] |= 1<<i;
- case 3: t->size_masks[3-1] |= 1<<i;
- case 2: t->size_masks[2-1] |= 1<<i;
- case 1: t->size_masks[1-1] |= 1<<i;
+ /* Examine vertex program output sizes to set the size_masks[] info
+ * which describes the fragment program input sizes.
+ */
+ for (vertRes = VERT_RESULT_TEX0; vertRes < VERT_RESULT_MAX; vertRes++) {
+ GLint fragAttrib;
+
+ /* map vertex program output index to fragment program input index */
+ if (vertRes <= VERT_RESULT_TEX7)
+ fragAttrib = FRAG_ATTRIB_TEX0 + vertRes - VERT_RESULT_TEX0;
+ else if (vertRes >= VERT_RESULT_VAR0)
+ fragAttrib = FRAG_ATTRIB_VAR0 + vertRes - VERT_RESULT_VAR0;
+ else
+ continue;
+ assert(fragAttrib >= FRAG_ATTRIB_TEX0);
+ assert(fragAttrib <= FRAG_ATTRIB_MAX);
+
+ switch (get_output_size(t, vertRes)) {
+ case 4: t->size_masks[4-1] |= 1 << fragAttrib;
+ case 3: t->size_masks[3-1] |= 1 << fragAttrib;
+ case 2: t->size_masks[2-1] |= 1 << fragAttrib;
+ case 1: t->size_masks[1-1] |= 1 << fragAttrib;
break;
}
}
@@ -170,8 +192,8 @@ static void calc_wm_input_sizes( struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
/* BRW_NEW_VERTEX_PROGRAM */
- struct brw_vertex_program *vp =
- (struct brw_vertex_program *)brw->vertex_program;
+ const struct brw_vertex_program *vp =
+ brw_vertex_program_const(brw->vertex_program);
/* BRW_NEW_INPUT_DIMENSIONS */
struct tracker t;
GLuint insn;
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
index 235f826..b69616d 100644
--- a/i965/brw_vs_emit.c
+++ b/i965/brw_vs_emit.c
@@ -38,18 +38,49 @@
#include "brw_vs.h"
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+ struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
-/* Do things as simply as possible. Allocate and populate all regs
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+static void release_tmps( struct brw_vs_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible. Allocate and populate all regs
* ahead of time.
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
GLuint i, reg = 0, mrf;
- GLuint nr_params;
+
+#if 0
+ if (c->vp->program.Base.Parameters->NumParameters >= 6)
+ c->vp->use_const_buffer = 1;
+ else
+#endif
+ c->vp->use_const_buffer = GL_FALSE;
+ /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
/* r0 -- reserved as usual
*/
- c->r0 = brw_vec8_grf(reg, 0); reg++;
+ c->r0 = brw_vec8_grf(reg, 0);
+ reg++;
/* User clip planes from curbe:
*/
@@ -60,24 +91,33 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Deal with curbe alignment:
*/
- reg += ((6+c->key.nr_userclip+3)/4)*2;
+ reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
}
/* Vertex program parameters from curbe:
*/
- nr_params = c->vp->program.Base.Parameters->NumParameters;
- for (i = 0; i < nr_params; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
- }
- reg += (nr_params+1)/2;
+ if (c->vp->use_const_buffer) {
+ /* get constants from a real constant buffer */
+ c->prog_data.curb_read_length = 0;
+ c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+ }
+ else {
+ /* use a section of the GRF for constants */
+ GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
+ for (i = 0; i < nr_params; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+ }
+ reg += (nr_params + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
- c->prog_data.curb_read_length = reg - 1;
+ c->prog_data.nr_params = nr_params * 4;
+ }
/* Allocate input regs:
*/
c->nr_inputs = 0;
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- if (c->prog_data.inputs_read & (1<<i)) {
+ if (c->prog_data.inputs_read & (1 << i)) {
c->nr_inputs++;
c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
reg++;
@@ -91,7 +131,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->first_output = reg;
mrf = 4;
for (i = 0; i < VERT_RESULT_MAX; i++) {
- if (c->prog_data.outputs_written & (1<<i)) {
+ if (c->prog_data.outputs_written & (1 << i)) {
c->nr_outputs++;
if (i == VERT_RESULT_HPOS) {
c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
@@ -132,17 +172,24 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
reg++;
}
+ if (c->vp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+
for (i = 0; i < 128; i++) {
- if (c->output_regs[i].used_in_src) {
- c->output_regs[i].reg = brw_vec8_grf(reg, 0);
- reg++;
- }
+ if (c->output_regs[i].used_in_src) {
+ c->output_regs[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
}
c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
reg += 2;
-
-
+
/* Some opcodes need an internal temporary:
*/
c->first_tmp = reg;
@@ -152,35 +199,23 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
* urb_read_length is the number of registers read from *each*
* vertex urb, so is half the amount:
*/
- c->prog_data.urb_read_length = (c->nr_inputs+1)/2;
+ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
- c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4;
+ c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
c->prog_data.total_grf = reg;
-}
-
-
-static struct brw_reg get_tmp( struct brw_vs_compile *c )
-{
- struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
- if (++c->last_tmp > c->prog_data.total_grf)
- c->prog_data.total_grf = c->last_tmp;
-
- return tmp;
-}
-
-static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
-{
- if (tmp.nr == c->last_tmp-1)
- c->last_tmp--;
-}
-
-static void release_tmps( struct brw_vs_compile *c )
-{
- c->last_tmp = c->first_tmp;
+ if (INTEL_DEBUG & DEBUG_VS) {
+ _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
+ _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
+ _mesa_printf("%s reg = %d\n", __FUNCTION__, reg);
+ }
}
+/**
+ * If an instruction uses a temp reg both as a src and the dest, we
+ * sometimes need to allocate an intermediate temporary.
+ */
static void unalias1( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
@@ -200,6 +235,10 @@ static void unalias1( struct brw_vs_compile *c,
}
}
+/**
+ * \sa unalias2
+ * Checkes if 2-operand instruction needs an intermediate temporary.
+ */
static void unalias2( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
@@ -222,6 +261,10 @@ static void unalias2( struct brw_vs_compile *c,
}
}
+/**
+ * \sa unalias2
+ * Checkes if 3-operand instruction needs an intermediate temporary.
+ */
static void unalias3( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
@@ -615,6 +658,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c,
}
brw_ENDIF(p, if_insn);
+
+ release_tmp(c, tmp);
}
static void emit_lrp_noalias(struct brw_vs_compile *c,
@@ -655,13 +700,83 @@ static void emit_nrm( struct brw_vs_compile *c,
}
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex)
+{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ struct brw_compile *p = &c->func;
+ struct brw_reg const_reg;
+ struct brw_reg const2_reg;
+
+ assert(argIndex < 3);
+
+ if (c->current_const[argIndex].index != src->Index || src->RelAddr) {
+ struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+
+ c->current_const[argIndex].index = src->Index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+ /* need to fetch the constant now */
+ brw_dp_READ_4_vs(p,
+ c->current_const[argIndex].reg,/* writeback dest */
+ 0, /* oword */
+ src->RelAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
+
+ if (src->RelAddr) {
+ /* second read */
+ const2_reg = get_tmp(c);
+
+ /* use upper half of address reg for second read */
+ addrReg = stride(addrReg, 0, 4, 0);
+ addrReg.subnr = 16;
+
+ brw_dp_READ_4_vs(p,
+ const2_reg, /* writeback dest */
+ 1, /* oword */
+ src->RelAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER
+ );
+ }
+ }
+
+ const_reg = c->current_const[argIndex].reg;
+
+ if (src->RelAddr) {
+ /* merge the two Owords into the constant register */
+ /* const_reg[7..4] = const2_reg[7..4] */
+ brw_MOV(p,
+ suboffset(stride(const_reg, 0, 4, 1), 4),
+ suboffset(stride(const2_reg, 0, 4, 1), 4));
+ release_tmp(c, const2_reg);
+ }
+ else {
+ /* replicate lower four floats into upper half (to get XYZWXYZW) */
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
+ }
+
+ return const_reg;
+}
+
+
+
/* TODO: relative addressing!
*/
static struct brw_reg get_reg( struct brw_vs_compile *c,
- GLuint file,
+ gl_register_file file,
GLuint index )
{
-
switch (file) {
case PROGRAM_TEMPORARY:
case PROGRAM_INPUT:
@@ -690,13 +805,17 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
}
+/**
+ * Indirect addressing: get reg[[arg] + offset].
+ */
static struct brw_reg deref( struct brw_vs_compile *c,
struct brw_reg arg,
GLint offset)
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = vec4(get_tmp(c));
- struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
struct brw_reg indirect = brw_vec4_indirect(0,0);
@@ -717,10 +836,67 @@ static struct brw_reg deref( struct brw_vs_compile *c,
brw_pop_insn_state(p);
}
+ /* NOTE: tmp not released */
return vec8(tmp);
}
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex )
+{
+ const GLuint file = inst->SrcReg[argIndex].File;
+ const GLint index = inst->SrcReg[argIndex].Index;
+ const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ if (relAddr) {
+ return deref(c, c->regs[file][0], index);
+ }
+ else {
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ }
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ if (c->vp->use_const_buffer) {
+ return get_constant(c, inst, argIndex);
+ }
+ else if (relAddr) {
+ return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+ }
+ else {
+ assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][index];
+ }
+ case PROGRAM_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case PROGRAM_UNDEFINED:
+ /* this is a normal case since we loop over all three src args */
+ return brw_null_reg();
+
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_WRITE_ONLY:
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
static void emit_arl( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0 )
@@ -732,30 +908,31 @@ static void emit_arl( struct brw_vs_compile *c,
if (need_tmp)
tmp = get_tmp(c);
- brw_RNDD(p, tmp, arg0);
- brw_MUL(p, dst, tmp, brw_imm_d(16));
+ brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */
+ brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */
if (need_tmp)
release_tmp(c, tmp);
}
-/* Will return mangled results for SWZ op. The emit_swz() function
+/**
+ * Return the brw reg for the given instruction's src argument.
+ * Will return mangled results for SWZ op. The emit_swz() function
* ignores this result and recalculates taking extended swizzles into
* account.
*/
static struct brw_reg get_arg( struct brw_vs_compile *c,
- struct prog_src_register *src )
+ const struct prog_instruction *inst,
+ GLuint argIndex )
{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_reg reg;
if (src->File == PROGRAM_UNDEFINED)
return brw_null_reg();
- if (src->RelAddr)
- reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
- else
- reg = get_reg(c, src->File, src->Index);
+ reg = get_src_reg(c, inst, argIndex);
/* Convert 3-bit swizzle to 2-bit.
*/
@@ -766,16 +943,38 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
/* Note this is ok for non-swizzle instructions:
*/
- reg.negate = src->NegateBase ? 1 : 0;
+ reg.negate = src->Negate ? 1 : 0;
return reg;
}
+/**
+ * Get brw register for the given program dest register.
+ */
static struct brw_reg get_dst( struct brw_vs_compile *c,
struct prog_dst_register dst )
{
- struct brw_reg reg = get_reg(c, dst.File, dst.Index);
+ struct brw_reg reg;
+
+ switch (dst.File) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_OUTPUT:
+ assert(c->regs[dst.File][dst.Index].nr != 0);
+ reg = c->regs[dst.File][dst.Index];
+ break;
+ case PROGRAM_ADDRESS:
+ assert(dst.Index == 0);
+ reg = c->regs[dst.File][dst.Index];
+ break;
+ case PROGRAM_UNDEFINED:
+ /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+ reg = brw_null_reg();
+ break;
+ default:
+ assert(0);
+ reg = brw_null_reg();
+ }
reg.dw1.bits.writemask = dst.WriteMask;
@@ -785,14 +984,16 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
static void emit_swz( struct brw_vs_compile *c,
struct brw_reg dst,
- struct prog_src_register src )
+ const struct prog_instruction *inst)
{
+ const GLuint argIndex = 0;
+ const struct prog_src_register src = inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
GLuint zeros_mask = 0;
GLuint ones_mask = 0;
GLuint src_mask = 0;
GLubyte src_swz[4];
- GLboolean need_tmp = (src.NegateBase &&
+ GLboolean need_tmp = (src.Negate &&
dst.file != BRW_GENERAL_REGISTER_FILE);
struct brw_reg tmp = dst;
GLuint i;
@@ -826,10 +1027,7 @@ static void emit_swz( struct brw_vs_compile *c,
if (src_mask) {
struct brw_reg arg0;
- if (src.RelAddr)
- arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
- else
- arg0 = get_reg(c, src.File, src.Index);
+ arg0 = get_src_reg(c, inst, argIndex);
arg0 = brw_swizzle(arg0,
src_swz[0], src_swz[1],
@@ -844,8 +1042,8 @@ static void emit_swz( struct brw_vs_compile *c,
if (ones_mask)
brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
- if (src.NegateBase)
- brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp));
+ if (src.Negate)
+ brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
if (need_tmp) {
brw_MOV(p, dst, tmp);
@@ -975,7 +1173,7 @@ post_vs_emit( struct brw_vs_compile *c,
}
-/* Emit the fragment program instructions here.
+/* Emit the vertex program instructions here.
*/
void brw_vs_emit(struct brw_vs_compile *c )
{
@@ -1025,6 +1223,11 @@ void brw_vs_emit(struct brw_vs_compile *c )
struct brw_reg args[3], dst;
GLuint i;
+#if 0
+ printf("%d: ", insn);
+ _mesa_print_instruction(inst);
+#endif
+
/* Get argument regs. SWZ is special and does this itself.
*/
if (inst->Opcode != OPCODE_SWZ)
@@ -1032,10 +1235,10 @@ void brw_vs_emit(struct brw_vs_compile *c )
struct prog_src_register *src = &inst->SrcReg[i];
index = src->Index;
file = src->File;
- if (file == PROGRAM_OUTPUT&&c->output_regs[index].used_in_src)
+ if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
args[i] = c->output_regs[index].reg;
else
- args[i] = get_arg(c, src);
+ args[i] = get_arg(c, inst, i);
}
/* Get dest regs. Note that it is possible for a reg to be both
@@ -1163,7 +1366,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
/* The args[0] value can't be used here as it won't have
* correctly encoded the full swizzle:
*/
- emit_swz(c, dst, inst->SrcReg[0] );
+ emit_swz(c, dst, inst);
break;
case OPCODE_TRUNC:
/* round toward zero */
diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c
index 1a63766..3d29538 100644
--- a/i965/brw_vs_state.c
+++ b/i965/brw_vs_state.c
@@ -44,6 +44,8 @@ struct brw_vs_unit_key {
unsigned int curbe_offset;
unsigned int nr_urb_entries, urb_size;
+
+ unsigned int nr_surfaces;
};
static void
@@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
key->nr_urb_entries = brw->urb.nr_vs_entries;
key->urb_size = brw->urb.vsize;
+ /* BRW_NEW_NR_VS_SURFACES */
+ key->nr_surfaces = brw->vs.nr_surfaces;
+
/* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled) {
/* Note that we read in the userclip planes as well, hence
@@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
* brw_urb_WRITE() results.
*/
vs.thread1.single_program_flow = 0;
+ vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
vs.thread3.dispatch_grf_start_reg = 1;
@@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = {
.dirty = {
.mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_VS_PROG
},
diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c
index e69d4c5..ba03afd 100644
--- a/i965/brw_vtbl.c
+++ b/i965/brw_vtbl.c
@@ -23,14 +23,12 @@
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
+**********************************************************************/
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
#include "main/glheader.h"
#include "main/mtypes.h"
@@ -44,12 +42,11 @@
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
-
#include "brw_draw.h"
#include "brw_state.h"
#include "brw_fallback.h"
#include "brw_vs.h"
-#include <stdarg.h>
+
static void
dri_bo_release(dri_bo **bo)
@@ -58,7 +55,9 @@ dri_bo_release(dri_bo **bo)
*bo = NULL;
}
-/* called from intelDestroyContext()
+
+/**
+ * called from intelDestroyContext()
*/
static void brw_destroy_context( struct intel_context *intel )
{
@@ -68,16 +67,19 @@ static void brw_destroy_context( struct intel_context *intel )
brw_destroy_state(brw);
brw_draw_destroy( brw );
+ _mesa_free(brw->wm.compile_data);
+
brw_FrameBufferTexDestroy( brw );
- for (i = 0; i < brw->state.nr_draw_regions; i++)
- intel_region_release(&brw->state.draw_regions[i]);
- brw->state.nr_draw_regions = 0;
+ for (i = 0; i < brw->state.nr_color_regions; i++)
+ intel_region_release(&brw->state.color_regions[i]);
+ brw->state.nr_color_regions = 0;
intel_region_release(&brw->state.depth_region);
dri_bo_release(&brw->curbe.curbe_bo);
dri_bo_release(&brw->vs.prog_bo);
dri_bo_release(&brw->vs.state_bo);
+ dri_bo_release(&brw->vs.bind_bo);
dri_bo_release(&brw->gs.prog_bo);
dri_bo_release(&brw->gs.state_bo);
dri_bo_release(&brw->clip.prog_bo);
@@ -91,6 +93,7 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->wm.bind_bo);
for (i = 0; i < BRW_WM_MAX_SURF; i++)
dri_bo_release(&brw->wm.surf_bo[i]);
+ dri_bo_release(&brw->wm.sampler_bo);
dri_bo_release(&brw->wm.prog_bo);
dri_bo_release(&brw->wm.state_bo);
dri_bo_release(&brw->cc.prog_bo);
@@ -98,37 +101,46 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->cc.vp_bo);
}
-/* called from intelDrawBuffer()
+
+/**
+ * called from intelDrawBuffer()
*/
static void brw_set_draw_region( struct intel_context *intel,
- struct intel_region *draw_regions[],
- struct intel_region *depth_region,
- GLuint num_regions)
+ struct intel_region *color_regions[],
+ struct intel_region *depth_region,
+ GLuint num_color_regions)
{
struct brw_context *brw = brw_context(&intel->ctx);
- int i;
+ GLuint i;
+
+ /* release old color/depth regions */
if (brw->state.depth_region != depth_region)
brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER;
- for (i = 0; i < brw->state.nr_draw_regions; i++)
- intel_region_release(&brw->state.draw_regions[i]);
+ for (i = 0; i < brw->state.nr_color_regions; i++)
+ intel_region_release(&brw->state.color_regions[i]);
intel_region_release(&brw->state.depth_region);
- for (i = 0; i < num_regions; i++)
- intel_region_reference(&brw->state.draw_regions[i], draw_regions[i]);
+
+ /* reference new color/depth regions */
+ for (i = 0; i < num_color_regions; i++)
+ intel_region_reference(&brw->state.color_regions[i], color_regions[i]);
intel_region_reference(&brw->state.depth_region, depth_region);
- brw->state.nr_draw_regions = num_regions;
+ brw->state.nr_color_regions = num_color_regions;
}
-/* called from intel_batchbuffer_flush and children before sending a
+
+/**
+ * called from intel_batchbuffer_flush and children before sending a
* batchbuffer off.
*/
static void brw_finish_batch(struct intel_context *intel)
{
struct brw_context *brw = brw_context(&intel->ctx);
-
brw_emit_query_end(brw);
}
-/* called from intelFlushBatchLocked
+
+/**
+ * called from intelFlushBatchLocked
*/
static void brw_new_batch( struct intel_context *intel )
{
@@ -159,39 +171,20 @@ static void brw_new_batch( struct intel_context *intel )
}
}
-static void brw_note_fence( struct intel_context *intel,
- GLuint fence )
+
+static void brw_note_fence( struct intel_context *intel, GLuint fence )
{
brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
}
-
+
+
static void brw_note_unlock( struct intel_context *intel )
{
struct brw_context *brw = brw_context(&intel->ctx);
-
brw_state_cache_check_size(brw);
}
-void brw_do_flush( struct brw_context *brw,
- GLuint flags )
-{
- struct brw_mi_flush flush;
- memset(&flush, 0, sizeof(flush));
- flush.opcode = CMD_MI_FLUSH;
- flush.flags = flags;
- BRW_BATCH_STRUCT(brw, &flush);
-}
-
-
-static void brw_emit_flush( struct intel_context *intel,
- GLuint unused )
-{
- brw_do_flush(brw_context(&intel->ctx),
- BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE);
-}
-
-
/* called from intelWaitForIdle() and intelFlush()
*
* For now, just flush everything. Could be smarter later.
@@ -205,6 +198,7 @@ static GLuint brw_flush_cmd( void )
return *(GLuint *)&flush;
}
+
static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
{
/* nothing */
@@ -214,20 +208,18 @@ static void brw_invalidate_state( struct intel_context *intel, GLuint new_state
void brwInitVtbl( struct brw_context *brw )
{
brw->intel.vtbl.check_vertex_size = 0;
- brw->intel.vtbl.emit_state = 0;
- brw->intel.vtbl.reduced_primitive_state = 0;
+ brw->intel.vtbl.emit_state = 0;
+ brw->intel.vtbl.reduced_primitive_state = 0;
brw->intel.vtbl.render_start = 0;
- brw->intel.vtbl.update_texture_state = 0;
+ brw->intel.vtbl.update_texture_state = 0;
- brw->intel.vtbl.invalidate_state = brw_invalidate_state;
- brw->intel.vtbl.note_fence = brw_note_fence;
- brw->intel.vtbl.note_unlock = brw_note_unlock;
+ brw->intel.vtbl.invalidate_state = brw_invalidate_state;
+ brw->intel.vtbl.note_fence = brw_note_fence;
+ brw->intel.vtbl.note_unlock = brw_note_unlock;
brw->intel.vtbl.new_batch = brw_new_batch;
brw->intel.vtbl.finish_batch = brw_finish_batch;
brw->intel.vtbl.destroy = brw_destroy_context;
brw->intel.vtbl.set_draw_region = brw_set_draw_region;
brw->intel.vtbl.flush_cmd = brw_flush_cmd;
- brw->intel.vtbl.emit_flush = brw_emit_flush;
brw->intel.vtbl.debug_batch = brw_debug_batch;
}
-
diff --git a/i965/brw_wm.c b/i965/brw_wm.c
index c6791da..8a3b7df 100644
--- a/i965/brw_wm.c
+++ b/i965/brw_wm.c
@@ -82,6 +82,58 @@ GLuint brw_wm_is_scalar_result( GLuint opcode )
}
+/**
+ * Do GPU code generation for non-GLSL shader. non-GLSL shaders have
+ * no flow control instructions so we can more readily do SSA-style
+ * optimizations.
+ */
+static void
+brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ /* Augment fragment program. Add instructions for pre- and
+ * post-fragment-program tasks such as interpolation and fogging.
+ */
+ brw_wm_pass_fp(c);
+
+ /* Translate to intermediate representation. Build register usage
+ * chains.
+ */
+ brw_wm_pass0(c);
+
+ /* Dead code removal.
+ */
+ brw_wm_pass1(c);
+
+ /* Register allocation.
+ * Divide by two because we operate on 16 pixels at a time and require
+ * two GRF entries for each logical shader register.
+ */
+ c->grf_limit = BRW_WM_MAX_GRF / 2;
+
+ brw_wm_pass2(c);
+
+ /* how many general-purpose registers are used */
+ c->prog_data.total_grf = c->max_wm_grf;
+
+ /* Scratch space is used for register spilling */
+ if (c->last_scratch) {
+ c->prog_data.total_scratch = c->last_scratch + 0x40;
+ }
+ else {
+ c->prog_data.total_scratch = 0;
+ }
+
+ /* Emit GEN4 code.
+ */
+ brw_wm_emit(c);
+}
+
+
+/**
+ * All Mesa program -> GPU code generation goes through this function.
+ * Depending on the instructions used (i.e. flow control instructions)
+ * we'll use one of two code generators.
+ */
static void do_wm_prog( struct brw_context *brw,
struct brw_fragment_program *fp,
struct brw_wm_prog_key *key)
@@ -92,52 +144,39 @@ static void do_wm_prog( struct brw_context *brw,
c = brw->wm.compile_data;
if (c == NULL) {
- brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
- c = brw->wm.compile_data;
+ brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
+ c = brw->wm.compile_data;
+ if (c == NULL) {
+ /* Ouch - big out of memory problem. Can't continue
+ * without triggering a segfault, no way to signal,
+ * so just return.
+ */
+ return;
+ }
} else {
- memset(c, 0, sizeof(*brw->wm.compile_data));
+ memset(c, 0, sizeof(*brw->wm.compile_data));
}
memcpy(&c->key, key, sizeof(*key));
c->fp = fp;
c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
- brw_init_compile(brw, &c->func);
- if (brw_wm_is_glsl(&c->fp->program)) {
- brw_wm_glsl_emit(brw, c);
- } else {
- /* Augment fragment program. Add instructions for pre- and
- * post-fragment-program tasks such as interpolation and fogging.
- */
- brw_wm_pass_fp(c);
-
- /* Translate to intermediate representation. Build register usage
- * chains.
- */
- brw_wm_pass0(c);
-
- /* Dead code removal.
- */
- brw_wm_pass1(c);
-
- /* Register allocation.
- */
- c->grf_limit = BRW_WM_MAX_GRF/2;
-
- brw_wm_pass2(c);
-
- c->prog_data.total_grf = c->max_wm_grf;
- if (c->last_scratch) {
- c->prog_data.total_scratch =
- c->last_scratch + 0x40;
- } else {
- c->prog_data.total_scratch = 0;
- }
-
- /* Emit GEN4 code.
- */
- brw_wm_emit(c);
+ brw_init_compile(brw, &c->func);
+
+ /* temporary sanity check assertion */
+ ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+
+ /*
+ * Shader which use GLSL features such as flow control are handled
+ * differently from "simple" shaders.
+ */
+ if (fp->isGLSL) {
+ brw_wm_glsl_emit(brw, c);
}
+ else {
+ brw_wm_non_glsl_emit(brw, c);
+ }
+
if (INTEL_DEBUG & DEBUG_WM)
fprintf(stderr, "\n");
@@ -161,7 +200,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
{
GLcontext *ctx = &brw->intel.ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
- struct brw_fragment_program *fp =
+ const struct brw_fragment_program *fp =
(struct brw_fragment_program *)brw->fragment_program;
GLuint lookup = 0;
GLuint line_aa;
@@ -176,7 +215,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
ctx->Color.AlphaEnabled)
lookup |= IZ_PS_KILL_ALPHATEST_BIT;
- if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPR))
+ if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH))
lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
/* _NEW_DEPTH */
@@ -188,7 +227,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
/* _NEW_STENCIL */
- if (ctx->Stencil.Enabled) {
+ if (ctx->Stencil._Enabled) {
lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
if (ctx->Stencil.WriteMask[0] ||
@@ -228,7 +267,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
/* BRW_NEW_WM_INPUT_DIMENSIONS */
- key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS);
+ key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
/* _NEW_LIGHT */
key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
@@ -245,6 +284,11 @@ static void brw_wm_populate_key( struct brw_context *brw,
if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR)
key->yuvtex_swap_mask |= 1 << i;
}
+
+ key->tex_swizzles[i] = t->_Swizzle;
+ }
+ else {
+ key->tex_swizzles[i] = SWIZZLE_NOOP;
}
}
@@ -275,10 +319,11 @@ static void brw_wm_populate_key( struct brw_context *brw,
key->drawable_height = brw->intel.driDrawable->h;
}
- /* Extra info:
- */
- key->program_string_id = fp->id;
+ /* CACHE_NEW_VS_PROG */
+ key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS;
+ /* The unique fragment program ID */
+ key->program_string_id = fp->id;
}
@@ -302,8 +347,6 @@ static void brw_prepare_wm_prog(struct brw_context *brw)
}
-/* See brw_wm.c:
- */
const struct brw_tracked_state brw_wm_prog = {
.dirty = {
.mesa = (_NEW_COLOR |
@@ -317,7 +360,7 @@ const struct brw_tracked_state brw_wm_prog = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_WM_INPUT_DIMENSIONS |
BRW_NEW_REDUCED_PRIMITIVE),
- .cache = 0
+ .cache = CACHE_NEW_VS_PROG,
},
.prepare = brw_prepare_wm_prog
};
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
index 3cbdf81..295fed8 100644
--- a/i965/brw_wm.h
+++ b/i965/brw_wm.h
@@ -65,15 +65,17 @@ struct brw_wm_prog_key {
GLuint flat_shade:1;
GLuint runtime_check_aads_emit:1;
- GLuint projtex_mask:16;
+ GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
GLuint shadowtex_mask:16;
GLuint yuvtex_mask:16;
GLuint yuvtex_swap_mask:16; /* UV swaped */
- // GLuint pad1:16;
+
+ GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
GLuint program_string_id:32;
GLuint origin_x, origin_y;
GLuint drawable_height;
+ GLuint vp_outputs_written;
};
@@ -142,13 +144,12 @@ struct brw_wm_instruction {
GLuint writemask:4;
GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */
GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
+ GLuint tex_shadow:1; /* do shadow comparison? */
GLuint eot:1; /* End of thread indicator for FB_WRITE*/
GLuint target:10; /* target binding table index for FB_WRITE*/
};
-#define PROGRAM_INTERNAL_PARAM
-
#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
#define BRW_WM_MAX_GRF 128 /* hardware limit */
#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4)
@@ -240,17 +241,25 @@ struct brw_wm_compile {
GLuint max_wm_grf;
GLuint last_scratch;
+ /** Mapping from Mesa registers to hardware registers */
struct {
- GLboolean inited;
- struct brw_reg reg;
+ GLboolean inited;
+ struct brw_reg reg;
} wm_regs[PROGRAM_PAYLOAD+1][256][4];
+
struct brw_reg stack;
struct brw_reg emit_mask_reg;
- GLuint reg_index;
+ GLuint reg_index; /**< Index of next free GRF register */
GLuint tmp_regs[BRW_WM_MAX_GRF];
GLuint tmp_index;
GLuint tmp_max;
GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
+
+ /** we may need up to 3 constants per instruction (if use_const_buffer) */
+ struct {
+ GLint index;
+ struct brw_reg reg;
+ } current_const[3];
};
diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c
index bc8e8c9..a870e75 100644
--- a/i965/brw_wm_emit.c
+++ b/i965/brw_wm_emit.c
@@ -699,7 +699,6 @@ static void emit_tex( struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
GLuint msgLength, responseLength;
- GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
GLuint i, nr;
GLuint emit;
@@ -721,7 +720,7 @@ static void emit_tex( struct brw_wm_compile *c,
break;
}
- if (shadow) {
+ if (inst->tex_shadow) {
nr = 4;
emit |= WRITEMASK_W;
}
@@ -743,10 +742,10 @@ static void emit_tex( struct brw_wm_compile *c,
retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
1,
retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
- inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
+ SURF_INDEX_TEXTURE(inst->tex_unit),
inst->tex_unit, /* sampler */
inst->writemask,
- (shadow ?
+ (inst->tex_shadow ?
BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
responseLength,
@@ -792,7 +791,7 @@ static void emit_txb( struct brw_wm_compile *c,
retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
1,
retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
- inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
+ SURF_INDEX_TEXTURE(inst->tex_unit),
inst->tex_unit, /* sampler */
inst->writemask,
BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
@@ -914,6 +913,9 @@ static void emit_aa( struct brw_wm_compile *c,
/* Post-fragment-program processing. Send the results to the
* framebuffer.
+ * \param arg0 the fragment color
+ * \param arg1 the pass-through depth value
+ * \param arg2 the shader-computed depth value
*/
static void emit_fb_write( struct brw_wm_compile *c,
struct brw_reg *arg0,
@@ -1022,8 +1024,8 @@ static void emit_fb_write( struct brw_wm_compile *c,
}
-/* Post-fragment-program processing. Send the results to the
- * framebuffer.
+/**
+ * Move a GPR to scratch memory.
*/
static void emit_spill( struct brw_wm_compile *c,
struct brw_reg reg,
@@ -1042,11 +1044,13 @@ static void emit_spill( struct brw_wm_compile *c,
*/
brw_dp_WRITE_16(p,
retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
- 1,
slot);
}
+/**
+ * Load a GPR from scratch memory.
+ */
static void emit_unspill( struct brw_wm_compile *c,
struct brw_reg reg,
GLuint slot )
@@ -1067,13 +1071,13 @@ static void emit_unspill( struct brw_wm_compile *c,
brw_dp_READ_16(p,
retype(vec16(reg), BRW_REGISTER_TYPE_UW),
- 1,
slot);
}
/**
- * Retrieve upto 4 GEN4 register pairs for the given wm reg:
+ * Retrieve up to 4 GEN4 register pairs for the given wm reg:
+ * Args with unspill_reg != 0 will be loaded from scratch memory.
*/
static void get_argument_regs( struct brw_wm_compile *c,
struct brw_wm_ref *arg[],
@@ -1083,13 +1087,12 @@ static void get_argument_regs( struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (arg[i]) {
-
- if (arg[i]->unspill_reg)
- emit_unspill(c,
+ if (arg[i]->unspill_reg)
+ emit_unspill(c,
brw_vec8_grf(arg[i]->unspill_reg, 0),
arg[i]->value->spill_slot);
- regs[i] = arg[i]->hw_reg;
+ regs[i] = arg[i]->hw_reg;
}
else {
regs[i] = brw_null_reg();
@@ -1098,6 +1101,9 @@ static void get_argument_regs( struct brw_wm_compile *c,
}
+/**
+ * For values that have a spill_slot!=0, write those regs to scratch memory.
+ */
static void spill_values( struct brw_wm_compile *c,
struct brw_wm_value *values,
GLuint nr )
diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c
index 7ebe5b9..49aad28 100644
--- a/i965/brw_wm_fp.c
+++ b/i965/brw_wm_fp.c
@@ -80,9 +80,8 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx)
reg.Index = idx;
reg.Swizzle = SWIZZLE_NOOP;
reg.RelAddr = 0;
- reg.NegateBase = 0;
+ reg.Negate = NEGATE_NONE;
reg.Abs = 0;
- reg.NegateAbs = 0;
return reg;
}
@@ -112,6 +111,12 @@ static struct prog_src_register src_swizzle1( struct prog_src_register reg, int
return src_swizzle(reg, x, x, x, x);
}
+static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
+{
+ reg.Swizzle = swizzle;
+ return reg;
+}
+
/***********************************************************************
* Dest regs
@@ -187,6 +192,7 @@ static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
GLuint saturate,
GLuint tex_src_unit,
GLuint tex_src_target,
+ GLuint tex_shadow,
struct prog_src_register src0,
struct prog_src_register src1,
struct prog_src_register src2 )
@@ -200,6 +206,7 @@ static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
inst->SaturateMode = saturate;
inst->TexSrcUnit = tex_src_unit;
inst->TexSrcTarget = tex_src_target;
+ inst->TexShadow = tex_shadow;
inst->SrcReg[0] = src0;
inst->SrcReg[1] = src1;
inst->SrcReg[2] = src2;
@@ -216,7 +223,7 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c,
struct prog_src_register src2 )
{
return emit_tex_op(c, op, dest, saturate,
- 0, 0, /* tex unit, target */
+ 0, 0, 0, /* tex unit, target, shadow */
src0, src1, src2);
}
@@ -282,8 +289,7 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
struct prog_dst_register pixel_w = get_temp(c);
struct prog_src_register deltas = get_delta_xy(c);
struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
-
-
+
/* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
*/
emit_op(c,
@@ -548,7 +554,6 @@ static void precalc_dst( struct brw_wm_compile *c,
src_undef());
}
-
if (dst.WriteMask & WRITEMASK_XZ) {
struct prog_instruction *swz;
GLuint z = GET_SWZ(src0.Swizzle, Z);
@@ -563,7 +568,7 @@ static void precalc_dst( struct brw_wm_compile *c,
src_undef(),
src_undef());
/* Avoid letting negation flag of src0 affect our 1 constant. */
- swz->SrcReg[0].NegateBase &= ~NEGATE_X;
+ swz->SrcReg[0].Negate &= ~NEGATE_X;
}
if (dst.WriteMask & WRITEMASK_W) {
/* dst.w = mov src1.w
@@ -598,10 +603,9 @@ static void precalc_lit( struct brw_wm_compile *c,
src_undef(),
src_undef());
/* Avoid letting the negation flag of src0 affect our 1 constant. */
- swz->SrcReg[0].NegateBase = 0;
+ swz->SrcReg[0].Negate = NEGATE_NONE;
}
-
if (dst.WriteMask & WRITEMASK_YZ) {
emit_op(c,
OPCODE_LIT,
@@ -646,7 +650,7 @@ static void precalc_tex( struct brw_wm_compile *c,
src0,
src_undef(),
src_undef());
- out->SrcReg[0].NegateBase = 0;
+ out->SrcReg[0].Negate = NEGATE_NONE;
out->SrcReg[0].Abs = 1;
/* tmp0 = MAX(coord.X, coord.Y) */
@@ -745,6 +749,7 @@ static void precalc_tex( struct brw_wm_compile *c,
inst->SaturateMode,
unit,
inst->TexSrcTarget,
+ inst->TexShadow,
coord,
src_undef(),
src_undef());
@@ -805,21 +810,40 @@ static void precalc_tex( struct brw_wm_compile *c,
inst->SaturateMode,
unit,
inst->TexSrcTarget,
+ inst->TexShadow,
coord,
src_undef(),
src_undef());
}
+ /* For GL_EXT_texture_swizzle: */
+ if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
+ /* swizzle the result of the TEX instruction */
+ struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
+ emit_op(c, OPCODE_SWZ,
+ inst->DstReg,
+ SATURATE_OFF, /* saturate already done above */
+ src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
+ src_undef(),
+ src_undef());
+ }
+
if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
(inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
release_temp(c, tmpcoord);
}
+/**
+ * Check if the given TXP instruction really needs the divide-by-W step.
+ */
static GLboolean projtex( struct brw_wm_compile *c,
const struct prog_instruction *inst )
{
- struct prog_src_register src = inst->SrcReg[0];
+ const struct prog_src_register src = inst->SrcReg[0];
+ GLboolean retVal;
+
+ assert(inst->Opcode == OPCODE_TXP);
/* Only try to detect the simplest cases. Could detect (later)
* cases where we are trying to emit code like RCP {1.0}, MUL x,
@@ -829,16 +853,21 @@ static GLboolean projtex( struct brw_wm_compile *c,
* user-provided fragment programs anyway:
*/
if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
- return 0; /* ut2004 gun rendering !?! */
+ retVal = GL_FALSE; /* ut2004 gun rendering !?! */
else if (src.File == PROGRAM_INPUT &&
GET_SWZ(src.Swizzle, W) == W &&
- (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
- return 0;
+ (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
+ retVal = GL_FALSE;
else
- return 1;
+ retVal = GL_TRUE;
+
+ return retVal;
}
+/**
+ * Emit code for TXP.
+ */
static void precalc_txp( struct brw_wm_compile *c,
const struct prog_instruction *inst )
{
@@ -889,42 +918,41 @@ static void precalc_txp( struct brw_wm_compile *c,
static void emit_fb_write( struct brw_wm_compile *c )
{
struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
- struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
+ struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
struct prog_src_register outcolor;
GLuint i;
struct prog_instruction *inst, *last_inst;
struct brw_context *brw = c->func.brw;
- /* inst->Sampler is not used by backend,
- use it for fb write target and eot */
-
- if (brw->state.nr_draw_regions > 1) {
- for (i = 0 ; i < brw->state.nr_draw_regions; i++) {
- outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
- last_inst = inst = emit_op(c,
- WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
- outcolor, payload_r0_depth, outdepth);
- inst->Sampler = (i<<1);
- if (c->fp_fragcolor_emitted) {
- outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
- last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
- 0, outcolor, payload_r0_depth, outdepth);
- inst->Sampler = (i<<1);
- }
- }
- last_inst->Sampler |= 1; //eot
+ /* The inst->Aux field is used for FB write target and the EOT marker */
+
+ if (brw->state.nr_color_regions > 1) {
+ for (i = 0 ; i < brw->state.nr_color_regions; i++) {
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
+ last_inst = inst = emit_op(c,
+ WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
+ outcolor, payload_r0_depth, outdepth);
+ inst->Aux = (i<<1);
+ if (c->fp_fragcolor_emitted) {
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
+ last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+ 0, outcolor, payload_r0_depth, outdepth);
+ inst->Aux = (i<<1);
+ }
+ }
+ last_inst->Aux |= 1; //eot
}
else {
/* if gl_FragData[0] is written, use it, else use gl_FragColor */
if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
else
- outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
- inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
- 0, outcolor, payload_r0_depth, outdepth);
- inst->Sampler = 1|(0<<1);
+ inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+ 0, outcolor, payload_r0_depth, outdepth);
+ inst->Aux = 1|(0<<1);
}
}
@@ -955,9 +983,9 @@ static void validate_dst_regs( struct brw_wm_compile *c,
const struct prog_instruction *inst )
{
if (inst->DstReg.File == PROGRAM_OUTPUT) {
- GLuint idx = inst->DstReg.Index;
- if (idx == FRAG_RESULT_COLR)
- c->fp_fragcolor_emitted = 1;
+ GLuint idx = inst->DstReg.Index;
+ if (idx == FRAG_RESULT_COLOR)
+ c->fp_fragcolor_emitted = 1;
}
}
@@ -981,6 +1009,11 @@ static void print_insns( const struct prog_instruction *insn,
}
}
+
+/**
+ * Initial pass for fragment program code generation.
+ * This function is used by both the GLSL and non-GLSL paths.
+ */
void brw_wm_pass_fp( struct brw_wm_compile *c )
{
struct brw_fragment_program *fp = c->fp;
@@ -997,15 +1030,19 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
c->pixel_w = src_undef();
c->nr_fp_insns = 0;
- /* Emit preamble instructions:
+ /* Emit preamble instructions. This is where special instructions such as
+ * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+ * compute shader inputs from varying vars.
*/
-
-
for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
validate_src_regs(c, inst);
validate_dst_regs(c, inst);
}
+
+ /* Loop over all instructions doing assorted simplifications and
+ * transformations.
+ */
for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
struct prog_instruction *out;
@@ -1014,7 +1051,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
* necessary:
*/
-
switch (inst->Opcode) {
case OPCODE_SWZ:
out = emit_insn(c, inst);
@@ -1024,14 +1060,14 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
case OPCODE_ABS:
out = emit_insn(c, inst);
out->Opcode = OPCODE_MOV;
- out->SrcReg[0].NegateBase = 0;
+ out->SrcReg[0].Negate = NEGATE_NONE;
out->SrcReg[0].Abs = 1;
break;
case OPCODE_SUB:
out = emit_insn(c, inst);
out->Opcode = OPCODE_ADD;
- out->SrcReg[1].NegateBase ^= 0xf;
+ out->SrcReg[1].Negate ^= NEGATE_XYZW;
break;
case OPCODE_SCS:
@@ -1094,9 +1130,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
}
if (INTEL_DEBUG & DEBUG_WM) {
- _mesa_printf("pass_fp:\n");
- print_insns( c->prog_instructions, c->nr_fp_insns );
- _mesa_printf("\n");
+ _mesa_printf("pass_fp:\n");
+ print_insns( c->prog_instructions, c->nr_fp_insns );
+ _mesa_printf("\n");
}
}
diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c
index 5a5497e..a907e1b 100644
--- a/i965/brw_wm_glsl.c
+++ b/i965/brw_wm_glsl.c
@@ -8,12 +8,17 @@ enum _subroutine {
SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
};
-/* Only guess, need a flag in gl_fragment_program later */
+
+/**
+ * Determine if the given fragment program uses GLSL features such
+ * as flow conditionals, loops, subroutines.
+ * Some GLSL shaders may use these features, others might not.
+ */
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
{
int i;
for (i = 0; i < fp->Base.NumInstructions; i++) {
- struct prog_instruction *inst = &fp->Base.Instructions[i];
+ const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
case OPCODE_IF:
case OPCODE_TRUNC:
@@ -36,6 +41,10 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
return GL_FALSE;
}
+
+/**
+ * Record the mapping of a Mesa register to a hardware register.
+ */
static void set_reg(struct brw_wm_compile *c, int file, int index,
int component, struct brw_reg reg)
{
@@ -43,7 +52,11 @@ static void set_reg(struct brw_wm_compile *c, int file, int index,
c->wm_regs[file][index][component].inited = GL_TRUE;
}
-static int get_scalar_dst_index(struct prog_instruction *inst)
+/**
+ * Examine instruction's write mask to find index of first component
+ * enabled for writing.
+ */
+static int get_scalar_dst_index(const struct prog_instruction *inst)
{
int i;
for (i = 0; i < 4; i++)
@@ -62,6 +75,10 @@ static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
return reg;
}
+/**
+ * Save current temp register info.
+ * There must be a matching call to release_tmps().
+ */
static int mark_tmps(struct brw_wm_compile *c)
{
return c->tmp_index;
@@ -77,8 +94,22 @@ static void release_tmps(struct brw_wm_compile *c, int mark)
c->tmp_index = mark;
}
+/**
+ * Convert Mesa src register to brw register.
+ *
+ * Since we're running in SOA mode each Mesa register corresponds to four
+ * hardware registers. We allocate the hardware registers as needed here.
+ *
+ * \param file register file, one of PROGRAM_x
+ * \param index register number
+ * \param component src component (X=0, Y=1, Z=2, W=3)
+ * \param nr not used?!?
+ * \param neg negate value?
+ * \param abs take absolute value?
+ */
static struct brw_reg
-get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs)
+get_reg(struct brw_wm_compile *c, int file, int index, int component,
+ int nr, GLuint neg, GLuint abs)
{
struct brw_reg reg;
switch (file) {
@@ -99,12 +130,18 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL
return brw_null_reg();
}
- if(c->wm_regs[file][index][component].inited)
+ /* see if we've already allocated a HW register for this Mesa register */
+ if (c->wm_regs[file][index][component].inited) {
+ /* yes, re-use */
reg = c->wm_regs[file][index][component].reg;
- else
+ }
+ else {
+ /* no, allocate new register */
reg = brw_vec8_grf(c->reg_index, 0);
+ }
- if(!c->wm_regs[file][index][component].inited) {
+ /* if this is a new register allocation, record it in the table */
+ if (!c->wm_regs[file][index][component].inited) {
set_reg(c, file, index, component, reg);
c->reg_index++;
}
@@ -113,7 +150,7 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL
/* ran out of temporary registers! */
#if 1
/* This is a big hack for now.
- * Return bad register index, but don't just crash hange the GPU.
+ * Return bad register index, just don't hang the GPU.
*/
_mesa_fprintf(stderr, "out of regs %d\n", c->reg_index);
c->reg_index = BRW_WM_MAX_GRF - 13;
@@ -130,78 +167,273 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL
return reg;
}
+
+/**
+ * Preallocate registers. This sets up the Mesa to hardware register
+ * mapping for certain registers, such as constants (uniforms/state vars)
+ * and shader inputs.
+ */
static void prealloc_reg(struct brw_wm_compile *c)
{
int i, j;
struct brw_reg reg;
- int nr_interp_regs = 0;
+ int urb_read_length = 0;
GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
for (i = 0; i < 4; i++) {
- reg = (i < c->key.nr_depth_regs)
- ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0);
+ if (i < c->key.nr_depth_regs)
+ reg = brw_vec8_grf(i * 2, 0);
+ else
+ reg = brw_vec8_grf(0, 0);
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
}
- c->reg_index += 2*c->key.nr_depth_regs;
+ c->reg_index += 2 * c->key.nr_depth_regs;
+
+ /* constants */
{
- int nr_params = c->fp->program.Base.Parameters->NumParameters;
- struct gl_program_parameter_list *plist =
- c->fp->program.Base.Parameters;
- int index = 0;
- c->prog_data.nr_params = 4*nr_params;
- for (i = 0; i < nr_params; i++) {
- for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(c->reg_index + index/8,
- index%8);
- c->prog_data.param[index] =
- &plist->ParameterValues[i][j];
- set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
- }
- }
- c->nr_creg = 2*((4*nr_params+15)/16);
- c->reg_index += c->nr_creg;
+ const int nr_params = c->fp->program.Base.Parameters->NumParameters;
+
+ /* use a real constant buffer, or just use a section of the GRF? */
+ c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/
+
+ if (c->fp->use_const_buffer) {
+ /* We'll use a real constant buffer and fetch constants from
+ * it with a dataport read message.
+ */
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 0;
+ }
+ else {
+ const struct gl_program_parameter_list *plist =
+ c->fp->program.Base.Parameters;
+ int index = 0;
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 4 * nr_params;
+
+ /* loop over program constants (float[4]) */
+ for (i = 0; i < nr_params; i++) {
+ /* loop over XYZW channels */
+ for (j = 0; j < 4; j++, index++) {
+ reg = brw_vec1_grf(c->reg_index + index / 8, index % 8);
+ /* Save pointer to parameter/constant value.
+ * Constants will be copied in prepare_constant_buffer()
+ */
+ c->prog_data.param[index] = &plist->ParameterValues[i][j];
+ set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+ }
+ }
+ /* number of constant regs used (each reg is float[8]) */
+ c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
+ c->reg_index += c->nr_creg;
+ }
}
- for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
- if (inputs & (1<<i)) {
- nr_interp_regs++;
- reg = brw_vec8_grf(c->reg_index, 0);
- for (j = 0; j < 4; j++)
- set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
- c->reg_index += 2;
- }
+ /* fragment shader inputs */
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ int fp_input;
+
+ if (i >= VERT_RESULT_VAR0)
+ fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
+ else if (i <= VERT_RESULT_TEX7)
+ fp_input = i;
+ else
+ fp_input = -1;
+
+ if (fp_input >= 0 && inputs & (1 << fp_input)) {
+ urb_read_length = c->reg_index;
+ reg = brw_vec8_grf(c->reg_index, 0);
+ for (j = 0; j < 4; j++)
+ set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
+ }
+ if (c->key.vp_outputs_written & (1 << i)) {
+ c->reg_index += 2;
+ }
}
+
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
- c->prog_data.urb_read_length = nr_interp_regs * 2;
+ c->prog_data.urb_read_length = urb_read_length;
c->prog_data.curb_read_length = c->nr_creg;
c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
c->reg_index++;
c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
c->reg_index += 2;
+
+ /* An instruction may reference up to three constants.
+ * They'll be found in these registers.
+ * XXX alloc these on demand!
+ */
+ if (c->fp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = alloc_tmp(c);
+ }
+ }
+#if 0
+ printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
+ printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index);
+#endif
}
+
+/**
+ * Check if any of the instruction's src registers are constants, uniforms,
+ * or statevars. If so, fetch any constants that we don't already have in
+ * the three GRF slots.
+ */
+static void fetch_constants(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ /* loop over instruction src regs */
+ for (i = 0; i < 3; i++) {
+ const struct prog_src_register *src = &inst->SrcReg[i];
+ if (src->File == PROGRAM_STATE_VAR ||
+ src->File == PROGRAM_CONSTANT ||
+ src->File == PROGRAM_UNIFORM) {
+ c->current_const[i].index = src->Index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, i, c->current_const[i].reg.nr);
+#endif
+
+ /* need to fetch the constant now */
+ brw_dp_READ_4(p,
+ c->current_const[i].reg, /* writeback dest */
+ src->RelAddr, /* relative indexing? */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
+ );
+ }
+ }
+}
+
+
+/**
+ * Convert Mesa dst register to brw register.
+ */
static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
- struct prog_instruction *inst, int component, int nr)
+ const struct prog_instruction *inst,
+ GLuint component)
{
+ const int nr = 1;
return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
0, 0);
}
+
+static struct brw_reg
+get_src_reg_const(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint srcRegIndex, GLuint component)
+{
+ /* We should have already fetched the constant from the constant
+ * buffer in fetch_constants(). Now we just have to return a
+ * register description that extracts the needed component and
+ * smears it across all eight vector components.
+ */
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ struct brw_reg const_reg;
+
+ assert(component < 4);
+ assert(srcRegIndex < 3);
+ assert(c->current_const[srcRegIndex].index != -1);
+ const_reg = c->current_const[srcRegIndex].reg;
+
+ /* extract desired float from the const_reg, and smear */
+ const_reg = stride(const_reg, 0, 1, 0);
+ const_reg.subnr = component * 4;
+
+ if (src->Negate & (1 << component))
+ const_reg = negate(const_reg);
+ if (src->Abs)
+ const_reg = brw_abs(const_reg);
+
+#if 0
+ printf(" form const[%d].%d for arg %d, reg %d\n",
+ c->current_const[srcRegIndex].index,
+ component,
+ srcRegIndex,
+ const_reg.nr);
+#endif
+
+ return const_reg;
+}
+
+
+/**
+ * Convert Mesa src register to brw register.
+ */
static struct brw_reg get_src_reg(struct brw_wm_compile *c,
- struct prog_src_register *src, int index, int nr)
-{
- int component = GET_SWZ(src->Swizzle, index);
- return get_reg(c, src->File, src->Index, component, nr,
- src->NegateBase, src->Abs);
+ const struct prog_instruction *inst,
+ GLuint srcRegIndex, GLuint channel)
+{
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ const GLuint nr = 1;
+ const GLuint component = GET_SWZ(src->Swizzle, channel);
+
+ if (c->fp->use_const_buffer &&
+ (src->File == PROGRAM_STATE_VAR ||
+ src->File == PROGRAM_CONSTANT ||
+ src->File == PROGRAM_UNIFORM)) {
+ return get_src_reg_const(c, inst, srcRegIndex, component);
+ }
+ else {
+ /* other type of source register */
+ return get_reg(c, src->File, src->Index, component, nr,
+ src->Negate, src->Abs);
+ }
}
-/* Subroutines are minimal support for resusable instruction sequences.
- They are implemented as simply as possible to minimise overhead: there
- is no explicit support for communication between the caller and callee
- other than saving the return address in a temporary register, nor is
- there any automatic local storage. This implies that great care is
- required before attempting reentrancy or any kind of nested
- subroutine invocations. */
+
+/**
+ * Same as \sa get_src_reg() but if the register is a literal, emit
+ * a brw_reg encoding the literal.
+ * Note that a brw instruction only allows one src operand to be a literal.
+ * For instructions with more than one operand, only the second can be a
+ * literal. This means that we treat some literals as constants/uniforms
+ * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
+ *
+ */
+static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint srcRegIndex, GLuint channel)
+{
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ if (src->File == PROGRAM_CONSTANT) {
+ /* a literal */
+ const int component = GET_SWZ(src->Swizzle, channel);
+ const GLfloat *param =
+ c->fp->program.Base.Parameters->ParameterValues[src->Index];
+ GLfloat value = param[component];
+ if (src->Negate & (1 << channel))
+ value = -value;
+ if (src->Abs)
+ value = FABSF(value);
+#if 0
+ printf(" form immed value %f for chan %d\n", value, channel);
+#endif
+ return brw_imm_f(value);
+ }
+ else {
+ return get_src_reg(c, inst, srcRegIndex, channel);
+ }
+}
+
+
+/**
+ * Subroutines are minimal support for resusable instruction sequences.
+ * They are implemented as simply as possible to minimise overhead: there
+ * is no explicit support for communication between the caller and callee
+ * other than saving the return address in a temporary register, nor is
+ * there any automatic local storage. This implies that great care is
+ * required before attempting reentrancy or any kind of nested
+ * subroutine invocations.
+ */
static void invoke_subroutine( struct brw_wm_compile *c,
enum _subroutine subroutine,
void (*emit)( struct brw_wm_compile * ) )
@@ -258,7 +490,7 @@ static void invoke_subroutine( struct brw_wm_compile *c,
}
static void emit_abs( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -266,8 +498,8 @@ static void emit_abs( struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (inst->DstReg.WriteMask & (1<<i)) {
struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i, 1);
- src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src = get_src_reg(c, inst, 0, i);
brw_MOV(p, dst, brw_abs(src));
}
}
@@ -275,7 +507,7 @@ static void emit_abs( struct brw_wm_compile *c,
}
static void emit_trunc( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -284,8 +516,8 @@ static void emit_trunc( struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i, 1) ;
- src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src = get_src_reg(c, inst, 0, i);
brw_RNDZ(p, dst, src);
}
}
@@ -293,7 +525,7 @@ static void emit_trunc( struct brw_wm_compile *c,
}
static void emit_mov( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -302,8 +534,10 @@ static void emit_mov( struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i, 1);
- src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ /* XXX some moves from immediate value don't work reliably!!! */
+ /*src = get_src_reg_imm(c, inst, 0, i);*/
+ src = get_src_reg(c, inst, 0, i);
brw_MOV(p, dst, src);
}
}
@@ -311,7 +545,7 @@ static void emit_mov( struct brw_wm_compile *c,
}
static void emit_pixel_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
@@ -320,8 +554,8 @@ static void emit_pixel_xy(struct brw_wm_compile *c,
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
- dst0 = get_dst_reg(c, inst, 0, 1);
- dst1 = get_dst_reg(c, inst, 1, 1);
+ dst0 = get_dst_reg(c, inst, 0);
+ dst1 = get_dst_reg(c, inst, 1);
/* Calculate pixel centers by adding 1 or 0 to each of the
* micro-tile coordinates passed in r1.
*/
@@ -338,21 +572,20 @@ static void emit_pixel_xy(struct brw_wm_compile *c,
stride(suboffset(r1_uw, 5), 2, 4, 0),
brw_imm_v(0x11001100));
}
-
}
static void emit_delta_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg dst0, dst1, src0, src1;
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
- dst0 = get_dst_reg(c, inst, 0, 1);
- dst1 = get_dst_reg(c, inst, 1, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1);
+ dst0 = get_dst_reg(c, inst, 0);
+ dst1 = get_dst_reg(c, inst, 1);
+ src0 = get_src_reg(c, inst, 0, 0);
+ src1 = get_src_reg(c, inst, 0, 1);
/* Calc delta X,Y by subtracting origin in r1 from the pixel
* centers.
*/
@@ -370,10 +603,8 @@ static void emit_delta_xy(struct brw_wm_compile *c,
negate(suboffset(r1,1)));
}
-
}
-
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
GLuint nr,
@@ -404,7 +635,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
}
static void emit_fb_write(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
int nr = 2;
@@ -416,38 +647,64 @@ static void emit_fb_write(struct brw_wm_compile *c,
*/
if (c->key.aa_dest_stencil_reg)
nr += 1;
- {
- brw_push_insn_state(p);
- for (channel = 0; channel < 4; channel++) {
- src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1);
- /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
- /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
- brw_MOV(p, brw_message_reg(nr + channel), src0);
- }
- /* skip over the regs populated above: */
- nr += 8;
- brw_pop_insn_state(p);
+
+ brw_push_insn_state(p);
+ for (channel = 0; channel < 4; channel++) {
+ src0 = get_src_reg(c, inst, 0, channel);
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+ brw_MOV(p, brw_message_reg(nr + channel), src0);
}
+ /* skip over the regs populated above: */
+ nr += 8;
+ brw_pop_insn_state(p);
- if (c->key.source_depth_to_render_target)
- {
- if (c->key.computes_depth) {
- src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1);
- brw_MOV(p, brw_message_reg(nr), src0);
- } else {
- src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
- brw_MOV(p, brw_message_reg(nr), src0);
- }
+ if (c->key.source_depth_to_render_target) {
+ if (c->key.computes_depth) {
+ src0 = get_src_reg(c, inst, 2, 2);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+ else {
+ src0 = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+
+ nr += 2;
+ }
- nr += 2;
+ if (c->key.dest_depth_reg) {
+ GLuint comp = c->key.dest_depth_reg / 2;
+ GLuint off = c->key.dest_depth_reg % 2;
+
+ assert(comp == 1);
+ assert(off == 0);
+#if 0
+ /* XXX do we need this code? comp always 1, off always 0, it seems */
+ if (off != 0) {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+ /* 2nd half? */
+ brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+ brw_pop_insn_state(p);
+ }
+ else
+#endif
+ {
+ struct brw_reg src = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src);
+ }
+ nr += 2;
}
- target = inst->Sampler >> 1;
- eot = inst->Sampler & 1;
+
+ target = inst->Aux >> 1;
+ eot = inst->Aux & 1;
fire_fb_write(c, 0, nr, target, eot);
}
static void emit_pixel_w( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -455,10 +712,10 @@ static void emit_pixel_w( struct brw_wm_compile *c,
struct brw_reg dst, src0, delta0, delta1;
struct brw_reg interp3;
- dst = get_dst_reg(c, inst, 3, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
- delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+ dst = get_dst_reg(c, inst, 3);
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
interp3 = brw_vec1_grf(src0.nr+1, 4);
/* Calc 1/w - just linterp wpos[3] optimized by putting the
@@ -477,19 +734,19 @@ static void emit_pixel_w( struct brw_wm_compile *c,
}
static void emit_linterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
struct brw_reg interp[4];
struct brw_reg dst, delta0, delta1;
struct brw_reg src0;
+ GLuint nr, i;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
- delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
- GLuint nr = src0.nr;
- int i;
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+ nr = src0.nr;
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
@@ -498,7 +755,7 @@ static void emit_linterp(struct brw_wm_compile *c,
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_LINE(p, brw_null_reg(), interp[i], delta0);
brw_MAC(p, dst, suboffset(interp[i],1), delta1);
}
@@ -506,17 +763,17 @@ static void emit_linterp(struct brw_wm_compile *c,
}
static void emit_cinterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
struct brw_reg interp[4];
struct brw_reg dst, src0;
+ GLuint nr, i;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- GLuint nr = src0.nr;
- int i;
+ src0 = get_src_reg(c, inst, 0, 0);
+ nr = src0.nr;
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
@@ -525,14 +782,14 @@ static void emit_cinterp(struct brw_wm_compile *c,
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV(p, dst, suboffset(interp[i],3));
}
}
}
static void emit_pinterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -540,13 +797,13 @@ static void emit_pinterp(struct brw_wm_compile *c,
struct brw_reg interp[4];
struct brw_reg dst, delta0, delta1;
struct brw_reg src0, w;
+ GLuint nr, i;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
- delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
- w = get_src_reg(c, &inst->SrcReg[2], 3, 1);
- GLuint nr = src0.nr;
- int i;
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+ w = get_src_reg(c, inst, 2, 3);
+ nr = src0.nr;
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
@@ -555,7 +812,7 @@ static void emit_pinterp(struct brw_wm_compile *c,
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_LINE(p, brw_null_reg(), interp[i], delta0);
brw_MAC(p, dst, suboffset(interp[i],1),
delta1);
@@ -566,7 +823,7 @@ static void emit_pinterp(struct brw_wm_compile *c,
/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
static void emit_frontfacing(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
@@ -576,7 +833,7 @@ static void emit_frontfacing(struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV(p, dst, brw_imm_f(0.0));
}
}
@@ -587,7 +844,7 @@ static void emit_frontfacing(struct brw_wm_compile *c,
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV(p, dst, brw_imm_f(1.0));
}
}
@@ -595,7 +852,7 @@ static void emit_frontfacing(struct brw_wm_compile *c,
}
static void emit_xpd(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -605,12 +862,12 @@ static void emit_xpd(struct brw_wm_compile *c,
GLuint i1 = (i+1)%3;
if (mask & (1<<i)) {
struct brw_reg src0, src1, dst;
- dst = get_dst_reg(c, inst, i, 1);
- src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1));
- src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = negate(get_src_reg(c, inst, 0, i2));
+ src1 = get_src_reg_imm(c, inst, 1, i1);
brw_MUL(p, brw_null_reg(), src0, src1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1);
+ src0 = get_src_reg(c, inst, 0, i1);
+ src1 = get_src_reg_imm(c, inst, 1, i2);
brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
brw_MAC(p, dst, src0, src1);
brw_set_saturate(p, 0);
@@ -620,17 +877,17 @@ static void emit_xpd(struct brw_wm_compile *c,
}
static void emit_dp3(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg src0[3], src1[3], dst;
int i;
struct brw_compile *p = &c->func;
for (i = 0; i < 3; i++) {
- src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
@@ -639,16 +896,16 @@ static void emit_dp3(struct brw_wm_compile *c,
}
static void emit_dp4(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg src0[4], src1[4], dst;
int i;
struct brw_compile *p = &c->func;
for (i = 0; i < 4; i++) {
- src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
@@ -658,16 +915,16 @@ static void emit_dp4(struct brw_wm_compile *c,
}
static void emit_dph(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg src0[4], src1[4], dst;
int i;
struct brw_compile *p = &c->func;
for (i = 0; i < 4; i++) {
- src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_MAC(p, dst, src0[2], src1[2]);
@@ -682,7 +939,7 @@ static void emit_dph(struct brw_wm_compile *c,
* register's X, Y, Z and W channels (subject to writemasking of course).
*/
static void emit_math1(struct brw_wm_compile *c,
- struct prog_instruction *inst, GLuint func)
+ const struct prog_instruction *inst, GLuint func)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, dst, tmp;
@@ -692,7 +949,7 @@ static void emit_math1(struct brw_wm_compile *c,
tmp = alloc_tmp(c);
/* Get first component of source register */
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ src0 = get_src_reg(c, inst, 0, 0);
/* tmp = func(src0) */
brw_MOV(p, brw_message_reg(2), src0);
@@ -710,7 +967,7 @@ static void emit_math1(struct brw_wm_compile *c,
/* replicate tmp value across enabled dest channels */
for (i = 0; i < 4; i++) {
if (inst->DstReg.WriteMask & (1 << i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV(p, dst, tmp);
}
}
@@ -719,43 +976,43 @@ static void emit_math1(struct brw_wm_compile *c,
}
static void emit_rcp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
}
static void emit_rsq(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
}
static void emit_sin(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
}
static void emit_cos(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
}
static void emit_ex2(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
}
static void emit_lg2(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
}
static void emit_add(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, dst;
@@ -764,17 +1021,30 @@ static void emit_add(struct brw_wm_compile *c,
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
brw_ADD(p, dst, src0, src1);
}
}
brw_set_saturate(p, 0);
}
+static void emit_arl(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, addr_reg;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS, 0);
+ src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
+ brw_MOV(p, addr_reg, src0);
+ brw_set_saturate(p, 0);
+}
+
static void emit_sub(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, dst;
@@ -783,9 +1053,9 @@ static void emit_sub(struct brw_wm_compile *c,
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
brw_ADD(p, dst, src0, negate(src1));
}
}
@@ -793,7 +1063,7 @@ static void emit_sub(struct brw_wm_compile *c,
}
static void emit_mul(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, dst;
@@ -802,9 +1072,9 @@ static void emit_mul(struct brw_wm_compile *c,
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
brw_MUL(p, dst, src0, src1);
}
}
@@ -812,7 +1082,7 @@ static void emit_mul(struct brw_wm_compile *c,
}
static void emit_frc(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, dst;
@@ -821,8 +1091,8 @@ static void emit_frc(struct brw_wm_compile *c,
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg_imm(c, inst, 0, i);
brw_FRC(p, dst, src0);
}
}
@@ -831,7 +1101,7 @@ static void emit_frc(struct brw_wm_compile *c,
}
static void emit_flr(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, dst;
@@ -840,78 +1110,71 @@ static void emit_flr(struct brw_wm_compile *c,
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg_imm(c, inst, 0, i);
brw_RNDD(p, dst, src0);
}
}
brw_set_saturate(p, 0);
}
-static void emit_max(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg src0, src1, dst;
- int i;
- brw_push_insn_state(p);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MOV(p, dst, src0);
- brw_set_saturate(p, 0);
-
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- brw_MOV(p, dst, src1);
- brw_set_saturate(p, 0);
- brw_set_predicate_control_flag_value(p, 0xff);
- }
- }
- brw_pop_insn_state(p);
-}
-static void emit_min(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static void emit_min_max(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg src0, src1, dst;
+ const GLuint mask = inst->DstReg.WriteMask;
+ const int mark = mark_tmps(c);
int i;
brw_push_insn_state(p);
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ struct brw_reg real_dst = get_dst_reg(c, inst, i);
+ struct brw_reg src0 = get_src_reg(c, inst, 0, i);
+ struct brw_reg src1 = get_src_reg(c, inst, 1, i);
+ struct brw_reg dst;
+ /* if dst==src0 or dst==src1 we need to use a temp reg */
+ GLboolean use_temp = brw_same_reg(dst, src0) ||
+ brw_same_reg(dst, src1);
+ if (use_temp)
+ dst = alloc_tmp(c);
+ else
+ dst = real_dst;
+
+ /*
+ printf(" Min/max: dst %d src0 %d src1 %d\n",
+ dst.nr, src0.nr, src1.nr);
+ */
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
brw_MOV(p, dst, src0);
brw_set_saturate(p, 0);
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+ if (inst->Opcode == OPCODE_MIN)
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+ else
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0);
+
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
brw_MOV(p, dst, src1);
brw_set_saturate(p, 0);
brw_set_predicate_control_flag_value(p, 0xff);
+ if (use_temp)
+ brw_MOV(p, real_dst, dst);
}
}
brw_pop_insn_state(p);
+ release_tmps(c, mark);
}
static void emit_pow(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg dst, src0, src1;
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+ dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ src0 = get_src_reg_imm(c, inst, 0, 0);
+ src1 = get_src_reg_imm(c, inst, 1, 0);
brw_MOV(p, brw_message_reg(2), src0);
brw_MOV(p, brw_message_reg(3), src1);
@@ -927,7 +1190,7 @@ static void emit_pow(struct brw_wm_compile *c,
}
static void emit_lrp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -936,10 +1199,10 @@ static void emit_lrp(struct brw_wm_compile *c,
int mark = mark_tmps(c);
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ src1 = get_src_reg_imm(c, inst, 1, i);
if (src1.nr == dst.nr) {
tmp1 = alloc_tmp(c);
@@ -947,7 +1210,7 @@ static void emit_lrp(struct brw_wm_compile *c,
} else
tmp1 = src1;
- src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
+ src2 = get_src_reg(c, inst, 2, i);
if (src2.nr == dst.nr) {
tmp2 = alloc_tmp(c);
brw_MOV(p, tmp2, src2);
@@ -980,7 +1243,7 @@ static void emit_kil(struct brw_wm_compile *c)
}
static void emit_mad(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -989,10 +1252,10 @@ static void emit_mad(struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ src2 = get_src_reg_imm(c, inst, 2, i);
brw_MUL(p, dst, src0, src1);
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
@@ -1003,7 +1266,7 @@ static void emit_mad(struct brw_wm_compile *c,
}
static void emit_sop(struct brw_wm_compile *c,
- struct prog_instruction *inst, GLuint cond)
+ const struct prog_instruction *inst, GLuint cond)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -1012,9 +1275,9 @@ static void emit_sop(struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
brw_push_insn_state(p);
brw_CMP(p, brw_null_reg(), cond, src0, src1);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
@@ -1027,43 +1290,43 @@ static void emit_sop(struct brw_wm_compile *c,
}
static void emit_slt(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_L);
}
static void emit_sle(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_LE);
}
static void emit_sgt(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_G);
}
static void emit_sge(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_GE);
}
static void emit_seq(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_EQ);
}
static void emit_sne(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
}
static void emit_ddx(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -1071,8 +1334,8 @@ static void emit_ddx(struct brw_wm_compile *c,
struct brw_reg dst;
struct brw_reg src0, w;
GLuint nr, i;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
+ src0 = get_src_reg(c, inst, 0, 0);
+ w = get_src_reg(c, inst, 1, 3);
nr = src0.nr;
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
@@ -1081,7 +1344,7 @@ static void emit_ddx(struct brw_wm_compile *c,
brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV(p, dst, interp[i]);
brw_MUL(p, dst, dst, w);
}
@@ -1090,7 +1353,7 @@ static void emit_ddx(struct brw_wm_compile *c,
}
static void emit_ddy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -1099,9 +1362,9 @@ static void emit_ddy(struct brw_wm_compile *c,
struct brw_reg src0, w;
GLuint nr, i;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+ src0 = get_src_reg(c, inst, 0, 0);
nr = src0.nr;
- w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
+ w = get_src_reg(c, inst, 1, 3);
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
interp[2] = brw_vec1_grf(nr+1, 0);
@@ -1109,7 +1372,7 @@ static void emit_ddy(struct brw_wm_compile *c,
brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV(p, dst, suboffset(interp[i], 1));
brw_MUL(p, dst, dst, w);
}
@@ -1117,23 +1380,23 @@ static void emit_ddy(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
-static __inline struct brw_reg high_words( struct brw_reg reg )
+static INLINE struct brw_reg high_words( struct brw_reg reg )
{
return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
0, 8, 2 );
}
-static __inline struct brw_reg low_words( struct brw_reg reg )
+static INLINE struct brw_reg low_words( struct brw_reg reg )
{
return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
}
-static __inline struct brw_reg even_bytes( struct brw_reg reg )
+static INLINE struct brw_reg even_bytes( struct brw_reg reg )
{
return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
}
-static __inline struct brw_reg odd_bytes( struct brw_reg reg )
+static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
{
return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
0, 16, 2 );
@@ -1233,7 +1496,7 @@ static void noise1_sub( struct brw_wm_compile *c ) {
}
static void emit_noise1( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src, param, dst;
@@ -1243,7 +1506,7 @@ static void emit_noise1( struct brw_wm_compile *c,
assert( mark == 0 );
- src = get_src_reg( c, inst->SrcReg, 0, 1 );
+ src = get_src_reg( c, inst, 0, 0 );
param = alloc_tmp( c );
@@ -1255,7 +1518,7 @@ static void emit_noise1( struct brw_wm_compile *c,
brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV( p, dst, param );
}
}
@@ -1403,7 +1666,7 @@ static void noise2_sub( struct brw_wm_compile *c ) {
}
static void emit_noise2( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, param0, param1, dst;
@@ -1413,8 +1676,8 @@ static void emit_noise2( struct brw_wm_compile *c,
assert( mark == 0 );
- src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
- src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
+ src0 = get_src_reg( c, inst, 0, 0 );
+ src1 = get_src_reg( c, inst, 0, 1 );
param0 = alloc_tmp( c );
param1 = alloc_tmp( c );
@@ -1428,7 +1691,7 @@ static void emit_noise2( struct brw_wm_compile *c,
brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV( p, dst, param0 );
}
}
@@ -1438,9 +1701,11 @@ static void emit_noise2( struct brw_wm_compile *c,
release_tmps( c, mark );
}
-/* The three-dimensional case is much like the one- and two- versions above,
- but since the number of corners is rapidly growing we now pack 16 16-bit
- hashes into each register to extract more parallelism from the EUs. */
+/**
+ * The three-dimensional case is much like the one- and two- versions above,
+ * but since the number of corners is rapidly growing we now pack 16 16-bit
+ * hashes into each register to extract more parallelism from the EUs.
+ */
static void noise3_sub( struct brw_wm_compile *c ) {
struct brw_compile *p = &c->func;
@@ -1704,7 +1969,7 @@ static void noise3_sub( struct brw_wm_compile *c ) {
}
static void emit_noise3( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, src2, param0, param1, param2, dst;
@@ -1714,9 +1979,9 @@ static void emit_noise3( struct brw_wm_compile *c,
assert( mark == 0 );
- src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
- src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
- src2 = get_src_reg( c, inst->SrcReg, 2, 1 );
+ src0 = get_src_reg( c, inst, 0, 0 );
+ src1 = get_src_reg( c, inst, 0, 1 );
+ src2 = get_src_reg( c, inst, 0, 2 );
param0 = alloc_tmp( c );
param1 = alloc_tmp( c );
@@ -1732,7 +1997,7 @@ static void emit_noise3( struct brw_wm_compile *c,
brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV( p, dst, param0 );
}
}
@@ -1742,13 +2007,15 @@ static void emit_noise3( struct brw_wm_compile *c,
release_tmps( c, mark );
}
-/* For the four-dimensional case, the little micro-optimisation benefits
- we obtain by unrolling all the loops aren't worth the massive bloat it
- now causes. Instead, we loop twice around performing a similar operation
- to noise3, once for the w=0 cube and once for the w=1, with a bit more
- code to glue it all together. */
-static void noise4_sub( struct brw_wm_compile *c ) {
-
+/**
+ * For the four-dimensional case, the little micro-optimisation benefits
+ * we obtain by unrolling all the loops aren't worth the massive bloat it
+ * now causes. Instead, we loop twice around performing a similar operation
+ * to noise3, once for the w=0 cube and once for the w=1, with a bit more
+ * code to glue it all together.
+ */
+static void noise4_sub( struct brw_wm_compile *c )
+{
struct brw_compile *p = &c->func;
struct brw_reg param[ 4 ],
x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
@@ -2125,7 +2392,7 @@ static void noise4_sub( struct brw_wm_compile *c ) {
}
static void emit_noise4( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst;
@@ -2135,10 +2402,10 @@ static void emit_noise4( struct brw_wm_compile *c,
assert( mark == 0 );
- src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
- src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
- src2 = get_src_reg( c, inst->SrcReg, 2, 1 );
- src3 = get_src_reg( c, inst->SrcReg, 3, 1 );
+ src0 = get_src_reg( c, inst, 0, 0 );
+ src1 = get_src_reg( c, inst, 0, 1 );
+ src2 = get_src_reg( c, inst, 0, 2 );
+ src3 = get_src_reg( c, inst, 0, 3 );
param0 = alloc_tmp( c );
param1 = alloc_tmp( c );
@@ -2156,7 +2423,7 @@ static void emit_noise4( struct brw_wm_compile *c,
brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV( p, dst, param0 );
}
}
@@ -2167,17 +2434,17 @@ static void emit_noise4( struct brw_wm_compile *c,
}
static void emit_wpos_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
struct brw_reg src0[2], dst[2];
- dst[0] = get_dst_reg(c, inst, 0, 1);
- dst[1] = get_dst_reg(c, inst, 1, 1);
+ dst[0] = get_dst_reg(c, inst, 0);
+ dst[1] = get_dst_reg(c, inst, 1);
- src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1);
+ src0[0] = get_src_reg(c, inst, 0, 0);
+ src0[1] = get_src_reg(c, inst, 0, 1);
/* Calculate the pixel offset from window bottom left into destination
* X and Y channels.
@@ -2200,27 +2467,28 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
}
/* TODO
- BIAS on SIMD8 not workind yet...
+ BIAS on SIMD8 not working yet...
*/
static void emit_txb(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg dst[4], src[4], payload_reg;
GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-
GLuint i;
+
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i, 1);
+ dst[i] = get_dst_reg(c, inst, i);
for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src[i] = get_src_reg(c, inst, 0, i);
switch (inst->TexSrcTarget) {
case TEXTURE_1D_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */
+ brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */
break;
case TEXTURE_2D_INDEX:
case TEXTURE_RECT_INDEX:
@@ -2234,28 +2502,28 @@ static void emit_txb(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(4), src[2]);
break;
}
- brw_MOV(p, brw_message_reg(5), src[3]);
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
- 1,
- retype(payload_reg, BRW_REGISTER_TYPE_UW),
- unit + MAX_DRAW_BUFFERS, /* surface */
- unit, /* sampler */
- inst->DstReg.WriteMask,
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
- 4,
- 4,
- 0);
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+ 1, /* msg_reg_nr */
+ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
+ SURF_INDEX_TEXTURE(unit),
+ unit, /* sampler */
+ inst->DstReg.WriteMask, /* writemask */
+ BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */
+ 4, /* response_length */
+ 4, /* msg_length */
+ 0); /* eot */
}
+
static void emit_tex(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg dst[4], src[4], payload_reg;
GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-
GLuint msg_len;
GLuint i, nr;
GLuint emit;
@@ -2264,10 +2532,9 @@ static void emit_tex(struct brw_wm_compile *c,
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i, 1);
+ dst[i] = get_dst_reg(c, inst, i);
for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
-
+ src[i] = get_src_reg(c, inst, 0, i);
switch (inst->TexSrcTarget) {
case TEXTURE_1D_INDEX:
@@ -2286,6 +2553,7 @@ static void emit_tex(struct brw_wm_compile *c,
}
msg_len = 1;
+ /* move/load S, T, R coords */
for (i = 0; i < nr; i++) {
static const GLuint swz[4] = {0,1,2,2};
if (emit & (1<<i))
@@ -2296,26 +2564,27 @@ static void emit_tex(struct brw_wm_compile *c,
}
if (shadow) {
- brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
- brw_MOV(p, brw_message_reg(6), src[2]);
+ brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
+ brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
}
brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
- 1,
- retype(payload_reg, BRW_REGISTER_TYPE_UW),
- unit + MAX_DRAW_BUFFERS, /* surface */
- unit, /* sampler */
- inst->DstReg.WriteMask,
- BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
- 4,
- shadow ? 6 : 4,
- 0);
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+ 1, /* msg_reg_nr */
+ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
+ SURF_INDEX_TEXTURE(unit),
+ unit, /* sampler */
+ inst->DstReg.WriteMask, /* writemask */
+ BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */
+ 4, /* response_length */
+ shadow ? 6 : 4, /* msg_length */
+ 0); /* eot */
if (shadow)
brw_MOV(p, dst[3], brw_imm_f(1.0));
}
+
/**
* Resolve subroutine calls after code emit is done.
*/
@@ -2340,7 +2609,16 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
for (i = 0; i < c->nr_fp_insns; i++) {
- struct prog_instruction *inst = &c->prog_instructions[i];
+ const struct prog_instruction *inst = &c->prog_instructions[i];
+
+#if 0
+ _mesa_printf("Inst %d: ", i);
+ _mesa_print_instruction(inst);
+#endif
+
+ /* fetch any constants that this instruction needs */
+ if (c->fp->use_const_buffer)
+ fetch_constants(c, inst);
if (inst->CondUpdate)
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
@@ -2381,6 +2659,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_ADD:
emit_add(c, inst);
break;
+ case OPCODE_ARL:
+ emit_arl(c, inst);
+ break;
case OPCODE_SUB:
emit_sub(c, inst);
break;
@@ -2397,6 +2678,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_trunc(c, inst);
break;
case OPCODE_MOV:
+ case OPCODE_SWZ:
emit_mov(c, inst);
break;
case OPCODE_DP3:
@@ -2429,11 +2711,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_LG2:
emit_lg2(c, inst);
break;
- case OPCODE_MAX:
- emit_max(c, inst);
- break;
case OPCODE_MIN:
- emit_min(c, inst);
+ case OPCODE_MAX:
+ emit_min_max(c, inst);
break;
case OPCODE_DDX:
emit_ddx(c, inst);
@@ -2531,6 +2811,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
break;
case OPCODE_BGNLOOP:
+ /* XXX may need to invalidate the current_constant regs */
loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
break;
case OPCODE_BRK:
@@ -2574,10 +2855,27 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
}
}
+
+/**
+ * Do GPU code generation for shaders that use GLSL features such as
+ * flow control. Other shaders will be compiled with the
+ */
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
{
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("brw_wm_glsl_emit:\n");
+ }
+
+ /* initial instruction translation/simplification */
brw_wm_pass_fp(c);
+
+ /* actual code generation */
brw_wm_emit_glsl(brw, c);
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "brw_wm_glsl_emit done");
+ }
+
c->prog_data.total_grf = c->reg_index;
c->prog_data.total_scratch = 0;
}
diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c
index fca7b7a..9214276 100644
--- a/i965/brw_wm_pass0.c
+++ b/i965/brw_wm_pass0.c
@@ -51,6 +51,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c)
return &c->vreg[c->nr_vreg++];
}
+/** return pointer to a newly allocated instruction */
static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
{
assert(c->nr_insns < BRW_WM_MAX_INSN);
@@ -60,6 +61,7 @@ static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
/***********************************************************************
*/
+/** Init the "undef" register */
static void pass0_init_undef( struct brw_wm_compile *c)
{
struct brw_wm_ref *ref = &c->undef_ref;
@@ -69,6 +71,7 @@ static void pass0_init_undef( struct brw_wm_compile *c)
ref->prevuse = NULL;
}
+/** Set a FP register to a value */
static void pass0_set_fpreg_value( struct brw_wm_compile *c,
GLuint file,
GLuint idx,
@@ -83,6 +86,7 @@ static void pass0_set_fpreg_value( struct brw_wm_compile *c,
c->pass0_fp_reg[file][idx][component] = ref;
}
+/** Set a FP register to a ref */
static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
GLuint file,
GLuint idx,
@@ -115,12 +119,13 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
ref->value = &c->creg[i/16];
ref->insn = 0;
ref->prevuse = NULL;
-
+
return ref;
}
}
+/** Return a ref to a constant/literal value */
static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
const GLfloat *constval )
{
@@ -142,7 +147,7 @@ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
*/
c->constref[i].constval = *constval;
c->constref[i].ref = get_param_ref(c, constval);
-
+
return c->constref[i].ref;
}
else {
@@ -187,7 +192,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
/* There's something really hokey about parameters parsed in
* arb programs - they all end up in here, whether they be
- * state values, paramters or constants. This duplicates the
+ * state values, parameters or constants. This duplicates the
* structure above & also seems to subvert the limits set for
* each type of constant/param.
*/
@@ -198,7 +203,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
*/
ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
break;
-
+
case PROGRAM_STATE_VAR:
case PROGRAM_UNIFORM:
/* These may change from run to run:
@@ -229,14 +234,13 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
-
/***********************************************************************
* Straight translation to internal instruction format
*/
static void pass0_set_dst( struct brw_wm_compile *c,
- struct brw_wm_instruction *out,
- const struct prog_instruction *inst,
+ struct brw_wm_instruction *out,
+ const struct prog_instruction *inst,
GLuint writemask )
{
const struct prog_dst_register *dst = &inst->DstReg;
@@ -245,18 +249,17 @@ static void pass0_set_dst( struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (writemask & (1<<i)) {
out->dst[i] = get_value(c);
-
pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]);
}
}
-
+
out->writemask = writemask;
}
static void pass0_set_dst_scalar( struct brw_wm_compile *c,
- struct brw_wm_instruction *out,
- const struct prog_instruction *inst,
+ struct brw_wm_instruction *out,
+ const struct prog_instruction *inst,
GLuint writemask )
{
if (writemask) {
@@ -282,7 +285,6 @@ static void pass0_set_dst_scalar( struct brw_wm_compile *c,
}
-
static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
struct prog_src_register src,
GLuint i )
@@ -292,14 +294,13 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
static const GLfloat const_zero = 0.0;
static const GLfloat const_one = 1.0;
-
if (component == SWIZZLE_ZERO)
src_ref = get_const_ref(c, &const_zero);
else if (component == SWIZZLE_ONE)
src_ref = get_const_ref(c, &const_one);
else
src_ref = pass0_get_reg(c, src.File, src.Index, component);
-
+
return src_ref;
}
@@ -311,19 +312,19 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
{
const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i);
struct brw_wm_ref *newref = get_ref(c);
-
+
newref->value = ref->value;
newref->hw_reg = ref->hw_reg;
- if (insn) {
+ if (insn) {
newref->insn = insn - c->instruction;
newref->prevuse = newref->value->lastuse;
newref->value->lastuse = newref;
}
- if (src.NegateBase & (1<<i))
+ if (src.Negate & (1 << i))
newref->hw_reg.negate ^= 1;
-
+
if (src.Abs) {
newref->hw_reg.negate = 0;
newref->hw_reg.abs = 1;
@@ -333,9 +334,9 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
}
-
-static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
+static void
+translate_insn(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
{
struct brw_wm_instruction *out = get_instruction(c);
GLuint writemask = inst->DstReg.WriteMask;
@@ -348,8 +349,9 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c,
out->saturate = (inst->SaturateMode != SATURATE_OFF);
out->tex_unit = inst->TexSrcUnit;
out->tex_idx = inst->TexSrcTarget;
- out->eot = inst->Sampler & 1;
- out->target = inst->Sampler>>1;
+ out->tex_shadow = inst->TexShadow;
+ out->eot = inst->Aux & 1;
+ out->target = inst->Aux >> 1;
/* Args:
*/
@@ -365,8 +367,6 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c,
pass0_set_dst_scalar(c, out, inst, writemask);
else
pass0_set_dst(c, out, inst, writemask);
-
- return out;
}
@@ -426,6 +426,7 @@ static void pass0_init_payload( struct brw_wm_compile *c )
&c->payload.input_interp[i] );
}
+
/***********************************************************************
* PASS 0
*
@@ -448,7 +449,6 @@ void brw_wm_pass0( struct brw_wm_compile *c )
for (insn = 0; insn < c->nr_fp_insns; insn++) {
const struct prog_instruction *inst = &c->prog_instructions[insn];
-
/* Optimize away moves, otherwise emit translated instruction:
*/
switch (inst->Opcode) {
@@ -461,8 +461,6 @@ void brw_wm_pass0( struct brw_wm_compile *c )
translate_insn(c, inst);
}
break;
-
-
default:
translate_insn(c, inst);
break;
@@ -473,4 +471,3 @@ void brw_wm_pass0( struct brw_wm_compile *c )
brw_wm_print_program(c, "pass0");
}
}
-
diff --git a/i965/brw_wm_pass1.c b/i965/brw_wm_pass1.c
index a1fea6f..ab9aa2f 100644
--- a/i965/brw_wm_pass1.c
+++ b/i965/brw_wm_pass1.c
@@ -58,7 +58,8 @@ static void unlink_ref(struct brw_wm_ref *ref)
if (ref == value->lastuse) {
value->lastuse = ref->prevuse;
- } else {
+ }
+ else {
struct brw_wm_ref *i = value->lastuse;
while (i->prevuse != ref) i = i->prevuse;
i->prevuse = ref->prevuse;
@@ -75,8 +76,9 @@ static void track_arg(struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
struct brw_wm_ref *ref = inst->src[arg][i];
if (ref) {
- if (readmask & (1<<i))
+ if (readmask & (1<<i)) {
ref->value->contributes_to_output = 1;
+ }
else {
unlink_ref(ref);
inst->src[arg][i] = NULL;
@@ -88,15 +90,21 @@ static void track_arg(struct brw_wm_compile *c,
static GLuint get_texcoord_mask( GLuint tex_idx )
{
switch (tex_idx) {
- case TEXTURE_1D_INDEX: return WRITEMASK_X;
- case TEXTURE_2D_INDEX: return WRITEMASK_XY;
- case TEXTURE_3D_INDEX: return WRITEMASK_XYZ;
- case TEXTURE_CUBE_INDEX: return WRITEMASK_XYZ;
- case TEXTURE_RECT_INDEX: return WRITEMASK_XY;
+ case TEXTURE_1D_INDEX:
+ return WRITEMASK_X;
+ case TEXTURE_2D_INDEX:
+ return WRITEMASK_XY;
+ case TEXTURE_3D_INDEX:
+ return WRITEMASK_XYZ;
+ case TEXTURE_CUBE_INDEX:
+ return WRITEMASK_XYZ;
+ case TEXTURE_RECT_INDEX:
+ return WRITEMASK_XY;
default: return 0;
}
}
+
/* Step two: Basically this is dead code elimination.
*
* Iterate backwards over instructions, noting which values
@@ -202,9 +210,10 @@ void brw_wm_pass1( struct brw_wm_compile *c )
break;
case OPCODE_TEX:
+ case OPCODE_TXP:
read0 = get_texcoord_mask(inst->tex_idx);
- if (c->key.shadowtex_mask & (1<<inst->tex_unit))
+ if (inst->tex_shadow)
read0 |= WRITEMASK_Z;
break;
@@ -259,7 +268,6 @@ void brw_wm_pass1( struct brw_wm_compile *c )
break;
case OPCODE_DST:
- case OPCODE_TXP:
case WM_FRONTFACING:
default:
break;
@@ -274,6 +282,3 @@ void brw_wm_pass1( struct brw_wm_compile *c )
brw_wm_print_program(c, "pass1");
}
}
-
-
-
diff --git a/i965/brw_wm_pass2.c b/i965/brw_wm_pass2.c
index 6fca9ad..6faea01 100644
--- a/i965/brw_wm_pass2.c
+++ b/i965/brw_wm_pass2.c
@@ -69,8 +69,6 @@ static void prealloc_reg(struct brw_wm_compile *c,
*/
static void init_registers( struct brw_wm_compile *c )
{
- struct brw_context *brw = c->func.brw;
- GLuint inputs = (brw->vs.prog_data->outputs_written & DO_SETUP_BITS);
GLuint nr_interp_regs = 0;
GLuint i = 0;
GLuint j;
@@ -84,18 +82,22 @@ static void init_registers( struct brw_wm_compile *c )
for (j = 0; j < c->nr_creg; j++)
prealloc_reg(c, &c->creg[j], i++);
- for (j = 0; j < FRAG_ATTRIB_MAX; j++)
- if (inputs & (1<<j)) {
- /* index for vs output and ps input are not the same
- in shader varying */
- GLuint index;
- if (j > FRAG_ATTRIB_VAR0)
- index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+ for (j = 0; j < FRAG_ATTRIB_MAX; j++) {
+ if (c->key.vp_outputs_written & (1<<j)) {
+ int fp_index;
+
+ if (j >= VERT_RESULT_VAR0)
+ fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+ else if (j <= VERT_RESULT_TEX7)
+ fp_index = j;
else
- index = j;
+ fp_index = -1;
+
nr_interp_regs++;
- prealloc_reg(c, &c->payload.input_interp[index], i++);
+ if (fp_index >= 0)
+ prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
}
+ }
assert(nr_interp_regs >= 1);
@@ -120,7 +122,7 @@ static void update_register_usage(struct brw_wm_compile *c,
/* Only search those which can change:
*/
if (grf->nextuse < thisinsn) {
- struct brw_wm_ref *ref = grf->value->lastuse;
+ const struct brw_wm_ref *ref = grf->value->lastuse;
/* Has last use of value been passed?
*/
@@ -148,7 +150,7 @@ static void spill_value(struct brw_wm_compile *c,
/* Allocate a spill slot. Note that allocations start from 0x40 -
* the first slot is reserved to mean "undef" in brw_wm_emit.c
*/
- if (!value->spill_slot) {
+ if (!value->spill_slot) {
c->last_scratch += 0x40;
value->spill_slot = c->last_scratch;
}
@@ -189,7 +191,7 @@ static GLuint search_contiguous_regs(struct brw_wm_compile *c,
if (grf[i+j].nextuse < group_nextuse)
group_nextuse = grf[i+j].nextuse;
}
-
+
if (group_nextuse > furthest) {
furthest = group_nextuse;
reg = i;
@@ -197,7 +199,7 @@ static GLuint search_contiguous_regs(struct brw_wm_compile *c,
}
assert(furthest != thisinsn);
-
+
/* Any non-empty regs will need to be spilled:
*/
for (j = 0; j < nr; j++)
@@ -243,7 +245,7 @@ static void alloc_contiguous_dest(struct brw_wm_compile *c,
static void load_args(struct brw_wm_compile *c,
struct brw_wm_instruction *inst)
-{
+{
GLuint thisinsn = inst - c->instruction;
GLuint i,j;
@@ -258,17 +260,17 @@ static void load_args(struct brw_wm_compile *c,
* register allocation and mark the ref as requiring a fill.
*/
GLuint reg = search_contiguous_regs(c, 1, thisinsn);
-
+
c->pass2_grf[reg].value = ref->value;
c->pass2_grf[reg].nextuse = thisinsn;
-
+
ref->value->resident = &c->pass2_grf[reg];
/* Note that a fill is required:
*/
ref->unspill_reg = reg*2;
}
-
+
/* Adjust the hw_reg to point at the value's current location:
*/
assert(ref->value == ref->value->resident->value);
@@ -294,7 +296,7 @@ void brw_wm_pass2( struct brw_wm_compile *c )
for (insn = 0; insn < c->nr_insns; insn++) {
struct brw_wm_instruction *inst = &c->instruction[insn];
-
+
/* Update registers' nextuse values:
*/
update_register_usage(c, insn);
@@ -322,11 +324,11 @@ void brw_wm_pass2( struct brw_wm_compile *c )
break;
}
- if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY)
+ if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) {
for (i = 0; i < 4; i++)
if (inst->dst[i])
spill_value(c, inst->dst[i]);
-
+ }
}
if (INTEL_DEBUG & DEBUG_WM) {
@@ -339,6 +341,3 @@ void brw_wm_pass2( struct brw_wm_compile *c )
brw_wm_print_program(c, "pass2/done");
}
}
-
-
-
diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c
index 68a9296..3fc18ff 100644
--- a/i965/brw_wm_sampler_state.c
+++ b/i965/brw_wm_sampler_state.c
@@ -152,7 +152,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
if (key->max_aniso > 2.0) {
- sampler->ss3.max_aniso = MAX2((key->max_aniso - 2) / 2,
+ sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2,
BRW_ANISORATIO_16);
}
}
@@ -178,6 +178,16 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
}
+ else if (key->tex_target == GL_TEXTURE_1D) {
+ /* There's a bug in 1D texture sampling - it actually pays
+ * attention to the wrap_t value, though it should not.
+ * Override the wrap_t value here to GL_REPEAT to keep
+ * any nonexistent border pixels from floating in.
+ */
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ }
else {
sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
@@ -217,6 +227,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
}
+
/** Sets up the cache key for sampler state for all texture units */
static void
brw_wm_sampler_populate_key(struct brw_context *brw,
diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c
index 1844eba..67b4117 100644
--- a/i965/brw_wm_state.c
+++ b/i965/brw_wm_state.c
@@ -62,6 +62,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
{
GLcontext *ctx = &brw->intel.ctx;
const struct gl_fragment_program *fp = brw->fragment_program;
+ const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
struct intel_context *intel = &brw->intel;
memset(key, 0, sizeof(*key));
@@ -103,11 +104,14 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* as far as we can tell */
key->computes_depth =
- (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0;
+ (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
- key->is_glsl = brw_wm_is_glsl(fp);
+ key->is_glsl = bfp->isGLSL;
+
+ /* temporary sanity check assertion */
+ ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
/* _NEW_DEPTH */
key->stats_wm = intel->stats_wm;
@@ -121,6 +125,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
key->offset_factor = ctx->Polygon.OffsetFactor;
}
+/**
+ * Setup wm hardware state. See page 225 of Volume 2
+ */
static dri_bo *
wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
dri_bo **reloc_bufs)
@@ -138,7 +145,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
if (key->total_scratch != 0) {
wm.thread2.scratch_space_base_pointer =
- brw->wm.scratch_buffer->offset >> 10; /* reloc */
+ brw->wm.scratch_bo->offset >> 10; /* reloc */
wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
} else {
wm.thread2.scratch_space_base_pointer = 0;
@@ -147,9 +154,9 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ wm.thread3.urb_entry_read_offset = 0;
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- wm.thread3.urb_entry_read_offset = 0;
wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
if (brw->wm.sampler_bo != NULL) {
@@ -216,7 +223,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
0, 0,
wm.thread2.per_thread_scratch_space,
offsetof(struct brw_wm_unit_state, thread2),
- brw->wm.scratch_buffer);
+ brw->wm.scratch_bo);
}
/* Emit sampler state relocation */
@@ -247,20 +254,20 @@ static void upload_wm_unit( struct brw_context *brw )
if (key.total_scratch) {
GLuint total = key.total_scratch * key.max_threads;
- if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) {
- dri_bo_unreference(brw->wm.scratch_buffer);
- brw->wm.scratch_buffer = NULL;
+ if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
+ dri_bo_unreference(brw->wm.scratch_bo);
+ brw->wm.scratch_bo = NULL;
}
- if (brw->wm.scratch_buffer == NULL) {
- brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr,
- "wm scratch",
- total,
- 4096);
+ if (brw->wm.scratch_bo == NULL) {
+ brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr,
+ "wm scratch",
+ total,
+ 4096);
}
}
reloc_bufs[0] = brw->wm.prog_bo;
- reloc_bufs[1] = brw->wm.scratch_buffer;
+ reloc_bufs[1] = brw->wm.scratch_bo;
reloc_bufs[2] = brw->wm.sampler_bo;
dri_bo_unreference(brw->wm.state_bo);
@@ -283,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
- BRW_NEW_NR_SURFACES),
+ BRW_NEW_NR_WM_SURFACES),
.cache = (CACHE_NEW_WM_PROG |
CACHE_NEW_SAMPLER)
diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c
index 3487b85..805df8a 100644
--- a/i965/brw_wm_surface_state.c
+++ b/i965/brw_wm_surface_state.c
@@ -33,11 +33,12 @@
#include "main/mtypes.h"
#include "main/texformat.h"
#include "main/texstore.h"
+#include "shader/prog_parameter.h"
#include "intel_mipmap_tree.h"
#include "intel_batchbuffer.h"
#include "intel_tex.h"
-
+#include "intel_fbo.h"
#include "brw_context.h"
#include "brw_state.h"
@@ -69,7 +70,8 @@ static GLuint translate_tex_target( GLenum target )
}
-static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode )
+static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
+ GLenum depth_mode )
{
switch( mesa_format ) {
case MESA_FORMAT_L8:
@@ -89,10 +91,16 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode )
return BRW_SURFACEFORMAT_R8G8B8_UNORM;
case MESA_FORMAT_ARGB8888:
- return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ if (internal_format == GL_RGB)
+ return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
+ else
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
case MESA_FORMAT_RGBA8888_REV:
- return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+ if (internal_format == GL_RGB)
+ return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
+ else
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
case MESA_FORMAT_RGB565:
return BRW_SURFACEFORMAT_B5G6R5_UNORM;
@@ -133,13 +141,34 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode )
case MESA_FORMAT_RGBA_DXT5:
return BRW_SURFACEFORMAT_BC3_UNORM;
- case MESA_FORMAT_SRGBA8:
- return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB;
+ case MESA_FORMAT_SARGB8:
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
+
+ case MESA_FORMAT_SLA8:
+ return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
+
+ case MESA_FORMAT_SL8:
+ return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
+
case MESA_FORMAT_SRGB_DXT1:
return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
case MESA_FORMAT_S8_Z24:
- return BRW_SURFACEFORMAT_I24X8_UNORM;
+ /* XXX: these different surface formats don't seem to
+ * make any difference for shadow sampler/compares.
+ */
+ if (depth_mode == GL_INTENSITY)
+ return BRW_SURFACEFORMAT_I24X8_UNORM;
+ else if (depth_mode == GL_ALPHA)
+ return BRW_SURFACEFORMAT_A24X8_UNORM;
+ else
+ return BRW_SURFACEFORMAT_L24X8_UNORM;
+
+ case MESA_FORMAT_DUDV8:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+
+ case MESA_FORMAT_SIGNED_RGBA8888_REV:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
default:
assert(0);
@@ -147,10 +176,14 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode )
}
}
-struct brw_wm_surface_key {
+
+/**
+ * Use same key for WM and VS surfaces.
+ */
+struct brw_surface_key {
GLenum target, depthmode;
dri_bo *bo;
- GLint format;
+ GLint format, internal_format;
GLint first_level, last_level;
GLint width, height, depth;
GLint pitch, cpp;
@@ -158,6 +191,7 @@ struct brw_wm_surface_key {
GLuint offset;
};
+
static void
brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
{
@@ -179,7 +213,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
static dri_bo *
brw_create_texture_surface( struct brw_context *brw,
- struct brw_wm_surface_key *key )
+ struct brw_surface_key *key )
{
struct brw_surface_state surf;
dri_bo *bo;
@@ -188,9 +222,11 @@ brw_create_texture_surface( struct brw_context *brw,
surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
surf.ss0.surface_type = translate_tex_target(key->target);
-
- if (key->bo)
- surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode);
+ if (key->bo) {
+ surf.ss0.surface_format = translate_tex_format(key->format,
+ key->internal_format,
+ key->depthmode);
+ }
else {
switch (key->depth) {
case 32:
@@ -256,7 +292,8 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
- struct brw_wm_surface_key key;
+ struct brw_surface_key key;
+ const GLuint surf = SURF_INDEX_TEXTURE(unit);
memset(&key, 0, sizeof(key));
@@ -267,6 +304,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.offset = intelObj->textureOffset;
} else {
key.format = firstImage->TexFormat->MesaFormat;
+ key.internal_format = firstImage->InternalFormat;
key.pitch = intelObj->mt->pitch;
key.depth = firstImage->Depth;
key.bo = intelObj->mt->region->buffer;
@@ -282,34 +320,207 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.cpp = intelObj->mt->cpp;
key.tiling = intelObj->mt->region->tiling;
- dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]);
- brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
- NULL);
- if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) {
- brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key);
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->wm.surf_bo[surf] == NULL) {
+ brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
}
}
+
+
+/**
+ * Create the constant buffer surface. Vertex/fragment shader constants will be
+ * read from this buffer with Data Port Read instructions/messages.
+ */
+static dri_bo *
+brw_create_constant_surface( struct brw_context *brw,
+ struct brw_surface_key *key )
+{
+ const GLint w = key->width - 1;
+ struct brw_surface_state surf;
+ dri_bo *bo;
+
+ memset(&surf, 0, sizeof(surf));
+
+ surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ surf.ss0.surface_type = BRW_SURFACE_BUFFER;
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ assert(key->bo);
+ if (key->bo)
+ surf.ss1.base_addr = key->bo->offset; /* reloc */
+ else
+ surf.ss1.base_addr = key->offset;
+
+ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
+ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
+ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
+ surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
+ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
+
+ bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+ key, sizeof(*key),
+ &key->bo, key->bo ? 1 : 0,
+ &surf, sizeof(surf),
+ NULL, NULL);
+
+ if (key->bo) {
+ /* Emit relocation to surface contents */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ key->bo);
+ }
+
+ return bo;
+}
+
+
+/**
+ * Update the surface state for a WM constant buffer.
+ * The constant buffer will be (re)allocated here if needed.
+ */
+static dri_bo *
+brw_update_wm_constant_surface( GLcontext *ctx,
+ GLuint surf,
+ dri_bo *const_buffer,
+ const struct gl_program_parameter_list *params)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_surface_key key;
+ struct intel_context *intel = &brw->intel;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+
+ /* free old const buffer if too small */
+ if (const_buffer && const_buffer->size < size) {
+ dri_bo_unreference(const_buffer);
+ const_buffer = NULL;
+ }
+
+ /* alloc new buffer if needed */
+ if (!const_buffer) {
+ const_buffer =
+ drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64);
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = MESA_FORMAT_RGBA_FLOAT32;
+ key.internal_format = GL_RGBA;
+ key.bo = const_buffer;
+ key.depthmode = GL_NONE;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->wm.surf_bo[surf] == NULL) {
+ brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+ }
+
+ return const_buffer;
+}
+
+
+/**
+ * Update the surface state for a VS constant buffer.
+ * The constant buffer will be (re)allocated here if needed.
+ */
+static dri_bo *
+brw_update_vs_constant_surface( GLcontext *ctx,
+ GLuint surf,
+ dri_bo *const_buffer,
+ const struct gl_program_parameter_list *params)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_surface_key key;
+ struct intel_context *intel = &brw->intel;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+
+ assert(surf == 0);
+
+ /* free old const buffer if too small */
+ if (const_buffer && const_buffer->size < size) {
+ dri_bo_unreference(const_buffer);
+ const_buffer = NULL;
+ }
+
+ /* alloc new buffer if needed */
+ if (!const_buffer) {
+ const_buffer =
+ drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = MESA_FORMAT_RGBA_FLOAT32;
+ key.internal_format = GL_RGBA;
+ key.bo = const_buffer;
+ key.depthmode = GL_NONE;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ dri_bo_unreference(brw->vs.surf_bo[surf]);
+ brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->vs.surf_bo[surf] == NULL) {
+ brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+ }
+
+ return const_buffer;
+}
+
+
/**
* Sets up a surface state structure to point at the given region.
* While it is only used for the front/back buffer currently, it should be
* usable for further buffers when doing ARB_draw_buffer support.
*/
static void
-brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
- unsigned int unit, GLboolean cached)
+brw_update_renderbuffer_surface(struct brw_context *brw,
+ struct gl_renderbuffer *rb,
+ unsigned int unit, GLboolean cached)
{
GLcontext *ctx = &brw->intel.ctx;
dri_bo *region_bo = NULL;
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ struct intel_region *region = irb ? irb->region : NULL;
struct {
unsigned int surface_type;
unsigned int surface_format;
- unsigned int width, height, cpp;
+ unsigned int width, height, pitch, cpp;
GLubyte color_mask[4];
GLboolean color_blend;
uint32_t tiling;
+ uint32_t draw_offset;
} key;
memset(&key, 0, sizeof(key));
@@ -318,14 +529,29 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
region_bo = region->buffer;
key.surface_type = BRW_SURFACE_2D;
- if (region->cpp == 4)
+ switch (irb->texformat->MesaFormat) {
+ case MESA_FORMAT_ARGB8888:
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- else
+ break;
+ case MESA_FORMAT_RGB565:
key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ break;
+ case MESA_FORMAT_ARGB1555:
+ key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+ break;
+ case MESA_FORMAT_ARGB4444:
+ key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+ break;
+ default:
+ _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
+ irb->texformat->MesaFormat);
+ }
key.tiling = region->tiling;
- key.width = region->pitch; /* XXX: not really! */
+ key.width = region->width;
key.height = region->height;
+ key.pitch = region->pitch;
key.cpp = region->cpp;
+ key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
} else {
key.surface_type = BRW_SURFACE_NULL;
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
@@ -333,6 +559,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
key.width = 1;
key.height = 1;
key.cpp = 4;
+ key.draw_offset = 0;
}
memcpy(key.color_mask, ctx->Color.ColorMask,
sizeof(key.color_mask));
@@ -354,13 +581,14 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
surf.ss0.surface_format = key.surface_format;
surf.ss0.surface_type = key.surface_type;
+ surf.ss1.base_addr = key.draw_offset;
if (region_bo != NULL)
- surf.ss1.base_addr = region_bo->offset; /* reloc */
+ surf.ss1.base_addr += region_bo->offset; /* reloc */
surf.ss2.width = key.width - 1;
surf.ss2.height = key.height - 1;
brw_set_surface_tiling(&surf, key.tiling);
- surf.ss3.pitch = (key.width * key.cpp) - 1;
+ surf.ss3.pitch = (key.pitch * key.cpp) - 1;
/* _NEW_COLOR */
surf.ss0.color_blend = key.color_blend;
@@ -371,7 +599,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
/* Key size will never match key size for textures, so we're safe. */
brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
+ &key, sizeof(key),
&region_bo, 1,
&surf, sizeof(surf),
NULL, NULL);
@@ -380,12 +608,12 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
* them both. We might be able to figure out from other state
* a more restrictive relocation to emit.
*/
- dri_bo_emit_reloc(brw->wm.surf_bo[unit],
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- 0,
- offsetof(struct brw_surface_state, ss1),
- region_bo);
+ drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
+ offsetof(struct brw_surface_state, ss1),
+ region_bo,
+ key.draw_offset,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER);
}
}
}
@@ -400,6 +628,8 @@ brw_wm_get_binding_table(struct brw_context *brw)
{
dri_bo *bind_bo;
+ assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
+
bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
@@ -446,54 +676,159 @@ static void prepare_wm_surfaces(struct brw_context *brw )
GLuint i;
int old_nr_surfaces;
- if (brw->state.nr_draw_regions > 1) {
- for (i = 0; i < brw->state.nr_draw_regions; i++) {
- brw_update_region_surface(brw, brw->state.draw_regions[i], i,
- GL_FALSE);
+ /* _NEW_BUFFERS */
+ /* Update surfaces for drawing buffers */
+ if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ brw_update_renderbuffer_surface(brw,
+ ctx->DrawBuffer->_ColorDrawBuffers[i],
+ i,
+ GL_FALSE);
}
- }else {
- brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE);
+ } else {
+ brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE);
}
old_nr_surfaces = brw->wm.nr_surfaces;
brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
+ /* Update surface / buffer for fragment shader constant buffer */
+ {
+ const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ fp->const_buffer =
+ brw_update_wm_constant_surface(ctx, surf, fp->const_buffer,
+ fp->program.Base.Parameters);
+
+ brw->wm.nr_surfaces = surf + 1;
+ }
+
+ /* Update surfaces for textures */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+ const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+ const GLuint surf = SURF_INDEX_TEXTURE(i);
/* _NEW_TEXTURE, BRW_NEW_TEXDATA */
- if(texUnit->_ReallyEnabled) {
+ if (texUnit->_ReallyEnabled) {
if (texUnit->_Current == intel->frame_buffer_texobj) {
- dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]);
- brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = brw->wm.surf_bo[0];
- dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]);
- brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1;
+ /* render to texture */
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = brw->wm.surf_bo[0];
+ dri_bo_reference(brw->wm.surf_bo[surf]);
+ brw->wm.nr_surfaces = surf + 1;
} else {
+ /* regular texture */
brw_update_texture_surface(ctx, i);
- brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1;
+ brw->wm.nr_surfaces = surf + 1;
}
} else {
- dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]);
- brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = NULL;
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
}
-
}
dri_bo_unreference(brw->wm.bind_bo);
brw->wm.bind_bo = brw_wm_get_binding_table(brw);
if (brw->wm.nr_surfaces != old_nr_surfaces)
- brw->state.dirty.brw |= BRW_NEW_NR_SURFACES;
+ brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+}
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static dri_bo *
+brw_vs_get_binding_table(struct brw_context *brw)
+{
+ dri_bo *bind_bo;
+
+ assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF);
+
+ bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, brw->vs.nr_surfaces,
+ NULL);
+
+ if (bind_bo == NULL) {
+ GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint);
+ uint32_t *data = malloc(data_size);
+ int i;
+
+ for (i = 0; i < brw->vs.nr_surfaces; i++)
+ if (brw->vs.surf_bo[i])
+ data[i] = brw->vs.surf_bo[i]->offset;
+ else
+ data[i] = 0;
+
+ bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, brw->vs.nr_surfaces,
+ data, data_size,
+ NULL, NULL);
+
+ /* Emit binding table relocations to surface state */
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ dri_bo_emit_reloc(bind_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ i * sizeof(GLuint),
+ brw->vs.surf_bo[i]);
+ }
+ }
+
+ free(data);
+ }
+
+ return bind_bo;
+}
+
+
+/**
+ * Vertex shader surfaces. Just constant buffer for now. Could add vertex
+ * shader textures in the future.
+ */
+static void prepare_vs_surfaces(struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+
+ /* Update surface / buffer for vertex shader constant buffer */
+ {
+ const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ vp->const_buffer =
+ brw_update_vs_constant_surface(ctx, surf, vp->const_buffer,
+ vp->program.Base.Parameters);
+
+ brw->vs.nr_surfaces = 1;
+ }
+
+ dri_bo_unreference(brw->vs.bind_bo);
+ brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+
+ if (1)
+ brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+}
+
+
+static void
+prepare_surfaces(struct brw_context *brw)
+{
+ prepare_wm_surfaces(brw);
+ prepare_vs_surfaces(brw);
}
const struct brw_tracked_state brw_wm_surfaces = {
.dirty = {
- .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
+ .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
- .prepare = prepare_wm_surfaces,
+ .prepare = prepare_surfaces,
};
diff --git a/i965/intel_state.c b/i965/intel_state.c
deleted file mode 100644
index 0c9c670..0000000
--- a/i965/intel_state.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "main/glheader.h"
-#include "main/context.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "main/colormac.h"
-#include "main/dd.h"
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_regions.h"
-#include "swrast/swrast.h"
-
-int
-intel_translate_shadow_compare_func( GLenum func )
-{
- switch(func) {
- case GL_NEVER:
- return COMPAREFUNC_ALWAYS;
- case GL_LESS:
- return COMPAREFUNC_LEQUAL;
- case GL_LEQUAL:
- return COMPAREFUNC_LESS;
- case GL_GREATER:
- return COMPAREFUNC_GEQUAL;
- case GL_GEQUAL:
- return COMPAREFUNC_GREATER;
- case GL_NOTEQUAL:
- return COMPAREFUNC_EQUAL;
- case GL_EQUAL:
- return COMPAREFUNC_NOTEQUAL;
- case GL_ALWAYS:
- return COMPAREFUNC_NEVER;
- }
-
- fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
- return COMPAREFUNC_NEVER;
-}
-
-int
-intel_translate_compare_func( GLenum func )
-{
- switch(func) {
- case GL_NEVER:
- return COMPAREFUNC_NEVER;
- case GL_LESS:
- return COMPAREFUNC_LESS;
- case GL_LEQUAL:
- return COMPAREFUNC_LEQUAL;
- case GL_GREATER:
- return COMPAREFUNC_GREATER;
- case GL_GEQUAL:
- return COMPAREFUNC_GEQUAL;
- case GL_NOTEQUAL:
- return COMPAREFUNC_NOTEQUAL;
- case GL_EQUAL:
- return COMPAREFUNC_EQUAL;
- case GL_ALWAYS:
- return COMPAREFUNC_ALWAYS;
- }
-
- fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
- return COMPAREFUNC_ALWAYS;
-}
-
-int
-intel_translate_stencil_op( GLenum op )
-{
- switch(op) {
- case GL_KEEP:
- return STENCILOP_KEEP;
- case GL_ZERO:
- return STENCILOP_ZERO;
- case GL_REPLACE:
- return STENCILOP_REPLACE;
- case GL_INCR:
- return STENCILOP_INCRSAT;
- case GL_DECR:
- return STENCILOP_DECRSAT;
- case GL_INCR_WRAP:
- return STENCILOP_INCR;
- case GL_DECR_WRAP:
- return STENCILOP_DECR;
- case GL_INVERT:
- return STENCILOP_INVERT;
- default:
- return STENCILOP_ZERO;
- }
-}
-
-int
-intel_translate_blend_factor( GLenum factor )
-{
- switch(factor) {
- case GL_ZERO:
- return BLENDFACT_ZERO;
- case GL_SRC_ALPHA:
- return BLENDFACT_SRC_ALPHA;
- case GL_ONE:
- return BLENDFACT_ONE;
- case GL_SRC_COLOR:
- return BLENDFACT_SRC_COLR;
- case GL_ONE_MINUS_SRC_COLOR:
- return BLENDFACT_INV_SRC_COLR;
- case GL_DST_COLOR:
- return BLENDFACT_DST_COLR;
- case GL_ONE_MINUS_DST_COLOR:
- return BLENDFACT_INV_DST_COLR;
- case GL_ONE_MINUS_SRC_ALPHA:
- return BLENDFACT_INV_SRC_ALPHA;
- case GL_DST_ALPHA:
- return BLENDFACT_DST_ALPHA;
- case GL_ONE_MINUS_DST_ALPHA:
- return BLENDFACT_INV_DST_ALPHA;
- case GL_SRC_ALPHA_SATURATE:
- return BLENDFACT_SRC_ALPHA_SATURATE;
- case GL_CONSTANT_COLOR:
- return BLENDFACT_CONST_COLOR;
- case GL_ONE_MINUS_CONSTANT_COLOR:
- return BLENDFACT_INV_CONST_COLOR;
- case GL_CONSTANT_ALPHA:
- return BLENDFACT_CONST_ALPHA;
- case GL_ONE_MINUS_CONSTANT_ALPHA:
- return BLENDFACT_INV_CONST_ALPHA;
- }
-
- fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor);
- return BLENDFACT_ZERO;
-}
-
-int
-intel_translate_logic_op( GLenum opcode )
-{
- switch(opcode) {
- case GL_CLEAR:
- return LOGICOP_CLEAR;
- case GL_AND:
- return LOGICOP_AND;
- case GL_AND_REVERSE:
- return LOGICOP_AND_RVRSE;
- case GL_COPY:
- return LOGICOP_COPY;
- case GL_COPY_INVERTED:
- return LOGICOP_COPY_INV;
- case GL_AND_INVERTED:
- return LOGICOP_AND_INV;
- case GL_NOOP:
- return LOGICOP_NOOP;
- case GL_XOR:
- return LOGICOP_XOR;
- case GL_OR:
- return LOGICOP_OR;
- case GL_OR_INVERTED:
- return LOGICOP_OR_INV;
- case GL_NOR:
- return LOGICOP_NOR;
- case GL_EQUIV:
- return LOGICOP_EQUIV;
- case GL_INVERT:
- return LOGICOP_INV;
- case GL_OR_REVERSE:
- return LOGICOP_OR_RVRSE;
- case GL_NAND:
- return LOGICOP_NAND;
- case GL_SET:
- return LOGICOP_SET;
- default:
- return LOGICOP_SET;
- }
-}
-
-
-static void
-intelClearColor(GLcontext *ctx, const GLfloat color[4])
-{
- struct intel_context *intel = intel_context(ctx);
- GLubyte clear[4];
-
- CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]);
- CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]);
- CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]);
- CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]);
-
- /* compute both 32 and 16-bit clear values */
- intel->ClearColor8888 = INTEL_PACKCOLOR8888(clear[0], clear[1],
- clear[2], clear[3]);
- intel->ClearColor565 = INTEL_PACKCOLOR565(clear[0], clear[1], clear[2]);
-}
-
-
-/* Fallback to swrast for select and feedback.
- */
-static void
-intelRenderMode( GLcontext *ctx, GLenum mode )
-{
- struct intel_context *intel = intel_context(ctx);
- FALLBACK( intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER) );
-}
-
-
-void
-intelInitStateFuncs( struct dd_function_table *functions )
-{
- functions->RenderMode = intelRenderMode;
- functions->ClearColor = intelClearColor;
-}