diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-14 05:01:55 +0100 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-14 05:01:55 +0100 |
commit | da7796a7d40ca2cd4a03e99ddf38e3a7256a401f (patch) | |
tree | f692761e1ce757ce13112c602efc8228d422b78d /i965 | |
parent | cdb0aeec165a846b647e6f4e722c95c09e185ce1 (diff) |
Import i915 and i965 dri drivers from mesa 7.5.0.7.5.0
Diffstat (limited to 'i965')
37 files changed, 2333 insertions, 1124 deletions
diff --git a/i965/Makefile.am b/i965/Makefile.am index 69abe6c..5a118af 100644 --- a/i965/Makefile.am +++ b/i965/Makefile.am @@ -15,7 +15,7 @@ i965_dri_la_SOURCES = \ ../shared/intel_clear.c \ ../shared/intel_context.c \ ../shared/intel_decode.c \ - ../shared/intel_depthstencil.c \ + ../shared/intel_extensions.c \ ../shared/intel_fbo.c \ ../shared/intel_mipmap_tree.c \ ../shared/intel_regions.c \ @@ -25,7 +25,7 @@ i965_dri_la_SOURCES = \ ../shared/intel_pixel_bitmap.c \ ../shared/intel_pixel_copy.c \ ../shared/intel_pixel_draw.c \ - intel_state.c \ + ../shared/intel_state.c \ ../shared/intel_swapbuffers.c \ ../shared/intel_tex.c \ ../shared/intel_tex_copy.c \ diff --git a/i965/brw_cc.c b/i965/brw_cc.c index 8237016..c724218 100644 --- a/i965/brw_cc.c +++ b/i965/brw_cc.c @@ -88,7 +88,7 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) memset(key, 0, sizeof(*key)); - key->stencil = ctx->Stencil.Enabled; + key->stencil = ctx->Stencil._Enabled; key->stencil_two_side = ctx->Stencil._TestTwoSide; if (key->stencil) { diff --git a/i965/brw_context.c b/i965/brw_context.c index 4357100..4dbe551 100644 --- a/i965/brw_context.c +++ b/i965/brw_context.c @@ -111,14 +111,15 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, ctx->Const.MaxTextureImageUnits); ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ - /* Advertise the full hardware capabilities. The new memory - * manager should cope much better with overload situations: + /* Mesa limits textures to 4kx4k; it would be nice to fix that someday */ - ctx->Const.MaxTextureLevels = 12; + ctx->Const.MaxTextureLevels = 13; ctx->Const.Max3DTextureLevels = 9; ctx->Const.MaxCubeTextureLevels = 12; - ctx->Const.MaxTextureRectSize = (1<<11); + ctx->Const.MaxTextureRectSize = (1<<12); + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + /* if conformance mode is set, swrast can handle any size AA point */ ctx->Const.MaxPointSizeAA = 255.0; diff --git a/i965/brw_context.h b/i965/brw_context.h index df90c20..577497b 100644 --- a/i965/brw_context.h +++ b/i965/brw_context.h @@ -46,7 +46,7 @@ * * CURBE - constant URB entry. An urb region (entry) used to hold * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. + * preload into the GRF when spawning a thread. * * VUE - vertex URB entry. An urb entry holding a vertex and usually * a vertex header. The header contains control information and @@ -63,7 +63,7 @@ * special and may be overwritten. * * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous + * by sending messages. Message parameters are placed in contiguous * MRF registers. All program output is via these messages. URB * entries are populated by sending a message to the shared URB * function containing the new data, together with a control word, @@ -141,7 +141,8 @@ struct brw_context; #define BRW_NEW_BATCH 0x10000 /** brw->depth_region updated */ #define BRW_NEW_DEPTH_BUFFER 0x20000 -#define BRW_NEW_NR_SURFACES 0x40000 +#define BRW_NEW_NR_WM_SURFACES 0x40000 +#define BRW_NEW_NR_VS_SURFACES 0x80000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -154,19 +155,25 @@ struct brw_state_flags { GLuint cache; }; + +/** Subclass of Mesa vertex program */ struct brw_vertex_program { struct gl_vertex_program program; GLuint id; + dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; }; - +/** Subclass of Mesa fragment program */ struct brw_fragment_program { struct gl_fragment_program program; - GLuint id; -}; - + GLuint id; /**< serial no. to identify frag progs, never re-used */ + GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ + dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; +}; /* Data about a particular attempt to compile a program. Note that @@ -182,7 +189,7 @@ struct brw_wm_prog_data { GLuint total_grf; GLuint total_scratch; - GLuint nr_params; + GLuint nr_params; /**< number of float params/constants */ GLboolean error; /* Pointer to tracked values (only valid once @@ -221,6 +228,7 @@ struct brw_vs_prog_data { GLuint urb_read_length; GLuint total_grf; GLuint outputs_written; + GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -237,8 +245,35 @@ struct brw_vs_ouput_sizes { }; +/** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS + +/** + * Size of our surface binding table for the WM. + * This contains pointers to the drawing surfaces and current texture + * objects and shader constant buffers (+2). + */ +#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) + +/** + * Helpers to convert drawing buffers, textures and constant buffers + * to surface binding table indexes, for WM. + */ +#define SURF_INDEX_DRAW(d) (d) +#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) + +/** + * Size of surface binding table for the VS. + * Only one constant buffer for now. + */ +#define BRW_VS_MAX_SURF 1 + +/** + * Only a VS constant buffer + */ +#define SURF_INDEX_VERT_CONST_BUFFER 0 + enum brw_cache_id { BRW_CC_VP, @@ -418,8 +453,8 @@ struct brw_context struct brw_tracked_state **atoms; GLuint nr_atoms; - GLuint nr_draw_regions; - struct intel_region *draw_regions[MAX_DRAW_BUFFERS]; + GLuint nr_color_regions; + struct intel_region *color_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; /** @@ -512,8 +547,8 @@ struct brw_context /* BRW_NEW_CURBE_OFFSETS: */ struct { - GLuint wm_start; - GLuint wm_size; + GLuint wm_start; /**< pos of first wm const in CURBE buffer */ + GLuint wm_size; /**< number of float[4] consts, multiple of 16 */ GLuint clip_start; GLuint clip_size; GLuint vs_start; @@ -545,6 +580,11 @@ struct brw_context dri_bo *prog_bo; dri_bo *state_bo; + + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_VS_MAX_SURF]; + GLuint nr_surfaces; } vs; struct { @@ -576,9 +616,10 @@ struct brw_context struct brw_wm_prog_data *prog_data; struct brw_wm_compile *compile_data; - /* Input sizes, calculated from active vertex program: + /** Input sizes, calculated from active vertex program. + * One bit per fragment program input attribute. */ - GLuint input_size_masks[4]; + GLbitfield input_size_masks[4]; /** Array of surface default colors (texture border color) */ dri_bo *sdc_bo[BRW_MAX_TEX_UNIT]; @@ -587,7 +628,7 @@ struct brw_context GLuint nr_surfaces; GLuint max_threads; - dri_bo *scratch_buffer; + dri_bo *scratch_bo; GLuint sampler_count; dri_bo *sampler_bo; @@ -627,8 +668,6 @@ struct brw_context * brw_vtbl.c */ void brwInitVtbl( struct brw_context *brw ); -void brw_do_flush( struct brw_context *brw, - GLuint flags ); /*====================================================================== * brw_context.c @@ -670,7 +709,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ); */ void brw_upload_urb_fence(struct brw_context *brw); -void brw_upload_constant_buffer_state(struct brw_context *brw); +/* brw_curbe.c + */ +void brw_upload_cs_urb_state(struct brw_context *brw); /*====================================================================== @@ -683,6 +724,32 @@ brw_context( GLcontext *ctx ) return (struct brw_context *)ctx; } +static INLINE struct brw_vertex_program * +brw_vertex_program(struct gl_vertex_program *p) +{ + return (struct brw_vertex_program *) p; +} + +static INLINE const struct brw_vertex_program * +brw_vertex_program_const(const struct gl_vertex_program *p) +{ + return (const struct brw_vertex_program *) p; +} + +static INLINE struct brw_fragment_program * +brw_fragment_program(struct gl_fragment_program *p) +{ + return (struct brw_fragment_program *) p; +} + +static INLINE const struct brw_fragment_program * +brw_fragment_program_const(const struct gl_fragment_program *p) +{ + return (const struct brw_fragment_program *) p; +} + + + #define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) #endif diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c index 4eaaa5f..9197fed 100644 --- a/i965/brw_curbe.c +++ b/i965/brw_curbe.c @@ -38,23 +38,28 @@ #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" +#include "intel_regions.h" #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" #include "brw_util.h" -/* Partition the CURBE between the various users of constant values: +/** + * Partition the CURBE between the various users of constant values: + * Note that vertex and fragment shaders can now fetch constants out + * of constant buffers. We no longer allocatea block of the GRF for + * constants. That greatly reduces the demand for space in the CURBE. + * Some of the comments within are dated... */ static void calculate_curbe_offsets( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; /* CACHE_NEW_WM_PROG */ - GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16; + const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16; GLuint nr_clip_regs = 0; GLuint total_regs; @@ -138,24 +143,24 @@ const struct brw_tracked_state brw_curbe_offsets = { * fixed-function hardware in a double-buffering scheme to avoid a * pipeline stall each time the contents of the curbe is changed. */ -void brw_upload_constant_buffer_state(struct brw_context *brw) +void brw_upload_cs_urb_state(struct brw_context *brw) { - struct brw_constant_buffer_state cbs; - memset(&cbs, 0, sizeof(cbs)); + struct brw_cs_urb_state cs_urb; + memset(&cs_urb, 0, sizeof(cs_urb)); /* It appears that this is the state packet for the CS unit, ie. the * urb entries detailed here are housed in the CS range from the * URB_FENCE command. */ - cbs.header.opcode = CMD_CONST_BUFFER_STATE; - cbs.header.length = sizeof(cbs)/4 - 2; + cs_urb.header.opcode = CMD_CS_URB_STATE; + cs_urb.header.length = sizeof(cs_urb)/4 - 2; /* BRW_NEW_URB_FENCE */ - cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; - cbs.bits0.urb_entry_size = brw->urb.csize - 1; + cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cs_urb.bits0.urb_entry_size = brw->urb.csize - 1; assert(brw->urb.nr_cs_entries); - BRW_CACHED_BATCH_STRUCT(brw, &cbs); + BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); } static GLfloat fixed_plane[6][4] = { @@ -174,10 +179,12 @@ static GLfloat fixed_plane[6][4] = { static void prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - GLuint sz = brw->curbe.total_size; - GLuint bufsz = sz * 16 * sizeof(GLfloat); + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + const GLuint sz = brw->curbe.total_size; + const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; @@ -189,27 +196,25 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; if (sz == 0) { - if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } - return; } - buf = (GLfloat *)malloc(bufsz); - - memset(buf, 0, bufsz); + buf = (GLfloat *) _mesa_calloc(bufsz); + /* fragment shader constants */ if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) - buf[offset + i] = brw->wm.prog_data->param[i][0]; + buf[offset + i] = *brw->wm.prog_data->param[i]; } @@ -244,18 +249,20 @@ static void prepare_constant_buffer(struct brw_context *brw) } } - + /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - GLuint nr = vp->program.Base.Parameters->NumParameters; + GLuint nr = brw->vs.prog_data->nr_params / 4; _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* XXX just use a memcpy here */ for (i = 0; i < nr; i++) { - buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; - buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; - buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; - buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; + const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; + buf[offset + i * 4 + 0] = value[0]; + buf[offset + i * 4 + 1] = value[1]; + buf[offset + i * 4 + 2] = value[2]; + buf[offset + i * 4 + 3] = value[3]; } } @@ -274,11 +281,14 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { - free(buf); + /* constants have not changed */ + _mesa_free(buf); } else { + /* constants have changed */ if (brw->curbe.last_buf) - free(brw->curbe.last_buf); + _mesa_free(brw->curbe.last_buf); + brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; @@ -326,11 +336,77 @@ static void prepare_constant_buffer(struct brw_context *brw) } +/** + * Copy Mesa program parameters into given constant buffer. + */ +static void +update_constant_buffer(struct brw_context *brw, + const struct gl_program_parameter_list *params, + dri_bo *const_buffer) +{ + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + /* copy Mesa program constants into the buffer */ + if (const_buffer && size > 0) { + + assert(const_buffer); + assert(const_buffer->size >= size); + + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(const_buffer); + memcpy(const_buffer->virtual, params->ParameterValues, size); + drm_intel_gem_bo_unmap_gtt(const_buffer); + } + else { + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + } + + if (0) { + int i; + for (i = 0; i < params->NumParameters; i++) { + float *p = params->ParameterValues[i]; + printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); + } + } + } +} + + +/** Copy current vertex program's parameters into the constant buffer */ +static void +update_vertex_constant_buffer(struct brw_context *brw) +{ + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + if (0) { + printf("update VS constants in buffer %p\n", vp->const_buffer); + printf("program %u\n", vp->program.Base.Id); + } + if (vp->use_const_buffer) + update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); +} + + +/** Copy current fragment program's parameters into the constant buffer */ +static void +update_fragment_constant_buffer(struct brw_context *brw) +{ + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + if (fp->use_const_buffer) + update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); +} + + static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; + update_vertex_constant_buffer(brw); + update_fragment_constant_buffer(brw); + BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); diff --git a/i965/brw_defines.h b/i965/brw_defines.h index 39c3225..98fc909 100644 --- a/i965/brw_defines.h +++ b/i965/brw_defines.h @@ -225,6 +225,24 @@ #define BRW_RASTRULE_UPPER_LEFT 0 #define BRW_RASTRULE_UPPER_RIGHT 1 +/* These are listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "IntelĀ® 965 Express Chipset Family and IntelĀ® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * These appear to be supported on at least some + * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT + * is useful when using OpenGL to render to a FBO + * (which has the pixel coordinate Y orientation inverted + * with respect to the normal OpenGL pixel coordinate system). + */ +#define BRW_RASTRULE_LOWER_LEFT 2 +#define BRW_RASTRULE_LOWER_RIGHT 3 #define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 #define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 @@ -349,9 +367,10 @@ #define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 #define BRW_SURFACEFORMAT_I16_FLOAT 0x115 #define BRW_SURFACEFORMAT_L16_FLOAT 0x116 -#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 -#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 -#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A #define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B #define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C #define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D @@ -368,6 +387,7 @@ #define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 #define BRW_SURFACEFORMAT_R8_SSCALED 0x149 #define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C #define BRW_SURFACEFORMAT_R1_UINT 0x181 #define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 #define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 @@ -734,7 +754,7 @@ #define CMD_URB_FENCE 0x6000 -#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CS_URB_STATE 0x6001 #define CMD_CONST_BUFFER 0x6002 #define CMD_STATE_BASE_ADDRESS 0x6101 diff --git a/i965/brw_draw.c b/i965/brw_draw.c index 0b64999..5342622 100644 --- a/i965/brw_draw.c +++ b/i965/brw_draw.c @@ -127,6 +127,7 @@ static void brw_emit_prim(struct brw_context *brw, uint32_t hw_prim) { struct brw_3d_primitive prim_packet; + struct intel_context *intel = &brw->intel; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), @@ -146,10 +147,27 @@ static void brw_emit_prim(struct brw_context *brw, /* Can't wrap here, since we rely on the validated state. */ brw->no_batch_wrap = GL_TRUE; + + /* If we're set to always flush, do it before and after the primitive emit. + * We want to catch both missed flushes that hurt instruction/state cache + * and missed flushes of the render cache as it heads to other parts of + * the besides the draw code. + */ + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } + brw->no_batch_wrap = GL_FALSE; } @@ -393,6 +411,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, retval = GL_TRUE; } + if (intel->always_flush_batch) + intel_batchbuffer_flush(intel->batch); out: UNLOCK_HARDWARE(intel); diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c index 73d6dea..b91b20b 100644 --- a/i965/brw_draw_upload.c +++ b/i965/brw_draw_upload.c @@ -156,7 +156,13 @@ static GLuint byte_types_scale[5] = { }; -static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized ) +/** + * Given vertex array type/size/format/normalized info, return + * the appopriate hardware surface type. + * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. + */ +static GLuint get_surface_type( GLenum type, GLuint size, + GLenum format, GLboolean normalized ) { if (INTEL_DEBUG & DEBUG_VERTS) _mesa_printf("type %s size %d normalized %d\n", @@ -171,11 +177,20 @@ static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized ) case GL_BYTE: return byte_types_norm[size]; case GL_UNSIGNED_INT: return uint_types_norm[size]; case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; - case GL_UNSIGNED_BYTE: return ubyte_types_norm[size]; + case GL_UNSIGNED_BYTE: + if (format == GL_BGRA) { + /* See GL_EXT_vertex_array_bgra */ + assert(size == 4); + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + } + else { + return ubyte_types_norm[size]; + } default: assert(0); return 0; } } else { + assert(format == GL_RGBA); /* sanity check */ switch (type) { case GL_DOUBLE: return double_types[size]; case GL_FLOAT: return float_types[size]; @@ -262,6 +277,7 @@ copy_array_to_vbo_array( struct brw_context *brw, struct brw_vertex_element *element, GLuint dst_stride) { + struct intel_context *intel = &brw->intel; GLuint size = element->count * dst_stride; get_space(brw, size, &element->bo, &element->offset); @@ -274,29 +290,52 @@ copy_array_to_vbo_array( struct brw_context *brw, } if (dst_stride == element->glarray->StrideB) { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(element->bo); + memcpy((char *)element->bo->virtual + element->offset, + element->glarray->Ptr, size); + drm_intel_gem_bo_unmap_gtt(element->bo); + } else { + dri_bo_subdata(element->bo, + element->offset, + size, + element->glarray->Ptr); + } } else { - void *data; char *dest; - const char *src = element->glarray->Ptr; + const unsigned char *src = element->glarray->Ptr; int i; - data = _mesa_malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(element->bo); + dest = element->bo->virtual; + dest += element->offset; - dri_bo_subdata(element->bo, - element->offset, - size, - data); - _mesa_free(data); + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } + + drm_intel_gem_bo_unmap_gtt(element->bo); + } else { + void *data; + + data = _mesa_malloc(dst_stride * element->count); + dest = data; + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } + + dri_bo_subdata(element->bo, + element->offset, + size, + data); + + _mesa_free(data); + } } } @@ -484,6 +523,7 @@ static void brw_emit_vertices(struct brw_context *brw) struct brw_vertex_element *input = enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, + input->glarray->Format, input->glarray->Normalized); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; @@ -547,9 +587,15 @@ static void brw_prepare_indices(struct brw_context *brw) /* Straight upload */ - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); + } else { + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } } else { - offset = (GLuint)index_buffer->ptr; + offset = (GLuint) (unsigned long) index_buffer->ptr; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. diff --git a/i965/brw_eu.h b/i965/brw_eu.h index b36a197..003332f 100644 --- a/i965/brw_eu.h +++ b/i965/brw_eu.h @@ -97,7 +97,7 @@ struct brw_glsl_call; #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1200 +#define BRW_EU_MAX_INSN 4000 struct brw_compile { struct brw_instruction store[BRW_EU_MAX_INSN]; @@ -170,6 +170,13 @@ static INLINE struct brw_reg brw_reg( GLuint file, GLuint writemask ) { struct brw_reg reg; + if (type == BRW_GENERAL_REGISTER_FILE) + assert(nr < 128); + else if (type == BRW_MESSAGE_REGISTER_FILE) + assert(nr < 9); + else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + reg.type = type; reg.file = file; reg.nr = nr; @@ -723,6 +730,13 @@ static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset return ptr; } +/** Do two brw_regs refer to the same register? */ +static INLINE GLboolean +brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + static INLINE struct brw_instruction *current_insn( struct brw_compile *p) { return &p->store[p->nr_insn]; @@ -841,12 +855,24 @@ void brw_math( struct brw_compile *p, void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ); +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ); + +void brw_dp_READ_4_vs( struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index ); + void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ); /* If/else/endif. Works by manipulating the execution flags on each diff --git a/i965/brw_eu_debug.c b/i965/brw_eu_debug.c index 91dbbd5..29f3f6d 100644 --- a/i965/brw_eu_debug.c +++ b/i965/brw_eu_debug.c @@ -65,6 +65,7 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.width == BRW_WIDTH_8 && hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && hwreg.type == BRW_REGISTER_TYPE_F) { + /* vector register */ _mesa_printf("vec%d", hwreg.nr); } else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && @@ -72,8 +73,12 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.width == BRW_WIDTH_1 && hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && hwreg.type == BRW_REGISTER_TYPE_F) { + /* "scalar" register */ _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); } + else if (hwreg.file == BRW_IMMEDIATE_VALUE) { + _mesa_printf("imm %f", hwreg.dw1.f); + } else { _mesa_printf("%s%d.%d<%d;%d,%d>:%s", file[hwreg.file], diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c index 4e099b5..2a147fb 100644 --- a/i965/brw_eu_emit.c +++ b/i965/brw_eu_emit.c @@ -55,6 +55,9 @@ static void guess_execution_size( struct brw_instruction *insn, static void brw_set_dest( struct brw_instruction *insn, struct brw_reg dest ) { + if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(dest.nr < 128); + insn->bits1.da1.dest_reg_file = dest.file; insn->bits1.da1.dest_reg_type = dest.type; insn->bits1.da1.dest_address_mode = dest.address_mode; @@ -96,10 +99,13 @@ static void brw_set_dest( struct brw_instruction *insn, } static void brw_set_src0( struct brw_instruction *insn, - struct brw_reg reg ) + struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + insn->bits1.da1.src0_reg_file = reg.file; insn->bits1.da1.src0_reg_type = reg.type; insn->bits2.da1.src0_abs = reg.abs; @@ -169,10 +175,12 @@ static void brw_set_src0( struct brw_instruction *insn, void brw_set_src1( struct brw_instruction *insn, - struct brw_reg reg ) + struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + assert(reg.nr < 128); + insn->bits1.da1.src1_reg_file = reg.file; insn->bits1.da1.src1_reg_type = reg.type; insn->bits3.da1.src1_abs = reg.abs; @@ -312,24 +320,25 @@ static void brw_set_dp_read_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.dp_read.binding_table_index = binding_table_index; - insn->bits3.dp_read.msg_control = msg_control; - insn->bits3.dp_read.msg_type = msg_type; - insn->bits3.dp_read.target_cache = target_cache; - insn->bits3.dp_read.response_length = response_length; - insn->bits3.dp_read.msg_length = msg_length; - insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits3.dp_read.end_of_thread = end_of_thread; + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read.response_length = response_length; /*16:19*/ + insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read.pad1 = 0; /*28:30*/ + insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ } static void brw_set_sampler_message(struct brw_context *brw, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint sampler, - GLuint msg_type, - GLuint response_length, - GLuint msg_length, - GLboolean eot) + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot) { brw_set_src1(insn, brw_imm_d(0)); @@ -407,7 +416,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, * Convenience routines. */ #define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg dest, \ struct brw_reg src0) \ { \ @@ -415,7 +424,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ } #define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg dest, \ struct brw_reg src0, \ struct brw_reg src1) \ @@ -469,9 +478,9 @@ void brw_NOP(struct brw_compile *p) */ struct brw_instruction *brw_JMPI(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) { struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); @@ -674,7 +683,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *do_insn) + struct brw_instruction *do_insn) { struct brw_instruction *insn; @@ -762,7 +771,7 @@ void brw_CMP(struct brw_compile *p, * Helpers for the various SEND message types: */ -/* Invert 8 values +/** Extended math function, float[8]. */ void brw_math( struct brw_compile *p, struct brw_reg dest, @@ -794,7 +803,9 @@ void brw_math( struct brw_compile *p, data_type); } -/* Use 2 send instructions to invert 16 elements +/** + * Extended math function, float[16]. + * Use 2 send instructions. */ void brw_math_16( struct brw_compile *p, struct brw_reg dest, @@ -847,22 +858,26 @@ void brw_math_16( struct brw_compile *p, } - - +/** + * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); + /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -879,7 +894,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, brw_set_src0(insn, src); brw_set_dp_write_message(insn, - 255, /* bti */ + 255, /* binding table index (255=stateless) */ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, @@ -887,24 +902,29 @@ void brw_dp_WRITE_16( struct brw_compile *p, 0, /* response_length */ 0); /* eot */ } - } +/** + * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); + /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -919,10 +939,10 @@ void brw_dp_READ_16( struct brw_compile *p, brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); brw_set_dp_read_message(insn, - 255, /* bti */ - 3, /* msg_control */ + 255, /* binding table index (255=stateless) */ + 3, /* msg_control (3 means 4 Owords) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 1, /* target cache */ + 1, /* target cache (render/scratch) */ 1, /* msg_length */ 2, /* response_length */ 0); /* eot */ @@ -930,14 +950,143 @@ void brw_dp_READ_16( struct brw_compile *p, } +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Location (in buffer) should be a multiple of 16. + * Used for fetching shader constants. + * If relAddr is true, we'll do an indirect fetch using the address register. + */ +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ) +{ + /* XXX: relAddr not implemented */ + GLuint msg_reg_nr = 1; + { + struct brw_reg b; + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /* Setup MRF[1] with location/offset into const buffer */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + brw_MOV(p, b, brw_imm_ud(location)); + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + + /* cast dest to a uword[8] vector */ + dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(insn, + bind_table_index, + 0, /* msg_control (0 means 1 Oword) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index) +{ + GLuint msg_reg_nr = 1; + + assert(oword < 2); + /* + printf("vs const read msg, location %u, msg_reg_nr %d\n", + location, msg_reg_nr); + */ + + /* Setup MRF[1] with location/offset into const buffer */ + { + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /*brw_set_access_mode(p, BRW_ALIGN_16);*/ + + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /*b = get_element_ud(b, 2);*/ + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(insn, + bind_table_index, + oword, /* 0 = lower Oword, 1 = upper Oword */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + + void brw_fb_WRITE(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - GLuint binding_table_index, - GLuint msg_length, - GLuint response_length, - GLboolean eot) + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); @@ -958,7 +1107,11 @@ void brw_fb_WRITE(struct brw_compile *p, } - +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -973,8 +1126,8 @@ void brw_SAMPLE(struct brw_compile *p, { GLboolean need_stall = 0; - if(writemask == 0) { -/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + if (writemask == 0) { + /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -1006,7 +1159,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; -/* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); @@ -1047,14 +1200,13 @@ void brw_SAMPLE(struct brw_compile *p, eot); } - if (need_stall) - { + if (need_stall) { struct brw_reg reg = vec8(offset(dest, response_length-1)); /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } */ brw_push_insn_state(p); - brw_set_compression_control(p, GL_FALSE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, reg, reg); brw_pop_insn_state(p); } diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c index 5f4f2d5..2993574 100644 --- a/i965/brw_fallback.c +++ b/i965/brw_fallback.c @@ -75,7 +75,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) /* _NEW_STENCIL */ - if (ctx->Stencil.Enabled && + if (ctx->Stencil._Enabled && (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) { DBG("FALLBACK: stencil\n"); return GL_TRUE; diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c index f311663..9bc5c35 100644 --- a/i965/brw_misc_state.c +++ b/i965/brw_misc_state.c @@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = { static void prepare_binding_table_pointers(struct brw_context *brw) { + brw_add_validated_bo(brw, brw->vs.bind_bo); brw_add_validated_bo(brw, brw->wm.bind_bo); } @@ -117,13 +118,11 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_BATCH(0); /* vs */ + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0); + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ ADVANCE_BATCH(); } @@ -178,7 +177,7 @@ static void upload_psp_urb_cbs(struct brw_context *brw ) { upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); - brw_upload_constant_buffer_state(brw); + brw_upload_cs_urb_state(brw); } const struct brw_tracked_state brw_psp_urb_cbs = { diff --git a/i965/brw_program.c b/i965/brw_program.c index 0c86911..bac6918 100644 --- a/i965/brw_program.c +++ b/i965/brw_program.c @@ -38,6 +38,7 @@ #include "brw_context.h" #include "brw_util.h" +#include "brw_wm.h" static void brwBindProgram( GLcontext *ctx, GLenum target, @@ -94,7 +95,12 @@ static struct gl_program *brwNewProgram( GLcontext *ctx, static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { - + if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); + dri_bo_unreference(brw_fprog->const_buffer); + } + _mesa_delete_program( ctx, prog ); } @@ -110,30 +116,36 @@ static void brwProgramStringNotify( GLcontext *ctx, GLenum target, struct gl_program *prog ) { + struct brw_context *brw = brw_context(ctx); + if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_context *brw = brw_context(ctx); - struct brw_fragment_program *p = (struct brw_fragment_program *)prog; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + struct brw_fragment_program *newFP = brw_fragment_program(fprog); + const struct brw_fragment_program *curFP = + brw_fragment_program_const(brw->fragment_program); + if (fprog->FogOption) { _mesa_append_fog_code(ctx, fprog); fprog->FogOption = GL_NONE; } - if (p == fp) + if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - p->id = brw->program_id++; + newFP->id = brw->program_id++; + newFP->isGLSL = brw_wm_is_glsl(fprog); } else if (target == GL_VERTEX_PROGRAM_ARB) { - struct brw_context *brw = brw_context(ctx); - struct brw_vertex_program *p = (struct brw_vertex_program *)prog; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - if (p == vp) + struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; + struct brw_vertex_program *newVP = brw_vertex_program(vprog); + const struct brw_vertex_program *curVP = + brw_vertex_program_const(brw->vertex_program); + + if (newVP == curVP) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - if (p->program.IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, &p->program); + if (newVP->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &newVP->program); } - p->id = brw->program_id++; + newVP->id = brw->program_id++; /* Also tell tnl about it: */ diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c index e22d080..c999187 100644 --- a/i965/brw_sf_state.c +++ b/i965/brw_sf_state.c @@ -43,10 +43,12 @@ static void upload_sf_vp(struct brw_context *brw) const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport sfv; GLfloat y_scale, y_bias; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + const GLfloat *v = ctx->Viewport._WindowMap.m; memset(&sfv, 0, sizeof(sfv)); - if (intel_rendering_to_texture(ctx)) { + if (render_to_fbo) { y_scale = 1.0; y_bias = 0; } @@ -57,8 +59,6 @@ static void upload_sf_vp(struct brw_context *brw) /* _NEW_VIEWPORT */ - const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv.viewport.m00 = v[MAT_SX]; sfv.viewport.m11 = v[MAT_SY] * y_scale; sfv.viewport.m22 = v[MAT_SZ] * depth_scale; @@ -66,7 +66,9 @@ static void upload_sf_vp(struct brw_context *brw) sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; sfv.viewport.m32 = v[MAT_TZ] * depth_scale; - /* _NEW_SCISSOR */ + /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT + * for DrawBuffer->_[XY]{min,max} + */ /* The scissor only needs to handle the intersection of drawable and * scissor rect. Clipping to the boundaries of static shared buffers @@ -75,7 +77,7 @@ static void upload_sf_vp(struct brw_context *brw) * Note that the hardware's coordinates are inclusive, while Mesa's min is * inclusive but max is exclusive. */ - if (intel_rendering_to_texture(ctx)) { + if (render_to_fbo) { /* texmemory: Y=0=bottom */ sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; @@ -97,7 +99,8 @@ static void upload_sf_vp(struct brw_context *brw) const struct brw_tracked_state brw_sf_vp = { .dirty = { .mesa = (_NEW_VIEWPORT | - _NEW_SCISSOR), + _NEW_SCISSOR | + _NEW_BUFFERS), .brw = 0, .cache = 0 }, @@ -111,10 +114,13 @@ struct brw_sf_unit_key { unsigned int nr_urb_entries, urb_size, sfsize; GLenum front_face, cull_face; - GLboolean scissor, line_smooth, point_sprite, point_attenuated; + unsigned scissor:1; + unsigned line_smooth:1; + unsigned point_sprite:1; + unsigned point_attenuated:1; + unsigned render_to_fbo:1; float line_width; float point_size; - GLboolean render_to_texture; }; static void @@ -144,10 +150,10 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->line_smooth = ctx->Line.SmoothFlag; key->point_sprite = ctx->Point.PointSprite; - key->point_size = ctx->Point.Size; + key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); key->point_attenuated = ctx->Point._Attenuated; - key->render_to_texture = intel_rendering_to_texture(&brw->intel.ctx); + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; } static dri_bo * @@ -194,10 +200,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else sf.sf5.front_winding = BRW_FRONTWINDING_CW; - /* The viewport is inverted for rendering to texture, and that inverts + /* The viewport is inverted for rendering to a FBO, and that inverts * polygon front/back orientation. */ - sf.sf5.front_winding ^= key->render_to_texture; + sf.sf5.front_winding ^= key->render_to_fbo; switch (key->cull_face) { case GL_FRONT: @@ -228,7 +234,33 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf6.line_width = 0; /* _NEW_POINT */ - sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; /* opengl conventions */ + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + if (!key->render_to_fbo) { + /* Rendering to an OpenGL window */ + sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; + } + else { + /* If rendering to an FBO, the pixel coordinate system is + * inverted with respect to the normal OpenGL coordinate + * system, so BRW_RASTRULE_LOWER_RIGHT is correct. + * But this value is listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "IntelĀ® 965 Express Chipset Family and IntelĀ® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * It does work on at least some devices, if not all; + * if devices that don't support it can be identified, + * the likely failure case is that points are rasterized + * incorrectly, which is no worse than occurs without + * the value, so we're using it here. + */ + sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; + } /* XXX clamp max depends on AA vs. non-AA */ sf.sf7.sprite_point = key->point_sprite; diff --git a/i965/brw_state.h b/i965/brw_state.h index df839c5..81b0a45 100644 --- a/i965/brw_state.h +++ b/i965/brw_state.h @@ -52,7 +52,6 @@ const struct brw_tracked_state brw_cc_vp; const struct brw_tracked_state brw_check_fallback; const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; -const struct brw_tracked_state brw_constant_buffer_state; const struct brw_tracked_state brw_constant_buffer; const struct brw_tracked_state brw_curbe_offsets; const struct brw_tracked_state brw_invarient_state; diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c index dc87859..811940e 100644 --- a/i965/brw_state_batch.c +++ b/i965/brw_state_batch.c @@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw ) { clear_batch_cache(brw); -/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ - brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c index b28c57c..a713262 100644 --- a/i965/brw_state_dump.c +++ b/i965/brw_state_dump.c @@ -84,6 +84,19 @@ get_965_surfacetype(unsigned int surfacetype) } } +static const char * +get_965_surface_format(unsigned int surface_format) +{ + switch (surface_format) { + case 0x000: return "r32g32b32a32_float"; + case 0x0c1: return "b8g8r8a8_unorm"; + case 0x100: return "b5g6r5_unorm"; + case 0x102: return "b5g5r5a1_unorm"; + case 0x104: return "b4g4r4a4_unorm"; + default: return "unknown"; + } +} + static void dump_wm_surface_state(struct brw_context *brw) { int i; @@ -95,7 +108,7 @@ static void dump_wm_surface_state(struct brw_context *brw) char name[20]; if (surf_bo == NULL) { - fprintf(stderr, "WM SS%d: NULL\n", i); + fprintf(stderr, " WM SS%d: NULL\n", i); continue; } dri_bo_map(surf_bo, GL_FALSE); @@ -103,8 +116,9 @@ static void dump_wm_surface_state(struct brw_context *brw) surf = (struct brw_surface_state *)(surf_bo->virtual); sprintf(name, "WM SS%d", i); - state_out(name, surf, surfoff, 0, "%s\n", - get_965_surfacetype(surf->ss0.surface_type)); + state_out(name, surf, surfoff, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); state_out(name, surf, surfoff, 1, "offset\n"); state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); @@ -162,6 +176,14 @@ static void brw_debug_prog(const char *name, dri_bo *prog) fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", name, (unsigned int)prog->offset + i * 4 * 4, data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + /* Stop at the end of the program. It'd be nice to keep track of the actual + * intended program size instead of guessing like this. + */ + if (data[i * 4 + 0] == 0 && + data[i * 4 + 1] == 0 && + data[i * 4 + 2] == 0 && + data[i * 4 + 3] == 0) + break; } dri_bo_unmap(prog); diff --git a/i965/brw_structs.h b/i965/brw_structs.h index 4e577d0..89e2981 100644 --- a/i965/brw_structs.h +++ b/i965/brw_structs.h @@ -439,7 +439,7 @@ struct brw_urb_fence } bits1; }; -struct brw_constant_buffer_state /* previously brw_command_streamer */ +struct brw_cs_urb_state { struct header header; @@ -1031,10 +1031,10 @@ struct brw_surface_state GLuint writedisable_green:1; GLuint writedisable_red:1; GLuint writedisable_alpha:1; - GLuint surface_format:9; + GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */ GLuint data_return_format:1; GLuint pad0:1; - GLuint surface_type:3; + GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ } ss0; struct { diff --git a/i965/brw_tex.c b/i965/brw_tex.c index ef99e9c..71bff16 100644 --- a/i965/brw_tex.c +++ b/i965/brw_tex.c @@ -32,21 +32,12 @@ #include "main/glheader.h" #include "main/mtypes.h" -#include "main/imports.h" -#include "main/simple_list.h" -#include "main/enums.h" -#include "main/image.h" #include "main/teximage.h" -#include "main/texstore.h" -#include "main/texformat.h" - -#include "texmem.h" #include "intel_context.h" #include "intel_regions.h" #include "intel_tex.h" #include "brw_context.h" -#include "brw_defines.h" void brw_FrameBufferTexInit( struct brw_context *brw, diff --git a/i965/brw_vs.h b/i965/brw_vs.h index 99d0e93..1e4f660 100644 --- a/i965/brw_vs.h +++ b/i965/brw_vs.h @@ -75,6 +75,11 @@ struct brw_vs_compile { struct brw_reg userplane[6]; + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/i965/brw_vs_constval.c b/i965/brw_vs_constval.c index 9977677..249a800 100644 --- a/i965/brw_vs_constval.c +++ b/i965/brw_vs_constval.c @@ -39,8 +39,8 @@ */ struct tracker { GLboolean twoside; - GLubyte active[PROGRAM_OUTPUT+1][128]; - GLuint size_masks[4]; + GLubyte active[PROGRAM_OUTPUT+1][MAX_PROGRAM_TEMPS]; + GLbitfield size_masks[4]; /**< one bit per fragment program input attrib */ }; @@ -53,8 +53,10 @@ static void set_active_component( struct tracker *t, case PROGRAM_TEMPORARY: case PROGRAM_INPUT: case PROGRAM_OUTPUT: + assert(file < PROGRAM_OUTPUT + 1); + assert(index < Elements(t->active[0])); t->active[file][index] |= active; - + break; default: break; } @@ -96,7 +98,7 @@ static GLubyte get_active( struct tracker *t, struct prog_src_register src ) { GLuint i; - GLubyte active = src.NegateBase; /* NOTE! */ + GLubyte active = src.Negate; /* NOTE! */ if (src.RelAddr) return 0xf; @@ -108,10 +110,15 @@ static GLubyte get_active( struct tracker *t, return active; } +/** + * Return the size (1,2,3 or 4) of the output/result for VERT_RESULT_idx. + */ static GLubyte get_output_size( struct tracker *t, GLuint idx ) { - GLubyte active = t->active[PROGRAM_OUTPUT][idx]; + GLubyte active; + assert(idx < VERT_RESULT_MAX); + active = t->active[PROGRAM_OUTPUT][idx]; if (active & (1<<3)) return 4; if (active & (1<<2)) return 3; if (active & (1<<1)) return 2; @@ -123,7 +130,7 @@ static GLubyte get_output_size( struct tracker *t, */ static void calc_sizes( struct tracker *t ) { - GLuint i; + GLint vertRes; if (t->twoside) { t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |= @@ -133,12 +140,27 @@ static void calc_sizes( struct tracker *t ) t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1]; } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - switch (get_output_size(t, i)) { - case 4: t->size_masks[4-1] |= 1<<i; - case 3: t->size_masks[3-1] |= 1<<i; - case 2: t->size_masks[2-1] |= 1<<i; - case 1: t->size_masks[1-1] |= 1<<i; + /* Examine vertex program output sizes to set the size_masks[] info + * which describes the fragment program input sizes. + */ + for (vertRes = VERT_RESULT_TEX0; vertRes < VERT_RESULT_MAX; vertRes++) { + GLint fragAttrib; + + /* map vertex program output index to fragment program input index */ + if (vertRes <= VERT_RESULT_TEX7) + fragAttrib = FRAG_ATTRIB_TEX0 + vertRes - VERT_RESULT_TEX0; + else if (vertRes >= VERT_RESULT_VAR0) + fragAttrib = FRAG_ATTRIB_VAR0 + vertRes - VERT_RESULT_VAR0; + else + continue; + assert(fragAttrib >= FRAG_ATTRIB_TEX0); + assert(fragAttrib <= FRAG_ATTRIB_MAX); + + switch (get_output_size(t, vertRes)) { + case 4: t->size_masks[4-1] |= 1 << fragAttrib; + case 3: t->size_masks[3-1] |= 1 << fragAttrib; + case 2: t->size_masks[2-1] |= 1 << fragAttrib; + case 1: t->size_masks[1-1] |= 1 << fragAttrib; break; } } @@ -170,8 +192,8 @@ static void calc_wm_input_sizes( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = - (struct brw_vertex_program *)brw->vertex_program; + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c index 235f826..b69616d 100644 --- a/i965/brw_vs_emit.c +++ b/i965/brw_vs_emit.c @@ -38,18 +38,49 @@ #include "brw_vs.h" +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} -/* Do things as simply as possible. Allocate and populate all regs +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +/** + * Preallocate GRF register before code emit. + * Do things as simply as possible. Allocate and populate all regs * ahead of time. */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; - GLuint nr_params; + +#if 0 + if (c->vp->program.Base.Parameters->NumParameters >= 6) + c->vp->use_const_buffer = 1; + else +#endif + c->vp->use_const_buffer = GL_FALSE; + /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ /* r0 -- reserved as usual */ - c->r0 = brw_vec8_grf(reg, 0); reg++; + c->r0 = brw_vec8_grf(reg, 0); + reg++; /* User clip planes from curbe: */ @@ -60,24 +91,33 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Deal with curbe alignment: */ - reg += ((6+c->key.nr_userclip+3)/4)*2; + reg += ((6 + c->key.nr_userclip + 3) / 4) * 2; } /* Vertex program parameters from curbe: */ - nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params+1)/2; + if (c->vp->use_const_buffer) { + /* get constants from a real constant buffer */ + c->prog_data.curb_read_length = 0; + c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + } + else { + /* use a section of the GRF for constants */ + GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; + for (i = 0; i < nr_params; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params + 1) / 2; + c->prog_data.curb_read_length = reg - 1; - c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = nr_params * 4; + } /* Allocate input regs: */ c->nr_inputs = 0; for (i = 0; i < VERT_ATTRIB_MAX; i++) { - if (c->prog_data.inputs_read & (1<<i)) { + if (c->prog_data.inputs_read & (1 << i)) { c->nr_inputs++; c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0); reg++; @@ -91,7 +131,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; mrf = 4; for (i = 0; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1<<i)) { + if (c->prog_data.outputs_written & (1 << i)) { c->nr_outputs++; if (i == VERT_RESULT_HPOS) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); @@ -132,17 +172,24 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } + if (c->vp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + for (i = 0; i < 128; i++) { - if (c->output_regs[i].used_in_src) { - c->output_regs[i].reg = brw_vec8_grf(reg, 0); - reg++; - } + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } } c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); reg += 2; - - + /* Some opcodes need an internal temporary: */ c->first_tmp = reg; @@ -152,35 +199,23 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * urb_read_length is the number of registers read from *each* * vertex urb, so is half the amount: */ - c->prog_data.urb_read_length = (c->nr_inputs+1)/2; + c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; - c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; + c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; c->prog_data.total_grf = reg; -} - - -static struct brw_reg get_tmp( struct brw_vs_compile *c ) -{ - struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - -static void release_tmps( struct brw_vs_compile *c ) -{ - c->last_tmp = c->first_tmp; + if (INTEL_DEBUG & DEBUG_VS) { + _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + } } +/** + * If an instruction uses a temp reg both as a src and the dest, we + * sometimes need to allocate an intermediate temporary. + */ static void unalias1( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -200,6 +235,10 @@ static void unalias1( struct brw_vs_compile *c, } } +/** + * \sa unalias2 + * Checkes if 2-operand instruction needs an intermediate temporary. + */ static void unalias2( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -222,6 +261,10 @@ static void unalias2( struct brw_vs_compile *c, } } +/** + * \sa unalias2 + * Checkes if 3-operand instruction needs an intermediate temporary. + */ static void unalias3( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -615,6 +658,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c, } brw_ENDIF(p, if_insn); + + release_tmp(c, tmp); } static void emit_lrp_noalias(struct brw_vs_compile *c, @@ -655,13 +700,83 @@ static void emit_nrm( struct brw_vs_compile *c, } +static struct brw_reg +get_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg; + struct brw_reg const2_reg; + + assert(argIndex < 3); + + if (c->current_const[argIndex].index != src->Index || src->RelAddr) { + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + c->current_const[argIndex].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + /* need to fetch the constant now */ + brw_dp_READ_4_vs(p, + c->current_const[argIndex].reg,/* writeback dest */ + 0, /* oword */ + src->RelAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + + if (src->RelAddr) { + /* second read */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + src->RelAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + } + } + + const_reg = c->current_const[argIndex].reg; + + if (src->RelAddr) { + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); + } + else { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } + + return const_reg; +} + + + /* TODO: relative addressing! */ static struct brw_reg get_reg( struct brw_vs_compile *c, - GLuint file, + gl_register_file file, GLuint index ) { - switch (file) { case PROGRAM_TEMPORARY: case PROGRAM_INPUT: @@ -690,13 +805,17 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } +/** + * Indirect addressing: get reg[[arg] + offset]. + */ static struct brw_reg deref( struct brw_vs_compile *c, struct brw_reg arg, GLint offset) { struct brw_compile *p = &c->func; struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; struct brw_reg indirect = brw_vec4_indirect(0,0); @@ -717,10 +836,67 @@ static struct brw_reg deref( struct brw_vs_compile *c, brw_pop_insn_state(p); } + /* NOTE: tmp not released */ return vec8(tmp); } +/** + * Get brw reg corresponding to the instruction's [argIndex] src reg. + * TODO: relative addressing! + */ +static struct brw_reg +get_src_reg( struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex ) +{ + const GLuint file = inst->SrcReg[argIndex].File; + const GLint index = inst->SrcReg[argIndex].Index; + const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; + + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + if (c->vp->use_const_buffer) { + return get_constant(c, inst, argIndex); + } + else if (relAddr) { + return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + } + else { + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; + } + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: + /* this is a normal case since we loop over all three src args */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + static void emit_arl( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ) @@ -732,30 +908,31 @@ static void emit_arl( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ if (need_tmp) release_tmp(c, tmp); } -/* Will return mangled results for SWZ op. The emit_swz() function +/** + * Return the brw reg for the given instruction's src argument. + * Will return mangled results for SWZ op. The emit_swz() function * ignores this result and recalculates taking extended swizzles into * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - struct prog_src_register *src ) + const struct prog_instruction *inst, + GLuint argIndex ) { + const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_reg reg; if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src->RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - else - reg = get_reg(c, src->File, src->Index); + reg = get_src_reg(c, inst, argIndex); /* Convert 3-bit swizzle to 2-bit. */ @@ -766,16 +943,38 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, /* Note this is ok for non-swizzle instructions: */ - reg.negate = src->NegateBase ? 1 : 0; + reg.negate = src->Negate ? 1 : 0; return reg; } +/** + * Get brw register for the given program dest register. + */ static struct brw_reg get_dst( struct brw_vs_compile *c, struct prog_dst_register dst ) { - struct brw_reg reg = get_reg(c, dst.File, dst.Index); + struct brw_reg reg; + + switch (dst.File) { + case PROGRAM_TEMPORARY: + case PROGRAM_OUTPUT: + assert(c->regs[dst.File][dst.Index].nr != 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_ADDRESS: + assert(dst.Index == 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_UNDEFINED: + /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ + reg = brw_null_reg(); + break; + default: + assert(0); + reg = brw_null_reg(); + } reg.dw1.bits.writemask = dst.WriteMask; @@ -785,14 +984,16 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, static void emit_swz( struct brw_vs_compile *c, struct brw_reg dst, - struct prog_src_register src ) + const struct prog_instruction *inst) { + const GLuint argIndex = 0; + const struct prog_src_register src = inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; GLuint zeros_mask = 0; GLuint ones_mask = 0; GLuint src_mask = 0; GLubyte src_swz[4]; - GLboolean need_tmp = (src.NegateBase && + GLboolean need_tmp = (src.Negate && dst.file != BRW_GENERAL_REGISTER_FILE); struct brw_reg tmp = dst; GLuint i; @@ -826,10 +1027,7 @@ static void emit_swz( struct brw_vs_compile *c, if (src_mask) { struct brw_reg arg0; - if (src.RelAddr) - arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); - else - arg0 = get_reg(c, src.File, src.Index); + arg0 = get_src_reg(c, inst, argIndex); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1], @@ -844,8 +1042,8 @@ static void emit_swz( struct brw_vs_compile *c, if (ones_mask) brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - if (src.NegateBase) - brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp)); + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); if (need_tmp) { brw_MOV(p, dst, tmp); @@ -975,7 +1173,7 @@ post_vs_emit( struct brw_vs_compile *c, } -/* Emit the fragment program instructions here. +/* Emit the vertex program instructions here. */ void brw_vs_emit(struct brw_vs_compile *c ) { @@ -1025,6 +1223,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) struct brw_reg args[3], dst; GLuint i; +#if 0 + printf("%d: ", insn); + _mesa_print_instruction(inst); +#endif + /* Get argument regs. SWZ is special and does this itself. */ if (inst->Opcode != OPCODE_SWZ) @@ -1032,10 +1235,10 @@ void brw_vs_emit(struct brw_vs_compile *c ) struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; - if (file == PROGRAM_OUTPUT&&c->output_regs[index].used_in_src) + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) args[i] = c->output_regs[index].reg; else - args[i] = get_arg(c, src); + args[i] = get_arg(c, inst, i); } /* Get dest regs. Note that it is possible for a reg to be both @@ -1163,7 +1366,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* The args[0] value can't be used here as it won't have * correctly encoded the full swizzle: */ - emit_swz(c, dst, inst->SrcReg[0] ); + emit_swz(c, dst, inst); break; case OPCODE_TRUNC: /* round toward zero */ diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c index 1a63766..3d29538 100644 --- a/i965/brw_vs_state.c +++ b/i965/brw_vs_state.c @@ -44,6 +44,8 @@ struct brw_vs_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + unsigned int nr_surfaces; }; static void @@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) key->nr_urb_entries = brw->urb.nr_vs_entries; key->urb_size = brw->urb.vsize; + /* BRW_NEW_NR_VS_SURFACES */ + key->nr_surfaces = brw->vs.nr_surfaces; + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) { /* Note that we read in the userclip planes as well, hence @@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * brw_urb_WRITE() results. */ vs.thread1.single_program_flow = 0; + vs.thread1.binding_table_entry_count = key->nr_surfaces; + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; vs.thread3.dispatch_grf_start_reg = 1; @@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c index e69d4c5..ba03afd 100644 --- a/i965/brw_vtbl.c +++ b/i965/brw_vtbl.c @@ -23,14 +23,12 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - +**********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ #include "main/glheader.h" #include "main/mtypes.h" @@ -44,12 +42,11 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" - #include "brw_draw.h" #include "brw_state.h" #include "brw_fallback.h" #include "brw_vs.h" -#include <stdarg.h> + static void dri_bo_release(dri_bo **bo) @@ -58,7 +55,9 @@ dri_bo_release(dri_bo **bo) *bo = NULL; } -/* called from intelDestroyContext() + +/** + * called from intelDestroyContext() */ static void brw_destroy_context( struct intel_context *intel ) { @@ -68,16 +67,19 @@ static void brw_destroy_context( struct intel_context *intel ) brw_destroy_state(brw); brw_draw_destroy( brw ); + _mesa_free(brw->wm.compile_data); + brw_FrameBufferTexDestroy( brw ); - for (i = 0; i < brw->state.nr_draw_regions; i++) - intel_region_release(&brw->state.draw_regions[i]); - brw->state.nr_draw_regions = 0; + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + brw->state.nr_color_regions = 0; intel_region_release(&brw->state.depth_region); dri_bo_release(&brw->curbe.curbe_bo); dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.state_bo); + dri_bo_release(&brw->vs.bind_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); @@ -91,6 +93,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->wm.bind_bo); for (i = 0; i < BRW_WM_MAX_SURF; i++) dri_bo_release(&brw->wm.surf_bo[i]); + dri_bo_release(&brw->wm.sampler_bo); dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.state_bo); dri_bo_release(&brw->cc.prog_bo); @@ -98,37 +101,46 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->cc.vp_bo); } -/* called from intelDrawBuffer() + +/** + * called from intelDrawBuffer() */ static void brw_set_draw_region( struct intel_context *intel, - struct intel_region *draw_regions[], - struct intel_region *depth_region, - GLuint num_regions) + struct intel_region *color_regions[], + struct intel_region *depth_region, + GLuint num_color_regions) { struct brw_context *brw = brw_context(&intel->ctx); - int i; + GLuint i; + + /* release old color/depth regions */ if (brw->state.depth_region != depth_region) brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; - for (i = 0; i < brw->state.nr_draw_regions; i++) - intel_region_release(&brw->state.draw_regions[i]); + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); intel_region_release(&brw->state.depth_region); - for (i = 0; i < num_regions; i++) - intel_region_reference(&brw->state.draw_regions[i], draw_regions[i]); + + /* reference new color/depth regions */ + for (i = 0; i < num_color_regions; i++) + intel_region_reference(&brw->state.color_regions[i], color_regions[i]); intel_region_reference(&brw->state.depth_region, depth_region); - brw->state.nr_draw_regions = num_regions; + brw->state.nr_color_regions = num_color_regions; } -/* called from intel_batchbuffer_flush and children before sending a + +/** + * called from intel_batchbuffer_flush and children before sending a * batchbuffer off. */ static void brw_finish_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - brw_emit_query_end(brw); } -/* called from intelFlushBatchLocked + +/** + * called from intelFlushBatchLocked */ static void brw_new_batch( struct intel_context *intel ) { @@ -159,39 +171,20 @@ static void brw_new_batch( struct intel_context *intel ) } } -static void brw_note_fence( struct intel_context *intel, - GLuint fence ) + +static void brw_note_fence( struct intel_context *intel, GLuint fence ) { brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } - + + static void brw_note_unlock( struct intel_context *intel ) { struct brw_context *brw = brw_context(&intel->ctx); - brw_state_cache_check_size(brw); } -void brw_do_flush( struct brw_context *brw, - GLuint flags ) -{ - struct brw_mi_flush flush; - memset(&flush, 0, sizeof(flush)); - flush.opcode = CMD_MI_FLUSH; - flush.flags = flags; - BRW_BATCH_STRUCT(brw, &flush); -} - - -static void brw_emit_flush( struct intel_context *intel, - GLuint unused ) -{ - brw_do_flush(brw_context(&intel->ctx), - BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); -} - - /* called from intelWaitForIdle() and intelFlush() * * For now, just flush everything. Could be smarter later. @@ -205,6 +198,7 @@ static GLuint brw_flush_cmd( void ) return *(GLuint *)&flush; } + static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -214,20 +208,18 @@ static void brw_invalidate_state( struct intel_context *intel, GLuint new_state void brwInitVtbl( struct brw_context *brw ) { brw->intel.vtbl.check_vertex_size = 0; - brw->intel.vtbl.emit_state = 0; - brw->intel.vtbl.reduced_primitive_state = 0; + brw->intel.vtbl.emit_state = 0; + brw->intel.vtbl.reduced_primitive_state = 0; brw->intel.vtbl.render_start = 0; - brw->intel.vtbl.update_texture_state = 0; + brw->intel.vtbl.update_texture_state = 0; - brw->intel.vtbl.invalidate_state = brw_invalidate_state; - brw->intel.vtbl.note_fence = brw_note_fence; - brw->intel.vtbl.note_unlock = brw_note_unlock; + brw->intel.vtbl.invalidate_state = brw_invalidate_state; + brw->intel.vtbl.note_fence = brw_note_fence; + brw->intel.vtbl.note_unlock = brw_note_unlock; brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.flush_cmd = brw_flush_cmd; - brw->intel.vtbl.emit_flush = brw_emit_flush; brw->intel.vtbl.debug_batch = brw_debug_batch; } - diff --git a/i965/brw_wm.c b/i965/brw_wm.c index c6791da..8a3b7df 100644 --- a/i965/brw_wm.c +++ b/i965/brw_wm.c @@ -82,6 +82,58 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) } +/** + * Do GPU code generation for non-GLSL shader. non-GLSL shaders have + * no flow control instructions so we can more readily do SSA-style + * optimizations. + */ +static void +brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Register allocation. + * Divide by two because we operate on 16 pixels at a time and require + * two GRF entries for each logical shader register. + */ + c->grf_limit = BRW_WM_MAX_GRF / 2; + + brw_wm_pass2(c); + + /* how many general-purpose registers are used */ + c->prog_data.total_grf = c->max_wm_grf; + + /* Scratch space is used for register spilling */ + if (c->last_scratch) { + c->prog_data.total_scratch = c->last_scratch + 0x40; + } + else { + c->prog_data.total_scratch = 0; + } + + /* Emit GEN4 code. + */ + brw_wm_emit(c); +} + + +/** + * All Mesa program -> GPU code generation goes through this function. + * Depending on the instructions used (i.e. flow control instructions) + * we'll use one of two code generators. + */ static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) @@ -92,52 +144,39 @@ static void do_wm_prog( struct brw_context *brw, c = brw->wm.compile_data; if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); - c = brw->wm.compile_data; + brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); + c = brw->wm.compile_data; + if (c == NULL) { + /* Ouch - big out of memory problem. Can't continue + * without triggering a segfault, no way to signal, + * so just return. + */ + return; + } } else { - memset(c, 0, sizeof(*brw->wm.compile_data)); + memset(c, 0, sizeof(*brw->wm.compile_data)); } memcpy(&c->key, key, sizeof(*key)); c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - brw_init_compile(brw, &c->func); - if (brw_wm_is_glsl(&c->fp->program)) { - brw_wm_glsl_emit(brw, c); - } else { - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Register allocation. - */ - c->grf_limit = BRW_WM_MAX_GRF/2; - - brw_wm_pass2(c); - - c->prog_data.total_grf = c->max_wm_grf; - if (c->last_scratch) { - c->prog_data.total_scratch = - c->last_scratch + 0x40; - } else { - c->prog_data.total_scratch = 0; - } - - /* Emit GEN4 code. - */ - brw_wm_emit(c); + brw_init_compile(brw, &c->func); + + /* temporary sanity check assertion */ + ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + + /* + * Shader which use GLSL features such as flow control are handled + * differently from "simple" shaders. + */ + if (fp->isGLSL) { + brw_wm_glsl_emit(brw, c); } + else { + brw_wm_non_glsl_emit(brw, c); + } + if (INTEL_DEBUG & DEBUG_WM) fprintf(stderr, "\n"); @@ -161,7 +200,7 @@ static void brw_wm_populate_key( struct brw_context *brw, { GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ - struct brw_fragment_program *fp = + const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; GLuint lookup = 0; GLuint line_aa; @@ -176,7 +215,7 @@ static void brw_wm_populate_key( struct brw_context *brw, ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) + if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH)) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; /* _NEW_DEPTH */ @@ -188,7 +227,7 @@ static void brw_wm_populate_key( struct brw_context *brw, lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; /* _NEW_STENCIL */ - if (ctx->Stencil.Enabled) { + if (ctx->Stencil._Enabled) { lookup |= IZ_STENCIL_TEST_ENABLE_BIT; if (ctx->Stencil.WriteMask[0] || @@ -228,7 +267,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS); + key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; /* _NEW_LIGHT */ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); @@ -245,6 +284,11 @@ static void brw_wm_populate_key( struct brw_context *brw, if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) key->yuvtex_swap_mask |= 1 << i; } + + key->tex_swizzles[i] = t->_Swizzle; + } + else { + key->tex_swizzles[i] = SWIZZLE_NOOP; } } @@ -275,10 +319,11 @@ static void brw_wm_populate_key( struct brw_context *brw, key->drawable_height = brw->intel.driDrawable->h; } - /* Extra info: - */ - key->program_string_id = fp->id; + /* CACHE_NEW_VS_PROG */ + key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + /* The unique fragment program ID */ + key->program_string_id = fp->id; } @@ -302,8 +347,6 @@ static void brw_prepare_wm_prog(struct brw_context *brw) } -/* See brw_wm.c: - */ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | @@ -317,7 +360,7 @@ const struct brw_tracked_state brw_wm_prog = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), - .cache = 0 + .cache = CACHE_NEW_VS_PROG, }, .prepare = brw_prepare_wm_prog }; diff --git a/i965/brw_wm.h b/i965/brw_wm.h index 3cbdf81..295fed8 100644 --- a/i965/brw_wm.h +++ b/i965/brw_wm.h @@ -65,15 +65,17 @@ struct brw_wm_prog_key { GLuint flat_shade:1; GLuint runtime_check_aads_emit:1; - GLuint projtex_mask:16; + GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ - // GLuint pad1:16; + + GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; GLuint origin_x, origin_y; GLuint drawable_height; + GLuint vp_outputs_written; }; @@ -142,13 +144,12 @@ struct brw_wm_instruction { GLuint writemask:4; GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ + GLuint tex_shadow:1; /* do shadow comparison? */ GLuint eot:1; /* End of thread indicator for FB_WRITE*/ GLuint target:10; /* target binding table index for FB_WRITE*/ }; -#define PROGRAM_INTERNAL_PARAM - #define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) @@ -240,17 +241,25 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + /** Mapping from Mesa registers to hardware registers */ struct { - GLboolean inited; - struct brw_reg reg; + GLboolean inited; + struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; + GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; + + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c index bc8e8c9..a870e75 100644 --- a/i965/brw_wm_emit.c +++ b/i965/brw_wm_emit.c @@ -699,7 +699,6 @@ static void emit_tex( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; GLuint msgLength, responseLength; - GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0; GLuint i, nr; GLuint emit; @@ -721,7 +720,7 @@ static void emit_tex( struct brw_wm_compile *c, break; } - if (shadow) { + if (inst->tex_shadow) { nr = 4; emit |= WRITEMASK_W; } @@ -743,10 +742,10 @@ static void emit_tex( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, - (shadow ? + (inst->tex_shadow ? BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE), responseLength, @@ -792,7 +791,7 @@ static void emit_txb( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, @@ -914,6 +913,9 @@ static void emit_aa( struct brw_wm_compile *c, /* Post-fragment-program processing. Send the results to the * framebuffer. + * \param arg0 the fragment color + * \param arg1 the pass-through depth value + * \param arg2 the shader-computed depth value */ static void emit_fb_write( struct brw_wm_compile *c, struct brw_reg *arg0, @@ -1022,8 +1024,8 @@ static void emit_fb_write( struct brw_wm_compile *c, } -/* Post-fragment-program processing. Send the results to the - * framebuffer. +/** + * Move a GPR to scratch memory. */ static void emit_spill( struct brw_wm_compile *c, struct brw_reg reg, @@ -1042,11 +1044,13 @@ static void emit_spill( struct brw_wm_compile *c, */ brw_dp_WRITE_16(p, retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), - 1, slot); } +/** + * Load a GPR from scratch memory. + */ static void emit_unspill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) @@ -1067,13 +1071,13 @@ static void emit_unspill( struct brw_wm_compile *c, brw_dp_READ_16(p, retype(vec16(reg), BRW_REGISTER_TYPE_UW), - 1, slot); } /** - * Retrieve upto 4 GEN4 register pairs for the given wm reg: + * Retrieve up to 4 GEN4 register pairs for the given wm reg: + * Args with unspill_reg != 0 will be loaded from scratch memory. */ static void get_argument_regs( struct brw_wm_compile *c, struct brw_wm_ref *arg[], @@ -1083,13 +1087,12 @@ static void get_argument_regs( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (arg[i]) { - - if (arg[i]->unspill_reg) - emit_unspill(c, + if (arg[i]->unspill_reg) + emit_unspill(c, brw_vec8_grf(arg[i]->unspill_reg, 0), arg[i]->value->spill_slot); - regs[i] = arg[i]->hw_reg; + regs[i] = arg[i]->hw_reg; } else { regs[i] = brw_null_reg(); @@ -1098,6 +1101,9 @@ static void get_argument_regs( struct brw_wm_compile *c, } +/** + * For values that have a spill_slot!=0, write those regs to scratch memory. + */ static void spill_values( struct brw_wm_compile *c, struct brw_wm_value *values, GLuint nr ) diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c index 7ebe5b9..49aad28 100644 --- a/i965/brw_wm_fp.c +++ b/i965/brw_wm_fp.c @@ -80,9 +80,8 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx) reg.Index = idx; reg.Swizzle = SWIZZLE_NOOP; reg.RelAddr = 0; - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; reg.Abs = 0; - reg.NegateAbs = 0; return reg; } @@ -112,6 +111,12 @@ static struct prog_src_register src_swizzle1( struct prog_src_register reg, int return src_swizzle(reg, x, x, x, x); } +static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle ) +{ + reg.Swizzle = swizzle; + return reg; +} + /*********************************************************************** * Dest regs @@ -187,6 +192,7 @@ static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, GLuint saturate, GLuint tex_src_unit, GLuint tex_src_target, + GLuint tex_shadow, struct prog_src_register src0, struct prog_src_register src1, struct prog_src_register src2 ) @@ -200,6 +206,7 @@ static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, inst->SaturateMode = saturate; inst->TexSrcUnit = tex_src_unit; inst->TexSrcTarget = tex_src_target; + inst->TexShadow = tex_shadow; inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; @@ -216,7 +223,7 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, struct prog_src_register src2 ) { return emit_tex_op(c, op, dest, saturate, - 0, 0, /* tex unit, target */ + 0, 0, 0, /* tex unit, target, shadow */ src0, src1, src2); } @@ -282,8 +289,7 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) struct prog_dst_register pixel_w = get_temp(c); struct prog_src_register deltas = get_delta_xy(c); struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); - - + /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ emit_op(c, @@ -548,7 +554,6 @@ static void precalc_dst( struct brw_wm_compile *c, src_undef()); } - if (dst.WriteMask & WRITEMASK_XZ) { struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); @@ -563,7 +568,7 @@ static void precalc_dst( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase &= ~NEGATE_X; + swz->SrcReg[0].Negate &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w @@ -598,10 +603,9 @@ static void precalc_lit( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting the negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase = 0; + swz->SrcReg[0].Negate = NEGATE_NONE; } - if (dst.WriteMask & WRITEMASK_YZ) { emit_op(c, OPCODE_LIT, @@ -646,7 +650,7 @@ static void precalc_tex( struct brw_wm_compile *c, src0, src_undef(), src_undef()); - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; /* tmp0 = MAX(coord.X, coord.Y) */ @@ -745,6 +749,7 @@ static void precalc_tex( struct brw_wm_compile *c, inst->SaturateMode, unit, inst->TexSrcTarget, + inst->TexShadow, coord, src_undef(), src_undef()); @@ -805,21 +810,40 @@ static void precalc_tex( struct brw_wm_compile *c, inst->SaturateMode, unit, inst->TexSrcTarget, + inst->TexShadow, coord, src_undef(), src_undef()); } + /* For GL_EXT_texture_swizzle: */ + if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { + /* swizzle the result of the TEX instruction */ + struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); + emit_op(c, OPCODE_SWZ, + inst->DstReg, + SATURATE_OFF, /* saturate already done above */ + src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), + src_undef(), + src_undef()); + } + if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) release_temp(c, tmpcoord); } +/** + * Check if the given TXP instruction really needs the divide-by-W step. + */ static GLboolean projtex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { - struct prog_src_register src = inst->SrcReg[0]; + const struct prog_src_register src = inst->SrcReg[0]; + GLboolean retVal; + + assert(inst->Opcode == OPCODE_TXP); /* Only try to detect the simplest cases. Could detect (later) * cases where we are trying to emit code like RCP {1.0}, MUL x, @@ -829,16 +853,21 @@ static GLboolean projtex( struct brw_wm_compile *c, * user-provided fragment programs anyway: */ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) - return 0; /* ut2004 gun rendering !?! */ + retVal = GL_FALSE; /* ut2004 gun rendering !?! */ else if (src.File == PROGRAM_INPUT && GET_SWZ(src.Swizzle, W) == W && - (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0) - return 0; + (c->key.proj_attrib_mask & (1 << src.Index)) == 0) + retVal = GL_FALSE; else - return 1; + retVal = GL_TRUE; + + return retVal; } +/** + * Emit code for TXP. + */ static void precalc_txp( struct brw_wm_compile *c, const struct prog_instruction *inst ) { @@ -889,42 +918,41 @@ static void precalc_txp( struct brw_wm_compile *c, static void emit_fb_write( struct brw_wm_compile *c ) { struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR); + struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); struct prog_src_register outcolor; GLuint i; struct prog_instruction *inst, *last_inst; struct brw_context *brw = c->func.brw; - /* inst->Sampler is not used by backend, - use it for fb write target and eot */ - - if (brw->state.nr_draw_regions > 1) { - for (i = 0 ; i < brw->state.nr_draw_regions; i++) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, - WM_FB_WRITE, dst_mask(dst_undef(),0), 0, - outcolor, payload_r0_depth, outdepth); - inst->Sampler = (i<<1); - if (c->fp_fragcolor_emitted) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Sampler = (i<<1); - } - } - last_inst->Sampler |= 1; //eot + /* The inst->Aux field is used for FB write target and the EOT marker */ + + if (brw->state.nr_color_regions > 1) { + for (i = 0 ; i < brw->state.nr_color_regions; i++) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); + last_inst = inst = emit_op(c, + WM_FB_WRITE, dst_mask(dst_undef(),0), 0, + outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + if (c->fp_fragcolor_emitted) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + } + } + last_inst->Aux |= 1; //eot } else { /* if gl_FragData[0] is written, use it, else use gl_FragColor */ if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); else - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Sampler = 1|(0<<1); + inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = 1|(0<<1); } } @@ -955,9 +983,9 @@ static void validate_dst_regs( struct brw_wm_compile *c, const struct prog_instruction *inst ) { if (inst->DstReg.File == PROGRAM_OUTPUT) { - GLuint idx = inst->DstReg.Index; - if (idx == FRAG_RESULT_COLR) - c->fp_fragcolor_emitted = 1; + GLuint idx = inst->DstReg.Index; + if (idx == FRAG_RESULT_COLOR) + c->fp_fragcolor_emitted = 1; } } @@ -981,6 +1009,11 @@ static void print_insns( const struct prog_instruction *insn, } } + +/** + * Initial pass for fragment program code generation. + * This function is used by both the GLSL and non-GLSL paths. + */ void brw_wm_pass_fp( struct brw_wm_compile *c ) { struct brw_fragment_program *fp = c->fp; @@ -997,15 +1030,19 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) c->pixel_w = src_undef(); c->nr_fp_insns = 0; - /* Emit preamble instructions: + /* Emit preamble instructions. This is where special instructions such as + * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from varying vars. */ - - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; validate_src_regs(c, inst); validate_dst_regs(c, inst); } + + /* Loop over all instructions doing assorted simplifications and + * transformations. + */ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; struct prog_instruction *out; @@ -1014,7 +1051,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) * necessary: */ - switch (inst->Opcode) { case OPCODE_SWZ: out = emit_insn(c, inst); @@ -1024,14 +1060,14 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_ABS: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; case OPCODE_SUB: out = emit_insn(c, inst); out->Opcode = OPCODE_ADD; - out->SrcReg[1].NegateBase ^= 0xf; + out->SrcReg[1].Negate ^= NEGATE_XYZW; break; case OPCODE_SCS: @@ -1094,9 +1130,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) } if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + _mesa_printf("pass_fp:\n"); + print_insns( c->prog_instructions, c->nr_fp_insns ); + _mesa_printf("\n"); } } diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c index 5a5497e..a907e1b 100644 --- a/i965/brw_wm_glsl.c +++ b/i965/brw_wm_glsl.c @@ -8,12 +8,17 @@ enum _subroutine { SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 }; -/* Only guess, need a flag in gl_fragment_program later */ + +/** + * Determine if the given fragment program uses GLSL features such + * as flow conditionals, loops, subroutines. + * Some GLSL shaders may use these features, others might not. + */ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; for (i = 0; i < fp->Base.NumInstructions; i++) { - struct prog_instruction *inst = &fp->Base.Instructions[i]; + const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { case OPCODE_IF: case OPCODE_TRUNC: @@ -36,6 +41,10 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) return GL_FALSE; } + +/** + * Record the mapping of a Mesa register to a hardware register. + */ static void set_reg(struct brw_wm_compile *c, int file, int index, int component, struct brw_reg reg) { @@ -43,7 +52,11 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, c->wm_regs[file][index][component].inited = GL_TRUE; } -static int get_scalar_dst_index(struct prog_instruction *inst) +/** + * Examine instruction's write mask to find index of first component + * enabled for writing. + */ +static int get_scalar_dst_index(const struct prog_instruction *inst) { int i; for (i = 0; i < 4; i++) @@ -62,6 +75,10 @@ static struct brw_reg alloc_tmp(struct brw_wm_compile *c) return reg; } +/** + * Save current temp register info. + * There must be a matching call to release_tmps(). + */ static int mark_tmps(struct brw_wm_compile *c) { return c->tmp_index; @@ -77,8 +94,22 @@ static void release_tmps(struct brw_wm_compile *c, int mark) c->tmp_index = mark; } +/** + * Convert Mesa src register to brw register. + * + * Since we're running in SOA mode each Mesa register corresponds to four + * hardware registers. We allocate the hardware registers as needed here. + * + * \param file register file, one of PROGRAM_x + * \param index register number + * \param component src component (X=0, Y=1, Z=2, W=3) + * \param nr not used?!? + * \param neg negate value? + * \param abs take absolute value? + */ static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs) +get_reg(struct brw_wm_compile *c, int file, int index, int component, + int nr, GLuint neg, GLuint abs) { struct brw_reg reg; switch (file) { @@ -99,12 +130,18 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL return brw_null_reg(); } - if(c->wm_regs[file][index][component].inited) + /* see if we've already allocated a HW register for this Mesa register */ + if (c->wm_regs[file][index][component].inited) { + /* yes, re-use */ reg = c->wm_regs[file][index][component].reg; - else + } + else { + /* no, allocate new register */ reg = brw_vec8_grf(c->reg_index, 0); + } - if(!c->wm_regs[file][index][component].inited) { + /* if this is a new register allocation, record it in the table */ + if (!c->wm_regs[file][index][component].inited) { set_reg(c, file, index, component, reg); c->reg_index++; } @@ -113,7 +150,7 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL /* ran out of temporary registers! */ #if 1 /* This is a big hack for now. - * Return bad register index, but don't just crash hange the GPU. + * Return bad register index, just don't hang the GPU. */ _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); c->reg_index = BRW_WM_MAX_GRF - 13; @@ -130,78 +167,273 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL return reg; } + +/** + * Preallocate registers. This sets up the Mesa to hardware register + * mapping for certain registers, such as constants (uniforms/state vars) + * and shader inputs. + */ static void prealloc_reg(struct brw_wm_compile *c) { int i, j; struct brw_reg reg; - int nr_interp_regs = 0; + int urb_read_length = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; for (i = 0; i < 4; i++) { - reg = (i < c->key.nr_depth_regs) - ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); + if (i < c->key.nr_depth_regs) + reg = brw_vec8_grf(i * 2, 0); + else + reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2*c->key.nr_depth_regs; + c->reg_index += 2 * c->key.nr_depth_regs; + + /* constants */ { - int nr_params = c->fp->program.Base.Parameters->NumParameters; - struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - c->prog_data.nr_params = 4*nr_params; - for (i = 0; i < nr_params; i++) { - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index/8, - index%8); - c->prog_data.param[index] = - &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - c->nr_creg = 2*((4*nr_params+15)/16); - c->reg_index += c->nr_creg; + const int nr_params = c->fp->program.Base.Parameters->NumParameters; + + /* use a real constant buffer, or just use a section of the GRF? */ + c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + + if (c->fp->use_const_buffer) { + /* We'll use a real constant buffer and fetch constants from + * it with a dataport read message. + */ + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 0; + } + else { + const struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ + for (i = 0; i < nr_params; i++) { + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); + c->reg_index += c->nr_creg; + } } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (inputs & (1<<i)) { - nr_interp_regs++; - reg = brw_vec8_grf(c->reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - c->reg_index += 2; - } + /* fragment shader inputs */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + int fp_input; + + if (i >= VERT_RESULT_VAR0) + fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; + else if (i <= VERT_RESULT_TEX7) + fp_input = i; + else + fp_input = -1; + + if (fp_input >= 0 && inputs & (1 << fp_input)) { + urb_read_length = c->reg_index; + reg = brw_vec8_grf(c->reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); + } + if (c->key.vp_outputs_written & (1 << i)) { + c->reg_index += 2; + } } + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.urb_read_length = urb_read_length; c->prog_data.curb_read_length = c->nr_creg; c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); c->reg_index++; c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); c->reg_index += 2; + + /* An instruction may reference up to three constants. + * They'll be found in these registers. + * XXX alloc these on demand! + */ + if (c->fp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = alloc_tmp(c); + } + } +#if 0 + printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); + printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); +#endif } + +/** + * Check if any of the instruction's src registers are constants, uniforms, + * or statevars. If so, fetch any constants that we don't already have in + * the three GRF slots. + */ +static void fetch_constants(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint i; + + /* loop over instruction src regs */ + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + if (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM) { + c->current_const[i].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); +#endif + + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ + ); + } + } +} + + +/** + * Convert Mesa dst register to brw register. + */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - struct prog_instruction *inst, int component, int nr) + const struct prog_instruction *inst, + GLuint component) { + const int nr = 1; return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, 0, 0); } + +static struct brw_reg +get_src_reg_const(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint component) +{ + /* We should have already fetched the constant from the constant + * buffer in fetch_constants(). Now we just have to return a + * register description that extracts the needed component and + * smears it across all eight vector components. + */ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + struct brw_reg const_reg; + + assert(component < 4); + assert(srcRegIndex < 3); + assert(c->current_const[srcRegIndex].index != -1); + const_reg = c->current_const[srcRegIndex].reg; + + /* extract desired float from the const_reg, and smear */ + const_reg = stride(const_reg, 0, 1, 0); + const_reg.subnr = component * 4; + + if (src->Negate & (1 << component)) + const_reg = negate(const_reg); + if (src->Abs) + const_reg = brw_abs(const_reg); + +#if 0 + printf(" form const[%d].%d for arg %d, reg %d\n", + c->current_const[srcRegIndex].index, + component, + srcRegIndex, + const_reg.nr); +#endif + + return const_reg; +} + + +/** + * Convert Mesa src register to brw register. + */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, - struct prog_src_register *src, int index, int nr) -{ - int component = GET_SWZ(src->Swizzle, index); - return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + const GLuint nr = 1; + const GLuint component = GET_SWZ(src->Swizzle, channel); + + if (c->fp->use_const_buffer && + (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM)) { + return get_src_reg_const(c, inst, srcRegIndex, component); + } + else { + /* other type of source register */ + return get_reg(c, src->File, src->Index, component, nr, + src->Negate, src->Abs); + } } -/* Subroutines are minimal support for resusable instruction sequences. - They are implemented as simply as possible to minimise overhead: there - is no explicit support for communication between the caller and callee - other than saving the return address in a temporary register, nor is - there any automatic local storage. This implies that great care is - required before attempting reentrancy or any kind of nested - subroutine invocations. */ + +/** + * Same as \sa get_src_reg() but if the register is a literal, emit + * a brw_reg encoding the literal. + * Note that a brw instruction only allows one src operand to be a literal. + * For instructions with more than one operand, only the second can be a + * literal. This means that we treat some literals as constants/uniforms + * (which why PROGRAM_CONSTANT is checked in fetch_constants()). + * + */ +static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + if (src->File == PROGRAM_CONSTANT) { + /* a literal */ + const int component = GET_SWZ(src->Swizzle, channel); + const GLfloat *param = + c->fp->program.Base.Parameters->ParameterValues[src->Index]; + GLfloat value = param[component]; + if (src->Negate & (1 << channel)) + value = -value; + if (src->Abs) + value = FABSF(value); +#if 0 + printf(" form immed value %f for chan %d\n", value, channel); +#endif + return brw_imm_f(value); + } + else { + return get_src_reg(c, inst, srcRegIndex, channel); + } +} + + +/** + * Subroutines are minimal support for resusable instruction sequences. + * They are implemented as simply as possible to minimise overhead: there + * is no explicit support for communication between the caller and callee + * other than saving the return address in a temporary register, nor is + * there any automatic local storage. This implies that great care is + * required before attempting reentrancy or any kind of nested + * subroutine invocations. + */ static void invoke_subroutine( struct brw_wm_compile *c, enum _subroutine subroutine, void (*emit)( struct brw_wm_compile * ) ) @@ -258,7 +490,7 @@ static void invoke_subroutine( struct brw_wm_compile *c, } static void emit_abs( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -266,8 +498,8 @@ static void emit_abs( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, brw_abs(src)); } } @@ -275,7 +507,7 @@ static void emit_abs( struct brw_wm_compile *c, } static void emit_trunc( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -284,8 +516,8 @@ static void emit_trunc( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1) ; - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); brw_RNDZ(p, dst, src); } } @@ -293,7 +525,7 @@ static void emit_trunc( struct brw_wm_compile *c, } static void emit_mov( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -302,8 +534,10 @@ static void emit_mov( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + /* XXX some moves from immediate value don't work reliably!!! */ + /*src = get_src_reg_imm(c, inst, 0, i);*/ + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, src); } } @@ -311,7 +545,7 @@ static void emit_mov( struct brw_wm_compile *c, } static void emit_pixel_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -320,8 +554,8 @@ static void emit_pixel_xy(struct brw_wm_compile *c, struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ @@ -338,21 +572,20 @@ static void emit_pixel_xy(struct brw_wm_compile *c, stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } - } static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); + src0 = get_src_reg(c, inst, 0, 0); + src1 = get_src_reg(c, inst, 0, 1); /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ @@ -370,10 +603,8 @@ static void emit_delta_xy(struct brw_wm_compile *c, negate(suboffset(r1,1))); } - } - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -404,7 +635,7 @@ static void fire_fb_write( struct brw_wm_compile *c, } static void emit_fb_write(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; @@ -416,38 +647,64 @@ static void emit_fb_write(struct brw_wm_compile *c, */ if (c->key.aa_dest_stencil_reg) nr += 1; - { - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); + + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, inst, 0, channel); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) - { - if (c->key.computes_depth) { - src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } else { - src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } + if (c->key.source_depth_to_render_target) { + if (c->key.computes_depth) { + src0 = get_src_reg(c, inst, 2, 2); + brw_MOV(p, brw_message_reg(nr), src0); + } + else { + src0 = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } - nr += 2; + if (c->key.dest_depth_reg) { + GLuint comp = c->key.dest_depth_reg / 2; + GLuint off = c->key.dest_depth_reg % 2; + + assert(comp == 1); + assert(off == 0); +#if 0 + /* XXX do we need this code? comp always 1, off always 0, it seems */ + if (off != 0) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_pop_insn_state(p); + } + else +#endif + { + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); + } + nr += 2; } - target = inst->Sampler >> 1; - eot = inst->Sampler & 1; + + target = inst->Aux >> 1; + eot = inst->Aux & 1; fire_fb_write(c, 0, nr, target, eot); } static void emit_pixel_w( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -455,10 +712,10 @@ static void emit_pixel_w( struct brw_wm_compile *c, struct brw_reg dst, src0, delta0, delta1; struct brw_reg interp3; - dst = get_dst_reg(c, inst, 3, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + dst = get_dst_reg(c, inst, 3); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); interp3 = brw_vec1_grf(src0.nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the @@ -477,19 +734,19 @@ static void emit_pixel_w( struct brw_wm_compile *c, } static void emit_linterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0; + GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -498,7 +755,7 @@ static void emit_linterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); } @@ -506,17 +763,17 @@ static void emit_linterp(struct brw_wm_compile *c, } static void emit_cinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst, src0; + GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -525,14 +782,14 @@ static void emit_cinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, suboffset(interp[i],3)); } } } static void emit_pinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -540,13 +797,13 @@ static void emit_pinterp(struct brw_wm_compile *c, struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0, w; + GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - w = get_src_reg(c, &inst->SrcReg[2], 3, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + w = get_src_reg(c, inst, 2, 3); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -555,7 +812,7 @@ static void emit_pinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); @@ -566,7 +823,7 @@ static void emit_pinterp(struct brw_wm_compile *c, /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ static void emit_frontfacing(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); @@ -576,7 +833,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, brw_imm_f(0.0)); } } @@ -587,7 +844,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, brw_imm_f(1.0)); } } @@ -595,7 +852,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, } static void emit_xpd(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -605,12 +862,12 @@ static void emit_xpd(struct brw_wm_compile *c, GLuint i1 = (i+1)%3; if (mask & (1<<i)) { struct brw_reg src0, src1, dst; - dst = get_dst_reg(c, inst, i, 1); - src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); - src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, inst, 0, i2)); + src1 = get_src_reg_imm(c, inst, 1, i1); brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); + src0 = get_src_reg(c, inst, 0, i1); + src1 = get_src_reg_imm(c, inst, 1, i2); brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); brw_MAC(p, dst, src0, src1); brw_set_saturate(p, 0); @@ -620,17 +877,17 @@ static void emit_xpd(struct brw_wm_compile *c, } static void emit_dp3(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[3], src1[3], dst; int i; struct brw_compile *p = &c->func; for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -639,16 +896,16 @@ static void emit_dp3(struct brw_wm_compile *c, } static void emit_dp4(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, brw_null_reg(), src0[2], src1[2]); @@ -658,16 +915,16 @@ static void emit_dp4(struct brw_wm_compile *c, } static void emit_dph(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); @@ -682,7 +939,7 @@ static void emit_dph(struct brw_wm_compile *c, * register's X, Y, Z and W channels (subject to writemasking of course). */ static void emit_math1(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint func) + const struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst, tmp; @@ -692,7 +949,7 @@ static void emit_math1(struct brw_wm_compile *c, tmp = alloc_tmp(c); /* Get first component of source register */ - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); /* tmp = func(src0) */ brw_MOV(p, brw_message_reg(2), src0); @@ -710,7 +967,7 @@ static void emit_math1(struct brw_wm_compile *c, /* replicate tmp value across enabled dest channels */ for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1 << i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, tmp); } } @@ -719,43 +976,43 @@ static void emit_math1(struct brw_wm_compile *c, } static void emit_rcp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_INV); } static void emit_rsq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); } static void emit_sin(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); } static void emit_cos(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_COS); } static void emit_ex2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); } static void emit_lg2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); } static void emit_add(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -764,17 +1021,30 @@ static void emit_add(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, src1); } } brw_set_saturate(p, 0); } +static void emit_arl(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, addr_reg; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, 0); + src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ + brw_MOV(p, addr_reg, src0); + brw_set_saturate(p, 0); +} + static void emit_sub(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -783,9 +1053,9 @@ static void emit_sub(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, negate(src1)); } } @@ -793,7 +1063,7 @@ static void emit_sub(struct brw_wm_compile *c, } static void emit_mul(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -802,9 +1072,9 @@ static void emit_mul(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_MUL(p, dst, src0, src1); } } @@ -812,7 +1082,7 @@ static void emit_mul(struct brw_wm_compile *c, } static void emit_frc(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -821,8 +1091,8 @@ static void emit_frc(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_FRC(p, dst, src0); } } @@ -831,7 +1101,7 @@ static void emit_frc(struct brw_wm_compile *c, } static void emit_flr(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -840,78 +1110,71 @@ static void emit_flr(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_RNDD(p, dst, src0); } } brw_set_saturate(p, 0); } -static void emit_max(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); -} -static void emit_min(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_min_max(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; + const GLuint mask = inst->DstReg.WriteMask; + const int mark = mark_tmps(c); int i; brw_push_insn_state(p); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + struct brw_reg real_dst = get_dst_reg(c, inst, i); + struct brw_reg src0 = get_src_reg(c, inst, 0, i); + struct brw_reg src1 = get_src_reg(c, inst, 1, i); + struct brw_reg dst; + /* if dst==src0 or dst==src1 we need to use a temp reg */ + GLboolean use_temp = brw_same_reg(dst, src0) || + brw_same_reg(dst, src1); + if (use_temp) + dst = alloc_tmp(c); + else + dst = real_dst; + + /* + printf(" Min/max: dst %d src0 %d src1 %d\n", + dst.nr, src0.nr, src1.nr); + */ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MOV(p, dst, src0); brw_set_saturate(p, 0); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + if (inst->Opcode == OPCODE_MIN) + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + else + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst, src1); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); + if (use_temp) + brw_MOV(p, real_dst, dst); } } brw_pop_insn_state(p); + release_tmps(c, mark); } static void emit_pow(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg_imm(c, inst, 0, 0); + src1 = get_src_reg_imm(c, inst, 1, 0); brw_MOV(p, brw_message_reg(2), src0); brw_MOV(p, brw_message_reg(3), src1); @@ -927,7 +1190,7 @@ static void emit_pow(struct brw_wm_compile *c, } static void emit_lrp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -936,10 +1199,10 @@ static void emit_lrp(struct brw_wm_compile *c, int mark = mark_tmps(c); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + src1 = get_src_reg_imm(c, inst, 1, i); if (src1.nr == dst.nr) { tmp1 = alloc_tmp(c); @@ -947,7 +1210,7 @@ static void emit_lrp(struct brw_wm_compile *c, } else tmp1 = src1; - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + src2 = get_src_reg(c, inst, 2, i); if (src2.nr == dst.nr) { tmp2 = alloc_tmp(c); brw_MOV(p, tmp2, src2); @@ -980,7 +1243,7 @@ static void emit_kil(struct brw_wm_compile *c) } static void emit_mad(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -989,10 +1252,10 @@ static void emit_mad(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); + src2 = get_src_reg_imm(c, inst, 2, i); brw_MUL(p, dst, src0, src1); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -1003,7 +1266,7 @@ static void emit_mad(struct brw_wm_compile *c, } static void emit_sop(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint cond) + const struct prog_instruction *inst, GLuint cond) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1012,9 +1275,9 @@ static void emit_sop(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), cond, src0, src1); brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -1027,43 +1290,43 @@ static void emit_sop(struct brw_wm_compile *c, } static void emit_slt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_L); } static void emit_sle(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_LE); } static void emit_sgt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_G); } static void emit_sge(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_GE); } static void emit_seq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_EQ); } static void emit_sne(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_NEQ); } static void emit_ddx(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1071,8 +1334,8 @@ static void emit_ddx(struct brw_wm_compile *c, struct brw_reg dst; struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + src0 = get_src_reg(c, inst, 0, 0); + w = get_src_reg(c, inst, 1, 3); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -1081,7 +1344,7 @@ static void emit_ddx(struct brw_wm_compile *c, brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, interp[i]); brw_MUL(p, dst, dst, w); } @@ -1090,7 +1353,7 @@ static void emit_ddx(struct brw_wm_compile *c, } static void emit_ddy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1099,9 +1362,9 @@ static void emit_ddy(struct brw_wm_compile *c, struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); nr = src0.nr; - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + w = get_src_reg(c, inst, 1, 3); interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); @@ -1109,7 +1372,7 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, suboffset(interp[i], 1)); brw_MUL(p, dst, dst, w); } @@ -1117,23 +1380,23 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static __inline struct brw_reg high_words( struct brw_reg reg ) +static INLINE struct brw_reg high_words( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), 0, 8, 2 ); } -static __inline struct brw_reg low_words( struct brw_reg reg ) +static INLINE struct brw_reg low_words( struct brw_reg reg ) { return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); } -static __inline struct brw_reg even_bytes( struct brw_reg reg ) +static INLINE struct brw_reg even_bytes( struct brw_reg reg ) { return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); } -static __inline struct brw_reg odd_bytes( struct brw_reg reg ) +static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), 0, 16, 2 ); @@ -1233,7 +1496,7 @@ static void noise1_sub( struct brw_wm_compile *c ) { } static void emit_noise1( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src, param, dst; @@ -1243,7 +1506,7 @@ static void emit_noise1( struct brw_wm_compile *c, assert( mark == 0 ); - src = get_src_reg( c, inst->SrcReg, 0, 1 ); + src = get_src_reg( c, inst, 0, 0 ); param = alloc_tmp( c ); @@ -1255,7 +1518,7 @@ static void emit_noise1( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param ); } } @@ -1403,7 +1666,7 @@ static void noise2_sub( struct brw_wm_compile *c ) { } static void emit_noise2( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, param0, param1, dst; @@ -1413,8 +1676,8 @@ static void emit_noise2( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1428,7 +1691,7 @@ static void emit_noise2( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -1438,9 +1701,11 @@ static void emit_noise2( struct brw_wm_compile *c, release_tmps( c, mark ); } -/* The three-dimensional case is much like the one- and two- versions above, - but since the number of corners is rapidly growing we now pack 16 16-bit - hashes into each register to extract more parallelism from the EUs. */ +/** + * The three-dimensional case is much like the one- and two- versions above, + * but since the number of corners is rapidly growing we now pack 16 16-bit + * hashes into each register to extract more parallelism from the EUs. + */ static void noise3_sub( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; @@ -1704,7 +1969,7 @@ static void noise3_sub( struct brw_wm_compile *c ) { } static void emit_noise3( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, param0, param1, param2, dst; @@ -1714,9 +1979,9 @@ static void emit_noise3( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1732,7 +1997,7 @@ static void emit_noise3( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -1742,13 +2007,15 @@ static void emit_noise3( struct brw_wm_compile *c, release_tmps( c, mark ); } -/* For the four-dimensional case, the little micro-optimisation benefits - we obtain by unrolling all the loops aren't worth the massive bloat it - now causes. Instead, we loop twice around performing a similar operation - to noise3, once for the w=0 cube and once for the w=1, with a bit more - code to glue it all together. */ -static void noise4_sub( struct brw_wm_compile *c ) { - +/** + * For the four-dimensional case, the little micro-optimisation benefits + * we obtain by unrolling all the loops aren't worth the massive bloat it + * now causes. Instead, we loop twice around performing a similar operation + * to noise3, once for the w=0 cube and once for the w=1, with a bit more + * code to glue it all together. + */ +static void noise4_sub( struct brw_wm_compile *c ) +{ struct brw_compile *p = &c->func; struct brw_reg param[ 4 ], x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ @@ -2125,7 +2392,7 @@ static void noise4_sub( struct brw_wm_compile *c ) { } static void emit_noise4( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; @@ -2135,10 +2402,10 @@ static void emit_noise4( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); - src3 = get_src_reg( c, inst->SrcReg, 3, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); + src3 = get_src_reg( c, inst, 0, 3 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -2156,7 +2423,7 @@ static void emit_noise4( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -2167,17 +2434,17 @@ static void emit_noise4( struct brw_wm_compile *c, } static void emit_wpos_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg src0[2], dst[2]; - dst[0] = get_dst_reg(c, inst, 0, 1); - dst[1] = get_dst_reg(c, inst, 1, 1); + dst[0] = get_dst_reg(c, inst, 0); + dst[1] = get_dst_reg(c, inst, 1); - src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); + src0[0] = get_src_reg(c, inst, 0, 0); + src0[1] = get_src_reg(c, inst, 0, 1); /* Calculate the pixel offset from window bottom left into destination * X and Y channels. @@ -2200,27 +2467,28 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } /* TODO - BIAS on SIMD8 not workind yet... + BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint i; + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: @@ -2234,28 +2502,28 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(4), src[2]); break; } - brw_MOV(p, brw_message_reg(5), src[3]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, - 4, - 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + SURF_INDEX_TEXTURE(unit), + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */ + 4, /* response_length */ + 4, /* msg_length */ + 0); /* eot */ } + static void emit_tex(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint msg_len; GLuint i, nr; GLuint emit; @@ -2264,10 +2532,9 @@ static void emit_tex(struct brw_wm_compile *c, payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: @@ -2286,6 +2553,7 @@ static void emit_tex(struct brw_wm_compile *c, } msg_len = 1; + /* move/load S, T, R coords */ for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<<i)) @@ -2296,26 +2564,27 @@ static void emit_tex(struct brw_wm_compile *c, } if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), src[2]); + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */ + brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, - 4, - shadow ? 6 : 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + SURF_INDEX_TEXTURE(unit), + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */ + 4, /* response_length */ + shadow ? 6 : 4, /* msg_length */ + 0); /* eot */ if (shadow) brw_MOV(p, dst[3], brw_imm_f(1.0)); } + /** * Resolve subroutine calls after code emit is done. */ @@ -2340,7 +2609,16 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (i = 0; i < c->nr_fp_insns; i++) { - struct prog_instruction *inst = &c->prog_instructions[i]; + const struct prog_instruction *inst = &c->prog_instructions[i]; + +#if 0 + _mesa_printf("Inst %d: ", i); + _mesa_print_instruction(inst); +#endif + + /* fetch any constants that this instruction needs */ + if (c->fp->use_const_buffer) + fetch_constants(c, inst); if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); @@ -2381,6 +2659,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_ADD: emit_add(c, inst); break; + case OPCODE_ARL: + emit_arl(c, inst); + break; case OPCODE_SUB: emit_sub(c, inst); break; @@ -2397,6 +2678,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_trunc(c, inst); break; case OPCODE_MOV: + case OPCODE_SWZ: emit_mov(c, inst); break; case OPCODE_DP3: @@ -2429,11 +2711,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LG2: emit_lg2(c, inst); break; - case OPCODE_MAX: - emit_max(c, inst); - break; case OPCODE_MIN: - emit_min(c, inst); + case OPCODE_MAX: + emit_min_max(c, inst); break; case OPCODE_DDX: emit_ddx(c, inst); @@ -2531,6 +2811,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_BGNLOOP: + /* XXX may need to invalidate the current_constant regs */ loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: @@ -2574,10 +2855,27 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } } + +/** + * Do GPU code generation for shaders that use GLSL features such as + * flow control. Other shaders will be compiled with the + */ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("brw_wm_glsl_emit:\n"); + } + + /* initial instruction translation/simplification */ brw_wm_pass_fp(c); + + /* actual code generation */ brw_wm_emit_glsl(brw, c); + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "brw_wm_glsl_emit done"); + } + c->prog_data.total_grf = c->reg_index; c->prog_data.total_scratch = 0; } diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c index fca7b7a..9214276 100644 --- a/i965/brw_wm_pass0.c +++ b/i965/brw_wm_pass0.c @@ -51,6 +51,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c) return &c->vreg[c->nr_vreg++]; } +/** return pointer to a newly allocated instruction */ static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) { assert(c->nr_insns < BRW_WM_MAX_INSN); @@ -60,6 +61,7 @@ static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) /*********************************************************************** */ +/** Init the "undef" register */ static void pass0_init_undef( struct brw_wm_compile *c) { struct brw_wm_ref *ref = &c->undef_ref; @@ -69,6 +71,7 @@ static void pass0_init_undef( struct brw_wm_compile *c) ref->prevuse = NULL; } +/** Set a FP register to a value */ static void pass0_set_fpreg_value( struct brw_wm_compile *c, GLuint file, GLuint idx, @@ -83,6 +86,7 @@ static void pass0_set_fpreg_value( struct brw_wm_compile *c, c->pass0_fp_reg[file][idx][component] = ref; } +/** Set a FP register to a ref */ static void pass0_set_fpreg_ref( struct brw_wm_compile *c, GLuint file, GLuint idx, @@ -115,12 +119,13 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, ref->value = &c->creg[i/16]; ref->insn = 0; ref->prevuse = NULL; - + return ref; } } +/** Return a ref to a constant/literal value */ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, const GLfloat *constval ) { @@ -142,7 +147,7 @@ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, */ c->constref[i].constval = *constval; c->constref[i].ref = get_param_ref(c, constval); - + return c->constref[i].ref; } else { @@ -187,7 +192,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, /* There's something really hokey about parameters parsed in * arb programs - they all end up in here, whether they be - * state values, paramters or constants. This duplicates the + * state values, parameters or constants. This duplicates the * structure above & also seems to subvert the limits set for * each type of constant/param. */ @@ -198,7 +203,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, */ ref = get_const_ref(c, &plist->ParameterValues[idx][component]); break; - + case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: /* These may change from run to run: @@ -229,14 +234,13 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, - /*********************************************************************** * Straight translation to internal instruction format */ static void pass0_set_dst( struct brw_wm_compile *c, - struct brw_wm_instruction *out, - const struct prog_instruction *inst, + struct brw_wm_instruction *out, + const struct prog_instruction *inst, GLuint writemask ) { const struct prog_dst_register *dst = &inst->DstReg; @@ -245,18 +249,17 @@ static void pass0_set_dst( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (writemask & (1<<i)) { out->dst[i] = get_value(c); - pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]); } } - + out->writemask = writemask; } static void pass0_set_dst_scalar( struct brw_wm_compile *c, - struct brw_wm_instruction *out, - const struct prog_instruction *inst, + struct brw_wm_instruction *out, + const struct prog_instruction *inst, GLuint writemask ) { if (writemask) { @@ -282,7 +285,6 @@ static void pass0_set_dst_scalar( struct brw_wm_compile *c, } - static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, struct prog_src_register src, GLuint i ) @@ -292,14 +294,13 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, static const GLfloat const_zero = 0.0; static const GLfloat const_one = 1.0; - if (component == SWIZZLE_ZERO) src_ref = get_const_ref(c, &const_zero); else if (component == SWIZZLE_ONE) src_ref = get_const_ref(c, &const_one); else src_ref = pass0_get_reg(c, src.File, src.Index, component); - + return src_ref; } @@ -311,19 +312,19 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, { const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i); struct brw_wm_ref *newref = get_ref(c); - + newref->value = ref->value; newref->hw_reg = ref->hw_reg; - if (insn) { + if (insn) { newref->insn = insn - c->instruction; newref->prevuse = newref->value->lastuse; newref->value->lastuse = newref; } - if (src.NegateBase & (1<<i)) + if (src.Negate & (1 << i)) newref->hw_reg.negate ^= 1; - + if (src.Abs) { newref->hw_reg.negate = 0; newref->hw_reg.abs = 1; @@ -333,9 +334,9 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, } - -static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, - const struct prog_instruction *inst ) +static void +translate_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_wm_instruction *out = get_instruction(c); GLuint writemask = inst->DstReg.WriteMask; @@ -348,8 +349,9 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, out->saturate = (inst->SaturateMode != SATURATE_OFF); out->tex_unit = inst->TexSrcUnit; out->tex_idx = inst->TexSrcTarget; - out->eot = inst->Sampler & 1; - out->target = inst->Sampler>>1; + out->tex_shadow = inst->TexShadow; + out->eot = inst->Aux & 1; + out->target = inst->Aux >> 1; /* Args: */ @@ -365,8 +367,6 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, pass0_set_dst_scalar(c, out, inst, writemask); else pass0_set_dst(c, out, inst, writemask); - - return out; } @@ -426,6 +426,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) &c->payload.input_interp[i] ); } + /*********************************************************************** * PASS 0 * @@ -448,7 +449,6 @@ void brw_wm_pass0( struct brw_wm_compile *c ) for (insn = 0; insn < c->nr_fp_insns; insn++) { const struct prog_instruction *inst = &c->prog_instructions[insn]; - /* Optimize away moves, otherwise emit translated instruction: */ switch (inst->Opcode) { @@ -461,8 +461,6 @@ void brw_wm_pass0( struct brw_wm_compile *c ) translate_insn(c, inst); } break; - - default: translate_insn(c, inst); break; @@ -473,4 +471,3 @@ void brw_wm_pass0( struct brw_wm_compile *c ) brw_wm_print_program(c, "pass0"); } } - diff --git a/i965/brw_wm_pass1.c b/i965/brw_wm_pass1.c index a1fea6f..ab9aa2f 100644 --- a/i965/brw_wm_pass1.c +++ b/i965/brw_wm_pass1.c @@ -58,7 +58,8 @@ static void unlink_ref(struct brw_wm_ref *ref) if (ref == value->lastuse) { value->lastuse = ref->prevuse; - } else { + } + else { struct brw_wm_ref *i = value->lastuse; while (i->prevuse != ref) i = i->prevuse; i->prevuse = ref->prevuse; @@ -75,8 +76,9 @@ static void track_arg(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { struct brw_wm_ref *ref = inst->src[arg][i]; if (ref) { - if (readmask & (1<<i)) + if (readmask & (1<<i)) { ref->value->contributes_to_output = 1; + } else { unlink_ref(ref); inst->src[arg][i] = NULL; @@ -88,15 +90,21 @@ static void track_arg(struct brw_wm_compile *c, static GLuint get_texcoord_mask( GLuint tex_idx ) { switch (tex_idx) { - case TEXTURE_1D_INDEX: return WRITEMASK_X; - case TEXTURE_2D_INDEX: return WRITEMASK_XY; - case TEXTURE_3D_INDEX: return WRITEMASK_XYZ; - case TEXTURE_CUBE_INDEX: return WRITEMASK_XYZ; - case TEXTURE_RECT_INDEX: return WRITEMASK_XY; + case TEXTURE_1D_INDEX: + return WRITEMASK_X; + case TEXTURE_2D_INDEX: + return WRITEMASK_XY; + case TEXTURE_3D_INDEX: + return WRITEMASK_XYZ; + case TEXTURE_CUBE_INDEX: + return WRITEMASK_XYZ; + case TEXTURE_RECT_INDEX: + return WRITEMASK_XY; default: return 0; } } + /* Step two: Basically this is dead code elimination. * * Iterate backwards over instructions, noting which values @@ -202,9 +210,10 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_TEX: + case OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); - if (c->key.shadowtex_mask & (1<<inst->tex_unit)) + if (inst->tex_shadow) read0 |= WRITEMASK_Z; break; @@ -259,7 +268,6 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_DST: - case OPCODE_TXP: case WM_FRONTFACING: default: break; @@ -274,6 +282,3 @@ void brw_wm_pass1( struct brw_wm_compile *c ) brw_wm_print_program(c, "pass1"); } } - - - diff --git a/i965/brw_wm_pass2.c b/i965/brw_wm_pass2.c index 6fca9ad..6faea01 100644 --- a/i965/brw_wm_pass2.c +++ b/i965/brw_wm_pass2.c @@ -69,8 +69,6 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { - struct brw_context *brw = c->func.brw; - GLuint inputs = (brw->vs.prog_data->outputs_written & DO_SETUP_BITS); GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -84,18 +82,22 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < FRAG_ATTRIB_MAX; j++) - if (inputs & (1<<j)) { - /* index for vs output and ps input are not the same - in shader varying */ - GLuint index; - if (j > FRAG_ATTRIB_VAR0) - index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + for (j = 0; j < FRAG_ATTRIB_MAX; j++) { + if (c->key.vp_outputs_written & (1<<j)) { + int fp_index; + + if (j >= VERT_RESULT_VAR0) + fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else if (j <= VERT_RESULT_TEX7) + fp_index = j; else - index = j; + fp_index = -1; + nr_interp_regs++; - prealloc_reg(c, &c->payload.input_interp[index], i++); + if (fp_index >= 0) + prealloc_reg(c, &c->payload.input_interp[fp_index], i++); } + } assert(nr_interp_regs >= 1); @@ -120,7 +122,7 @@ static void update_register_usage(struct brw_wm_compile *c, /* Only search those which can change: */ if (grf->nextuse < thisinsn) { - struct brw_wm_ref *ref = grf->value->lastuse; + const struct brw_wm_ref *ref = grf->value->lastuse; /* Has last use of value been passed? */ @@ -148,7 +150,7 @@ static void spill_value(struct brw_wm_compile *c, /* Allocate a spill slot. Note that allocations start from 0x40 - * the first slot is reserved to mean "undef" in brw_wm_emit.c */ - if (!value->spill_slot) { + if (!value->spill_slot) { c->last_scratch += 0x40; value->spill_slot = c->last_scratch; } @@ -189,7 +191,7 @@ static GLuint search_contiguous_regs(struct brw_wm_compile *c, if (grf[i+j].nextuse < group_nextuse) group_nextuse = grf[i+j].nextuse; } - + if (group_nextuse > furthest) { furthest = group_nextuse; reg = i; @@ -197,7 +199,7 @@ static GLuint search_contiguous_regs(struct brw_wm_compile *c, } assert(furthest != thisinsn); - + /* Any non-empty regs will need to be spilled: */ for (j = 0; j < nr; j++) @@ -243,7 +245,7 @@ static void alloc_contiguous_dest(struct brw_wm_compile *c, static void load_args(struct brw_wm_compile *c, struct brw_wm_instruction *inst) -{ +{ GLuint thisinsn = inst - c->instruction; GLuint i,j; @@ -258,17 +260,17 @@ static void load_args(struct brw_wm_compile *c, * register allocation and mark the ref as requiring a fill. */ GLuint reg = search_contiguous_regs(c, 1, thisinsn); - + c->pass2_grf[reg].value = ref->value; c->pass2_grf[reg].nextuse = thisinsn; - + ref->value->resident = &c->pass2_grf[reg]; /* Note that a fill is required: */ ref->unspill_reg = reg*2; } - + /* Adjust the hw_reg to point at the value's current location: */ assert(ref->value == ref->value->resident->value); @@ -294,7 +296,7 @@ void brw_wm_pass2( struct brw_wm_compile *c ) for (insn = 0; insn < c->nr_insns; insn++) { struct brw_wm_instruction *inst = &c->instruction[insn]; - + /* Update registers' nextuse values: */ update_register_usage(c, insn); @@ -322,11 +324,11 @@ void brw_wm_pass2( struct brw_wm_compile *c ) break; } - if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) + if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) { for (i = 0; i < 4; i++) if (inst->dst[i]) spill_value(c, inst->dst[i]); - + } } if (INTEL_DEBUG & DEBUG_WM) { @@ -339,6 +341,3 @@ void brw_wm_pass2( struct brw_wm_compile *c ) brw_wm_print_program(c, "pass2/done"); } } - - - diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c index 68a9296..3fc18ff 100644 --- a/i965/brw_wm_sampler_state.c +++ b/i965/brw_wm_sampler_state.c @@ -152,7 +152,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; if (key->max_aniso > 2.0) { - sampler->ss3.max_aniso = MAX2((key->max_aniso - 2) / 2, + sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, BRW_ANISORATIO_16); } } @@ -178,6 +178,16 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; } + else if (key->tex_target == GL_TEXTURE_1D) { + /* There's a bug in 1D texture sampling - it actually pays + * attention to the wrap_t value, though it should not. + * Override the wrap_t value here to GL_REPEAT to keep + * any nonexistent border pixels from floating in. + */ + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + } else { sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); @@ -217,6 +227,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } + /** Sets up the cache key for sampler state for all texture units */ static void brw_wm_sampler_populate_key(struct brw_context *brw, diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c index 1844eba..67b4117 100644 --- a/i965/brw_wm_state.c +++ b/i965/brw_wm_state.c @@ -62,6 +62,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { GLcontext *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; + const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -103,11 +104,14 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0; + (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = brw_wm_is_glsl(fp); + key->is_glsl = bfp->isGLSL; + + /* temporary sanity check assertion */ + ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); /* _NEW_DEPTH */ key->stats_wm = intel->stats_wm; @@ -121,6 +125,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->offset_factor = ctx->Polygon.OffsetFactor; } +/** + * Setup wm hardware state. See page 225 of Volume 2 + */ static dri_bo * wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, dri_bo **reloc_bufs) @@ -138,7 +145,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, if (key->total_scratch != 0) { wm.thread2.scratch_space_base_pointer = - brw->wm.scratch_buffer->offset >> 10; /* reloc */ + brw->wm.scratch_bo->offset >> 10; /* reloc */ wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; } else { wm.thread2.scratch_space_base_pointer = 0; @@ -147,9 +154,9 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; wm.thread3.urb_entry_read_length = key->urb_entry_read_length; + wm.thread3.urb_entry_read_offset = 0; wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - wm.thread3.urb_entry_read_offset = 0; wm.wm4.sampler_count = (key->sampler_count + 1) / 4; if (brw->wm.sampler_bo != NULL) { @@ -216,7 +223,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, 0, 0, wm.thread2.per_thread_scratch_space, offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_buffer); + brw->wm.scratch_bo); } /* Emit sampler state relocation */ @@ -247,20 +254,20 @@ static void upload_wm_unit( struct brw_context *brw ) if (key.total_scratch) { GLuint total = key.total_scratch * key.max_threads; - if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) { - dri_bo_unreference(brw->wm.scratch_buffer); - brw->wm.scratch_buffer = NULL; + if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { + dri_bo_unreference(brw->wm.scratch_bo); + brw->wm.scratch_bo = NULL; } - if (brw->wm.scratch_buffer == NULL) { - brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, - "wm scratch", - total, - 4096); + if (brw->wm.scratch_bo == NULL) { + brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr, + "wm scratch", + total, + 4096); } } reloc_bufs[0] = brw->wm.prog_bo; - reloc_bufs[1] = brw->wm.scratch_buffer; + reloc_bufs[1] = brw->wm.scratch_bo; reloc_bufs[2] = brw->wm.sampler_bo; dri_bo_unreference(brw->wm.state_bo); @@ -283,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_SURFACES), + BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c index 3487b85..805df8a 100644 --- a/i965/brw_wm_surface_state.c +++ b/i965/brw_wm_surface_state.c @@ -33,11 +33,12 @@ #include "main/mtypes.h" #include "main/texformat.h" #include "main/texstore.h" +#include "shader/prog_parameter.h" #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" #include "intel_tex.h" - +#include "intel_fbo.h" #include "brw_context.h" #include "brw_state.h" @@ -69,7 +70,8 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) +static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, + GLenum depth_mode ) { switch( mesa_format ) { case MESA_FORMAT_L8: @@ -89,10 +91,16 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + else + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; case MESA_FORMAT_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + else + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -133,13 +141,34 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) case MESA_FORMAT_RGBA_DXT5: return BRW_SURFACEFORMAT_BC3_UNORM; - case MESA_FORMAT_SRGBA8: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case MESA_FORMAT_SARGB8: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; + + case MESA_FORMAT_SLA8: + return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; + + case MESA_FORMAT_SL8: + return BRW_SURFACEFORMAT_L8_UNORM_SRGB; + case MESA_FORMAT_SRGB_DXT1: return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; case MESA_FORMAT_S8_Z24: - return BRW_SURFACEFORMAT_I24X8_UNORM; + /* XXX: these different surface formats don't seem to + * make any difference for shadow sampler/compares. + */ + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I24X8_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A24X8_UNORM; + else + return BRW_SURFACEFORMAT_L24X8_UNORM; + + case MESA_FORMAT_DUDV8: + return BRW_SURFACEFORMAT_R8G8_SNORM; + + case MESA_FORMAT_SIGNED_RGBA8888_REV: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; default: assert(0); @@ -147,10 +176,14 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) } } -struct brw_wm_surface_key { + +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { GLenum target, depthmode; dri_bo *bo; - GLint format; + GLint format, internal_format; GLint first_level, last_level; GLint width, height, depth; GLint pitch, cpp; @@ -158,6 +191,7 @@ struct brw_wm_surface_key { GLuint offset; }; + static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -179,7 +213,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) static dri_bo * brw_create_texture_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { struct brw_surface_state surf; dri_bo *bo; @@ -188,9 +222,11 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - - if (key->bo) - surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode); + if (key->bo) { + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); + } else { switch (key->depth) { case 32: @@ -256,7 +292,8 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; - struct brw_wm_surface_key key; + struct brw_surface_key key; + const GLuint surf = SURF_INDEX_TEXTURE(unit); memset(&key, 0, sizeof(key)); @@ -267,6 +304,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.offset = intelObj->textureOffset; } else { key.format = firstImage->TexFormat->MesaFormat; + key.internal_format = firstImage->InternalFormat; key.pitch = intelObj->mt->pitch; key.depth = firstImage->Depth; key.bo = intelObj->mt->region->buffer; @@ -282,34 +320,207 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.cpp = intelObj->mt->cpp; key.tiling = intelObj->mt->region->tiling; - dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) { - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key); + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } } + + +/** + * Create the constant buffer surface. Vertex/fragment shader constants will be + * read from this buffer with Data Port Read instructions/messages. + */ +static dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ) +{ + const GLint w = key->width - 1; + struct brw_surface_state surf; + dri_bo *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = BRW_SURFACE_BUFFER; + surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + assert(key->bo); + if (key->bo) + surf.ss1.base_addr = key->bo->offset; /* reloc */ + else + surf.ss1.base_addr = key->offset; + + surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ + surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ + surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ + surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ + + bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + + if (key->bo) { + /* Emit relocation to surface contents */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + } + + return bo; +} + + +/** + * Update the surface state for a WM constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static dri_bo * +brw_update_wm_constant_surface( GLcontext *ctx, + GLuint surf, + dri_bo *const_buffer, + const struct gl_program_parameter_list *params) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + /* free old const buffer if too small */ + if (const_buffer && const_buffer->size < size) { + dri_bo_unreference(const_buffer); + const_buffer = NULL; + } + + /* alloc new buffer if needed */ + if (!const_buffer) { + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64); + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } + + return const_buffer; +} + + +/** + * Update the surface state for a VS constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static dri_bo * +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf, + dri_bo *const_buffer, + const struct gl_program_parameter_list *params) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + assert(surf == 0); + + /* free old const buffer if too small */ + if (const_buffer && const_buffer->size < size) { + dri_bo_unreference(const_buffer); + const_buffer = NULL; + } + + /* alloc new buffer if needed */ + if (!const_buffer) { + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } + + return const_buffer; +} + + /** * Sets up a surface state structure to point at the given region. * While it is only used for the front/back buffer currently, it should be * usable for further buffers when doing ARB_draw_buffer support. */ static void -brw_update_region_surface(struct brw_context *brw, struct intel_region *region, - unsigned int unit, GLboolean cached) +brw_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + unsigned int unit, GLboolean cached) { GLcontext *ctx = &brw->intel.ctx; dri_bo *region_bo = NULL; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; struct { unsigned int surface_type; unsigned int surface_format; - unsigned int width, height, cpp; + unsigned int width, height, pitch, cpp; GLubyte color_mask[4]; GLboolean color_blend; uint32_t tiling; + uint32_t draw_offset; } key; memset(&key, 0, sizeof(key)); @@ -318,14 +529,29 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, region_bo = region->buffer; key.surface_type = BRW_SURFACE_2D; - if (region->cpp == 4) + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - else + break; + case MESA_FORMAT_RGB565: key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + break; + case MESA_FORMAT_ARGB1555: + key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + break; + case MESA_FORMAT_ARGB4444: + key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } key.tiling = region->tiling; - key.width = region->pitch; /* XXX: not really! */ + key.width = region->width; key.height = region->height; + key.pitch = region->pitch; key.cpp = region->cpp; + key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; @@ -333,6 +559,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, key.width = 1; key.height = 1; key.cpp = 4; + key.draw_offset = 0; } memcpy(key.color_mask, ctx->Color.ColorMask, sizeof(key.color_mask)); @@ -354,13 +581,14 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, surf.ss0.surface_format = key.surface_format; surf.ss0.surface_type = key.surface_type; + surf.ss1.base_addr = key.draw_offset; if (region_bo != NULL) - surf.ss1.base_addr = region_bo->offset; /* reloc */ + surf.ss1.base_addr += region_bo->offset; /* reloc */ surf.ss2.width = key.width - 1; surf.ss2.height = key.height - 1; brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.width * key.cpp) - 1; + surf.ss3.pitch = (key.pitch * key.cpp) - 1; /* _NEW_COLOR */ surf.ss0.color_blend = key.color_blend; @@ -371,7 +599,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, /* Key size will never match key size for textures, so we're safe. */ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), + &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), NULL, NULL); @@ -380,12 +608,12 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, * them both. We might be able to figure out from other state * a more restrictive relocation to emit. */ - dri_bo_emit_reloc(brw->wm.surf_bo[unit], - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, - 0, - offsetof(struct brw_surface_state, ss1), - region_bo); + drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], + offsetof(struct brw_surface_state, ss1), + region_bo, + key.draw_offset, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); } } } @@ -400,6 +628,8 @@ brw_wm_get_binding_table(struct brw_context *brw) { dri_bo *bind_bo; + assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, @@ -446,54 +676,159 @@ static void prepare_wm_surfaces(struct brw_context *brw ) GLuint i; int old_nr_surfaces; - if (brw->state.nr_draw_regions > 1) { - for (i = 0; i < brw->state.nr_draw_regions; i++) { - brw_update_region_surface(brw, brw->state.draw_regions[i], i, - GL_FALSE); + /* _NEW_BUFFERS */ + /* Update surfaces for drawing buffers */ + if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + brw_update_renderbuffer_surface(brw, + ctx->DrawBuffer->_ColorDrawBuffers[i], + i, + GL_FALSE); } - }else { - brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); + } else { + brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE); } old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + /* Update surface / buffer for fragment shader constant buffer */ + { + const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + fp->const_buffer = + brw_update_wm_constant_surface(ctx, surf, fp->const_buffer, + fp->program.Base.Parameters); + + brw->wm.nr_surfaces = surf + 1; + } + + /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + const GLuint surf = SURF_INDEX_TEXTURE(i); /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ - if(texUnit->_ReallyEnabled) { + if (texUnit->_ReallyEnabled) { if (texUnit->_Current == intel->frame_buffer_texobj) { - dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + /* render to texture */ + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; + dri_bo_reference(brw->wm.surf_bo[surf]); + brw->wm.nr_surfaces = surf + 1; } else { + /* regular texture */ brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + brw->wm.nr_surfaces = surf + 1; } } else { - dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = NULL; + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; } - } dri_bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw); if (brw->wm.nr_surfaces != old_nr_surfaces) - brw->state.dirty.brw |= BRW_NEW_NR_SURFACES; + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); + + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, brw->vs.nr_surfaces, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < brw->vs.nr_surfaces; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, brw->vs.nr_surfaces, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + dri_bo_emit_reloc(bind_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(GLuint), + brw->vs.surf_bo[i]); + } + } + + free(data); + } + + return bind_bo; +} + + +/** + * Vertex shader surfaces. Just constant buffer for now. Could add vertex + * shader textures in the future. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + + /* Update surface / buffer for vertex shader constant buffer */ + { + const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + vp->const_buffer = + brw_update_vs_constant_surface(ctx, surf, vp->const_buffer, + vp->program.Base.Parameters); + + brw->vs.nr_surfaces = 1; + } + + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + + if (1) + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; +} + + +static void +prepare_surfaces(struct brw_context *brw) +{ + prepare_wm_surfaces(brw); + prepare_vs_surfaces(brw); } const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, + .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .prepare = prepare_wm_surfaces, + .prepare = prepare_surfaces, }; diff --git a/i965/intel_state.c b/i965/intel_state.c deleted file mode 100644 index 0c9c670..0000000 --- a/i965/intel_state.c +++ /dev/null @@ -1,233 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "main/glheader.h" -#include "main/context.h" -#include "main/macros.h" -#include "main/enums.h" -#include "main/colormac.h" -#include "main/dd.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_regions.h" -#include "swrast/swrast.h" - -int -intel_translate_shadow_compare_func( GLenum func ) -{ - switch(func) { - case GL_NEVER: - return COMPAREFUNC_ALWAYS; - case GL_LESS: - return COMPAREFUNC_LEQUAL; - case GL_LEQUAL: - return COMPAREFUNC_LESS; - case GL_GREATER: - return COMPAREFUNC_GEQUAL; - case GL_GEQUAL: - return COMPAREFUNC_GREATER; - case GL_NOTEQUAL: - return COMPAREFUNC_EQUAL; - case GL_EQUAL: - return COMPAREFUNC_NOTEQUAL; - case GL_ALWAYS: - return COMPAREFUNC_NEVER; - } - - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); - return COMPAREFUNC_NEVER; -} - -int -intel_translate_compare_func( GLenum func ) -{ - switch(func) { - case GL_NEVER: - return COMPAREFUNC_NEVER; - case GL_LESS: - return COMPAREFUNC_LESS; - case GL_LEQUAL: - return COMPAREFUNC_LEQUAL; - case GL_GREATER: - return COMPAREFUNC_GREATER; - case GL_GEQUAL: - return COMPAREFUNC_GEQUAL; - case GL_NOTEQUAL: - return COMPAREFUNC_NOTEQUAL; - case GL_EQUAL: - return COMPAREFUNC_EQUAL; - case GL_ALWAYS: - return COMPAREFUNC_ALWAYS; - } - - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); - return COMPAREFUNC_ALWAYS; -} - -int -intel_translate_stencil_op( GLenum op ) -{ - switch(op) { - case GL_KEEP: - return STENCILOP_KEEP; - case GL_ZERO: - return STENCILOP_ZERO; - case GL_REPLACE: - return STENCILOP_REPLACE; - case GL_INCR: - return STENCILOP_INCRSAT; - case GL_DECR: - return STENCILOP_DECRSAT; - case GL_INCR_WRAP: - return STENCILOP_INCR; - case GL_DECR_WRAP: - return STENCILOP_DECR; - case GL_INVERT: - return STENCILOP_INVERT; - default: - return STENCILOP_ZERO; - } -} - -int -intel_translate_blend_factor( GLenum factor ) -{ - switch(factor) { - case GL_ZERO: - return BLENDFACT_ZERO; - case GL_SRC_ALPHA: - return BLENDFACT_SRC_ALPHA; - case GL_ONE: - return BLENDFACT_ONE; - case GL_SRC_COLOR: - return BLENDFACT_SRC_COLR; - case GL_ONE_MINUS_SRC_COLOR: - return BLENDFACT_INV_SRC_COLR; - case GL_DST_COLOR: - return BLENDFACT_DST_COLR; - case GL_ONE_MINUS_DST_COLOR: - return BLENDFACT_INV_DST_COLR; - case GL_ONE_MINUS_SRC_ALPHA: - return BLENDFACT_INV_SRC_ALPHA; - case GL_DST_ALPHA: - return BLENDFACT_DST_ALPHA; - case GL_ONE_MINUS_DST_ALPHA: - return BLENDFACT_INV_DST_ALPHA; - case GL_SRC_ALPHA_SATURATE: - return BLENDFACT_SRC_ALPHA_SATURATE; - case GL_CONSTANT_COLOR: - return BLENDFACT_CONST_COLOR; - case GL_ONE_MINUS_CONSTANT_COLOR: - return BLENDFACT_INV_CONST_COLOR; - case GL_CONSTANT_ALPHA: - return BLENDFACT_CONST_ALPHA; - case GL_ONE_MINUS_CONSTANT_ALPHA: - return BLENDFACT_INV_CONST_ALPHA; - } - - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor); - return BLENDFACT_ZERO; -} - -int -intel_translate_logic_op( GLenum opcode ) -{ - switch(opcode) { - case GL_CLEAR: - return LOGICOP_CLEAR; - case GL_AND: - return LOGICOP_AND; - case GL_AND_REVERSE: - return LOGICOP_AND_RVRSE; - case GL_COPY: - return LOGICOP_COPY; - case GL_COPY_INVERTED: - return LOGICOP_COPY_INV; - case GL_AND_INVERTED: - return LOGICOP_AND_INV; - case GL_NOOP: - return LOGICOP_NOOP; - case GL_XOR: - return LOGICOP_XOR; - case GL_OR: - return LOGICOP_OR; - case GL_OR_INVERTED: - return LOGICOP_OR_INV; - case GL_NOR: - return LOGICOP_NOR; - case GL_EQUIV: - return LOGICOP_EQUIV; - case GL_INVERT: - return LOGICOP_INV; - case GL_OR_REVERSE: - return LOGICOP_OR_RVRSE; - case GL_NAND: - return LOGICOP_NAND; - case GL_SET: - return LOGICOP_SET; - default: - return LOGICOP_SET; - } -} - - -static void -intelClearColor(GLcontext *ctx, const GLfloat color[4]) -{ - struct intel_context *intel = intel_context(ctx); - GLubyte clear[4]; - - CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]); - CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]); - CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); - CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); - - /* compute both 32 and 16-bit clear values */ - intel->ClearColor8888 = INTEL_PACKCOLOR8888(clear[0], clear[1], - clear[2], clear[3]); - intel->ClearColor565 = INTEL_PACKCOLOR565(clear[0], clear[1], clear[2]); -} - - -/* Fallback to swrast for select and feedback. - */ -static void -intelRenderMode( GLcontext *ctx, GLenum mode ) -{ - struct intel_context *intel = intel_context(ctx); - FALLBACK( intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER) ); -} - - -void -intelInitStateFuncs( struct dd_function_table *functions ) -{ - functions->RenderMode = intelRenderMode; - functions->ClearColor = intelClearColor; -} |