diff options
author | Roland Scheidegger <sroland@vmware.com> | 2010-12-02 04:32:06 +0100 |
---|---|---|
committer | Roland Scheidegger <sroland@vmware.com> | 2010-12-02 04:32:06 +0100 |
commit | a45bd509014743d21a532194d7b658a1aeb00cb7 (patch) | |
tree | d5fc155ee50d2f41fa9e7d4a253a8f9c51bc9e51 | |
parent | 1aeca287a827f29206078fa1204715a477072c08 (diff) | |
parent | 32e1e591467d9a28c2ac4d2e17af7be2dc429d43 (diff) |
Merge remote branch 'origin/master' into gallium-array-texturesgallium-array-textures
Conflicts:
src/gallium/drivers/i915/i915_resource_texture.c
src/gallium/drivers/i915/i915_state_emit.c
src/gallium/drivers/i915/i915_surface.c
56 files changed, 1249 insertions, 351 deletions
diff --git a/src/gallium/drivers/i915/TODO b/src/gallium/drivers/i915/TODO new file mode 100644 index 0000000000..94c428bebf --- /dev/null +++ b/src/gallium/drivers/i915/TODO @@ -0,0 +1,25 @@ +Random list of problems with i915g: + +- Dies with BadDrawable on GLXFBconfig changes/destruction. Makes piglit totally + unusable :( Upgrading xserver helped here, it doesn't crash anymore. Still + broken, it doesn't update the viewport/get new buffers. + +- Tends to hang the chip after a few minutes of openarena. Looks tiling related, + at the last frame rendered has tiling corruption over the complete frame. + +- Kills the chip in 3D_PRIMITIVE LINELIST with mesa-demos/fbotexture in + wireframe mode. + +- Tiling is funny: If unlucky, it renders/samples all black. No clue yet what's + going on. Seems to depend on tiny details like whethever the sampler + relocation is fenced/unfenced (broken _with_ fenced reloc using tiling bits!). + +- Y-tiling is even more fun. i915c doesn't use it, maybe there's a reason? + Texture sampling from Y-tiled buffers seems to work, though (save above + problems). + +- Need to validate buffers before usage. Currently do_exec on the batchbuffer + can fail with -ENOSPC. + +Other bugs can be found here: +https://bugs.freedesktop.org/buglist.cgi?bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&component=Drivers/Gallium/i915g diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index c411b84ccd..6e93da7620 100644 --- a/src/gallium/drivers/i915/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h @@ -38,7 +38,10 @@ i915_winsys_batchbuffer_dword(i915->batch, dword) #define OUT_RELOC(buf, usage, offset) \ - i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset) + i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, false) + +#define OUT_RELOC_FENCED(buf, usage, offset) \ + i915_winsys_batchbuffer_reloc(i915->batch, buf, usage, offset, true) #define FLUSH_BATCH(fence) \ i915_flush(i915, fence) diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index c1cd314e7b..d92b2ccb31 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -29,42 +29,47 @@ #define I915_BATCHBUFFER_H #include "i915_winsys.h" +#include "util/u_debug.h" struct i915_context; +static INLINE size_t +i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch) +{ + return batch->size - (batch->ptr - batch->map); +} + static INLINE boolean i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch, size_t dwords, size_t relocs) { - return dwords * 4 <= batch->size - (batch->ptr - batch->map) && + return dwords * 4 <= i915_winsys_batchbuffer_space(batch) && relocs <= (batch->max_relocs - batch->relocs); } -static INLINE size_t -i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch) +static INLINE void +i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch, + unsigned dword) { - return batch->size - (batch->ptr - batch->map); + *(unsigned *)batch->ptr = dword; + batch->ptr += 4; } static INLINE void i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch, unsigned dword) { - if (i915_winsys_batchbuffer_space(batch) < 4) - return; - - *(unsigned *)batch->ptr = dword; - batch->ptr += 4; + assert (i915_winsys_batchbuffer_space(batch) >= 4); + i915_winsys_batchbuffer_dword_unchecked(batch, dword); } static INLINE void i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch, - void *data, - size_t size) + void *data, + size_t size) { - if (i915_winsys_batchbuffer_space(batch) < size) - return; + assert (i915_winsys_batchbuffer_space(batch) >= size); memcpy(data, batch->ptr, size); batch->ptr += size; @@ -74,9 +79,9 @@ static INLINE int i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - size_t offset) + size_t offset, bool fenced) { - return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset); + return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset, fenced); } #endif diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index cdf20c0055..97c2566515 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -74,7 +74,7 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(BR13); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); - OUT_RELOC(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); + OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); OUT_BATCH(color); } @@ -138,8 +138,8 @@ i915_copy_blit(struct i915_context *i915, OUT_BATCH(BR13); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); + OUT_RELOC_FENCED(dst_buffer, I915_USAGE_2D_TARGET, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); - OUT_RELOC(src_buffer, I915_USAGE_2D_SOURCE, src_offset); + OUT_RELOC_FENCED(src_buffer, I915_USAGE_2D_SOURCE, src_offset); } diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 3ae61d0ea7..7103a1b8c1 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -193,8 +193,7 @@ struct i915_velems_state { }; -struct i915_context -{ +struct i915_context { struct pipe_context base; struct i915_winsys *iws; diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index 57d3390dea..d7150c99c4 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -46,10 +46,12 @@ static const struct debug_named_value debug_options[] = { }; unsigned i915_debug = 0; +boolean i915_tiling = TRUE; void i915_debug_init(struct i915_screen *screen) { i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0); + i915_tiling = !debug_get_bool_option("I915_NO_TILING", FALSE); } diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index fa60799d0c..11af7662f0 100644 --- a/src/gallium/drivers/i915/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h @@ -46,6 +46,7 @@ struct i915_winsys_batchbuffer; #define DBG_CONSTANTS 0x20 extern unsigned i915_debug; +extern boolean i915_tiling; #ifdef DEBUG static INLINE boolean diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index bd046bd905..baebbc7bae 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -172,6 +172,7 @@ i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) * * Side effects: * Updates hw_offset, sw_offset, index and allocates a new buffer. + * Will set i915->vbo to null on buffer allocation. */ static void i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) @@ -179,8 +180,16 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) struct i915_context *i915 = i915_render->i915; struct i915_winsys *iws = i915->iws; - if (i915_render->vbo) + if (i915_render->vbo) { iws->buffer_destroy(iws, i915_render->vbo); + /* + * XXX If buffers where referenced then this should be done in + * update_vbo_state but since they arn't and malloc likes to reuse + * memory we need to set it to null + */ + i915->vbo = NULL; + i915_render->vbo = NULL; + } i915->vbo_flushed = 0; @@ -198,7 +207,7 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) #endif i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, - 64, I915_NEW_VERTEX); + I915_NEW_VERTEX); } /** @@ -726,7 +735,7 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->pool_fifo = u_fifo_create(6); for (i = 0; i < 6; i++) u_fifo_add(i915_render->pool_fifo, - iws->buffer_create(iws, i915_render->pool_buffer_size, 64, + iws->buffer_create(iws, i915_render->pool_buffer_size, I915_NEW_VERTEX)); #else (void)i; diff --git a/src/gallium/drivers/i915/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h index cc28891e4a..5e4e80ddf6 100644 --- a/src/gallium/drivers/i915/i915_reg.h +++ b/src/gallium/drivers/i915/i915_reg.h @@ -753,7 +753,7 @@ #define MT_COMPRESS_DXT1_RGB (4<<3) #define MS3_USE_FENCE_REGS (1<<2) #define MS3_TILED_SURFACE (1<<1) -#define MS3_TILE_WALK (1<<0) +#define MS3_TILE_WALK_Y (1<<0) #define MS4_PITCH_SHIFT 21 #define MS4_CUBE_FACE_ENA_NEGX (1<<20) @@ -851,6 +851,7 @@ #define MI_FLUSH ((0<<29)|(4<<23)) #define FLUSH_MAP_CACHE (1<<0) #define INHIBIT_FLUSH_RENDER_CACHE (1<<2) +#define MI_NOOP 0 #define CMD_3D (0x3<<29) diff --git a/src/gallium/drivers/i915/i915_resource.h b/src/gallium/drivers/i915/i915_resource.h index 753bd266b1..86620e6a12 100644 --- a/src/gallium/drivers/i915/i915_resource.h +++ b/src/gallium/drivers/i915/i915_resource.h @@ -49,6 +49,10 @@ struct i915_buffer { #define I915_MAX_TEXTURE_3D_LEVELS 8 /* max 128x128x128 */ +struct offset_pair { + unsigned short nblocksx; + unsigned short nblocksy; +}; struct i915_texture { struct u_resource b; @@ -63,14 +67,18 @@ struct i915_texture { /* Explicitly store the offset of each image for each cube face or * depth value. + * + * Array [depth] off offsets. */ - unsigned *image_offset[I915_MAX_TEXTURE_2D_LEVELS]; /**< array [depth] of offsets */ + struct offset_pair *image_offset[I915_MAX_TEXTURE_2D_LEVELS]; /* The data is held here: */ struct i915_winsys_buffer *buffer; }; +unsigned i915_texture_offset(struct i915_texture *tex, + unsigned level, unsigned layer); void i915_init_screen_resource_functions(struct i915_screen *is); void i915_init_resource_functions(struct i915_context *i915); diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index 67356abe20..f19106f341 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -106,6 +106,23 @@ get_pot_stride(enum pipe_format format, unsigned width) return util_next_power_of_two(util_format_get_stride(format, width)); } +static INLINE const char* +get_tiling_string(enum i915_winsys_buffer_tile tile) +{ + switch(tile) { + case I915_TILE_NONE: + return "none"; + case I915_TILE_X: + return "x"; + case I915_TILE_Y: + return "y"; + default: + assert(FALSE); + return "?"; + } +} + + /* * More advanced helper funcs */ @@ -120,28 +137,56 @@ i915_texture_set_level_info(struct i915_texture *tex, assert(!tex->image_offset[level]); tex->nr_images[level] = nr_images; - tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); - tex->image_offset[level][0] = 0; + tex->image_offset[level] = MALLOC(nr_images * sizeof(struct offset_pair)); + tex->image_offset[level][0].nblocksx = 0; + tex->image_offset[level][0].nblocksy = 0; +} + +INLINE unsigned i915_texture_offset(struct i915_texture *tex, + unsigned level, unsigned layer) +{ + unsigned x, y; + x = tex->image_offset[level][layer].nblocksx + * util_format_get_blocksize(tex->b.b.format); + y = tex->image_offset[level][layer].nblocksy; + + return y * tex->stride + x; } static void i915_texture_set_image_offset(struct i915_texture *tex, unsigned level, unsigned img, - unsigned x, unsigned y) + unsigned nblocksx, unsigned nblocksy) { /* for the first image and level make sure offset is zero */ - assert(!(img == 0 && level == 0) || (x == 0 && y == 0)); + assert(!(img == 0 && level == 0) || (nblocksx == 0 && nblocksy == 0)); assert(img < tex->nr_images[level]); - tex->image_offset[level][img] = y * tex->stride + x * util_format_get_blocksize(tex->b.b.format); + tex->image_offset[level][img].nblocksx = nblocksx; + tex->image_offset[level][img].nblocksy = nblocksy; #if DEBUG_TEXTURES - debug_printf("%s: %p level %u, img %u (%u, %u) %p\n", __FUNCTION__, - tex, level, img, x, y, - (void*)(uintptr_t)tex->image_offset[level][img]); + debug_printf("%s: %p level %u, img %u (%u, %u)\n", __FUNCTION__, + tex, level, img, x, y); #endif } +static enum i915_winsys_buffer_tile +i915_texture_tiling(struct pipe_resource *pt) +{ + if (!i915_tiling) + return I915_TILE_NONE; + + if (pt->target == PIPE_TEXTURE_1D) + return I915_TILE_NONE; + + if (util_format_is_s3tc(pt->format)) + /* XXX X-tiling might make sense */ + return I915_TILE_NONE; + + return I915_TILE_X; +} + /* * Shared layout functions @@ -163,9 +208,10 @@ i9x5_scanout_layout(struct i915_texture *tex) i915_texture_set_image_offset(tex, 0, 0, 0, 0); if (pt->width0 >= 240) { - tex->stride = get_pot_stride(pt->format, pt->width0); + tex->stride = align(util_format_get_stride(pt->format, pt->width0), 64); tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8); tex->tiling = I915_TILE_X; + /* special case for cursors */ } else if (pt->width0 == 64 && pt->height0 == 64) { tex->stride = get_pot_stride(pt->format, pt->width0); tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8); @@ -200,7 +246,7 @@ i9x5_display_target_layout(struct i915_texture *tex) i915_texture_set_level_info(tex, 0, 1); i915_texture_set_image_offset(tex, 0, 0, 0, 0); - tex->stride = get_pot_stride(pt->format, pt->width0); + tex->stride = align(util_format_get_stride(pt->format, pt->width0), 64); tex->total_nblocksy = align_nblocksy(pt->format, pt->height0, 8); tex->tiling = I915_TILE_X; @@ -357,6 +403,8 @@ i915_texture_layout(struct i915_texture * tex) { struct pipe_resource *pt = &tex->b.b; + tex->tiling = i915_texture_tiling(pt); + switch (pt->target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: @@ -603,6 +651,8 @@ i945_texture_layout(struct i915_texture * tex) { struct pipe_resource *pt = &tex->b.b; + tex->tiling = i915_texture_tiling(pt); + switch (pt->target) { case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: @@ -687,7 +737,6 @@ i915_texture_get_transfer(struct pipe_context *context, return transfer; } - static void * i915_texture_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) @@ -703,7 +752,7 @@ i915_texture_transfer_map(struct pipe_context *pipe, if (resource->target != PIPE_TEXTURE_3D && resource->target != PIPE_TEXTURE_CUBE) assert(box->z == 0); - offset = tex->image_offset[transfer->level][box->z]; + offset = i915_texture_offset(tex, transfer->level, box->z); map = iws->buffer_map(iws, tex->buffer, (transfer->usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE); @@ -749,7 +798,6 @@ i915_texture_create(struct pipe_screen *screen, struct i915_screen *is = i915_screen(screen); struct i915_winsys *iws = is->iws; struct i915_texture *tex = CALLOC_STRUCT(i915_texture); - size_t tex_size; unsigned buf_usage = 0; if (!tex) @@ -768,8 +816,6 @@ i915_texture_create(struct pipe_screen *screen, goto fail; } - tex_size = tex->stride * tex->total_nblocksy; - /* for scanouts and cursors, cursors arn't scanouts */ /* XXX: use a custom flag for cursors, don't rely on magically @@ -780,27 +826,15 @@ i915_texture_create(struct pipe_screen *screen, else buf_usage = I915_NEW_TEXTURE; - tex->buffer = iws->buffer_create(iws, tex_size, 64, buf_usage); + tex->buffer = iws->buffer_create_tiled(iws, &tex->stride, tex->total_nblocksy, + &tex->tiling, buf_usage); if (!tex->buffer) goto fail; - /* setup any hw fences */ - if (tex->tiling) { - iws->buffer_set_fence_reg(iws, tex->buffer, tex->stride, tex->tiling); - } - - -#if 0 - void *ptr = ws->buffer_map(ws, tex->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE); - memset(ptr, 0x80, tex_size); - ws->buffer_unmap(ws, tex->buffer); -#endif - - I915_DBG(DBG_TEXTURE, "%s: %p size %u, stride %u, blocks (%u, %u)\n", __func__, - tex, (unsigned int)tex_size, tex->stride, + I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%u, %u) tiling %s\n", __func__, + tex, tex->stride, tex->stride / util_format_get_blocksize(tex->b.b.format), - tex->total_nblocksy); + tex->total_nblocksy, get_tiling_string(tex->tiling)); return &tex->b.b; @@ -819,10 +853,11 @@ i915_texture_from_handle(struct pipe_screen * screen, struct i915_winsys *iws = is->iws; struct i915_winsys_buffer *buffer; unsigned stride; + enum i915_winsys_buffer_tile tiling; assert(screen); - buffer = iws->buffer_from_handle(iws, whandle, &stride); + buffer = iws->buffer_from_handle(iws, whandle, &tiling, &stride); /* Only supports one type */ if ((template->target != PIPE_TEXTURE_2D && @@ -842,6 +877,7 @@ i915_texture_from_handle(struct pipe_screen * screen, tex->b.b.screen = screen; tex->stride = stride; + tex->tiling = tiling; tex->total_nblocksy = align_nblocksy(tex->b.b.format, tex->b.b.height0, 8); i915_texture_set_level_info(tex, 0, 1); @@ -849,10 +885,10 @@ i915_texture_from_handle(struct pipe_screen * screen, tex->buffer = buffer; - I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%ux%u)\n", __func__, + I915_DBG(DBG_TEXTURE, "%s: %p stride %u, blocks (%u, %u) tiling %s\n", __func__, tex, tex->stride, tex->stride / util_format_get_blocksize(tex->b.b.format), - tex->total_nblocksy); + tex->total_nblocksy, get_tiling_string(tex->tiling)); return &tex->b.b; } diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 9292fa00b4..c48d53ffbb 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -86,6 +86,22 @@ framebuffer_size(const struct pipe_framebuffer_state *fb, } } +static inline uint32_t +buf_3d_tiling_bits(enum i915_winsys_buffer_tile tiling) +{ + uint32_t tiling_bits = 0; + + switch (tiling) { + case I915_TILE_Y: + tiling_bits |= BUF_3D_TILE_WALK_Y; + case I915_TILE_X: + tiling_bits |= BUF_3D_TILED_SURFACE; + case I915_TILE_NONE: + break; + } + + return tiling_bits; +} /* Push the state into the sarea and/or texture memory. */ @@ -220,44 +236,39 @@ i915_emit_hardware_state(struct i915_context *i915 ) struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; if (cbuf_surface) { - unsigned ctile = BUF_3D_USE_FENCE; struct i915_texture *tex = i915_texture(cbuf_surface->texture); - unsigned offset; assert(tex); - offset = tex->image_offset[cbuf_surface->u.tex.level][cbuf_surface->u.tex.first_layer]; - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); OUT_BATCH(BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - ctile); + buf_3d_tiling_bits(tex->tiling)); OUT_RELOC(tex->buffer, I915_USAGE_RENDER, - offset); + 0); } /* What happens if no zbuf?? */ if (depth_surface) { - unsigned ztile = BUF_3D_USE_FENCE; struct i915_texture *tex = i915_texture(depth_surface->texture); - unsigned offset; + unsigned offset = i915_texture_offset(tex, depth_surface->u.tex.level, + depth_surface->u.tex.first_layer); assert(tex); - - offset = tex->image_offset[depth_surface->u.tex.level][depth_surface->u.tex.first_layer]; + assert(offset == 0); OUT_BATCH(_3DSTATE_BUF_INFO_CMD); assert(tex); OUT_BATCH(BUF_3D_ID_DEPTH | BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - ztile); + buf_3d_tiling_bits(tex->tiling)); OUT_RELOC(tex->buffer, I915_USAGE_RENDER, - offset); + 0); } { @@ -299,12 +310,11 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (enabled & (1 << unit)) { struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); struct i915_winsys_buffer *buf = texture->buffer; - uint offset = 0; assert(buf); count++; - OUT_RELOC(buf, I915_USAGE_SAMPLER, offset); + OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ } @@ -397,18 +407,33 @@ i915_emit_hardware_state(struct i915_context *i915 ) #if 01 /* drawing surface size */ /* 6 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_STATIC) { uint w, h; - boolean k = framebuffer_size(&i915->framebuffer, &w, &h); - (void)k; - assert(k); + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct i915_texture *tex = i915_texture(cbuf_surface->texture); + unsigned x, y; + int layer; + uint32_t draw_offset; + boolean ret; + ret = framebuffer_size(&i915->framebuffer, &w, &h); + assert(ret); + + layer = cbuf_surface->u.tex.first_layer; + + x = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksx; + y = tex->image_offset[cbuf_surface->u.tex.level][layer].nblocksy; + + draw_offset = x | (y << 16); + + /* XXX flush only required when the draw_offset changes! */ + OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16)); - OUT_BATCH(0); - OUT_BATCH(0); + OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); + OUT_BATCH(draw_offset); + OUT_BATCH((w - 1 + x) | ((h - 1 + y) << 16)); + OUT_BATCH(draw_offset); } #endif diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index 9771274ca1..916cb76753 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -243,6 +243,23 @@ static uint translate_texture_format(enum pipe_format pipeFormat) } } +static inline uint32_t +ms3_tiling_bits(enum i915_winsys_buffer_tile tiling) +{ + uint32_t tiling_bits = 0; + + switch (tiling) { + case I915_TILE_Y: + tiling_bits |= MS3_TILE_WALK_Y; + case I915_TILE_X: + tiling_bits |= MS3_TILED_SURFACE; + case I915_TILE_NONE: + break; + } + + return tiling_bits; +} + static void update_map(struct i915_context *i915, uint unit, const struct i915_texture *tex, @@ -254,7 +271,6 @@ static void update_map(struct i915_context *i915, const uint width = pt->width0, height = pt->height0, depth = pt->depth0; const uint num_levels = pt->last_level; unsigned max_lod = num_levels * 4; - unsigned tiled = MS3_USE_FENCE_REGS; assert(tex); assert(width); @@ -272,7 +288,7 @@ static void update_map(struct i915_context *i915, (((height - 1) << MS3_HEIGHT_SHIFT) | ((width - 1) << MS3_WIDTH_SHIFT) | format - | tiled); + | ms3_tiling_bits(tex->tiling)); /* * XXX When min_filter != mag_filter and there's just one mipmap level, diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index 4ac1f90ef9..becc6e93c2 100644 --- a/src/gallium/drivers/i915/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c @@ -59,13 +59,12 @@ i915_surface_copy(struct pipe_context *pipe, if (dst->target != PIPE_TEXTURE_CUBE && dst->target != PIPE_TEXTURE_3D) assert(dstz == 0); - dst_offset = dst_tex->image_offset[dst_level][dstz]; + dst_offset = i915_texture_offset(dst_tex, dst_level, dstz); if (src->target != PIPE_TEXTURE_CUBE && src->target != PIPE_TEXTURE_3D) assert(src_box->z == 0); - src_offset = src_tex->image_offset[src_level][src_box->z]; - + src_offset = i915_texture_offset(src_tex, src_level, src_box->z); assert( dst != src ); assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) ); @@ -93,7 +92,7 @@ i915_clear_render_target(struct pipe_context *pipe, struct i915_texture *tex = i915_texture(dst->texture); struct pipe_resource *pt = &tex->b.b; union util_color uc; - unsigned offset = tex->image_offset[dst->u.tex.level][dst->u.tex.first_layer]; + unsigned offset = i915_texture_offset(tex, dst->u.tex.level, dst->u.tex.first_layer); assert(util_format_get_blockwidth(pt->format) == 1); assert(util_format_get_blockheight(pt->format) == 1); @@ -122,7 +121,7 @@ i915_clear_depth_stencil(struct pipe_context *pipe, struct pipe_resource *pt = &tex->b.b; unsigned packedds; unsigned mask = 0; - unsigned offset = tex->image_offset[dst->u.tex.level][dst->u.tex.first_layer]; + unsigned offset = i915_texture_offset(tex, dst->u.tex.level, dst->u.tex.first_layer); assert(util_format_get_blockwidth(pt->format) == 1); assert(util_format_get_blockheight(pt->format) == 1); diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 5385e403d2..24ea416f01 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -53,6 +53,7 @@ enum i915_winsys_buffer_type I915_NEW_VERTEX }; +/* These need to be in sync with the definitions of libdrm-intel! */ enum i915_winsys_buffer_tile { I915_TILE_NONE, @@ -106,7 +107,7 @@ struct i915_winsys { int (*batchbuffer_reloc)(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *reloc, enum i915_winsys_buffer_usage usage, - unsigned offset); + unsigned offset, bool fenced); /** * Flush a bufferbatch. @@ -130,10 +131,24 @@ struct i915_winsys { */ struct i915_winsys_buffer * (*buffer_create)(struct i915_winsys *iws, - unsigned size, unsigned alignment, + unsigned size, enum i915_winsys_buffer_type type); /** + * Create a tiled buffer. + * + * *stride, height are in bytes. The winsys tries to allocate the buffer with + * the tiling mode provide in *tiling. If tiling is no possible, *tiling will + * be set to I915_TILE_NONE. The calculated stride (incorporateing hw/kernel + * requirements) is always returned in *stride. + */ + struct i915_winsys_buffer * + (*buffer_create_tiled)(struct i915_winsys *iws, + unsigned *stride, unsigned height, + enum i915_winsys_buffer_tile *tiling, + enum i915_winsys_buffer_type type); + + /** * Creates a buffer from a handle. * Used to implement pipe_screen::resource_from_handle. * Also provides the stride information needed for the @@ -142,6 +157,7 @@ struct i915_winsys { struct i915_winsys_buffer * (*buffer_from_handle)(struct i915_winsys *iws, struct winsys_handle *whandle, + enum i915_winsys_buffer_tile *tiling, unsigned *stride); /** @@ -154,15 +170,6 @@ struct i915_winsys { unsigned stride); /** - * Fence a buffer with a fence reg. - * Not to be confused with pipe_fence_handle. - */ - int (*buffer_set_fence_reg)(struct i915_winsys *iws, - struct i915_winsys_buffer *buffer, - unsigned stride, - enum i915_winsys_buffer_tile tile); - - /** * Map a buffer. */ void *(*buffer_map)(struct i915_winsys *iws, diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 48c2409211..583e981a4d 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -424,4 +424,5 @@ void r300_parse_chipset(struct r300_capabilities* caps) } caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; + caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index e7ca642b4f..7ea4175dbe 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -79,6 +79,8 @@ struct r300_capabilities { boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; + /* DXTC texture swizzling. */ + boolean dxtc_swizzle; }; /* Enumerations for legibility and telling which card we're running on. */ diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 6bea783f69..788c513be7 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1520,11 +1520,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_TRI_PERF_3_8 (3<<15) # define R300_ANISO_THRESHOLD_MASK (7<<17) +# define R400_DXTC_SWIZZLE_ENABLE (1<<21) # define R500_MACRO_SWITCH (1<<22) # define R500_TX_MAX_ANISO(x) ((x) << 23) # define R500_TX_MAX_ANISO_MASK (63 << 23) # define R500_TX_ANISO_HIGH_QUALITY (1 << 30) - # define R500_BORDER_FIX (1<<31) #define R300_TX_FORMAT0_0 0x4480 diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 85de60df0f..09981cb26b 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -116,8 +116,9 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_TEXTURE_SWIZZLE: return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: + return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1; /* Unsupported features (boolean caps). */ case PIPE_CAP_TIMER_QUERY: diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 509e3106a4..0f563703c0 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1347,6 +1347,7 @@ r300_create_sampler_view(struct pipe_context *pipe, struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view); struct r300_texture *tex = r300_texture(texture); boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500; + boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle; if (view) { view->base = *templ; @@ -1363,7 +1364,8 @@ r300_create_sampler_view(struct pipe_context *pipe, view->format = tex->tx_format; view->format.format1 |= r300_translate_texformat(templ->format, view->swizzle, - is_r500); + is_r500, + dxtc_swizzle); if (is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 5722c91ece..a6d0776051 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -764,13 +764,18 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) { texstate->format.format1 |= r300_get_swizzle_combined(depth_swizzle, - view->swizzle); + view->swizzle, FALSE); } else { texstate->format.format1 |= - r300_get_swizzle_combined(depth_swizzle, 0); + r300_get_swizzle_combined(depth_swizzle, 0, FALSE); } } + if (r300->screen->caps.dxtc_swizzle && + util_format_is_compressed(tex->desc.b.b.format)) { + texstate->filter1 |= R400_DXTC_SWIZZLE_ENABLE; + } + /* to emulate 1D textures through 2D ones correctly */ if (tex->desc.b.b.target == PIPE_TEXTURE_1D) { texstate->filter0 &= ~R300_TX_WRAP_T_MASK; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index fe8859ab8f..4b7b3e0356 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -40,7 +40,8 @@ #include "pipe/p_screen.h" unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, - const unsigned char *swizzle_view) + const unsigned char *swizzle_view, + boolean dxtc_swizzle) { unsigned i; unsigned char swizzle[4]; @@ -51,10 +52,10 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, R300_TX_FORMAT_B_SHIFT, R300_TX_FORMAT_A_SHIFT }; - const uint32_t swizzle_bit[4] = { - R300_TX_FORMAT_X, + uint32_t swizzle_bit[4] = { + dxtc_swizzle ? R300_TX_FORMAT_Z : R300_TX_FORMAT_X, R300_TX_FORMAT_Y, - R300_TX_FORMAT_Z, + dxtc_swizzle ? R300_TX_FORMAT_X : R300_TX_FORMAT_Z, R300_TX_FORMAT_W }; @@ -107,7 +108,8 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ uint32_t r300_translate_texformat(enum pipe_format format, const unsigned char *swizzle_view, - boolean is_r500) + boolean is_r500, + boolean dxtc_swizzle) { uint32_t result = 0; const struct util_format_description *desc; @@ -169,7 +171,8 @@ uint32_t r300_translate_texformat(enum pipe_format format, } } - result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view); + result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view, + util_format_is_compressed(format) && dxtc_swizzle); /* S3TC formats. */ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { @@ -571,7 +574,7 @@ boolean r300_is_zs_format_supported(enum pipe_format format) boolean r300_is_sampler_format_supported(enum pipe_format format) { - return r300_translate_texformat(format, 0, TRUE) != ~0; + return r300_translate_texformat(format, 0, TRUE, FALSE) != ~0; } void r300_texture_setup_format_state(struct r300_screen *screen, diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index be2740efc3..0ab22f747e 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -36,11 +36,13 @@ struct r300_texture; struct r300_screen; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, - const unsigned char *swizzle_view); + const unsigned char *swizzle_view, + boolean dxtc_swizzle); uint32_t r300_translate_texformat(enum pipe_format format, const unsigned char *swizzle_view, - boolean is_r500); + boolean is_r500, + boolean dxtc_swizzle); uint32_t r500_tx_format_msb_bit(enum pipe_format format); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 33448bf0de..15a323989b 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ /* gap */ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; - /* case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; */ + case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */ case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c index c6daa52a37..ebe86dcf19 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c @@ -14,9 +14,6 @@ #define INTEL_BATCH_CLIPRECTS 0x2 #undef INTEL_RUN_SYNC -#undef INTEL_MAP_BATCHBUFFER -#undef INTEL_MAP_GTT -#define INTEL_ALWAYS_FLUSH struct i915_drm_batchbuffer { @@ -72,11 +69,7 @@ i915_drm_batchbuffer_create(struct i915_winsys *iws) batch->actual_size = idws->max_batch_size; -#ifdef INTEL_MAP_BATCHBUFFER - batch->base.map = NULL; -#else batch->base.map = MALLOC(batch->actual_size); -#endif batch->base.ptr = NULL; batch->base.size = 0; @@ -94,7 +87,7 @@ static int i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - unsigned pre_add) + unsigned pre_add, bool fenced) { struct i915_drm_batchbuffer *batch = i915_drm_batchbuffer(ibatch); unsigned write_domain = 0; @@ -104,37 +97,44 @@ i915_drm_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, assert(batch->base.relocs < batch->base.max_relocs); - if (usage == I915_USAGE_SAMPLER) { + switch (usage) { + case I915_USAGE_SAMPLER: write_domain = 0; read_domain = I915_GEM_DOMAIN_SAMPLER; - - } else if (usage == I915_USAGE_RENDER) { + break; + case I915_USAGE_RENDER: write_domain = I915_GEM_DOMAIN_RENDER; read_domain = I915_GEM_DOMAIN_RENDER; - - } else if (usage == I915_USAGE_2D_TARGET) { + break; + case I915_USAGE_2D_TARGET: write_domain = I915_GEM_DOMAIN_RENDER; read_domain = I915_GEM_DOMAIN_RENDER; - - } else if (usage == I915_USAGE_2D_SOURCE) { + break; + case I915_USAGE_2D_SOURCE: write_domain = 0; read_domain = I915_GEM_DOMAIN_RENDER; - - } else if (usage == I915_USAGE_VERTEX) { + break; + case I915_USAGE_VERTEX: write_domain = 0; read_domain = I915_GEM_DOMAIN_VERTEX; - - } else { + break; + default: assert(0); return -1; } offset = (unsigned)(batch->base.ptr - batch->base.map); - ret = drm_intel_bo_emit_reloc(batch->bo, offset, - intel_bo(buffer), pre_add, - read_domain, - write_domain); + if (fenced) + ret = drm_intel_bo_emit_reloc_fence(batch->bo, offset, + intel_bo(buffer), pre_add, + read_domain, + write_domain); + else + ret = drm_intel_bo_emit_reloc(batch->bo, offset, + intel_bo(buffer), pre_add, + read_domain, + write_domain); ((uint32_t*)batch->base.ptr)[0] = intel_bo(buffer)->offset + pre_add; batch->base.ptr += 4; @@ -150,70 +150,32 @@ i915_drm_batchbuffer_flush(struct i915_winsys_batchbuffer *ibatch, struct pipe_fence_handle **fence) { struct i915_drm_batchbuffer *batch = i915_drm_batchbuffer(ibatch); - unsigned used = 0; - int ret = 0; + unsigned used; + int ret; - assert(i915_winsys_batchbuffer_space(ibatch) >= 0); + /* MI_BATCH_BUFFER_END */ + i915_winsys_batchbuffer_dword_unchecked(ibatch, (0xA<<23)); used = batch->base.ptr - batch->base.map; - assert((used & 3) == 0); - - -#ifdef INTEL_ALWAYS_FLUSH - /* MI_FLUSH | FLUSH_MAP_CACHE */ - i915_winsys_batchbuffer_dword(ibatch, (0x4<<23)|(1<<0)); - used += 4; -#endif - - if ((used & 4) == 0) { + if (used & 4) { /* MI_NOOP */ - i915_winsys_batchbuffer_dword(ibatch, 0); + i915_winsys_batchbuffer_dword_unchecked(ibatch, 0); + used += 4; } - /* MI_BATCH_BUFFER_END */ - i915_winsys_batchbuffer_dword(ibatch, (0xA<<23)); - - used = batch->base.ptr - batch->base.map; - assert((used & 4) == 0); - -#ifdef INTEL_MAP_BATCHBUFFER -#ifdef INTEL_MAP_GTT - drm_intel_gem_bo_unmap_gtt(batch->bo); -#else - drm_intel_bo_unmap(batch->bo); -#endif -#else - drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map); -#endif /* Do the sending to HW */ - if (i915_drm_winsys(ibatch->iws)->send_cmd) + ret = drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map); + if (ret == 0 && i915_drm_winsys(ibatch->iws)->send_cmd) ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); - else - ret = 0; if (ret != 0 || i915_drm_winsys(ibatch->iws)->dump_cmd) { -#ifdef INTEL_MAP_BATCHBUFFER -#ifdef INTEL_MAP_GTT - drm_intel_gem_bo_map_gtt(batch->bo); -#else - drm_intel_bo_map(batch->bo, 0); -#endif -#endif i915_dump_batchbuffer(ibatch); assert(ret == 0); -#ifdef INTEL_MAP_BATCHBUFFER -#ifdef INTEL_MAP_GTT - drm_intel_gem_bo_unmap_gtt(batch->bo); -#else - drm_intel_bo_unmap(batch->bo); -#endif -#endif - } else { + } + #ifdef INTEL_RUN_SYNC - drm_intel_bo_map(batch->bo, FALSE); - drm_intel_bo_unmap(batch->bo); + drm_intel_bo_wait_rendering(batch->bo); #endif - } if (fence) { ibatch->iws->fence_reference(ibatch->iws, fence, NULL); @@ -237,9 +199,7 @@ i915_drm_batchbuffer_destroy(struct i915_winsys_batchbuffer *ibatch) if (batch->bo) drm_intel_bo_unreference(batch->bo); -#ifndef INTEL_MAP_BATCHBUFFER FREE(batch->base.map); -#endif FREE(batch); } diff --git a/src/gallium/winsys/i915/drm/i915_drm_buffer.c b/src/gallium/winsys/i915/drm/i915_drm_buffer.c index 15ec448745..01dd4bf062 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_buffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_buffer.c @@ -5,14 +5,31 @@ #include "i915_drm.h" +static char *i915_drm_type_to_name(enum i915_winsys_buffer_type type) +{ + char *name; + + if (type == I915_NEW_TEXTURE) { + name = "gallium3d_texture"; + } else if (type == I915_NEW_VERTEX) { + name = "gallium3d_vertex"; + } else if (type == I915_NEW_SCANOUT) { + name = "gallium3d_scanout"; + } else { + assert(0); + name = "gallium3d_unknown"; + } + + return name; +} + static struct i915_winsys_buffer * i915_drm_buffer_create(struct i915_winsys *iws, - unsigned size, unsigned alignment, + unsigned size, enum i915_winsys_buffer_type type) { struct i915_drm_buffer *buf = CALLOC_STRUCT(i915_drm_buffer); struct i915_drm_winsys *idws = i915_drm_winsys(iws); - char *name; if (!buf) return NULL; @@ -21,22 +38,48 @@ i915_drm_buffer_create(struct i915_winsys *iws, buf->flinked = FALSE; buf->flink = 0; - if (type == I915_NEW_TEXTURE) { - name = "gallium3d_texture"; - } else if (type == I915_NEW_VERTEX) { - name = "gallium3d_vertex"; - } else if (type == I915_NEW_SCANOUT) { - name = "gallium3d_scanout"; - } else { - assert(0); - name = "gallium3d_unknown"; - } + buf->bo = drm_intel_bo_alloc(idws->gem_manager, + i915_drm_type_to_name(type), size, 0); - buf->bo = drm_intel_bo_alloc(idws->gem_manager, name, size, alignment); + if (!buf->bo) + goto err; + + return (struct i915_winsys_buffer *)buf; + +err: + assert(0); + FREE(buf); + return NULL; +} + +static struct i915_winsys_buffer * +i915_drm_buffer_create_tiled(struct i915_winsys *iws, + unsigned *stride, unsigned height, + enum i915_winsys_buffer_tile *tiling, + enum i915_winsys_buffer_type type) +{ + struct i915_drm_buffer *buf = CALLOC_STRUCT(i915_drm_buffer); + struct i915_drm_winsys *idws = i915_drm_winsys(iws); + unsigned long pitch = 0; + uint32_t tiling_mode = *tiling; + + if (!buf) + return NULL; + + buf->magic = 0xDEAD1337; + buf->flinked = FALSE; + buf->flink = 0; + + buf->bo = drm_intel_bo_alloc_tiled(idws->gem_manager, + i915_drm_type_to_name(type), + *stride, height, 1, + &tiling_mode, &pitch, 0); if (!buf->bo) goto err; + *stride = pitch; + *tiling = tiling_mode; return (struct i915_winsys_buffer *)buf; err: @@ -47,8 +90,9 @@ err: static struct i915_winsys_buffer * i915_drm_buffer_from_handle(struct i915_winsys *iws, - struct winsys_handle *whandle, - unsigned *stride) + struct winsys_handle *whandle, + enum i915_winsys_buffer_tile *tiling, + unsigned *stride) { struct i915_drm_winsys *idws = i915_drm_winsys(iws); struct i915_drm_buffer *buf = CALLOC_STRUCT(i915_drm_buffer); @@ -68,6 +112,7 @@ i915_drm_buffer_from_handle(struct i915_winsys *iws, drm_intel_bo_get_tiling(buf->bo, &tile, &swizzle); *stride = whandle->stride; + *tiling = tile; return (struct i915_winsys_buffer *)buf; @@ -103,24 +148,6 @@ i915_drm_buffer_get_handle(struct i915_winsys *iws, return TRUE; } -static int -i915_drm_buffer_set_fence_reg(struct i915_winsys *iws, - struct i915_winsys_buffer *buffer, - unsigned stride, - enum i915_winsys_buffer_tile tile) -{ - struct i915_drm_buffer *buf = i915_drm_buffer(buffer); - assert(I915_TILING_NONE == I915_TILE_NONE); - assert(I915_TILING_X == I915_TILE_X); - assert(I915_TILING_Y == I915_TILE_Y); - - if (tile != I915_TILE_NONE) { - assert(buf->map_count == 0); - } - - return drm_intel_bo_set_tiling(buf->bo, &tile, stride); -} - static void * i915_drm_buffer_map(struct i915_winsys *iws, struct i915_winsys_buffer *buffer, @@ -190,9 +217,9 @@ void i915_drm_winsys_init_buffer_functions(struct i915_drm_winsys *idws) { idws->base.buffer_create = i915_drm_buffer_create; + idws->base.buffer_create_tiled = i915_drm_buffer_create_tiled; idws->base.buffer_from_handle = i915_drm_buffer_from_handle; idws->base.buffer_get_handle = i915_drm_buffer_get_handle; - idws->base.buffer_set_fence_reg = i915_drm_buffer_set_fence_reg; idws->base.buffer_map = i915_drm_buffer_map; idws->base.buffer_unmap = i915_drm_buffer_unmap; idws->base.buffer_write = i915_drm_buffer_write; diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.c b/src/gallium/winsys/i915/drm/i915_drm_winsys.c index cc0b6a9957..2288b48b2b 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_winsys.c +++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.c @@ -69,6 +69,7 @@ i915_drm_winsys_create(int drmFD) idws->gem_manager = drm_intel_bufmgr_gem_init(idws->fd, idws->max_batch_size); drm_intel_bufmgr_gem_enable_reuse(idws->gem_manager); + drm_intel_bufmgr_gem_enable_fenced_relocs(idws->gem_manager); idws->dump_cmd = debug_get_bool_option("I915_DUMP_CMD", FALSE); idws->send_cmd = !debug_get_bool_option("I915_NO_HW", FALSE); diff --git a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c index a480cfed57..44773ae30e 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c +++ b/src/gallium/winsys/i915/sw/i915_sw_batchbuffer.c @@ -61,7 +61,7 @@ static int i915_sw_batchbuffer_reloc(struct i915_winsys_batchbuffer *ibatch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - unsigned pre_add) + unsigned pre_add, bool fenced) { struct i915_sw_batchbuffer *batch = i915_sw_batchbuffer(ibatch); int ret = 0; diff --git a/src/gallium/winsys/i915/sw/i915_sw_buffer.c b/src/gallium/winsys/i915/sw/i915_sw_buffer.c index df17568886..834805e621 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_buffer.c +++ b/src/gallium/winsys/i915/sw/i915_sw_buffer.c @@ -4,28 +4,15 @@ static struct i915_winsys_buffer * i915_sw_buffer_create(struct i915_winsys *iws, - unsigned size, unsigned alignment, + unsigned size, enum i915_winsys_buffer_type type) { struct i915_sw_buffer *buf = CALLOC_STRUCT(i915_sw_buffer); - char *name; if (!buf) return NULL; - if (type == I915_NEW_TEXTURE) { - name = "gallium3d_texture"; - } else if (type == I915_NEW_VERTEX) { - name = "gallium3d_vertex"; - } else if (type == I915_NEW_SCANOUT) { - name = "gallium3d_scanout"; - } else { - assert(0); - name = "gallium3d_unknown"; - } - buf->magic = 0xDEAD1337; - buf->name = name; buf->type = type; buf->ptr = CALLOC(size, 1); @@ -40,21 +27,32 @@ err: return NULL; } -static int -i915_sw_buffer_set_fence_reg(struct i915_winsys *iws, - struct i915_winsys_buffer *buffer, - unsigned stride, - enum i915_winsys_buffer_tile tile) +static struct i915_winsys_buffer * +i915_sw_buffer_create_tiled(struct i915_winsys *iws, + unsigned *stride, unsigned height, + enum i915_winsys_buffer_tile *tiling, + enum i915_winsys_buffer_type type) { - struct i915_sw_buffer *buf = i915_sw_buffer(buffer); + struct i915_sw_buffer *buf = CALLOC_STRUCT(i915_sw_buffer); + + if (!buf) + return NULL; + + buf->magic = 0xDEAD1337; + buf->type = type; + buf->ptr = CALLOC(*stride * height, 1); + buf->tiling = *tiling; + buf->stride = *stride; - if (tile != I915_TILE_NONE) { - assert(buf->map_count == 0); - } + if (!buf->ptr) + goto err; - buf->tile = tile; + return (struct i915_winsys_buffer *)buf; - return 0; +err: + assert(0); + FREE(buf); + return NULL; } static void * @@ -108,7 +106,7 @@ void i915_sw_winsys_init_buffer_functions(struct i915_sw_winsys *isws) { isws->base.buffer_create = i915_sw_buffer_create; - isws->base.buffer_set_fence_reg = i915_sw_buffer_set_fence_reg; + isws->base.buffer_create_tiled = i915_sw_buffer_create_tiled; isws->base.buffer_map = i915_sw_buffer_map; isws->base.buffer_unmap = i915_sw_buffer_unmap; isws->base.buffer_write = i915_sw_buffer_write; diff --git a/src/gallium/winsys/i915/sw/i915_sw_winsys.h b/src/gallium/winsys/i915/sw/i915_sw_winsys.h index b7b43669f3..3af2548419 100644 --- a/src/gallium/winsys/i915/sw/i915_sw_winsys.h +++ b/src/gallium/winsys/i915/sw/i915_sw_winsys.h @@ -43,8 +43,8 @@ struct i915_sw_buffer { void *ptr; unsigned map_count; enum i915_winsys_buffer_type type; - enum i915_winsys_buffer_tile tile; - const char *name; + enum i915_winsys_buffer_tile tiling; + unsigned stride; }; static INLINE struct i915_sw_buffer * diff --git a/src/glsl/Makefile b/src/glsl/Makefile index f5aadc347b..2674c6ec48 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -52,6 +52,7 @@ CXX_SOURCES = \ loop_analysis.cpp \ loop_controls.cpp \ loop_unroll.cpp \ + lower_discard.cpp \ lower_if_to_cond_assign.cpp \ lower_instructions.cpp \ lower_jumps.cpp \ @@ -70,6 +71,7 @@ CXX_SOURCES = \ opt_dead_code.cpp \ opt_dead_code_local.cpp \ opt_dead_functions.cpp \ + opt_discard_simplification.cpp \ opt_function_inlining.cpp \ opt_if_simplification.cpp \ opt_noop_swizzle.cpp \ diff --git a/src/glsl/SConscript b/src/glsl/SConscript index fd22f66863..b5b1728bee 100644 --- a/src/glsl/SConscript +++ b/src/glsl/SConscript @@ -49,6 +49,7 @@ sources = [ 'loop_analysis.cpp', 'loop_controls.cpp', 'loop_unroll.cpp', + 'lower_discard.cpp', 'lower_if_to_cond_assign.cpp', 'lower_instructions.cpp', 'lower_jumps.cpp', @@ -66,6 +67,7 @@ sources = [ 'opt_dead_code.cpp', 'opt_dead_code_local.cpp', 'opt_dead_functions.cpp', + 'opt_discard_simplification.cpp', 'opt_function_inlining.cpp', 'opt_if_simplification.cpp', 'opt_noop_swizzle.cpp', diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 04b221e9b8..f5b1120f78 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -745,6 +745,16 @@ ast_node::hir(exec_list *instructions, return NULL; } +static void +mark_whole_array_access(ir_rvalue *access) +{ + ir_dereference_variable *deref = access->as_dereference_variable(); + + if (deref) { + deref->var->max_array_access = deref->type->length - 1; + } +} + static ir_rvalue * do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) { @@ -780,6 +790,10 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) last = result; } } + + mark_whole_array_access(op0); + mark_whole_array_access(op1); + return last; } diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 302cfbc566..8dbe66927d 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -716,6 +716,7 @@ do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iteration } progress = do_structure_splitting(ir) || progress; progress = do_if_simplification(ir) || progress; + progress = do_discard_simplification(ir) || progress; progress = do_copy_propagation(ir) || progress; if (linked) progress = do_dead_code(ir) || progress; diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index fa497a4555..f264265f4b 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -32,8 +32,9 @@ #define SUB_TO_ADD_NEG 0x01 #define DIV_TO_MUL_RCP 0x02 #define EXP_TO_EXP2 0x04 -#define LOG_TO_LOG2 0x08 -#define MOD_TO_FRACT 0x10 +#define POW_TO_EXP2 0x08 +#define LOG_TO_LOG2 0x10 +#define MOD_TO_FRACT 0x20 bool do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iterations); @@ -51,6 +52,7 @@ bool do_function_inlining(exec_list *instructions); bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false); bool do_lower_texture_projection(exec_list *instructions); bool do_if_simplification(exec_list *instructions); +bool do_discard_simplification(exec_list *instructions); bool do_if_to_cond_assign(exec_list *instructions); bool do_mat_op_to_vec(exec_list *instructions); bool do_mod_to_fract(exec_list *instructions); @@ -61,6 +63,7 @@ bool do_swizzle_swizzle(exec_list *instructions); bool do_tree_grafting(exec_list *instructions); bool do_vec_index_to_cond_assign(exec_list *instructions); bool do_vec_index_to_swizzle(exec_list *instructions); +bool lower_discard(exec_list *instructions); bool lower_instructions(exec_list *instructions, unsigned what_to_lower); bool lower_noise(exec_list *instructions); bool lower_variable_index_to_cond_assign(exec_list *instructions, diff --git a/src/glsl/lower_discard.cpp b/src/glsl/lower_discard.cpp new file mode 100644 index 0000000000..b95313df8c --- /dev/null +++ b/src/glsl/lower_discard.cpp @@ -0,0 +1,198 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_discard.cpp + * + * This pass moves discards out of if-statements. + * + * Case 1: The "then" branch contains a conditional discard: + * --------------------------------------------------------- + * + * if (cond1) { + * s1; + * discard cond2; + * s2; + * } else { + * s3; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * temp = cond2; + * s2; + * } else { + * s3; + * } + * discard temp; + * + * Case 2: The "else" branch contains a conditional discard: + * --------------------------------------------------------- + * + * if (cond1) { + * s1; + * } else { + * s2; + * discard cond2; + * s3; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * } else { + * s2; + * temp = cond2; + * s3; + * } + * discard temp; + * + * Case 3: Both branches contain a conditional discard: + * ---------------------------------------------------- + * + * if (cond1) { + * s1; + * discard cond2; + * s2; + * } else { + * s3; + * discard cond3; + * s4; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * temp = cond2; + * s2; + * } else { + * s3; + * temp = cond3; + * s4; + * } + * discard temp; + * + * If there are multiple conditional discards, we need only deal with one of + * them. Repeatedly applying this pass will take care of the others. + * + * Unconditional discards are treated as having a condition of "true". + */ + +#include "glsl_types.h" +#include "ir.h" + +class lower_discard_visitor : public ir_hierarchical_visitor { +public: + lower_discard_visitor() + { + this->progress = false; + } + + ir_visitor_status visit_leave(ir_if *); + + bool progress; +}; + + +bool +lower_discard(exec_list *instructions) +{ + lower_discard_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} + + +static ir_discard * +find_discard(exec_list &instructions) +{ + foreach_list(n, &instructions) { + ir_discard *ir = ((ir_instruction *) n)->as_discard(); + if (ir != NULL) + return ir; + } + return NULL; +} + + +static void +replace_discard(void *mem_ctx, ir_variable *var, ir_discard *ir) +{ + ir_rvalue *condition = ir->condition; + + /* For unconditional discards, use "true" as the condition. */ + if (condition == NULL) + condition = new(mem_ctx) ir_constant(true); + + ir_assignment *assignment = + new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(var), + condition, NULL); + + ir->replace_with(assignment); +} + + +ir_visitor_status +lower_discard_visitor::visit_leave(ir_if *ir) +{ + ir_discard *then_discard = find_discard(ir->then_instructions); + ir_discard *else_discard = find_discard(ir->else_instructions); + + if (then_discard == NULL && else_discard == NULL) + return visit_continue; + + void *mem_ctx = talloc_parent(ir); + + ir_variable *temp = new(mem_ctx) ir_variable(glsl_type::bool_type, + "discard_cond_temp", + ir_var_temporary); + ir_assignment *temp_initializer = + new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(temp), + new(mem_ctx) ir_constant(false), NULL); + + ir->insert_before(temp); + ir->insert_before(temp_initializer); + + if (then_discard != NULL) + replace_discard(mem_ctx, temp, then_discard); + + if (else_discard != NULL) + replace_discard(mem_ctx, temp, else_discard); + + ir_discard *discard = then_discard != NULL ? then_discard : else_discard; + discard->condition = new(mem_ctx) ir_dereference_variable(temp); + ir->insert_after(discard); + + this->progress = true; + + return visit_continue; +} diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index d460ba1a97..a5f61f213d 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -33,6 +33,7 @@ * - SUB_TO_ADD_NEG * - DIV_TO_MUL_RCP * - EXP_TO_EXP2 + * - POW_TO_EXP2 * - LOG_TO_LOG2 * - MOD_TO_FRACT * @@ -61,6 +62,11 @@ * do have base 2 versions, so this pass converts exp and log to exp2 * and log2 operations. * + * POW_TO_EXP2: + * ----------- + * Many older GPUs don't have an x**y instruction. For these GPUs, convert + * x**y to 2**(y * log2(x)). + * * MOD_TO_FRACT: * ------------- * Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1)) @@ -70,7 +76,7 @@ * opportunity to do things like constant fold the (1.0 / op1) easily. */ -#include "main/core.h" /* for M_E */ +#include "main/core.h" /* for M_LOG2E */ #include "glsl_types.h" #include "ir.h" #include "ir_optimization.h" @@ -91,6 +97,7 @@ private: void div_to_mul_rcp(ir_expression *); void mod_to_fract(ir_expression *); void exp_to_exp2(ir_expression *); + void pow_to_exp2(ir_expression *); void log_to_log2(ir_expression *); }; @@ -172,7 +179,7 @@ lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir) void lower_instructions_visitor::exp_to_exp2(ir_expression *ir) { - ir_constant *log2_e = new(ir) ir_constant(log2f(M_E)); + ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E)); ir->operation = ir_unop_exp2; ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type, @@ -181,12 +188,26 @@ lower_instructions_visitor::exp_to_exp2(ir_expression *ir) } void +lower_instructions_visitor::pow_to_exp2(ir_expression *ir) +{ + ir_expression *const log2_x = + new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, + ir->operands[0]); + + ir->operation = ir_unop_exp2; + ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type, + ir->operands[1], log2_x); + ir->operands[1] = NULL; + this->progress = true; +} + +void lower_instructions_visitor::log_to_log2(ir_expression *ir) { ir->operation = ir_binop_mul; ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, ir->operands[0], NULL); - ir->operands[1] = new(ir) ir_constant(1.0f / log2f(M_E)); + ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E)); this->progress = true; } @@ -254,6 +275,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) mod_to_fract(ir); break; + case ir_binop_pow: + if (lowering(POW_TO_EXP2)) + pow_to_exp2(ir); + break; + default: return visit_continue; } diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index e1e7a5b007..9cd15ef736 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -23,6 +23,37 @@ /** * \file lower_jumps.cpp + * + * This pass lowers jumps (break, continue, and return) to if/else structures. + * + * It can be asked to: + * 1. Pull jumps out of ifs where possible + * 2. Remove all "continue"s, replacing them with an "execute flag" + * 3. Replace all "break" with a single conditional one at the end of the loop + * 4. Replace all "return"s with a single return at the end of the function, + * for the main function and/or other functions + * + * Applying this pass gives several benefits: + * 1. All functions can be inlined. + * 2. nv40 and other pre-DX10 chips without "continue" can be supported + * 3. nv30 and other pre-DX10 chips with no control flow at all are better + * supported + * + * Continues are lowered by adding a per-loop "execute flag", initialized to + * true, that when cleared inhibits all execution until the end of the loop. + * + * Breaks are lowered to continues, plus setting a "break flag" that is checked + * at the end of the loop, and trigger the unique "break". + * + * Returns are lowered to breaks/continues, plus adding a "return flag" that + * causes loops to break again out of their enclosing loops until all the + * loops are exited: then the "execute flag" logic will ignore everything + * until the end of the function. + * + * Note that "continue" and "return" can also be implemented by adding + * a dummy loop and using break. + * However, this is bad for hardware with limited nesting depth, and + * prevents further optimization, and thus is not currently performed. */ #include "glsl_types.h" @@ -36,7 +67,6 @@ enum jump_strength strength_continue, strength_break, strength_return, - strength_discard }; struct block_record @@ -202,8 +232,6 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { virtual void visit(class ir_discard * ir) { - truncate_after_instruction(ir); - this->block.min_strength = strength_discard; } enum jump_strength get_jump_strength(ir_instruction* ir) @@ -217,8 +245,6 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { return strength_continue; } else if(ir->ir_type == ir_type_return) return strength_return; - else if(ir->ir_type == ir_type_discard) - return strength_discard; else return strength_none; } @@ -253,9 +279,6 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor { else lower = lower_sub_return; break; - case strength_discard: - lower = false; /* probably nothing needs this lowered */ - break; } return lower; } @@ -313,9 +336,8 @@ retry: /* we get here if we put code after the if inside a branch */ /* FINISHME: unify returns with identical expressions */ else if(jump_strengths[0] == strength_return && this->function.signature->return_type->is_void()) ir->insert_after(new(ir) ir_return(NULL)); - /* FINISHME: unify discards */ - else - unify = false; + else + unify = false; if(unify) { jumps[0]->remove(); diff --git a/src/glsl/opt_discard_simplification.cpp b/src/glsl/opt_discard_simplification.cpp new file mode 100644 index 0000000000..0e577c478a --- /dev/null +++ b/src/glsl/opt_discard_simplification.cpp @@ -0,0 +1,180 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_discard_simplification.cpp + * + * This pass simplifies if-statements and loops containing unconditional + * discards. + * + * Case 1: Both branches contain unconditional discards: + * ----------------------------------------------------- + * + * if (cond) { + * s1; + * discard; + * s2; + * } else { + * s3; + * discard; + * s4; + * } + * + * becomes: + * + * discard + * + * Case 2: The "then" clause contains an unconditional discard: + * ------------------------------------------------------------ + * + * if (cond) { + * s1; + * discard; + * s2; + * } else { + * s3; + * } + * + * becomes: + * + * if (cond) { + * discard; + * } else { + * s3; + * } + * + * Case 3: The "else" clause contains an unconditional discard: + * ------------------------------------------------------------ + * + * if (cond) { + * s1; + * } else { + * s2; + * discard; + * s3; + * } + * + * becomes: + * + * if (cond) { + * s1; + * } else { + * discard; + * } + */ + +#include "glsl_types.h" +#include "ir.h" + +class discard_simplifier : public ir_hierarchical_visitor { +public: + discard_simplifier() + { + this->progress = false; + } + + ir_visitor_status visit_enter(ir_if *); + ir_visitor_status visit_enter(ir_loop *); + + bool progress; +}; + +static ir_discard * +find_unconditional_discard(exec_list &instructions) +{ + foreach_list(n, &instructions) { + ir_discard *ir = ((ir_instruction *) n)->as_discard(); + if (ir != NULL && ir->condition == NULL) + return ir; + } + return NULL; +} + +static bool +is_only_instruction(ir_discard *discard) +{ + return (discard->prev->is_head_sentinel() && + discard->next->is_tail_sentinel()); +} + +ir_visitor_status +discard_simplifier::visit_enter(ir_if *ir) +{ + ir_discard *then_discard = find_unconditional_discard(ir->then_instructions); + ir_discard *else_discard = find_unconditional_discard(ir->else_instructions); + + if (then_discard == NULL && else_discard == NULL) + return visit_continue; + + /* If both branches result in discard, replace whole if with discard. */ + if (then_discard != NULL && else_discard != NULL) { + this->progress = true; + ir->replace_with(then_discard); + return visit_continue_with_parent; + } + + /* Otherwise, one branch has a discard. */ + if (then_discard != NULL && !is_only_instruction(then_discard)) { + this->progress = true; + ir->then_instructions.make_empty(); + ir->then_instructions.push_tail(then_discard); + } else if (else_discard != NULL && !is_only_instruction(else_discard)) { + this->progress = true; + ir->else_instructions.make_empty(); + ir->else_instructions.push_tail(else_discard); + } + + visit_list_elements(this, &ir->then_instructions); + return visit_continue_with_parent; +} + +ir_visitor_status +discard_simplifier::visit_enter(ir_loop *ir) +{ + ir_discard *discard = find_unconditional_discard(ir->body_instructions); + + if (discard) { + ir->replace_with(discard); + return visit_continue_with_parent; + } + + return visit_continue; +} + +bool +do_discard_simplification(exec_list *instructions) +{ + /* Look for a top-level unconditional discard */ + ir_discard *discard = find_unconditional_discard(*instructions); + if (discard != NULL) { + instructions->make_empty(); + instructions->push_tail(discard); + return true; + } + + discard_simplifier v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index f943f81dd0..f32f3cf602 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -176,6 +176,7 @@ i915CreateContext(int api, ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = GL_TRUE; ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoIfs = GL_TRUE; ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoNoise = GL_TRUE; + ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoPow = GL_TRUE; ctx->Const.MaxDrawBuffers = 1; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index c00ee415b6..7a9fb7f088 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -569,10 +569,14 @@ upload_program(struct i915_fragment_program *p) if (inst->DstReg.CondMask == COND_TR) { tmp = i915_get_utemp(p); + /* The KIL instruction discards the fragment if any component of + * the source is < 0. Emit an immediate operand of {-1}.xywz. + */ i915_emit_texld(p, get_live_regs(p, inst), tmp, A0_DEST_CHANNEL_ALL, 0, /* use a dummy dest reg */ - swizzle(tmp, ONE, ONE, ONE, ONE), /* always */ + negate(swizzle(tmp, ONE, ONE, ONE, ONE), + 1, 1, 1, 1), T0_TEXKILL); } else { p->error = 1; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index cb0a8b96c9..28549f2574 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -122,9 +122,6 @@ GLboolean brwCreateContext( int api, (i == MESA_SHADER_FRAGMENT); ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp = (i == MESA_SHADER_FRAGMENT); - - if (intel->gen == 6) - ctx->ShaderCompilerOptions[i].EmitNoIfs = (i == MESA_SHADER_VERTEX); } ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 962c04128b..6b61f7af15 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -899,7 +899,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) err |= dest (file, inst); } else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF || inst->header.opcode == BRW_OPCODE_ELSE || - inst->header.opcode == BRW_OPCODE_ENDIF)) { + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { format (file, " %d", inst->bits1.branch_gen6.jump_count); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index b4538e6e8a..a4904b7098 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -954,6 +954,8 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *patch_insn); struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); +struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, + struct brw_instruction *do_insn); struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); /* Forward jumps: */ @@ -1009,6 +1011,7 @@ void brw_math_invert( struct brw_compile *p, void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ); +void brw_set_uip_jip(struct brw_compile *p); /* brw_optimize.c */ void brw_optimize(struct brw_compile *p); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 9cb941dacf..945f50d110 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1029,16 +1029,44 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + if (intel->gen >= 6) { + brw_set_dest(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + + return insn; +} + +struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + int br = 2; + + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); + + insn->bits3.break_cont.uip = br * (do_insn - insn); + insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = BRW_EXECUTE_8; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - insn->bits3.if_else.pad0 = 0; - insn->bits3.if_else.pop_count = pop_count; return insn; } @@ -1058,10 +1086,26 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) } /* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) { - if (p->single_program_flow) { + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6 || p->single_program_flow) { return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); @@ -1094,34 +1138,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, if (intel->gen >= 5) br = 2; - if (p->single_program_flow) - insn = next_insn(p, BRW_OPCODE_ADD); - else + if (intel->gen >= 6) { insn = next_insn(p, BRW_OPCODE_WHILE); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + brw_set_dest(insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = do_insn->header.execution_size; + assert(insn->header.execution_size == BRW_EXECUTE_8); + } else { + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); - if (p->single_program_flow) { - insn->header.execution_size = BRW_EXECUTE_1; + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = next_insn(p, BRW_OPCODE_WHILE); - insn->bits3.d = (do_insn - insn) * 16; - } else { - insn->header.execution_size = do_insn->header.execution_size; + assert(do_insn->header.opcode == BRW_OPCODE_DO); - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; - } + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0)); -/* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; } @@ -1989,6 +2041,80 @@ void brw_urb_WRITE(struct brw_compile *p, swizzle); } +static int +brw_find_next_block_end(struct brw_compile *p, int start) +{ + int ip; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* There is no DO instruction on gen6, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +static int +brw_find_loop_end(struct brw_compile *p, int start) +{ + int ip; + int br = 2; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + if (insn->header.opcode == BRW_OPCODE_WHILE) { + if (ip + insn->bits1.branch_gen6.jump_count / br < start) + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK and CONT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + int ip; + int br = 2; + + if (intel->gen < 6) + return; + + for (ip = 0; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1); + break; + case BRW_OPCODE_CONTINUE: + /* JIP is set at CONTINUE emit time, since that's when we + * know where the start of the loop is. + */ + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + } + } +} + void brw_ff_sync(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7a8e981225..ee9ae160bd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -933,6 +933,10 @@ fs_visitor::visit(ir_expression *ir) assert(!"not reached: should be handled by lower_noise"); break; + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + case ir_unop_sqrt: emit_math(FS_OPCODE_SQRT, this->result, op[0]); break; @@ -3375,10 +3379,6 @@ fs_visitor::generate_code() break; case BRW_OPCODE_DO: - /* FINISHME: We need to write the loop instruction support still. */ - if (intel->gen >= 6) - this->fail = true; - loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); if_depth_in_loop[loop_stack_depth] = 0; break; @@ -3388,7 +3388,11 @@ fs_visitor::generate_code() brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: - brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; @@ -3402,16 +3406,18 @@ fs_visitor::generate_code() assert(loop_stack_depth > 0); loop_stack_depth--; inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_stack[loop_stack_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } } } } @@ -3488,6 +3494,26 @@ fs_visitor::generate_code() last_native_inst = p->nr_insn; } + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } } GLboolean diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 8ce9af9c4f..8f97bd136f 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1539,6 +1539,21 @@ struct brw_instruction GLuint pad0:12; } if_else; + struct + { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + struct { GLuint function:4; GLuint int_type:1; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b13e0c2a2c..407358f498 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -2032,35 +2032,42 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); - brw_CONT(p, if_depth_in_loop[loop_depth]); + if (intel->gen >= 6) { + brw_CONT_gen6(p, loop_inst[loop_depth - 1]); + } else { + brw_CONT(p, if_depth_in_loop[loop_depth]); + } brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_ENDLOOP: - { - clear_current_const(c); - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - loop_depth--; - - if (intel->gen == 5) - br = 2; - - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BEGINLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && + + case OPCODE_ENDLOOP: { + clear_current_const(c); + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + loop_depth--; + + if (intel->gen == 5) + br = 2; + + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } break; + case OPCODE_BRA: brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); @@ -2151,6 +2158,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) } brw_resolve_cals(p); + brw_set_uip_jip(p); brw_optimize(p); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 471067e8f0..06ac5d49a0 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -99,6 +99,48 @@ upload_sf_state(struct brw_context *brw) if (ctx->Polygon.OffsetFill) dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; + if (ctx->Polygon.OffsetLine) + dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; + + if (ctx->Polygon.OffsetPoint) + dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; + + switch (ctx->Polygon.FrontMode) { + case GL_FILL: + dw2 |= GEN6_SF_FRONT_SOLID; + break; + + case GL_LINE: + dw2 |= GEN6_SF_FRONT_WIREFRAME; + break; + + case GL_POINT: + dw2 |= GEN6_SF_FRONT_POINT; + break; + + default: + assert(0); + break; + } + + switch (ctx->Polygon.BackMode) { + case GL_FILL: + dw2 |= GEN6_SF_BACK_SOLID; + break; + + case GL_LINE: + dw2 |= GEN6_SF_BACK_WIREFRAME; + break; + + case GL_POINT: + dw2 |= GEN6_SF_BACK_POINT; + break; + + default: + assert(0); + break; + } + /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) dw3 |= GEN6_SF_SCISSOR_ENABLE; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index da495a3afa..113b27632a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -67,6 +67,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_CLAMP, + .Name = "CLAMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_CMP, .Name = "CMP", .NumSrcRegs = 3, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index d3f639c870..7e66610127 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -50,6 +50,9 @@ typedef enum { /** vec4 instruction: dst.c = ceil(src0.c) */ RC_OPCODE_CEIL, + /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ + RC_OPCODE_CLAMP, + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ RC_OPCODE_CMP, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 106e03495d..01c2e74e7b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -217,6 +217,22 @@ static void transform_CEIL(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + int tempreg = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + static void transform_DP2(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -554,6 +570,7 @@ int radeonTransformALU( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; @@ -782,6 +799,7 @@ int r300_transform_vertex_alu( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 2bf24096a0..1fa559cec1 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3334,7 +3334,14 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGE; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + } pAsm->D.dst.op3 = 1; tmp = (-1); @@ -3416,8 +3423,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) checkop1(pAsm); tmp = gethelpr(pAsm); - - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -3457,7 +3470,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4742,7 +4762,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) tmp = gethelpr(pAsm); - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4782,7 +4809,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -5010,7 +5044,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm) GLuint tmp = gethelpr(pAsm); /* tmp = (src > 0 ? 1 : src) */ - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGT; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + } pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; @@ -5033,7 +5074,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm) } /* dst = (-tmp > 0 ? -1 : tmp) */ - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGT; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + } pAsm->D.dst.op3 = 1; if( GL_FALSE == assemble_dst(pAsm) ) diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index 800eb83900..5557a3b5cb 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -358,6 +358,10 @@ static INLINE GLuint CPU_TO_LE32(GLuint x) #define M_E (2.7182818284590452354) #endif +#ifndef M_LOG2E +#define M_LOG2E (1.4426950408889634074) +#endif + #ifndef ONE_DIV_LN2 #define ONE_DIV_LN2 (1.442695040888963456) #endif diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 80c20e09d9..82495714f2 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2197,6 +2197,7 @@ struct gl_shader_compiler_options GLboolean EmitNoCont; /**< Emit CONT opcode? */ GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */ GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */ + GLboolean EmitNoPow; /**< Emit POW opcodes? */ /** * \name Forms of indirect addressing the driver cannot do. diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 8f75c82c3e..b274a961b2 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2166,9 +2166,14 @@ ir_to_mesa_visitor::visit(ir_discard *ir) { struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; - assert(ir->condition == NULL); /* FINISHME */ + if (ir->condition) { + ir->condition->accept(this); + this->result.negate = ~this->result.negate; + ir_to_mesa_emit_op1(ir, OPCODE_KIL, ir_to_mesa_undef_dst, this->result); + } else { + ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV); + } - ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV); fp->UsesKill = GL_TRUE; } @@ -2844,8 +2849,9 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) /* Lowering */ do_mat_op_to_vec(ir); - lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 - | LOG_TO_LOG2); + lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 + | LOG_TO_LOG2 + | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; @@ -2853,8 +2859,10 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = lower_quadop_vector(ir, true) || progress; - if (options->EmitNoIfs) + if (options->EmitNoIfs) { + progress = lower_discard(ir) || progress; progress = do_if_to_cond_assign(ir) || progress; + } if (options->EmitNoNoise) progress = lower_noise(ir) || progress; |