summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuc Verhaegen <libv@skynet.be>2010-03-14 05:54:10 +0100
committerLuc Verhaegen <libv@skynet.be>2010-03-14 05:54:10 +0100
commit4cfe98db595fd3876e252bd23ed48bf82836f795 (patch)
tree4a05e56b9c3ebba1f42460ffe2e6cf6e7e08eafe
parentda7796a7d40ca2cd4a03e99ddf38e3a7256a401f (diff)
Import i915 and i965 dri drivers from mesa 7.6.0.7.6.0
-rw-r--r--configure.ac7
-rw-r--r--i915/Makefile.am4
-rw-r--r--i915/i830_context.h8
-rw-r--r--i915/i830_reg.h16
-rw-r--r--i915/i830_state.c32
-rw-r--r--i915/i830_texstate.c10
-rw-r--r--i915/i830_vtbl.c66
-rw-r--r--i915/i915_context.c10
-rw-r--r--i915/i915_context.h12
-rw-r--r--i915/i915_reg.h19
-rw-r--r--i915/i915_state.c186
-rw-r--r--i915/i915_tex_layout.c63
-rw-r--r--i915/i915_texstate.c9
-rw-r--r--i915/i915_vtbl.c76
-rw-r--r--i915/intel_tris.c7
-rw-r--r--i965/Makefile.am5
-rw-r--r--i965/brw_clip.c25
-rw-r--r--i965/brw_clip.h6
-rw-r--r--i965/brw_clip_line.c15
-rw-r--r--i965/brw_clip_point.c2
-rw-r--r--i965/brw_clip_state.c9
-rw-r--r--i965/brw_clip_tri.c17
-rw-r--r--i965/brw_clip_unfilled.c1
-rw-r--r--i965/brw_clip_util.c51
-rw-r--r--i965/brw_context.c30
-rw-r--r--i965/brw_context.h37
-rw-r--r--i965/brw_curbe.c83
-rw-r--r--i965/brw_defines.h33
-rw-r--r--i965/brw_disasm.c903
-rw-r--r--i965/brw_draw.c66
-rw-r--r--i965/brw_draw.h1
-rw-r--r--i965/brw_draw_upload.c180
-rw-r--r--i965/brw_eu.c2
-rw-r--r--i965/brw_eu.h24
-rw-r--r--i965/brw_eu_emit.c322
-rw-r--r--i965/brw_fallback.c31
-rw-r--r--i965/brw_gs.c9
-rw-r--r--i965/brw_gs.h1
-rw-r--r--i965/brw_gs_emit.c30
-rw-r--r--i965/brw_gs_state.c3
-rw-r--r--i965/brw_misc_state.c45
-rw-r--r--i965/brw_queryobj.c7
-rw-r--r--i965/brw_sf.c5
-rw-r--r--i965/brw_sf.h3
-rw-r--r--i965/brw_sf_emit.c41
-rw-r--r--i965/brw_sf_state.c48
-rw-r--r--i965/brw_state.h31
-rw-r--r--i965/brw_state_batch.c15
-rw-r--r--i965/brw_state_cache.c159
-rw-r--r--i965/brw_state_dump.c2
-rw-r--r--i965/brw_state_upload.c69
-rw-r--r--i965/brw_structs.h143
-rw-r--r--i965/brw_tex_layout.c159
-rw-r--r--i965/brw_urb.c27
-rw-r--r--i965/brw_vs.c2
-rw-r--r--i965/brw_vs.h1
-rw-r--r--i965/brw_vs_emit.c248
-rw-r--r--i965/brw_vs_state.c18
-rw-r--r--i965/brw_vs_surface_state.c226
-rw-r--r--i965/brw_vtbl.c11
-rw-r--r--i965/brw_wm.c12
-rw-r--r--i965/brw_wm.h10
-rw-r--r--i965/brw_wm_emit.c121
-rw-r--r--i965/brw_wm_fp.c159
-rw-r--r--i965/brw_wm_glsl.c519
-rw-r--r--i965/brw_wm_iz.c7
-rw-r--r--i965/brw_wm_pass0.c33
-rw-r--r--i965/brw_wm_pass1.c1
-rw-r--r--i965/brw_wm_sampler_state.c44
-rw-r--r--i965/brw_wm_state.c16
-rw-r--r--i965/brw_wm_surface_state.c342
-rw-r--r--shared/intel_batchbuffer.c19
-rw-r--r--shared/intel_blit.c152
-rw-r--r--shared/intel_blit.h20
-rw-r--r--shared/intel_buffer_objects.c311
-rw-r--r--shared/intel_buffer_objects.h13
-rw-r--r--shared/intel_buffers.c19
-rw-r--r--shared/intel_chipset.h18
-rw-r--r--shared/intel_clear.c273
-rw-r--r--shared/intel_context.c173
-rw-r--r--shared/intel_context.h55
-rw-r--r--shared/intel_extensions.c19
-rw-r--r--shared/intel_fbo.c87
-rw-r--r--shared/intel_generatemipmap.c304
-rw-r--r--shared/intel_mipmap_tree.c98
-rw-r--r--shared/intel_mipmap_tree.h16
-rw-r--r--shared/intel_pixel.c177
-rw-r--r--shared/intel_pixel.h12
-rw-r--r--shared/intel_pixel_bitmap.c103
-rw-r--r--shared/intel_pixel_copy.c197
-rw-r--r--shared/intel_pixel_draw.c220
-rw-r--r--shared/intel_pixel_read.c (renamed from i915/intel_pixel_read.c)54
-rw-r--r--shared/intel_reg.h13
-rw-r--r--shared/intel_regions.c234
-rw-r--r--shared/intel_regions.h27
-rw-r--r--shared/intel_screen.c27
-rw-r--r--shared/intel_span.c4
-rw-r--r--shared/intel_syncobj.c132
-rw-r--r--shared/intel_tex.c72
-rw-r--r--shared/intel_tex.h111
-rw-r--r--shared/intel_tex_copy.c81
-rw-r--r--shared/intel_tex_image.c117
-rw-r--r--shared/intel_tex_layout.c33
-rw-r--r--shared/intel_tex_layout.h6
-rw-r--r--shared/intel_tex_subimage.c101
-rw-r--r--shared/intel_tex_validate.c5
106 files changed, 5429 insertions, 2809 deletions
diff --git a/configure.ac b/configure.ac
index b73fe88..af47813 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
# Process this file with autoconf to produce a configure script
AC_PREREQ(2.57)
-AC_INIT([mesa-dri-i9xx], 7.5.0, [], mesa-dri-i9xx)
+AC_INIT([mesa-dri-i9xx], 7.6.0, [], mesa-dri-i9xx)
AM_INIT_AUTOMAKE([dist-bzip2])
@@ -16,9 +16,8 @@ AC_PROG_CC
AC_HEADER_STDC
PKG_CHECK_MODULES([DRM], [libdrm >= 2.4.3])
-# changes in struct gl_fragment_program.
-PKG_CHECK_MODULES([DRI], [libmesadri >= 7.5.0 libmesadri < 7.6.0
- libmesadricommon >= 7.5.0 libmesadricommon < 7.6.0])
+PKG_CHECK_MODULES([DRI], [libmesadri >= 7.6.0 libmesadri < 7.7.0
+ libmesadricommon >= 7.6.0 libmesadricommon < 7.7.0])
AC_OUTPUT([
Makefile
diff --git a/i915/Makefile.am b/i915/Makefile.am
index 2dc608c..451e9fd 100644
--- a/i915/Makefile.am
+++ b/i915/Makefile.am
@@ -20,6 +20,7 @@ i915_dri_la_SOURCES = \
../shared/intel_batchbuffer.c \
../shared/intel_clear.c \
../shared/intel_extensions.c \
+ ../shared/intel_generatemipmap.c \
../shared/intel_mipmap_tree.c \
../shared/intel_tex_layout.c \
../shared/intel_tex_image.c \
@@ -32,7 +33,7 @@ i915_dri_la_SOURCES = \
../shared/intel_pixel_bitmap.c \
../shared/intel_pixel_copy.c \
../shared/intel_pixel_draw.c \
- intel_pixel_read.c \
+ ../shared/intel_pixel_read.c \
../shared/intel_buffers.c \
../shared/intel_blit.c \
../shared/intel_swapbuffers.c \
@@ -51,5 +52,6 @@ i915_dri_la_SOURCES = \
../shared/intel_screen.c \
../shared/intel_span.c \
../shared/intel_state.c \
+ ../shared/intel_syncobj.c \
intel_tris.c \
../shared/intel_fbo.c
diff --git a/i915/i830_context.h b/i915/i830_context.h
index 1bdb320..f73cbbf 100644
--- a/i915/i830_context.h
+++ b/i915/i830_context.h
@@ -40,6 +40,7 @@
#define I830_UPLOAD_BUFFERS 0x2
#define I830_UPLOAD_STIPPLE 0x4
#define I830_UPLOAD_INVARIENT 0x8
+#define I830_UPLOAD_RASTER_RULES 0x10
#define I830_UPLOAD_TEX(i) (0x10<<(i))
#define I830_UPLOAD_TEXBLEND(i) (0x100<<(i))
#define I830_UPLOAD_TEX_ALL (0x0f0)
@@ -99,6 +100,11 @@
#define I830_TEXBLEND_SIZE 12 /* (4 args + op) * 2 + COLOR_FACTOR */
+enum {
+ I830_RASTER_RULES,
+ I830_RASTER_RULES_SIZE
+};
+
struct i830_texture_object
{
struct intel_texture_object intel;
@@ -112,6 +118,7 @@ struct i830_hw_state
GLuint Ctx[I830_CTX_SETUP_SIZE];
GLuint Buffer[I830_DEST_SETUP_SIZE];
GLuint Stipple[I830_STP_SETUP_SIZE];
+ GLuint RasterRules[I830_RASTER_RULES_SIZE];
GLuint Tex[I830_TEX_UNITS][I830_TEX_SETUP_SIZE];
GLuint TexBlend[I830_TEX_UNITS][I830_TEXBLEND_SIZE];
GLuint TexBlendWordsUsed[I830_TEX_UNITS];
@@ -197,6 +204,7 @@ extern void i830InitStateFuncs(struct dd_function_table *functions);
extern void i830EmitState(struct i830_context *i830);
extern void i830InitState(struct i830_context *i830);
+extern void i830_update_provoking_vertex(GLcontext *ctx);
/* i830_metaops.c
*/
diff --git a/i915/i830_reg.h b/i915/i830_reg.h
index d210c2d..ae13170 100644
--- a/i915/i830_reg.h
+++ b/i915/i830_reg.h
@@ -48,19 +48,6 @@
#define AA_LINE_ENABLE ((1<<1) | 1)
#define AA_LINE_DISABLE (1<<1)
-#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
-/* Dword 1 */
-#define BUF_3D_ID_COLOR_BACK (0x3<<24)
-#define BUF_3D_ID_DEPTH (0x7<<24)
-#define BUF_3D_USE_FENCE (1<<23)
-#define BUF_3D_TILED_SURFACE (1<<22)
-#define BUF_3D_TILE_WALK_X 0
-#define BUF_3D_TILE_WALK_Y (1<<21)
-#define BUF_3D_PITCH(x) (((x)/4)<<2)
-/* Dword 2 */
-#define BUF_3D_ADDR(x) ((x) & ~0x3)
-
-
#define _3DSTATE_COLOR_FACTOR_CMD (CMD_3D | (0x1d<<24) | (0x1<<16))
#define _3DSTATE_COLOR_FACTOR_N_CMD(stage) (CMD_3D | (0x1d<<24) | \
@@ -433,8 +420,11 @@
#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
#define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2)
+#define LINE_STRIP_PROVOKE_VRTX_MASK (3<<6)
#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX_MASK (3<<3)
#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
+#define TRI_STRIP_PROVOKE_VRTX_MASK (3<<0)
#define TRI_STRIP_PROVOKE_VRTX(x) (x)
/* _3DSTATE_SCISSOR_ENABLE, p200 */
diff --git a/i915/i830_state.c b/i915/i830_state.c
index 8ef6c91..645ebe3 100644
--- a/i915/i830_state.c
+++ b/i915/i830_state.c
@@ -1047,6 +1047,16 @@ i830_init_packets(struct i830_context *i830)
TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
+ i830->state.RasterRules[I830_RASTER_RULES] = (_3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ ENABLE_TRI_STRIP_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) |
+ TRI_STRIP_PROVOKE_VRTX(2));
+
i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE;
@@ -1058,6 +1068,27 @@ i830_init_packets(struct i830_context *i830)
i830->state.Buffer[I830_DESTREG_SR2] = 0;
}
+void
+i830_update_provoking_vertex(GLcontext * ctx)
+{
+ struct i830_context *i830 = i830_context(ctx);
+
+ I830_STATECHANGE(i830, I830_UPLOAD_RASTER_RULES);
+ i830->state.RasterRules[I830_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK |
+ TRI_FAN_PROVOKE_VRTX_MASK |
+ TRI_STRIP_PROVOKE_VRTX_MASK);
+
+ /* _NEW_LIGHT */
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+ i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) |
+ TRI_STRIP_PROVOKE_VRTX(2));
+ } else {
+ i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) |
+ TRI_FAN_PROVOKE_VRTX(1) |
+ TRI_STRIP_PROVOKE_VRTX(0));
+ }
+}
void
i830InitStateFuncs(struct dd_function_table *functions)
@@ -1101,6 +1132,7 @@ i830InitState(struct i830_context *i830)
i830->current = &i830->state;
i830->state.emitted = 0;
i830->state.active = (I830_UPLOAD_INVARIENT |
+ I830_UPLOAD_RASTER_RULES |
I830_UPLOAD_TEXBLEND(0) |
I830_UPLOAD_STIPPLE |
I830_UPLOAD_CTX | I830_UPLOAD_BUFFERS);
diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c
index 753c25b..6f998fa 100644
--- a/i915/i830_texstate.c
+++ b/i915/i830_texstate.c
@@ -174,14 +174,16 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
(LOAD_TEXTURE_MAP0 << unit) | 4);
-/* state[I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE | */
-/* t->intel.TextureOffset); */
-
-
state[I830_TEXREG_TM0S1] =
(((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) |
((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format);
+ if (intelObj->mt->region->tiling != I915_TILING_NONE) {
+ state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE;
+ if (intelObj->mt->region->tiling == I915_TILING_Y)
+ state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK;
+ }
+
state[I830_TEXREG_TM0S2] =
((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK);
diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c
index 3bf02de..983f672 100644
--- a/i915/i830_vtbl.c
+++ b/i915/i830_vtbl.c
@@ -299,7 +299,7 @@ i830_emit_invarient_state(struct intel_context *intel)
{
BATCH_LOCALS;
- BEGIN_BATCH(30, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(29, IGNORE_CLIPRECTS);
OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
OUT_BATCH(0);
@@ -351,15 +351,6 @@ i830_emit_invarient_state(struct intel_context *intel)
OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3));
- OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
- ENABLE_POINT_RASTER_RULE |
- OGL_POINT_RASTER_RULE |
- ENABLE_LINE_STRIP_PROVOKE_VRTX |
- ENABLE_TRI_FAN_PROVOKE_VRTX |
- ENABLE_TRI_STRIP_PROVOKE_VRTX |
- LINE_STRIP_PROVOKE_VRTX(1) |
- TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2));
-
OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM);
OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
@@ -394,6 +385,9 @@ get_state_size(struct i830_hw_state *state)
if (dirty & I830_UPLOAD_INVARIENT)
sz += 40 * sizeof(int);
+ if (dirty & I830_UPLOAD_RASTER_RULES)
+ sz += sizeof(state->RasterRules);
+
if (dirty & I830_UPLOAD_CTX)
sz += sizeof(state->Ctx);
@@ -486,6 +480,11 @@ i830_emit_state(struct intel_context *intel)
i830_emit_invarient_state(intel);
}
+ if (dirty & I830_UPLOAD_RASTER_RULES) {
+ DBG("I830_UPLOAD_RASTER_RULES:\n");
+ emit(intel, state->RasterRules, sizeof(state->RasterRules));
+ }
+
if (dirty & I830_UPLOAD_CTX) {
DBG("I830_UPLOAD_CTX:\n");
emit(intel, state->Ctx, sizeof(state->Ctx));
@@ -552,7 +551,7 @@ i830_emit_state(struct intel_context *intel)
if (state->tex_buffer[i]) {
OUT_RELOC(state->tex_buffer[i],
I915_GEM_DOMAIN_SAMPLER, 0,
- state->tex_offset[i] | TM0S0_USE_FENCE);
+ state->tex_offset[i]);
}
else if (state == &i830->meta) {
assert(i == 0);
@@ -634,21 +633,11 @@ i830_state_draw_region(struct intel_context *intel,
/*
* Set stride/cpp values
*/
- if (color_region) {
- state->Buffer[I830_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
- state->Buffer[I830_DESTREG_CBUFADDR1] =
- (BUF_3D_ID_COLOR_BACK |
- BUF_3D_PITCH(color_region->pitch * color_region->cpp) |
- BUF_3D_USE_FENCE);
- }
+ i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_CBUFADDR0],
+ color_region, BUF_3D_ID_COLOR_BACK);
- if (depth_region) {
- state->Buffer[I830_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
- state->Buffer[I830_DESTREG_DBUFADDR1] =
- (BUF_3D_ID_DEPTH |
- BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) |
- BUF_3D_USE_FENCE);
- }
+ i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_DBUFADDR0],
+ depth_region, BUF_3D_ID_DEPTH);
/*
* Compute/set I830_DESTREG_DV1 value
@@ -718,26 +707,6 @@ i830_set_draw_region(struct intel_context *intel,
i830_state_draw_region(intel, &i830->state, color_regions[0], depth_region);
}
-#if 0
-static void
-i830_update_color_z_regions(intelContextPtr intel,
- const intelRegion * colorRegion,
- const intelRegion * depthRegion)
-{
- i830ContextPtr i830 = I830_CONTEXT(intel);
-
- i830->state.Buffer[I830_DESTREG_CBUFADDR1] =
- (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) |
- BUF_3D_USE_FENCE);
- i830->state.Buffer[I830_DESTREG_CBUFADDR2] = colorRegion->offset;
-
- i830->state.Buffer[I830_DESTREG_DBUFADDR1] =
- (BUF_3D_ID_DEPTH | BUF_3D_PITCH(depthRegion->pitch) | BUF_3D_USE_FENCE);
- i830->state.Buffer[I830_DESTREG_DBUFADDR2] = depthRegion->offset;
-}
-#endif
-
-
/* This isn't really handled at the moment.
*/
static void
@@ -768,9 +737,10 @@ i830_assert_not_dirty( struct intel_context *intel )
}
static void
-i830_note_unlock( struct intel_context *intel )
+i830_invalidate_state(struct intel_context *intel, GLuint new_state)
{
- /* nothing */
+ if (new_state & _NEW_LIGHT)
+ i830_update_provoking_vertex(&intel->ctx);
}
void
@@ -787,6 +757,6 @@ i830InitVtbl(struct i830_context *i830)
i830->intel.vtbl.render_start = i830_render_start;
i830->intel.vtbl.render_prevalidate = i830_render_prevalidate;
i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;
- i830->intel.vtbl.note_unlock = i830_note_unlock;
i830->intel.vtbl.finish_batch = intel_finish_vb;
+ i830->intel.vtbl.invalidate_state = i830_invalidate_state;
}
diff --git a/i915/i915_context.c b/i915/i915_context.c
index 1f9f363..3ab7d68 100644
--- a/i915/i915_context.c
+++ b/i915/i915_context.c
@@ -27,6 +27,7 @@
#include "i915_context.h"
#include "main/imports.h"
+#include "main/macros.h"
#include "intel_tex.h"
#include "intel_tris.h"
#include "tnl/t_context.h"
@@ -73,8 +74,12 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state)
p->params_uptodate = 0;
}
- if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM))
+ if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
i915_update_fog(ctx);
+ if (new_state & (_NEW_STENCIL | _NEW_BUFFERS | _NEW_POLYGON))
+ i915_update_stencil(ctx);
+ if (new_state & (_NEW_LIGHT))
+ i915_update_provoking_vertex(ctx);
}
@@ -162,6 +167,9 @@ i915CreateContext(const __GLcontextModes * mesaVis,
ctx->Const.FragmentProgram.MaxNativeTexIndirections =
I915_MAX_TEX_INDIRECT;
ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* I don't think we have one */
+ ctx->Const.FragmentProgram.MaxEnvParams =
+ MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
+ ctx->Const.FragmentProgram.MaxEnvParams);
ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
diff --git a/i915/i915_context.h b/i915/i915_context.h
index 87bbf5f..8de4a9d 100644
--- a/i915/i915_context.h
+++ b/i915/i915_context.h
@@ -48,6 +48,7 @@
#define I915_UPLOAD_FOG 0x20
#define I915_UPLOAD_INVARIENT 0x40
#define I915_UPLOAD_DEFAULTS 0x80
+#define I915_UPLOAD_RASTER_RULES 0x100
#define I915_UPLOAD_TEX(i) (0x00010000<<(i))
#define I915_UPLOAD_TEX_ALL (0x00ff0000)
#define I915_UPLOAD_TEX_0_SHIFT 16
@@ -82,7 +83,9 @@
#define I915_CTXREG_IAB 6
#define I915_CTXREG_BLENDCOLOR0 7
#define I915_CTXREG_BLENDCOLOR1 8
-#define I915_CTX_SETUP_SIZE 9
+#define I915_CTXREG_BF_STENCIL_OPS 9
+#define I915_CTXREG_BF_STENCIL_MASKS 10
+#define I915_CTX_SETUP_SIZE 11
#define I915_FOGREG_COLOR 0
#define I915_FOGREG_MODE0 1
@@ -110,6 +113,10 @@
#define I915_DEFREG_Z1 5
#define I915_DEF_SETUP_SIZE 6
+enum {
+ I915_RASTER_RULES,
+ I915_RASTER_RULES_SETUP_SIZE,
+};
#define I915_MAX_CONSTANT 32
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
@@ -206,6 +213,7 @@ struct i915_hw_state
GLuint Stipple[I915_STP_SETUP_SIZE];
GLuint Fog[I915_FOG_SETUP_SIZE];
GLuint Defaults[I915_DEF_SETUP_SIZE];
+ GLuint RasterRules[I915_RASTER_RULES_SETUP_SIZE];
GLuint Tex[I915_TEX_UNITS][I915_TEX_SETUP_SIZE];
GLuint Constant[I915_CONSTANT_SIZE];
GLuint ConstantSize;
@@ -321,6 +329,8 @@ extern void i915_print_ureg(const char *msg, GLuint ureg);
extern void i915InitStateFunctions(struct dd_function_table *functions);
extern void i915InitState(struct i915_context *i915);
extern void i915_update_fog(GLcontext * ctx);
+extern void i915_update_stencil(GLcontext * ctx);
+extern void i915_update_provoking_vertex(GLcontext *ctx);
/*======================================================================
diff --git a/i915/i915_reg.h b/i915/i915_reg.h
index 8891e11..b5fa7fd 100644
--- a/i915/i915_reg.h
+++ b/i915/i915_reg.h
@@ -86,27 +86,15 @@
#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16)
#define BFM_STENCIL_TEST_MASK_SHIFT 8
#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8)
+#define BFM_STENCIL_TEST_MASK(x) (((x)&0xff) << 8)
#define BFM_STENCIL_WRITE_MASK_SHIFT 0
#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0)
+#define BFM_STENCIL_WRITE_MASK(x) ((x)&0xff)
/* 3DSTATE_BIN_CONTROL p141 */
-/* p143 */
-#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
-/* Dword 1 */
-#define BUF_3D_ID_COLOR_BACK (0x3<<24)
-#define BUF_3D_ID_DEPTH (0x7<<24)
-#define BUF_3D_USE_FENCE (1<<23)
-#define BUF_3D_TILED_SURFACE (1<<22)
-#define BUF_3D_TILE_WALK_X 0
-#define BUF_3D_TILE_WALK_Y (1<<21)
-#define BUF_3D_PITCH(x) (((x)/4)<<2)
-/* Dword 2 */
-#define BUF_3D_ADDR(x) ((x) & ~0x3)
-
-
/* 3DSTATE_CHROMA_KEY */
/* 3DSTATE_CLEAR_PARAMETERS, p150 */
@@ -155,6 +143,7 @@
/* p161 */
#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16))
/* Dword 1 */
+#define CLASSIC_EARLY_DEPTH (1<<31)
#define TEX_DEFAULT_COLOR_OGL (0<<30)
#define TEX_DEFAULT_COLOR_D3D (1<<30)
#define ZR_EARLY_DEPTH (1<<29)
@@ -308,7 +297,9 @@
#define TEXKILL_4D (1<<9)
#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
+#define LINE_STRIP_PROVOKE_VRTX_MASK (3 << 6)
#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX_MASK (3 << 3)
#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
/* _3DSTATE_SCISSOR_ENABLE, p256 */
diff --git a/i915/i915_state.c b/i915/i915_state.c
index 814fb59..b60efea 100644
--- a/i915/i915_state.c
+++ b/i915/i915_state.c
@@ -48,73 +48,119 @@
#define FILE_DEBUG_FLAG DEBUG_STATE
-static void
-i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref,
- GLuint mask)
+void
+i915_update_stencil(GLcontext * ctx)
{
struct i915_context *i915 = I915_CONTEXT(ctx);
- int test = intel_translate_compare_func(func);
+ GLuint front_ref, front_writemask, front_mask;
+ GLenum front_func, front_fail, front_pass_z_fail, front_pass_z_pass;
+ GLuint back_ref, back_writemask, back_mask;
+ GLenum back_func, back_fail, back_pass_z_fail, back_pass_z_pass;
- mask = mask & 0xff;
-
- DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
- _mesa_lookup_enum_by_nr(func), ref, mask);
+ I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+ /* The 915 considers CW to be "front" for two-sided stencil, so choose
+ * appropriately.
+ */
+ /* _NEW_POLYGON | _NEW_STENCIL */
+ if (ctx->Polygon.FrontFace == GL_CW) {
+ front_ref = ctx->Stencil.Ref[0];
+ front_mask = ctx->Stencil.ValueMask[0];
+ front_writemask = ctx->Stencil.WriteMask[0];
+ front_func = ctx->Stencil.Function[0];
+ front_fail = ctx->Stencil.FailFunc[0];
+ front_pass_z_fail = ctx->Stencil.ZFailFunc[0];
+ front_pass_z_pass = ctx->Stencil.ZPassFunc[0];
+ back_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+ back_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
+ back_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
+ back_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
+ back_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
+ back_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
+ back_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
+ } else {
+ front_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+ front_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
+ front_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
+ front_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
+ front_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
+ front_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
+ front_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
+ back_ref = ctx->Stencil.Ref[0];
+ back_mask = ctx->Stencil.ValueMask[0];
+ back_writemask = ctx->Stencil.WriteMask[0];
+ back_func = ctx->Stencil.Function[0];
+ back_fail = ctx->Stencil.FailFunc[0];
+ back_pass_z_fail = ctx->Stencil.ZFailFunc[0];
+ back_pass_z_pass = ctx->Stencil.ZPassFunc[0];
+ }
- I915_STATECHANGE(i915, I915_UPLOAD_CTX);
- i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
+ /* Set front state. */
+ i915->state.Ctx[I915_CTXREG_STATE4] &= ~(MODE4_ENABLE_STENCIL_TEST_MASK |
+ MODE4_ENABLE_STENCIL_WRITE_MASK);
i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
- STENCIL_TEST_MASK(mask));
+ ENABLE_STENCIL_WRITE_MASK |
+ STENCIL_TEST_MASK(front_mask) |
+ STENCIL_WRITE_MASK(front_writemask));
i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK |
- S5_STENCIL_TEST_FUNC_MASK);
+ S5_STENCIL_TEST_FUNC_MASK |
+ S5_STENCIL_FAIL_MASK |
+ S5_STENCIL_PASS_Z_FAIL_MASK |
+ S5_STENCIL_PASS_Z_PASS_MASK);
+
+ i915->state.Ctx[I915_CTXREG_LIS5] |=
+ (front_ref << S5_STENCIL_REF_SHIFT) |
+ (intel_translate_compare_func(front_func) << S5_STENCIL_TEST_FUNC_SHIFT) |
+ (intel_translate_stencil_op(front_fail) << S5_STENCIL_FAIL_SHIFT) |
+ (intel_translate_stencil_op(front_pass_z_fail) <<
+ S5_STENCIL_PASS_Z_FAIL_SHIFT) |
+ (intel_translate_stencil_op(front_pass_z_pass) <<
+ S5_STENCIL_PASS_Z_PASS_SHIFT);
+
+ /* Set back state if different from front. */
+ if (ctx->Stencil._TestTwoSide) {
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &=
+ ~(BFO_STENCIL_REF_MASK |
+ BFO_STENCIL_TEST_MASK |
+ BFO_STENCIL_FAIL_MASK |
+ BFO_STENCIL_PASS_Z_FAIL_MASK |
+ BFO_STENCIL_PASS_Z_PASS_MASK);
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] |= BFO_STENCIL_TWO_SIDE |
+ (back_ref << BFO_STENCIL_REF_SHIFT) |
+ (intel_translate_compare_func(back_func) << BFO_STENCIL_TEST_SHIFT) |
+ (intel_translate_stencil_op(back_fail) << BFO_STENCIL_FAIL_SHIFT) |
+ (intel_translate_stencil_op(back_pass_z_fail) <<
+ BFO_STENCIL_PASS_Z_FAIL_SHIFT) |
+ (intel_translate_stencil_op(back_pass_z_pass) <<
+ BFO_STENCIL_PASS_Z_PASS_SHIFT);
+
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] &=
+ ~(BFM_STENCIL_TEST_MASK_MASK |
+ BFM_STENCIL_WRITE_MASK_MASK);
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] |=
+ BFM_STENCIL_TEST_MASK(back_mask) |
+ BFM_STENCIL_WRITE_MASK(back_writemask);
+ } else {
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &= ~BFO_STENCIL_TWO_SIDE;
+ }
+}
- i915->state.Ctx[I915_CTXREG_LIS5] |= ((ref << S5_STENCIL_REF_SHIFT) |
- (test <<
- S5_STENCIL_TEST_FUNC_SHIFT));
+static void
+i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref,
+ GLuint mask)
+{
}
static void
i915StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
{
- struct i915_context *i915 = I915_CONTEXT(ctx);
-
- DBG("%s : mask 0x%x\n", __FUNCTION__, mask);
-
- mask = mask & 0xff;
-
- I915_STATECHANGE(i915, I915_UPLOAD_CTX);
- i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
- i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
- STENCIL_WRITE_MASK(mask));
}
-
static void
i915StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail,
GLenum zpass)
{
- struct i915_context *i915 = I915_CONTEXT(ctx);
- int fop = intel_translate_stencil_op(fail);
- int dfop = intel_translate_stencil_op(zfail);
- int dpop = intel_translate_stencil_op(zpass);
-
-
- DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
- _mesa_lookup_enum_by_nr(fail),
- _mesa_lookup_enum_by_nr(zfail), _mesa_lookup_enum_by_nr(zpass));
-
- I915_STATECHANGE(i915, I915_UPLOAD_CTX);
-
- i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK |
- S5_STENCIL_PASS_Z_FAIL_MASK |
- S5_STENCIL_PASS_Z_PASS_MASK);
-
- i915->state.Ctx[I915_CTXREG_LIS5] |= ((fop << S5_STENCIL_FAIL_SHIFT) |
- (dfop <<
- S5_STENCIL_PASS_Z_FAIL_SHIFT) |
- (dpop <<
- S5_STENCIL_PASS_Z_PASS_SHIFT));
}
static void
@@ -945,6 +991,17 @@ i915_init_packets(struct i915_context *i915)
_3DSTATE_CONST_BLEND_COLOR_CMD;
i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = 0;
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] =
+ _3DSTATE_BACKFACE_STENCIL_MASKS |
+ BFM_ENABLE_STENCIL_TEST_MASK |
+ BFM_ENABLE_STENCIL_WRITE_MASK |
+ (0xff << BFM_STENCIL_WRITE_MASK_SHIFT) |
+ (0xff << BFM_STENCIL_TEST_MASK_SHIFT);
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] =
+ _3DSTATE_BACKFACE_STENCIL_OPS |
+ BFO_ENABLE_STENCIL_REF |
+ BFO_ENABLE_STENCIL_FUNCS |
+ BFO_ENABLE_STENCIL_TWO_SIDE;
}
{
@@ -976,6 +1033,13 @@ i915_init_packets(struct i915_context *i915)
i915->state.Buffer[I915_DESTREG_SR2] = 0;
}
+ i915->state.RasterRules[I915_RASTER_RULES] = _3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D;
#if 0
{
@@ -996,7 +1060,33 @@ i915_init_packets(struct i915_context *i915)
i915->state.active = (I915_UPLOAD_PROGRAM |
I915_UPLOAD_STIPPLE |
I915_UPLOAD_CTX |
- I915_UPLOAD_BUFFERS | I915_UPLOAD_INVARIENT);
+ I915_UPLOAD_BUFFERS |
+ I915_UPLOAD_INVARIENT |
+ I915_UPLOAD_RASTER_RULES);
+}
+
+void
+i915_update_provoking_vertex(GLcontext * ctx)
+{
+ struct i915_context *i915 = I915_CONTEXT(ctx);
+
+ I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+ i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_TRISTRIP_PV_MASK);
+
+ I915_STATECHANGE(i915, I915_UPLOAD_RASTER_RULES);
+ i915->state.RasterRules[I915_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK |
+ TRI_FAN_PROVOKE_VRTX_MASK);
+
+ /* _NEW_LIGHT */
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+ i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2));
+ i915->state.Ctx[I915_CTXREG_LIS6] |= (2 << S6_TRISTRIP_PV_SHIFT);
+ } else {
+ i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) |
+ TRI_FAN_PROVOKE_VRTX(1));
+ i915->state.Ctx[I915_CTXREG_LIS6] |= (0 << S6_TRISTRIP_PV_SHIFT);
+ }
}
void
diff --git a/i915/i915_tex_layout.c b/i915/i915_tex_layout.c
index 7cc1c09..d9588e5 100644
--- a/i915/i915_tex_layout.c
+++ b/i915/i915_tex_layout.c
@@ -55,6 +55,17 @@ static GLint step_offsets[6][2] = {
[FACE_NEG_Z] = {-1, 1},
};
+
+static GLint bottom_offsets[6] = {
+ [FACE_POS_X] = 16 + 0 * 8,
+ [FACE_POS_Y] = 16 + 1 * 8,
+ [FACE_POS_Z] = 16 + 2 * 8,
+ [FACE_NEG_X] = 16 + 3 * 8,
+ [FACE_NEG_Y] = 16 + 4 * 8,
+ [FACE_NEG_Z] = 16 + 5 * 8,
+};
+
+
/**
* Cube texture map layout for i830M-GM915.
*
@@ -101,7 +112,8 @@ static GLint step_offsets[6][2] = {
*/
static void
i915_miptree_layout_cube(struct intel_context *intel,
- struct intel_mipmap_tree * mt)
+ struct intel_mipmap_tree * mt,
+ uint32_t tiling)
{
const GLuint dim = mt->width0;
GLuint face;
@@ -111,7 +123,7 @@ i915_miptree_layout_cube(struct intel_context *intel,
assert(lvlWidth == lvlHeight); /* cubemap images are square */
/* double pitch for cube layouts */
- mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2);
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, dim * 2);
mt->total_height = dim * 4;
for (level = mt->first_level; level <= mt->last_level; level++) {
@@ -145,7 +157,8 @@ i915_miptree_layout_cube(struct intel_context *intel,
static void
i915_miptree_layout_3d(struct intel_context *intel,
- struct intel_mipmap_tree * mt)
+ struct intel_mipmap_tree * mt,
+ uint32_t tiling)
{
GLuint width = mt->width0;
GLuint height = mt->height0;
@@ -154,7 +167,7 @@ i915_miptree_layout_3d(struct intel_context *intel,
GLint level;
/* Calculate the size of a single slice. */
- mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
/* XXX: hardware expects/requires 9 levels at minimum. */
for (level = mt->first_level; level <= MAX2(8, mt->last_level); level++) {
@@ -189,14 +202,15 @@ i915_miptree_layout_3d(struct intel_context *intel,
static void
i915_miptree_layout_2d(struct intel_context *intel,
- struct intel_mipmap_tree * mt)
+ struct intel_mipmap_tree * mt,
+ uint32_t tiling)
{
GLuint width = mt->width0;
GLuint height = mt->height0;
GLuint img_height;
GLint level;
- mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
mt->total_height = 0;
for (level = mt->first_level; level <= mt->last_level; level++) {
@@ -217,19 +231,20 @@ i915_miptree_layout_2d(struct intel_context *intel,
}
GLboolean
-i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
+i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt,
+ uint32_t tiling)
{
switch (mt->target) {
case GL_TEXTURE_CUBE_MAP:
- i915_miptree_layout_cube(intel, mt);
+ i915_miptree_layout_cube(intel, mt, tiling);
break;
case GL_TEXTURE_3D:
- i915_miptree_layout_3d(intel, mt);
+ i915_miptree_layout_3d(intel, mt, tiling);
break;
case GL_TEXTURE_1D:
case GL_TEXTURE_2D:
case GL_TEXTURE_RECTANGLE_ARB:
- i915_miptree_layout_2d(intel, mt);
+ i915_miptree_layout_2d(intel, mt, tiling);
break;
default:
_mesa_problem(NULL, "Unexpected tex target in i915_miptree_layout()");
@@ -297,7 +312,7 @@ i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
* +---+ +---+ +---+ +---+ +---+ +---+
*
* The bottom row continues with the remaining 2x2 then the 1x1 mip contents
- * in order, with each of them aligned to a 4x4 block boundary. Thus, for
+ * in order, with each of them aligned to a 8x8 block boundary. Thus, for
* 32x32 cube maps and smaller, the bottom row layout is going to dictate the
* pitch of the tree. For a tree with 4x4 images, the pitch is at least
* 14 * 8 = 112 texels, for 2x2 it is at least 12 * 8 texels, and for 1x1
@@ -306,7 +321,8 @@ i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
static void
i945_miptree_layout_cube(struct intel_context *intel,
- struct intel_mipmap_tree * mt)
+ struct intel_mipmap_tree * mt,
+ uint32_t tiling)
{
const GLuint dim = mt->width0;
GLuint face;
@@ -320,9 +336,9 @@ i945_miptree_layout_cube(struct intel_context *intel,
* or the final row of 4x4, 2x2 and 1x1 faces below this.
*/
if (dim > 32)
- mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2);
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, dim * 2);
else
- mt->pitch = intel_miptree_pitch_align (intel, mt, 14 * 8);
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, 14 * 8);
if (dim >= 4)
mt->total_height = dim * 4 + 4;
@@ -375,10 +391,11 @@ i945_miptree_layout_cube(struct intel_context *intel,
x = (face - 4) * 8;
break;
}
+ break;
case 2:
y = mt->total_height - 4;
- x = 16 + face * 8;
+ x = bottom_offsets[face];
break;
case 1:
@@ -396,7 +413,8 @@ i945_miptree_layout_cube(struct intel_context *intel,
static void
i945_miptree_layout_3d(struct intel_context *intel,
- struct intel_mipmap_tree * mt)
+ struct intel_mipmap_tree * mt,
+ uint32_t tiling)
{
GLuint width = mt->width0;
GLuint height = mt->height0;
@@ -405,7 +423,7 @@ i945_miptree_layout_3d(struct intel_context *intel,
GLuint pack_y_pitch;
GLuint level;
- mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
mt->total_height = 0;
pack_y_pitch = MAX2(mt->height0, 2);
@@ -450,22 +468,23 @@ i945_miptree_layout_3d(struct intel_context *intel,
}
GLboolean
-i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
+i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt,
+ uint32_t tiling)
{
switch (mt->target) {
case GL_TEXTURE_CUBE_MAP:
if (mt->compressed)
- i945_miptree_layout_cube(intel, mt);
+ i945_miptree_layout_cube(intel, mt, tiling);
else
- i915_miptree_layout_cube(intel, mt);
+ i915_miptree_layout_cube(intel, mt, tiling);
break;
case GL_TEXTURE_3D:
- i945_miptree_layout_3d(intel, mt);
+ i945_miptree_layout_3d(intel, mt, tiling);
break;
case GL_TEXTURE_1D:
case GL_TEXTURE_2D:
case GL_TEXTURE_RECTANGLE_ARB:
- i945_miptree_layout_2d(intel, mt);
+ i945_miptree_layout_2d(intel, mt, tiling);
break;
default:
_mesa_problem(NULL, "Unexpected tex target in i945_miptree_layout()");
diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c
index a37dd7f..32d4b30 100644
--- a/i915/i915_texstate.c
+++ b/i915/i915_texstate.c
@@ -185,8 +185,13 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
state[I915_TEXREG_MS3] =
(((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) |
- ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format |
- MS3_USE_FENCE_REGS);
+ ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format);
+
+ if (intelObj->mt->region->tiling != I915_TILING_NONE) {
+ state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE;
+ if (intelObj->mt->region->tiling == I915_TILING_Y)
+ state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
+ }
state[I915_TEXREG_MS4] =
((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK |
diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c
index 1150046..9a723d3 100644
--- a/i915/i915_vtbl.c
+++ b/i915/i915_vtbl.c
@@ -42,6 +42,7 @@
#include "intel_regions.h"
#include "intel_tris.h"
#include "intel_fbo.h"
+#include "intel_chipset.h"
#include "i915_reg.h"
#include "i915_context.h"
@@ -175,7 +176,7 @@ i915_emit_invarient_state(struct intel_context *intel)
{
BATCH_LOCALS;
- BEGIN_BATCH(20, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(17, IGNORE_CLIPRECTS);
OUT_BATCH(_3DSTATE_AA_CMD |
AA_LINE_ECAAR_WIDTH_ENABLE |
@@ -199,14 +200,6 @@ i915_emit_invarient_state(struct intel_context *intel)
CSB_TCB(3, 3) |
CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
- OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
- ENABLE_POINT_RASTER_RULE |
- OGL_POINT_RASTER_RULE |
- ENABLE_LINE_STRIP_PROVOKE_VRTX |
- ENABLE_TRI_FAN_PROVOKE_VRTX |
- LINE_STRIP_PROVOKE_VRTX(1) |
- TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
-
/* Need to initialize this to zero.
*/
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0));
@@ -224,11 +217,6 @@ i915_emit_invarient_state(struct intel_context *intel)
OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */
OUT_BATCH(0);
-
- /* Don't support twosided stencil yet */
- OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
- OUT_BATCH(0);
-
ADVANCE_BATCH();
}
@@ -262,6 +250,9 @@ get_state_size(struct i915_hw_state *state)
if (dirty & I915_UPLOAD_INVARIENT)
sz += 30 * 4;
+ if (dirty & I915_UPLOAD_RASTER_RULES)
+ sz += sizeof(state->RasterRules);
+
if (dirty & I915_UPLOAD_CTX)
sz += sizeof(state->Ctx);
@@ -370,6 +361,12 @@ i915_emit_state(struct intel_context *intel)
i915_emit_invarient_state(intel);
}
+ if (dirty & I915_UPLOAD_RASTER_RULES) {
+ if (INTEL_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "I915_UPLOAD_RASTER_RULES:\n");
+ emit(intel, state->RasterRules, sizeof(state->RasterRules));
+ }
+
if (dirty & I915_UPLOAD_CTX) {
if (INTEL_DEBUG & DEBUG_STATE)
fprintf(stderr, "I915_UPLOAD_CTX:\n");
@@ -529,6 +526,23 @@ i915_destroy_context(struct intel_context *intel)
_tnl_free_vertices(&intel->ctx);
}
+void
+i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region,
+ uint32_t buffer_id)
+{
+ state[0] = _3DSTATE_BUF_INFO_CMD;
+ state[1] = buffer_id;
+
+ if (region != NULL) {
+ state[1] |= BUF_3D_PITCH(region->pitch * region->cpp);
+
+ if (region->tiling != I915_TILING_NONE) {
+ state[1] |= BUF_3D_TILED_SURFACE;
+ if (region->tiling == I915_TILING_Y)
+ state[1] |= BUF_3D_TILE_WALK_Y;
+ }
+ }
+}
/**
* Set the drawing regions for the color and depth/stencil buffers.
@@ -562,21 +576,11 @@ i915_state_draw_region(struct intel_context *intel,
/*
* Set stride/cpp values
*/
- if (color_region) {
- state->Buffer[I915_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
- state->Buffer[I915_DESTREG_CBUFADDR1] =
- (BUF_3D_ID_COLOR_BACK |
- BUF_3D_PITCH(color_region->pitch * color_region->cpp) |
- BUF_3D_USE_FENCE);
- }
+ i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_CBUFADDR0],
+ color_region, BUF_3D_ID_COLOR_BACK);
- if (depth_region) {
- state->Buffer[I915_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
- state->Buffer[I915_DESTREG_DBUFADDR1] =
- (BUF_3D_ID_DEPTH |
- BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) |
- BUF_3D_USE_FENCE);
- }
+ i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_DBUFADDR0],
+ depth_region, BUF_3D_ID_DEPTH);
/*
* Compute/set I915_DESTREG_DV1 value
@@ -604,6 +608,14 @@ i915_state_draw_region(struct intel_context *intel,
}
}
+ /* This isn't quite safe, thus being hidden behind an option. When changing
+ * the value of this bit, the pipeline needs to be MI_FLUSHed. And it
+ * can only be set when a depth buffer is already defined.
+ */
+ if (IS_945(intel->intelScreen->deviceID) && intel->use_early_z &&
+ depth_region->tiling != I915_TILING_NONE)
+ value |= CLASSIC_EARLY_DEPTH;
+
if (depth_region && depth_region->cpp == 4) {
value |= DEPTH_FRMT_24_FIXED_8_OTHER;
}
@@ -676,13 +688,6 @@ i915_assert_not_dirty( struct intel_context *intel )
assert(!dirty);
}
-static void
-i915_note_unlock( struct intel_context *intel )
-{
- /* nothing */
-}
-
-
void
i915InitVtbl(struct i915_context *i915)
{
@@ -697,6 +702,5 @@ i915InitVtbl(struct i915_context *i915)
i915->intel.vtbl.update_texture_state = i915UpdateTextureState;
i915->intel.vtbl.flush_cmd = i915_flush_cmd;
i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;
- i915->intel.vtbl.note_unlock = i915_note_unlock;
i915->intel.vtbl.finish_batch = intel_finish_vb;
}
diff --git a/i915/intel_tris.c b/i915/intel_tris.c
index 1d39278..a905455 100644
--- a/i915/intel_tris.c
+++ b/i915/intel_tris.c
@@ -1255,11 +1255,9 @@ intel_meta_draw_poly(struct intel_context *intel,
{
union fi *vb;
GLint i;
- GLboolean was_locked = intel->locked;
unsigned int saved_vertex_size = intel->vertex_size;
- if (!was_locked)
- LOCK_HARDWARE(intel);
+ LOCK_HARDWARE(intel);
intel->vertex_size = 6;
@@ -1283,8 +1281,7 @@ intel_meta_draw_poly(struct intel_context *intel,
intel->vertex_size = saved_vertex_size;
- if (!was_locked)
- UNLOCK_HARDWARE(intel);
+ UNLOCK_HARDWARE(intel);
}
static void
diff --git a/i965/Makefile.am b/i965/Makefile.am
index 5a118af..8ada8b5 100644
--- a/i965/Makefile.am
+++ b/i965/Makefile.am
@@ -17,6 +17,7 @@ i965_dri_la_SOURCES = \
../shared/intel_decode.c \
../shared/intel_extensions.c \
../shared/intel_fbo.c \
+ ../shared/intel_generatemipmap.c \
../shared/intel_mipmap_tree.c \
../shared/intel_regions.c \
../shared/intel_screen.c \
@@ -25,8 +26,10 @@ i965_dri_la_SOURCES = \
../shared/intel_pixel_bitmap.c \
../shared/intel_pixel_copy.c \
../shared/intel_pixel_draw.c \
+ ../shared/intel_pixel_read.c \
../shared/intel_state.c \
../shared/intel_swapbuffers.c \
+ ../shared/intel_syncobj.c \
../shared/intel_tex.c \
../shared/intel_tex_copy.c \
../shared/intel_tex_format.c \
@@ -44,6 +47,7 @@ i965_dri_la_SOURCES = \
brw_clip_util.c \
brw_context.c \
brw_curbe.c \
+ brw_disasm.c \
brw_draw.c \
brw_draw_upload.c \
brw_eu.c \
@@ -72,6 +76,7 @@ i965_dri_la_SOURCES = \
brw_vs_constval.c \
brw_vs_emit.c \
brw_vs_state.c \
+ brw_vs_surface_state.c \
brw_vtbl.c \
brw_wm.c \
brw_wm_debug.c \
diff --git a/i965/brw_clip.c b/i965/brw_clip.c
index 5cffceb..20a927c 100644
--- a/i965/brw_clip.c
+++ b/i965/brw_clip.c
@@ -65,21 +65,31 @@ static void compile_clip_prog( struct brw_context *brw,
c.func.single_program_flow = 1;
c.key = *key;
-
+ c.need_ff_sync = BRW_IS_IGDNG(brw);
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.header_position_offset = ATTR_SIZE;
- for (i = 0, delta = REG_SIZE; i < VERT_RESULT_MAX; i++)
+ if (BRW_IS_IGDNG(brw))
+ delta = 3 * REG_SIZE;
+ else
+ delta = REG_SIZE;
+
+ for (i = 0; i < VERT_RESULT_MAX; i++)
if (c.key.attrs & (1<<i)) {
c.offset[i] = delta;
delta += ATTR_SIZE;
}
c.nr_attrs = brw_count_bits(c.key.attrs);
- c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
c.nr_bytes = c.nr_regs * REG_SIZE;
c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
@@ -148,11 +158,16 @@ static void upload_clip_prog(struct brw_context *brw)
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
/* _NEW_TRANSFORM */
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
- key.clip_mode = BRW_CLIPMODE_NORMAL;
+
+ if (BRW_IS_IGDNG(brw))
+ key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+ else
+ key.clip_mode = BRW_CLIPMODE_NORMAL;
/* _NEW_POLYGON */
if (key.primitive == GL_TRIANGLES) {
- if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+ if (ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
else {
GLuint fill_front = CLIP_CULL;
diff --git a/i965/brw_clip.h b/i965/brw_clip.h
index e067478..957df44 100644
--- a/i965/brw_clip.h
+++ b/i965/brw_clip.h
@@ -100,6 +100,8 @@ struct brw_clip_compile {
struct brw_reg fixed_planes;
struct brw_reg plane_equation;
+
+ struct brw_reg ff_sync;
} reg;
/* 3 different ways of expressing vertex size:
@@ -117,6 +119,7 @@ struct brw_clip_compile {
GLuint header_position_offset;
GLuint offset[VERT_ATTRIB_MAX];
+ GLboolean need_ff_sync;
};
#define ATTR_SIZE (4*4)
@@ -171,5 +174,6 @@ struct brw_reg get_tmp( struct brw_clip_compile *c );
void brw_clip_project_position(struct brw_clip_compile *c,
struct brw_reg pos );
-
+void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
#endif
diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c
index d830e49..048ca62 100644
--- a/i965/brw_clip_line.c
+++ b/i965/brw_clip_line.c
@@ -85,6 +85,10 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
i++;
}
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
c->first_tmp = i;
c->last_tmp = i;
@@ -130,7 +134,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
struct brw_instruction *plane_loop;
struct brw_instruction *plane_active;
struct brw_instruction *is_negative;
- struct brw_instruction *is_neg2;
+ struct brw_instruction *is_neg2 = NULL;
struct brw_instruction *not_culled;
struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
@@ -148,7 +152,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_clip_init_clipmask(c);
/* -ve rhw workaround */
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
@@ -185,7 +189,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
* Both can be negative on GM965/G965 due to RHW workaround
* if so, this object should be rejected.
*/
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
{
@@ -210,7 +214,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
/* If both are positive, do nothing */
/* Only on GM965/G965 */
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
is_neg2 = brw_IF(p, BRW_EXECUTE_1);
}
@@ -225,7 +229,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_ENDIF(p, is_neg2);
}
}
@@ -263,6 +267,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
void brw_emit_line_clip( struct brw_clip_compile *c )
{
brw_clip_line_alloc_regs(c);
+ brw_clip_init_ff_sync(c);
if (c->key.do_flat_shading)
brw_clip_copy_colors(c, 0, 1);
diff --git a/i965/brw_clip_point.c b/i965/brw_clip_point.c
index d17b199..8458f61 100644
--- a/i965/brw_clip_point.c
+++ b/i965/brw_clip_point.c
@@ -50,5 +50,7 @@ void brw_emit_point_clip( struct brw_clip_compile *c )
/* Send an empty message to kill the thread:
*/
brw_clip_tri_alloc_regs(c, 0);
+ brw_clip_init_ff_sync(c);
+
brw_clip_kill_thread(c);
}
diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c
index 9b0d7ea..5762c95 100644
--- a/i965/brw_clip_state.c
+++ b/i965/brw_clip_state.c
@@ -95,7 +95,14 @@ clip_unit_create_from_key(struct brw_context *brw,
* even number.
*/
assert(key->nr_urb_entries % 2 == 0);
- clip.thread4.max_threads = 2 - 1;
+
+ /* Although up to 16 concurrent Clip threads are allowed on IGDNG,
+ * only 2 threads can output VUEs at a time.
+ */
+ if (BRW_IS_IGDNG(brw))
+ clip.thread4.max_threads = 16 - 1;
+ else
+ clip.thread4.max_threads = 2 - 1;
} else {
assert(key->nr_urb_entries >= 5);
clip.thread4.max_threads = 1 - 1;
diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c
index 7fd37bd..0efd772 100644
--- a/i965/brw_clip_tri.c
+++ b/i965/brw_clip_tri.c
@@ -77,6 +77,10 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
if (c->nr_attrs & 1) {
for (j = 0; j < 3; j++) {
GLuint delta = c->nr_attrs*16 + 32;
+
+ if (BRW_IS_IGDNG(c->func.brw))
+ delta = c->nr_attrs * 16 + 32 * 3;
+
brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
}
}
@@ -115,6 +119,11 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
i++;
}
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
c->first_tmp = i;
c->last_tmp = i;
@@ -559,10 +568,11 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
brw_clip_tri_init_vertices(c);
brw_clip_init_clipmask(c);
+ brw_clip_init_ff_sync(c);
/* if -ve rhw workaround bit is set,
do cliptest */
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
@@ -579,11 +589,12 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
if (c->key.do_flat_shading)
brw_clip_tri_flat_shade(c);
- if (c->key.clip_mode == BRW_CLIPMODE_NORMAL)
+ if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
+ (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
do_clip_tri(c);
else
maybe_do_clip_tri(c);
-
+
brw_clip_tri_emit_polygon(c);
/* Send an empty message to kill the thread:
diff --git a/i965/brw_clip_unfilled.c b/i965/brw_clip_unfilled.c
index d7ca517..ad1bfa4 100644
--- a/i965/brw_clip_unfilled.c
+++ b/i965/brw_clip_unfilled.c
@@ -453,6 +453,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
brw_clip_tri_init_vertices(c);
+ brw_clip_init_ff_sync(c);
assert(c->offset[VERT_RESULT_EDGE]);
diff --git a/i965/brw_clip_util.c b/i965/brw_clip_util.c
index 9d3b0be..5a73abd 100644
--- a/i965/brw_clip_util.c
+++ b/i965/brw_clip_util.c
@@ -140,6 +140,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
/* Just copy the vertex header:
*/
+ /*
+ * After CLIP stage, only first 256 bits of the VUE are read
+ * back on IGDNG, so needn't change it
+ */
brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
/* Iterate over each attribute (could be done in pairs?)
@@ -147,6 +151,9 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
for (i = 0; i < c->nr_attrs; i++) {
GLuint delta = i*16 + 32;
+ if (BRW_IS_IGDNG(p->brw))
+ delta = i * 16 + 32 * 3;
+
if (delta == c->offset[VERT_RESULT_EDGE]) {
if (force_edgeflag)
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
@@ -177,6 +184,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
if (i & 1) {
GLuint delta = i*16 + 32;
+
+ if (BRW_IS_IGDNG(p->brw))
+ delta = i * 16 + 32 * 3;
+
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
}
@@ -202,6 +213,8 @@ void brw_clip_emit_vue(struct brw_clip_compile *c,
struct brw_compile *p = &c->func;
GLuint start = c->last_mrf;
+ brw_clip_ff_sync(c);
+
assert(!(allocate && eot));
/* Cycle through mrf regs - probably futile as we have to wait for
@@ -252,6 +265,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c)
{
struct brw_compile *p = &c->func;
+ brw_clip_ff_sync(c);
/* Send an empty message to kill the thread and release any
* allocated urb entry:
*/
@@ -343,3 +357,40 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
}
}
+void brw_clip_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *need_ff_sync;
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+ need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+ }
+ brw_ENDIF(p, need_ff_sync);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+
+ brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+ }
+}
diff --git a/i965/brw_context.c b/i965/brw_context.c
index 4dbe551..c300c33 100644
--- a/i965/brw_context.c
+++ b/i965/brw_context.c
@@ -125,8 +125,34 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
/* We want the GLSL compiler to emit code that uses condition codes */
ctx->Shader.EmitCondCodes = GL_TRUE;
-
-/* ctx->Const.MaxNativeVertexProgramTemps = 32; */
+ ctx->Shader.EmitNVTempInitialization = GL_TRUE;
+
+ ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
+ ctx->Const.VertexProgram.MaxAluInstructions = 0;
+ ctx->Const.VertexProgram.MaxTexInstructions = 0;
+ ctx->Const.VertexProgram.MaxTexIndirections = 0;
+ ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
+ ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
+ ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
+ ctx->Const.VertexProgram.MaxNativeAttribs = 16;
+ ctx->Const.VertexProgram.MaxNativeTemps = 256;
+ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+ ctx->Const.VertexProgram.MaxNativeParameters = 1024;
+ ctx->Const.VertexProgram.MaxEnvParams =
+ MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
+ ctx->Const.VertexProgram.MaxEnvParams);
+
+ ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
+ ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
+ ctx->Const.FragmentProgram.MaxNativeTemps = 256;
+ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+ ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
+ ctx->Const.FragmentProgram.MaxEnvParams =
+ MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
+ ctx->Const.FragmentProgram.MaxEnvParams);
brw_init_state( brw );
diff --git a/i965/brw_context.h b/i965/brw_context.h
index 577497b..a5209ac 100644
--- a/i965/brw_context.h
+++ b/i965/brw_context.h
@@ -129,8 +129,8 @@ struct brw_context;
#define BRW_NEW_PRIMITIVE 0x40
#define BRW_NEW_CONTEXT 0x80
#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
-#define BRW_NEW_INPUT_VARYING 0x200
#define BRW_NEW_PSP 0x800
+#define BRW_NEW_WM_SURFACES 0x1000
#define BRW_NEW_FENCE 0x2000
#define BRW_NEW_INDICES 0x4000
#define BRW_NEW_VERTICES 0x8000
@@ -143,6 +143,7 @@ struct brw_context;
#define BRW_NEW_DEPTH_BUFFER 0x20000
#define BRW_NEW_NR_WM_SURFACES 0x40000
#define BRW_NEW_NR_VS_SURFACES 0x80000
+#define BRW_NEW_INDEX_BUFFER 0x100000
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@@ -173,6 +174,9 @@ struct brw_fragment_program {
dri_bo *const_buffer; /** Program constant buffer/surface */
GLboolean use_const_buffer;
+
+ /** for debugging, which texture units are referenced */
+ GLbitfield tex_units_used;
};
@@ -386,6 +390,8 @@ struct brw_cached_batch_item {
struct brw_vertex_element {
const struct gl_client_array *glarray;
+ /** The corresponding Mesa vertex attribute */
+ gl_vert_attrib attrib;
/** Size of a complete element */
GLuint element_size;
/** Number of uploaded elements for this input. */
@@ -401,7 +407,6 @@ struct brw_vertex_element {
struct brw_vertex_info {
- GLuint varying; /* varying:1[VERT_ATTRIB_MAX] */
GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
};
@@ -439,9 +444,13 @@ struct brw_query_object {
unsigned int count;
};
+
+/**
+ * brw_context is derived from intel_context.
+ */
struct brw_context
{
- struct intel_context intel;
+ struct intel_context intel; /**< base class, must be first field */
GLuint primitive;
GLboolean emit_state_always;
@@ -450,8 +459,6 @@ struct brw_context
struct {
struct brw_state_flags dirty;
- struct brw_tracked_state **atoms;
- GLuint nr_atoms;
GLuint nr_color_regions;
struct intel_region *color_regions[MAX_DRAW_BUFFERS];
@@ -471,12 +478,16 @@ struct brw_context
int validated_bo_count;
} state;
- struct brw_cache cache;
+ struct brw_cache cache; /** non-surface items */
+ struct brw_cache surface_cache; /* surface items */
struct brw_cached_batch_item *cached_batch_items;
struct {
struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+ struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
+ GLuint nr_enabled;
+
#define BRW_NR_UPLOAD_BUFS 17
#define BRW_UPLOAD_INIT_SIZE (128*1024)
@@ -500,8 +511,15 @@ struct brw_context
*/
const struct _mesa_index_buffer *ib;
+ /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
dri_bo *bo;
unsigned int offset;
+ unsigned int size;
+ /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
+ * avoid re-uploading the IB packet over and over if we're actually
+ * referencing the same index buffer.
+ */
+ unsigned int start_vertex_offset;
} ib;
/* Active vertex program:
@@ -555,11 +573,6 @@ struct brw_context
GLuint vs_size;
GLuint total_size;
- /* Dynamic tracker which changes to reflect the state referenced
- * by active fp and vp program parameters:
- */
- struct brw_tracked_state tracked_state;
-
dri_bo *curbe_bo;
/** Offset within curbe_bo of space for current curbe entry */
GLuint curbe_offset;
@@ -713,6 +726,8 @@ void brw_upload_urb_fence(struct brw_context *brw);
*/
void brw_upload_cs_urb_state(struct brw_context *brw);
+/* brw_disasm.c */
+int brw_disasm (FILE *file, struct brw_instruction *inst);
/*======================================================================
* Inline conversion functions. These are better-typed than the
diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c
index 9197fed..4be6c77 100644
--- a/i965/brw_curbe.c
+++ b/i965/brw_curbe.c
@@ -36,6 +36,7 @@
#include "main/macros.h"
#include "main/enums.h"
#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
#include "shader/prog_statevars.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
@@ -188,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLfloat *buf;
GLuint i;
- /* Update our own dependency flags. This works because this
- * function will also be called whenever fp or vp changes.
- */
- brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
- brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags;
- brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags;
-
if (sz == 0) {
if (brw->curbe.last_buf) {
free(brw->curbe.last_buf);
@@ -254,6 +248,12 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = brw->vs.prog_data->nr_params / 4;
+ if (brw->vertex_program->IsNVProgram)
+ _mesa_load_tracked_matrices(ctx);
+
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
/* XXX just use a memcpy here */
@@ -335,78 +335,11 @@ static void prepare_constant_buffer(struct brw_context *brw)
*/
}
-
-/**
- * Copy Mesa program parameters into given constant buffer.
- */
-static void
-update_constant_buffer(struct brw_context *brw,
- const struct gl_program_parameter_list *params,
- dri_bo *const_buffer)
-{
- struct intel_context *intel = &brw->intel;
- const int size = params->NumParameters * 4 * sizeof(GLfloat);
-
- /* copy Mesa program constants into the buffer */
- if (const_buffer && size > 0) {
-
- assert(const_buffer);
- assert(const_buffer->size >= size);
-
- if (intel->intelScreen->kernel_exec_fencing) {
- drm_intel_gem_bo_map_gtt(const_buffer);
- memcpy(const_buffer->virtual, params->ParameterValues, size);
- drm_intel_gem_bo_unmap_gtt(const_buffer);
- }
- else {
- dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
- }
-
- if (0) {
- int i;
- for (i = 0; i < params->NumParameters; i++) {
- float *p = params->ParameterValues[i];
- printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]);
- }
- }
- }
-}
-
-
-/** Copy current vertex program's parameters into the constant buffer */
-static void
-update_vertex_constant_buffer(struct brw_context *brw)
-{
- struct brw_vertex_program *vp =
- (struct brw_vertex_program *) brw->vertex_program;
- if (0) {
- printf("update VS constants in buffer %p\n", vp->const_buffer);
- printf("program %u\n", vp->program.Base.Id);
- }
- if (vp->use_const_buffer)
- update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer);
-}
-
-
-/** Copy current fragment program's parameters into the constant buffer */
-static void
-update_fragment_constant_buffer(struct brw_context *brw)
-{
- struct brw_fragment_program *fp =
- (struct brw_fragment_program *) brw->fragment_program;
- if (fp->use_const_buffer)
- update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer);
-}
-
-
static void emit_constant_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
GLuint sz = brw->curbe.total_size;
- update_vertex_constant_buffer(brw);
- update_fragment_constant_buffer(brw);
-
BEGIN_BATCH(2, IGNORE_CLIPRECTS);
if (sz == 0) {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
@@ -428,7 +361,7 @@ static void emit_constant_buffer(struct brw_context *brw)
*/
const struct brw_tracked_state brw_constant_buffer = {
.dirty = {
- .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */
+ .mesa = _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
diff --git a/i965/brw_defines.h b/i965/brw_defines.h
index 98fc909..78d457a 100644
--- a/i965/brw_defines.h
+++ b/i965/brw_defines.h
@@ -139,6 +139,7 @@
#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
#define BRW_CLIPMODE_REJECT_ALL 3
#define BRW_CLIPMODE_ACCEPT_ALL 4
+#define BRW_CLIPMODE_KERNEL_CLIP 5
#define BRW_CLIP_NDCSPACE 0
#define BRW_CLIP_SCREENSPACE 1
@@ -470,8 +471,9 @@
#define BRW_CONDITIONAL_GE 4
#define BRW_CONDITIONAL_L 5
#define BRW_CONDITIONAL_LE 6
-#define BRW_CONDITIONAL_C 7
+#define BRW_CONDITIONAL_R 7
#define BRW_CONDITIONAL_O 8
+#define BRW_CONDITIONAL_U 9
#define BRW_DEBUG_NONE 0
#define BRW_DEBUG_BREAKPOINT 1
@@ -511,6 +513,7 @@
#define BRW_OPCODE_RSL 11
#define BRW_OPCODE_ASR 12
#define BRW_OPCODE_CMP 16
+#define BRW_OPCODE_CMPN 17
#define BRW_OPCODE_JMPI 32
#define BRW_OPCODE_IF 34
#define BRW_OPCODE_IFF 35
@@ -670,6 +673,25 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
+
+/* for IGDNG only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
+
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
@@ -819,8 +841,11 @@
#include "intel_chipset.h"
#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID))
-#define CMD_PIPELINE_SELECT(brw) (BRW_IS_G4X(brw) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
-#define CMD_VF_STATISTICS(brw) (BRW_IS_G4X(brw) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
-#define URB_SIZES(brw) (BRW_IS_G4X(brw) ? 384 : 256) /* 512 bit units */
+#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID))
+#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
+#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
+#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
+#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
+ (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
#endif
diff --git a/i965/brw_disasm.c b/i965/brw_disasm.c
new file mode 100644
index 0000000..9fef230
--- /dev/null
+++ b/i965/brw_disasm.c
@@ -0,0 +1,903 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include "main/mtypes.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+struct {
+ char *name;
+ int nsrc;
+ int ndst;
+} opcode[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+char *conditional_modifier[16] = {
+ [BRW_CONDITIONAL_NONE] = "",
+ [BRW_CONDITIONAL_Z] = ".e",
+ [BRW_CONDITIONAL_NZ] = ".ne",
+ [BRW_CONDITIONAL_G] = ".g",
+ [BRW_CONDITIONAL_GE] = ".ge",
+ [BRW_CONDITIONAL_L] = ".l",
+ [BRW_CONDITIONAL_LE] = ".le",
+ [BRW_CONDITIONAL_R] = ".r",
+ [BRW_CONDITIONAL_O] = ".o",
+ [BRW_CONDITIONAL_U] = ".u",
+};
+
+char *negate[2] = {
+ [0] = "",
+ [1] = "-",
+};
+
+char *_abs[2] = {
+ [0] = "",
+ [1] = "(abs)",
+};
+
+char *vert_stride[16] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4",
+ [4] = "8",
+ [5] = "16",
+ [6] = "32",
+ [15] = "VxH",
+};
+
+char *width[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+};
+
+char *horiz_stride[4] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4"
+};
+
+char *chan_sel[4] = {
+ [0] = "x",
+ [1] = "y",
+ [2] = "z",
+ [3] = "w",
+};
+
+char *dest_condmod[16] = {
+};
+
+char *debug_ctrl[2] = {
+ [0] = "",
+ [1] = ".breakpoint"
+};
+
+char *saturate[2] = {
+ [0] = "",
+ [1] = ".sat"
+};
+
+char *exec_size[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+ [5] = "32"
+};
+
+char *pred_inv[2] = {
+ [0] = "+",
+ [1] = "-"
+};
+
+char *pred_ctrl_align16[16] = {
+ [1] = "",
+ [2] = ".x",
+ [3] = ".y",
+ [4] = ".z",
+ [5] = ".w",
+ [6] = ".any4h",
+ [7] = ".all4h",
+};
+
+char *pred_ctrl_align1[16] = {
+ [1] = "",
+ [2] = ".anyv",
+ [3] = ".allv",
+ [4] = ".any2h",
+ [5] = ".all2h",
+ [6] = ".any4h",
+ [7] = ".all4h",
+ [8] = ".any8h",
+ [9] = ".all8h",
+ [10] = ".any16h",
+ [11] = ".all16h",
+};
+
+char *thread_ctrl[4] = {
+ [0] = "",
+ [2] = "switch"
+};
+
+char *compr_ctrl[4] = {
+ [0] = "",
+ [1] = "sechalf",
+ [2] = "compr",
+};
+
+char *dep_ctrl[4] = {
+ [0] = "",
+ [1] = "NoDDClr",
+ [2] = "NoDDChk",
+ [3] = "NoDDClr,NoDDChk",
+};
+
+char *mask_ctrl[4] = {
+ [0] = "",
+ [1] = "nomask",
+};
+
+char *access_mode[2] = {
+ [0] = "align1",
+ [1] = "align16",
+};
+
+char *reg_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [4] = "UB",
+ [5] = "B",
+ [7] = "F"
+};
+
+char *imm_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [5] = "VF",
+ [5] = "V",
+ [7] = "F"
+};
+
+char *reg_file[4] = {
+ [0] = "A",
+ [1] = "g",
+ [2] = "m",
+ [3] = "imm",
+};
+
+char *writemask[16] = {
+ [0x0] = ".",
+ [0x1] = ".x",
+ [0x2] = ".y",
+ [0x3] = ".xy",
+ [0x4] = ".z",
+ [0x5] = ".xz",
+ [0x6] = ".yz",
+ [0x7] = ".xyz",
+ [0x8] = ".w",
+ [0x9] = ".xw",
+ [0xa] = ".yw",
+ [0xb] = ".xyw",
+ [0xc] = ".zw",
+ [0xd] = ".xzw",
+ [0xe] = ".yzw",
+ [0xf] = "",
+};
+
+char *end_of_thread[2] = {
+ [0] = "",
+ [1] = "EOT"
+};
+
+char *target_function[16] = {
+ [BRW_MESSAGE_TARGET_NULL] = "null",
+ [BRW_MESSAGE_TARGET_MATH] = "math",
+ [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+ [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+ [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read",
+ [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write",
+ [BRW_MESSAGE_TARGET_URB] = "urb",
+ [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *math_function[16] = {
+ [BRW_MATH_FUNCTION_INV] = "inv",
+ [BRW_MATH_FUNCTION_LOG] = "log",
+ [BRW_MATH_FUNCTION_EXP] = "exp",
+ [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+ [BRW_MATH_FUNCTION_RSQ] = "rsq",
+ [BRW_MATH_FUNCTION_SIN] = "sin",
+ [BRW_MATH_FUNCTION_COS] = "cos",
+ [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+ [BRW_MATH_FUNCTION_TAN] = "tan",
+ [BRW_MATH_FUNCTION_POW] = "pow",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
+ [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
+};
+
+char *math_saturate[2] = {
+ [0] = "",
+ [1] = "sat"
+};
+
+char *math_signed[2] = {
+ [0] = "",
+ [1] = "signed"
+};
+
+char *math_scalar[2] = {
+ [0] = "",
+ [1] = "scalar"
+};
+
+char *math_precision[2] = {
+ [0] = "",
+ [1] = "partial_precision"
+};
+
+char *urb_swizzle[4] = {
+ [BRW_URB_SWIZZLE_NONE] = "",
+ [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+ [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+char *urb_allocate[2] = {
+ [0] = "",
+ [1] = "allocate"
+};
+
+char *urb_used[2] = {
+ [0] = "",
+ [1] = "used"
+};
+
+char *urb_complete[2] = {
+ [0] = "",
+ [1] = "complete"
+};
+
+char *sampler_target_format[4] = {
+ [0] = "F",
+ [2] = "UD",
+ [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, char *string)
+{
+ fputs (string, file);
+ column += strlen (string);
+ return 0;
+}
+
+static int format (FILE *f, char *format, ...)
+{
+ char buf[1024];
+ va_list args;
+ va_start (args, format);
+
+ vsnprintf (buf, sizeof (buf) - 1, format, args);
+ string (f, buf);
+ return 0;
+}
+
+static int newline (FILE *f)
+{
+ putc ('\n', f);
+ column = 0;
+ return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+ do
+ string (f, " ");
+ while (column < c);
+ return 0;
+}
+
+static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
+{
+ if (!ctrl[id]) {
+ fprintf (file, "*** invalid %s value %d ",
+ name, id);
+ return 1;
+ }
+ if (ctrl[id][0])
+ {
+ if (space && *space)
+ string (file, " ");
+ string (file, ctrl[id]);
+ if (space)
+ *space = 1;
+ }
+ return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+ if (!opcode[id].name) {
+ format (file, "*** invalid opcode value %d ", id);
+ return 1;
+ }
+ string (file, opcode[id].name);
+ return 0;
+}
+
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+ int err = 0;
+ if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ switch (_reg_nr & 0xf0) {
+ case BRW_ARF_NULL:
+ string (file, "null");
+ return -1;
+ case BRW_ARF_ADDRESS:
+ format (file, "a%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ format (file, "acc%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK:
+ format (file, "mask%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK_STACK:
+ format (file, "msd%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_STATE:
+ format (file, "sr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_CONTROL:
+ format (file, "cr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_NOTIFICATION_COUNT:
+ format (file, "n%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_IP:
+ string (file, "ip");
+ return -1;
+ break;
+ default:
+ format (file, "ARF%d", _reg_nr);
+ break;
+ }
+ } else {
+ err |= control (file, "src reg file", reg_file, _reg_file, NULL);
+ format (file, "%d", _reg_nr);
+ }
+ return err;
+}
+
+static int dest (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+ }
+ else
+ {
+ string (file, "g[a0");
+ if (inst->bits1.ia1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ if (inst->bits1.ia1.dest_indirect_offset)
+ format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+ string (file, "]");
+ format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+ }
+ }
+ else
+ {
+ if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da16.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+ }
+ else
+ {
+ err = 1;
+ string (file, "Indirect align16 address mode not supported");
+ }
+ }
+
+ return 0;
+}
+
+static int src_align1_region (FILE *file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+ int err = 0;
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",");
+ err |= control (file, "width", width, _width, NULL);
+ string (file, ",");
+ err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+ string (file, ">");
+ return err;
+}
+
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+ GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, reg_num);
+ if (err == -1)
+ return 0;
+ if (sub_reg_num)
+ format (file, ".%d", sub_reg_num);
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_ia1 (FILE *file,
+ GLuint type,
+ GLuint _reg_file,
+ GLint _addr_imm,
+ GLuint _addr_subreg_nr,
+ GLuint _negate,
+ GLuint __abs,
+ GLuint _addr_mode,
+ GLuint _horiz_stride,
+ GLuint _width,
+ GLuint _vert_stride)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ string (file, "g[a0");
+ if (_addr_subreg_nr)
+ format (file, ".%d", _addr_subreg_nr);
+ if (_addr_imm)
+ format (file, " %d", _addr_imm);
+ string (file, "]");
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_da16 (FILE *file,
+ GLuint _reg_type,
+ GLuint _reg_file,
+ GLuint _vert_stride,
+ GLuint _reg_nr,
+ GLuint _subreg_nr,
+ GLuint __abs,
+ GLuint _negate,
+ GLuint swz_x,
+ GLuint swz_y,
+ GLuint swz_z,
+ GLuint swz_w)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, _reg_nr);
+ if (err == -1)
+ return 0;
+ if (_subreg_nr)
+ format (file, ".%d", _subreg_nr);
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
+ switch (type) {
+ case BRW_REGISTER_TYPE_UD:
+ format (file, "0x%08xUD", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ format (file, "%dD", inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UW:
+ format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_W:
+ format (file, "%dW", (int16_t) inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UB:
+ format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_VF:
+ format (file, "Vector Float");
+ break;
+ case BRW_REGISTER_TYPE_V:
+ format (file, "0x%08xV", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_F:
+ format (file, "%-gF", inst->bits3.f);
+ }
+ return 0;
+}
+
+static int src0 (FILE *file, struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src0_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src0_reg_type,
+ inst->bits1.da1.src0_reg_file,
+ inst->bits2.da1.src0_vert_stride,
+ inst->bits2.da1.src0_width,
+ inst->bits2.da1.src0_horiz_stride,
+ inst->bits2.da1.src0_reg_nr,
+ inst->bits2.da1.src0_subreg_nr,
+ inst->bits2.da1.src0_abs,
+ inst->bits2.da1.src0_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src0_reg_type,
+ inst->bits1.ia1.src0_reg_file,
+ inst->bits2.ia1.src0_indirect_offset,
+ inst->bits2.ia1.src0_subreg_nr,
+ inst->bits2.ia1.src0_negate,
+ inst->bits2.ia1.src0_abs,
+ inst->bits2.ia1.src0_address_mode,
+ inst->bits2.ia1.src0_horiz_stride,
+ inst->bits2.ia1.src0_width,
+ inst->bits2.ia1.src0_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src0_reg_type,
+ inst->bits1.da16.src0_reg_file,
+ inst->bits2.da16.src0_vert_stride,
+ inst->bits2.da16.src0_reg_nr,
+ inst->bits2.da16.src0_subreg_nr,
+ inst->bits2.da16.src0_abs,
+ inst->bits2.da16.src0_negate,
+ inst->bits2.da16.src0_swz_x,
+ inst->bits2.da16.src0_swz_y,
+ inst->bits2.da16.src0_swz_z,
+ inst->bits2.da16.src0_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+static int src1 (FILE *file, struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src1_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src1_reg_type,
+ inst->bits1.da1.src1_reg_file,
+ inst->bits3.da1.src1_vert_stride,
+ inst->bits3.da1.src1_width,
+ inst->bits3.da1.src1_horiz_stride,
+ inst->bits3.da1.src1_reg_nr,
+ inst->bits3.da1.src1_subreg_nr,
+ inst->bits3.da1.src1_abs,
+ inst->bits3.da1.src1_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src1_reg_type,
+ inst->bits1.ia1.src1_reg_file,
+ inst->bits3.ia1.src1_indirect_offset,
+ inst->bits3.ia1.src1_subreg_nr,
+ inst->bits3.ia1.src1_negate,
+ inst->bits3.ia1.src1_abs,
+ inst->bits3.ia1.src1_address_mode,
+ inst->bits3.ia1.src1_horiz_stride,
+ inst->bits3.ia1.src1_width,
+ inst->bits3.ia1.src1_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src1_reg_type,
+ inst->bits1.da16.src1_reg_file,
+ inst->bits3.da16.src1_vert_stride,
+ inst->bits3.da16.src1_reg_nr,
+ inst->bits3.da16.src1_subreg_nr,
+ inst->bits3.da16.src1_abs,
+ inst->bits3.da16.src1_negate,
+ inst->bits3.da16.src1_swz_x,
+ inst->bits3.da16.src1_swz_y,
+ inst->bits3.da16.src1_swz_z,
+ inst->bits3.da16.src1_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+int brw_disasm (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ int space = 0;
+
+ if (inst->header.predicate_control) {
+ string (file, "(");
+ err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+ string (file, "f0");
+ if (inst->bits2.da1.flag_reg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_reg_nr);
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ err |= control (file, "predicate control align1", pred_ctrl_align1,
+ inst->header.predicate_control, NULL);
+ else
+ err |= control (file, "predicate control align16", pred_ctrl_align16,
+ inst->header.predicate_control, NULL);
+ string (file, ") ");
+ }
+
+ err |= print_opcode (file, inst->header.opcode);
+ err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+ err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_SEND)
+ err |= control (file, "conditional modifier", conditional_modifier,
+ inst->header.destreg__conditionalmod, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "(");
+ err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+ string (file, ")");
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ format (file, " %d", inst->header.destreg__conditionalmod);
+
+ if (opcode[inst->header.opcode].ndst > 0) {
+ pad (file, 16);
+ err |= dest (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 0) {
+ pad (file, 32);
+ err |= src0 (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 1) {
+ pad (file, 48);
+ err |= src1 (file, inst);
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND) {
+ newline (file);
+ pad (file, 16);
+ space = 0;
+ err |= control (file, "target function", target_function,
+ inst->bits3.generic.msg_target, &space);
+ switch (inst->bits3.generic.msg_target) {
+ case BRW_MESSAGE_TARGET_MATH:
+ err |= control (file, "math function", math_function,
+ inst->bits3.math.function, &space);
+ err |= control (file, "math saturate", math_saturate,
+ inst->bits3.math.saturate, &space);
+ err |= control (file, "math signed", math_signed,
+ inst->bits3.math.int_type, &space);
+ err |= control (file, "math scalar", math_scalar,
+ inst->bits3.math.data_type, &space);
+ err |= control (file, "math precision", math_precision,
+ inst->bits3.math.precision, &space);
+ break;
+ case BRW_MESSAGE_TARGET_SAMPLER:
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format", sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ break;
+ case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ break;
+ case BRW_MESSAGE_TARGET_URB:
+ format (file, " %d", inst->bits3.urb.offset);
+ space = 1;
+ err |= control (file, "urb swizzle", urb_swizzle,
+ inst->bits3.urb.swizzle_control, &space);
+ err |= control (file, "urb allocate", urb_allocate,
+ inst->bits3.urb.allocate, &space);
+ err |= control (file, "urb used", urb_used,
+ inst->bits3.urb.used, &space);
+ err |= control (file, "urb complete", urb_complete,
+ inst->bits3.urb.complete, &space);
+ break;
+ case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
+ break;
+ default:
+ format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+ break;
+ }
+ if (space)
+ string (file, " ");
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
+ pad (file, 64);
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "{");
+ space = 1;
+ err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+ err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+ err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+ err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+ err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ err |= control (file, "end of thread", end_of_thread,
+ inst->bits3.generic.end_of_thread, &space);
+ if (space)
+ string (file, " ");
+ string (file, "}");
+ }
+ string (file, ";");
+ newline (file);
+ return err;
+}
diff --git a/i965/brw_draw.c b/i965/brw_draw.c
index 5342622..c53bd47 100644
--- a/i965/brw_draw.c
+++ b/i965/brw_draw.c
@@ -141,6 +141,8 @@ static void brw_emit_prim(struct brw_context *brw,
prim_packet.verts_per_instance = trim(prim->mode, prim->count);
prim_packet.start_vert_location = prim->start;
+ if (prim->indexed)
+ prim_packet.start_vert_location += brw->ib.start_vertex_offset;
prim_packet.instance_count = 1;
prim_packet.start_instance_location = 0;
prim_packet.base_vert_location = 0;
@@ -185,21 +187,16 @@ static void brw_merge_inputs( struct brw_context *brw,
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
brw->vb.inputs[i].glarray = arrays[i];
+ brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
if (arrays[i]->StrideB != 0)
- brw->vb.info.varying |= 1 << i;
-
brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
((i%16) * 2);
}
- /* Raise statechanges if input sizes and varying have changed:
- */
+ /* Raise statechanges if input sizes have changed. */
if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
-
- if (brw->vb.info.varying != old.varying)
- brw->state.dirty.brw |= BRW_NEW_INPUT_VARYING;
}
/* XXX: could split the primitive list to fallback only on the
@@ -416,6 +413,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
out:
UNLOCK_HARDWARE(intel);
+ brw_state_cache_check_size(brw);
+
if (warn)
fprintf(stderr, "i965: Single primitive emit potentially exceeded "
"available aperture space\n");
@@ -426,54 +425,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
return retval;
}
-static GLboolean brw_need_rebase( GLcontext *ctx,
- const struct gl_client_array *arrays[],
- const struct _mesa_index_buffer *ib,
- GLuint min_index )
-{
- if (min_index == 0)
- return GL_FALSE;
-
- if (ib) {
- if (!vbo_all_varyings_in_vbos(arrays))
- return GL_TRUE;
- else
- return GL_FALSE;
- }
- else {
- /* Hmm. This isn't quite what I wanted. BRW can actually
- * handle the mixed case well enough that we shouldn't need to
- * rebase. However, it's probably not very common, nor hugely
- * expensive to do it this way:
- */
- if (!vbo_all_varyings_in_vbos(arrays))
- return GL_TRUE;
- else
- return GL_FALSE;
- }
-}
-
-
void brw_draw_prims( GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index )
{
GLboolean retval;
- /* Decide if we want to rebase. If so we end up recursing once
- * only into this function.
- */
- if (brw_need_rebase( ctx, arrays, ib, min_index )) {
- vbo_rebase_prims( ctx, arrays,
- prim, nr_prims,
- ib, min_index, max_index,
- brw_draw_prims );
-
- return;
+ if (!vbo_all_varyings_in_vbos(arrays)) {
+ if (!index_bounds_valid)
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+
+ /* Decide if we want to rebase. If so we end up recursing once
+ * only into this function.
+ */
+ if (min_index != 0) {
+ vbo_rebase_prims(ctx, arrays,
+ prim, nr_prims,
+ ib, min_index, max_index,
+ brw_draw_prims );
+ return;
+ }
}
/* Make a first attempt at drawing:
diff --git a/i965/brw_draw.h b/i965/brw_draw.h
index 9aebbdb..2a14db2 100644
--- a/i965/brw_draw.h
+++ b/i965/brw_draw.h
@@ -39,6 +39,7 @@ void brw_draw_prims( GLcontext *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index );
diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c
index b91b20b..4aa17fa 100644
--- a/i965/brw_draw_upload.c
+++ b/i965/brw_draw_upload.c
@@ -343,16 +343,13 @@ static void brw_prepare_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
- GLuint tmp = brw->vs.prog_data->inputs_read;
+ GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
GLuint i;
const unsigned char *ptr = NULL;
GLuint interleave = 0;
unsigned int min_index = brw->vb.min_index;
unsigned int max_index = brw->vb.max_index;
- struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
- GLuint nr_enabled = 0;
-
struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
GLuint nr_uploads = 0;
@@ -362,12 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw)
_mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
/* Accumulate the list of enabled arrays. */
- while (tmp) {
- GLuint i = _mesa_ffsll(tmp)-1;
+ brw->vb.nr_enabled = 0;
+ while (vs_inputs) {
+ GLuint i = _mesa_ffsll(vs_inputs) - 1;
struct brw_vertex_element *input = &brw->vb.inputs[i];
- tmp &= ~(1<<i);
- enabled[nr_enabled++] = input;
+ vs_inputs &= ~(1 << i);
+ brw->vb.enabled[brw->vb.nr_enabled++] = input;
}
/* XXX: In the rare cases where this happens we fallback all
@@ -376,16 +374,15 @@ static void brw_prepare_vertices(struct brw_context *brw)
* cases with > 17 vertex attributes enabled, so it probably
* isn't an issue at this point.
*/
- if (nr_enabled >= BRW_VEP_MAX) {
+ if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
intel->Fallback = 1;
return;
}
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
- input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
if (input->glarray->BufferObj->Name != 0) {
struct intel_buffer_object *intel_buffer =
@@ -398,7 +395,23 @@ static void brw_prepare_vertices(struct brw_context *brw)
dri_bo_reference(input->bo);
input->offset = (unsigned long)input->glarray->Ptr;
input->stride = input->glarray->StrideB;
+ input->count = input->glarray->_MaxElement;
+
+ /* This is a common place to reach if the user mistakenly supplies
+ * a pointer in place of a VBO offset. If we just let it go through,
+ * we may end up dereferencing a pointer beyond the bounds of the
+ * GTT. We would hope that the VBO's max_index would save us, but
+ * Mesa appears to hand us min/max values not clipped to the
+ * array object's _MaxElement, and _MaxElement frequently appears
+ * to be wrong anyway.
+ *
+ * The VBO spec allows application termination in this case, and it's
+ * probably a service to the poor programmer to do so rather than
+ * trying to just not render.
+ */
+ assert(input->offset < input->bo->size);
} else {
+ input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
if (input->bo != NULL) {
/* Already-uploaded vertex data is present from a previous
* prepare_vertices, but we had to re-validate state due to
@@ -410,7 +423,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
/* Queue the buffer object up to be uploaded in the next pass,
* when we've decided if we're doing interleaved or not.
*/
- if (i == 0) {
+ if (input->attrib == VERT_ATTRIB_POS) {
/* Position array not properly enabled:
*/
if (input->glarray->StrideB == 0) {
@@ -466,8 +479,8 @@ static void brw_prepare_vertices(struct brw_context *brw)
brw_prepare_query_begin(brw);
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
brw_add_validated_bo(brw, input->bo);
}
@@ -477,34 +490,44 @@ static void brw_emit_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
- GLuint tmp = brw->vs.prog_data->inputs_read;
- struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
GLuint i;
- GLuint nr_enabled = 0;
- /* Accumulate the list of enabled arrays. */
- while (tmp) {
- i = _mesa_ffsll(tmp)-1;
- struct brw_vertex_element *input = &brw->vb.inputs[i];
+ brw_emit_query_begin(brw);
- tmp &= ~(1<<i);
- enabled[nr_enabled++] = input;
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), emit a single pad
+ * VERTEX_ELEMENT struct and bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ if (brw->vb.nr_enabled == 0) {
+ BEGIN_BATCH(3, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
+ OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
+ ADVANCE_BATCH();
+ return;
}
- brw_emit_query_begin(brw);
-
/* Now emit VB and VEP state packets.
*
* This still defines a hardware VB for each input, even if they
* are interleaved or from the same VBO. TBD if this makes a
* performance difference.
*/
- BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
- ((1 + nr_enabled * 4) - 2));
+ ((1 + brw->vb.nr_enabled * 4) - 2));
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
BRW_VB0_ACCESS_VERTEXDATA |
@@ -512,15 +535,27 @@ static void brw_emit_vertices(struct brw_context *brw)
OUT_RELOC(input->bo,
I915_GEM_DOMAIN_VERTEX, 0,
input->offset);
- OUT_BATCH(brw->vb.max_index);
+ if (BRW_IS_IGDNG(brw)) {
+ if (input->stride) {
+ OUT_RELOC(input->bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ input->offset + input->stride * input->count);
+ } else {
+ assert(input->count == 1);
+ OUT_RELOC(input->bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ input->offset + input->element_size);
+ }
+ } else
+ OUT_BATCH(input->stride ? input->count : 0);
OUT_BATCH(0); /* Instance data step rate */
}
ADVANCE_BATCH();
- BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS);
- OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2));
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
uint32_t format = get_surface_type(input->glarray->Type,
input->glarray->Size,
input->glarray->Format,
@@ -542,11 +577,18 @@ static void brw_emit_vertices(struct brw_context *brw)
BRW_VE0_VALID |
(format << BRW_VE0_FORMAT_SHIFT) |
(0 << BRW_VE0_SRC_OFFSET_SHIFT));
- OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
- (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
- (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
- (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
- ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
+
+ if (BRW_IS_IGDNG(brw))
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
+ else
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
+ ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
}
ADVANCE_BATCH();
}
@@ -570,17 +612,20 @@ static void brw_prepare_indices(struct brw_context *brw)
dri_bo *bo = NULL;
struct gl_buffer_object *bufferobj;
GLuint offset;
+ GLuint ib_type_size;
if (index_buffer == NULL)
return;
- ib_size = get_size(index_buffer->type) * index_buffer->count;
+ ib_type_size = get_size(index_buffer->type);
+ ib_size = ib_type_size * index_buffer->count;
bufferobj = index_buffer->obj;;
/* Turn into a proper VBO:
*/
if (!bufferobj->Name) {
-
+ brw->ib.start_vertex_offset = 0;
+
/* Get new bufferobj, offset:
*/
get_space(brw, ib_size, &bo, &offset);
@@ -596,6 +641,7 @@ static void brw_prepare_indices(struct brw_context *brw)
}
} else {
offset = (GLuint) (unsigned long) index_buffer->ptr;
+ brw->ib.start_vertex_offset = 0;
/* If the index buffer isn't aligned to its element size, we have to
* rebase it into a temporary.
@@ -616,39 +662,62 @@ static void brw_prepare_indices(struct brw_context *brw)
bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
INTEL_READ);
dri_bo_reference(bo);
+
+ /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
+ * the index buffer state when we're just moving the start index
+ * of our drawing.
+ */
+ brw->ib.start_vertex_offset = offset / ib_type_size;
+ offset = 0;
+ ib_size = bo->size;
}
}
- dri_bo_unreference(brw->ib.bo);
- brw->ib.bo = bo;
- brw->ib.offset = offset;
+ if (brw->ib.bo != bo ||
+ brw->ib.offset != offset ||
+ brw->ib.size != ib_size)
+ {
+ drm_intel_bo_unreference(brw->ib.bo);
+ brw->ib.bo = bo;
+ brw->ib.offset = offset;
+ brw->ib.size = ib_size;
+
+ brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+ } else {
+ drm_intel_bo_unreference(bo);
+ }
brw_add_validated_bo(brw, brw->ib.bo);
}
-static void brw_emit_indices(struct brw_context *brw)
+const struct brw_tracked_state brw_indices = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_INDICES,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_indices,
+};
+
+static void brw_emit_index_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
- GLuint ib_size;
if (index_buffer == NULL)
return;
- ib_size = get_size(index_buffer->type) * index_buffer->count;
-
/* Emit the indexbuffer packet:
*/
{
struct brw_indexbuffer ib;
memset(&ib, 0, sizeof(ib));
-
+
ib.header.bits.opcode = CMD_INDEX_BUFFER;
ib.header.bits.length = sizeof(ib)/4 - 2;
ib.header.bits.index_format = get_index_type(index_buffer->type);
ib.header.bits.cut_index_enable = 0;
-
BEGIN_BATCH(4, IGNORE_CLIPRECTS);
OUT_BATCH( ib.header.dword );
@@ -657,18 +726,17 @@ static void brw_emit_indices(struct brw_context *brw)
brw->ib.offset);
OUT_RELOC(brw->ib.bo,
I915_GEM_DOMAIN_VERTEX, 0,
- brw->ib.offset + ib_size);
+ brw->ib.offset + brw->ib.size);
OUT_BATCH( 0 );
ADVANCE_BATCH();
}
}
-const struct brw_tracked_state brw_indices = {
+const struct brw_tracked_state brw_index_buffer = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_BATCH | BRW_NEW_INDICES,
+ .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
.cache = 0,
},
- .prepare = brw_prepare_indices,
- .emit = brw_emit_indices,
+ .emit = brw_emit_index_buffer,
};
diff --git a/i965/brw_eu.c b/i965/brw_eu.c
index c53efba..1df5613 100644
--- a/i965/brw_eu.c
+++ b/i965/brw_eu.c
@@ -62,7 +62,7 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
{
- p->current->header.destreg__conditonalmod = conditional;
+ p->current->header.destreg__conditionalmod = conditional;
}
void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
diff --git a/i965/brw_eu.h b/i965/brw_eu.h
index 003332f..30603bd 100644
--- a/i965/brw_eu.h
+++ b/i965/brw_eu.h
@@ -97,7 +97,7 @@ struct brw_glsl_call;
#define BRW_EU_MAX_INSN_STACK 5
-#define BRW_EU_MAX_INSN 4000
+#define BRW_EU_MAX_INSN 10000
struct brw_compile {
struct brw_instruction store[BRW_EU_MAX_INSN];
@@ -171,9 +171,9 @@ static INLINE struct brw_reg brw_reg( GLuint file,
{
struct brw_reg reg;
if (type == BRW_GENERAL_REGISTER_FILE)
- assert(nr < 128);
+ assert(nr < BRW_MAX_GRF);
else if (type == BRW_MESSAGE_REGISTER_FILE)
- assert(nr < 9);
+ assert(nr < BRW_MAX_MRF);
else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_IP);
@@ -538,6 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
static INLINE struct brw_reg brw_message_reg( GLuint nr )
{
+ assert(nr < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
nr,
0);
@@ -815,6 +816,19 @@ void brw_urb_WRITE(struct brw_compile *p,
GLuint offset,
GLuint swizzle);
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
void brw_fb_WRITE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
@@ -834,7 +848,9 @@ void brw_SAMPLE(struct brw_compile *p,
GLuint msg_type,
GLuint response_length,
GLuint msg_length,
- GLboolean eot);
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode);
void brw_math_16( struct brw_compile *p,
struct brw_reg dest,
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
index 2a147fb..241cdc3 100644
--- a/i965/brw_eu_emit.c
+++ b/i965/brw_eu_emit.c
@@ -241,7 +241,8 @@ void brw_set_src1( struct brw_instruction *insn,
-static void brw_set_math_message( struct brw_instruction *insn,
+static void brw_set_math_message( struct brw_context *brw,
+ struct brw_instruction *insn,
GLuint msg_length,
GLuint response_length,
GLuint function,
@@ -252,18 +253,35 @@ static void brw_set_math_message( struct brw_instruction *insn,
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.math.function = function;
- insn->bits3.math.int_type = integer_type;
- insn->bits3.math.precision = low_precision;
- insn->bits3.math.saturate = saturate;
- insn->bits3.math.data_type = dataType;
- insn->bits3.math.response_length = response_length;
- insn->bits3.math.msg_length = msg_length;
- insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
- insn->bits3.math.end_of_thread = 0;
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.math_igdng.function = function;
+ insn->bits3.math_igdng.int_type = integer_type;
+ insn->bits3.math_igdng.precision = low_precision;
+ insn->bits3.math_igdng.saturate = saturate;
+ insn->bits3.math_igdng.data_type = dataType;
+ insn->bits3.math_igdng.snapshot = 0;
+ insn->bits3.math_igdng.header_present = 0;
+ insn->bits3.math_igdng.response_length = response_length;
+ insn->bits3.math_igdng.msg_length = msg_length;
+ insn->bits3.math_igdng.end_of_thread = 0;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
+ insn->bits2.send_igdng.end_of_thread = 0;
+ } else {
+ insn->bits3.math.function = function;
+ insn->bits3.math.int_type = integer_type;
+ insn->bits3.math.precision = low_precision;
+ insn->bits3.math.saturate = saturate;
+ insn->bits3.math.data_type = dataType;
+ insn->bits3.math.response_length = response_length;
+ insn->bits3.math.msg_length = msg_length;
+ insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+ insn->bits3.math.end_of_thread = 0;
+ }
}
-static void brw_set_urb_message( struct brw_instruction *insn,
+
+static void brw_set_ff_sync_message( struct brw_context *brw,
+ struct brw_instruction *insn,
GLboolean allocate,
GLboolean used,
GLuint msg_length,
@@ -273,21 +291,64 @@ static void brw_set_urb_message( struct brw_instruction *insn,
GLuint offset,
GLuint swizzle_control )
{
- brw_set_src1(insn, brw_imm_d(0));
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.urb_igdng.opcode = 1;
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used;
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+}
- insn->bits3.urb.opcode = 0; /* ? */
- insn->bits3.urb.offset = offset;
- insn->bits3.urb.swizzle_control = swizzle_control;
- insn->bits3.urb.allocate = allocate;
- insn->bits3.urb.used = used; /* ? */
- insn->bits3.urb.complete = complete;
- insn->bits3.urb.response_length = response_length;
- insn->bits3.urb.msg_length = msg_length;
- insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
- insn->bits3.urb.end_of_thread = end_of_thread;
+static void brw_set_urb_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean end_of_thread,
+ GLboolean complete,
+ GLuint offset,
+ GLuint swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.urb_igdng.opcode = 0; /* ? */
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used; /* ? */
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.urb.opcode = 0; /* ? */
+ insn->bits3.urb.offset = offset;
+ insn->bits3.urb.swizzle_control = swizzle_control;
+ insn->bits3.urb.allocate = allocate;
+ insn->bits3.urb.used = used; /* ? */
+ insn->bits3.urb.complete = complete;
+ insn->bits3.urb.response_length = response_length;
+ insn->bits3.urb.msg_length = msg_length;
+ insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+ insn->bits3.urb.end_of_thread = end_of_thread;
+ }
}
-static void brw_set_dp_write_message( struct brw_instruction *insn,
+static void brw_set_dp_write_message( struct brw_context *brw,
+ struct brw_instruction *insn,
GLuint binding_table_index,
GLuint msg_control,
GLuint msg_type,
@@ -298,18 +359,33 @@ static void brw_set_dp_write_message( struct brw_instruction *insn,
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.dp_write.binding_table_index = binding_table_index;
- insn->bits3.dp_write.msg_control = msg_control;
- insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
- insn->bits3.dp_write.msg_type = msg_type;
- insn->bits3.dp_write.send_commit_msg = 0;
- insn->bits3.dp_write.response_length = response_length;
- insn->bits3.dp_write.msg_length = msg_length;
- insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
- insn->bits3.urb.end_of_thread = end_of_thread;
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_write_igdng.msg_control = msg_control;
+ insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write_igdng.msg_type = msg_type;
+ insn->bits3.dp_write_igdng.send_commit_msg = 0;
+ insn->bits3.dp_write_igdng.header_present = 1;
+ insn->bits3.dp_write_igdng.response_length = response_length;
+ insn->bits3.dp_write_igdng.msg_length = msg_length;
+ insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_write.binding_table_index = binding_table_index;
+ insn->bits3.dp_write.msg_control = msg_control;
+ insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write.msg_type = msg_type;
+ insn->bits3.dp_write.send_commit_msg = 0;
+ insn->bits3.dp_write.response_length = response_length;
+ insn->bits3.dp_write.msg_length = msg_length;
+ insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits3.dp_write.end_of_thread = end_of_thread;
+ }
}
-static void brw_set_dp_read_message( struct brw_instruction *insn,
+static void brw_set_dp_read_message( struct brw_context *brw,
+ struct brw_instruction *insn,
GLuint binding_table_index,
GLuint msg_control,
GLuint msg_type,
@@ -320,15 +396,29 @@ static void brw_set_dp_read_message( struct brw_instruction *insn,
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
- insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
- insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
- insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
- insn->bits3.dp_read.response_length = response_length; /*16:19*/
- insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
- insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
- insn->bits3.dp_read.pad1 = 0; /*28:30*/
- insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_read_igdng.msg_control = msg_control;
+ insn->bits3.dp_read_igdng.msg_type = msg_type;
+ insn->bits3.dp_read_igdng.target_cache = target_cache;
+ insn->bits3.dp_read_igdng.header_present = 1;
+ insn->bits3.dp_read_igdng.response_length = response_length;
+ insn->bits3.dp_read_igdng.msg_length = msg_length;
+ insn->bits3.dp_read_igdng.pad1 = 0;
+ insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
+ insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
+ insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
+ insn->bits3.dp_read.response_length = response_length; /*16:19*/
+ insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
+ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+ insn->bits3.dp_read.pad1 = 0; /*28:30*/
+ insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
+ }
}
static void brw_set_sampler_message(struct brw_context *brw,
@@ -338,11 +428,25 @@ static void brw_set_sampler_message(struct brw_context *brw,
GLuint msg_type,
GLuint response_length,
GLuint msg_length,
- GLboolean eot)
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
{
+ assert(eot == 0);
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_G4X(brw)) {
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
+ insn->bits3.sampler_igdng.sampler = sampler;
+ insn->bits3.sampler_igdng.msg_type = msg_type;
+ insn->bits3.sampler_igdng.simd_mode = simd_mode;
+ insn->bits3.sampler_igdng.header_present = header_present;
+ insn->bits3.sampler_igdng.response_length = response_length;
+ insn->bits3.sampler_igdng.msg_length = msg_length;
+ insn->bits3.sampler_igdng.end_of_thread = eot;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+ insn->bits2.send_igdng.end_of_thread = eot;
+ } else if (BRW_IS_G4X(brw)) {
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
insn->bits3.sampler_g4x.sampler = sampler;
insn->bits3.sampler_g4x.msg_type = msg_type;
@@ -377,8 +481,8 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
/* Reset this one-shot flag:
*/
- if (p->current->header.destreg__conditonalmod) {
- p->current->header.destreg__conditonalmod = 0;
+ if (p->current->header.destreg__conditionalmod) {
+ p->current->header.destreg__conditionalmod = 0;
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
}
@@ -484,6 +588,10 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p,
{
struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+ insn->header.execution_size = 1;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
p->current->header.predicate_control = BRW_PREDICATE_NONE;
return insn;
@@ -540,6 +648,10 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
struct brw_instruction *if_insn)
{
struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
if (p->single_program_flow) {
insn = next_insn(p, BRW_OPCODE_ADD);
@@ -566,8 +678,8 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
} else {
assert(if_insn->header.opcode == BRW_OPCODE_IF);
- if_insn->bits3.if_else.jump_count = insn - if_insn;
- if_insn->bits3.if_else.pop_count = 1;
+ if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
+ if_insn->bits3.if_else.pop_count = 0;
if_insn->bits3.if_else.pad0 = 0;
}
@@ -577,6 +689,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *patch_insn)
{
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
if (p->single_program_flow) {
/* In single program flow mode, there's no need to execute an ENDIF,
* since we don't need to do any stack operations, and if we're executing
@@ -608,11 +725,11 @@ void brw_ENDIF(struct brw_compile *p,
/* Automagically turn it into an IFF:
*/
patch_insn->header.opcode = BRW_OPCODE_IFF;
- patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
patch_insn->bits3.if_else.pop_count = 0;
patch_insn->bits3.if_else.pad0 = 0;
} else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
- patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
patch_insn->bits3.if_else.pop_count = 1;
patch_insn->bits3.if_else.pad0 = 0;
} else {
@@ -686,6 +803,10 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
if (p->single_program_flow)
insn = next_insn(p, BRW_OPCODE_ADD);
@@ -706,7 +827,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
insn->header.execution_size = do_insn->header.execution_size;
assert(do_insn->header.opcode == BRW_OPCODE_DO);
- insn->bits3.if_else.jump_count = do_insn - insn + 1;
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
insn->bits3.if_else.pop_count = 0;
insn->bits3.if_else.pad0 = 0;
}
@@ -725,11 +846,15 @@ void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *jmp_insn)
{
struct brw_instruction *landing = &p->store[p->nr_insn];
+ GLuint jmpi = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
- jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
+ jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
}
@@ -746,7 +871,7 @@ void brw_CMP(struct brw_compile *p,
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
- insn->header.destreg__conditonalmod = conditional;
+ insn->header.destreg__conditionalmod = conditional;
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
@@ -790,11 +915,12 @@ void brw_math( struct brw_compile *p,
* instructions.
*/
insn->header.predicate_control = 0;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
- brw_set_math_message(insn,
+ brw_set_math_message(p->brw,
+ insn,
msg_length, response_length,
function,
BRW_MATH_INTEGER_UNSIGNED,
@@ -826,11 +952,12 @@ void brw_math_16( struct brw_compile *p,
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
insn = next_insn(p, BRW_OPCODE_SEND);
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
- brw_set_math_message(insn,
+ brw_set_math_message(p->brw,
+ insn,
msg_length, response_length,
function,
BRW_MATH_INTEGER_UNSIGNED,
@@ -842,11 +969,12 @@ void brw_math_16( struct brw_compile *p,
*/
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
- insn->header.destreg__conditonalmod = msg_reg_nr+1;
+ insn->header.destreg__conditionalmod = msg_reg_nr+1;
brw_set_dest(insn, offset(dest,1));
brw_set_src0(insn, src);
- brw_set_math_message(insn,
+ brw_set_math_message(p->brw,
+ insn,
msg_length, response_length,
function,
BRW_MATH_INTEGER_UNSIGNED,
@@ -888,12 +1016,13 @@ void brw_dp_WRITE_16( struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
- brw_set_dp_write_message(insn,
+ brw_set_dp_write_message(p->brw,
+ insn,
255, /* binding table index (255=stateless) */
BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
@@ -933,12 +1062,13 @@ void brw_dp_READ_16( struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest); /* UW? */
brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
- brw_set_dp_read_message(insn,
+ brw_set_dp_read_message(p->brw,
+ insn,
255, /* binding table index (255=stateless) */
3, /* msg_control (3 means 4 Owords) */
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
@@ -986,7 +1116,7 @@ void brw_dp_READ_4( struct brw_compile *p,
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
/* cast dest to a uword[8] vector */
@@ -995,7 +1125,8 @@ void brw_dp_READ_4( struct brw_compile *p,
brw_set_dest(insn, dest);
brw_set_src0(insn, brw_null_reg());
- brw_set_dp_read_message(insn,
+ brw_set_dp_read_message(p->brw,
+ insn,
bind_table_index,
0, /* msg_control (0 means 1 Oword) */
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
@@ -1059,14 +1190,15 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
/*insn->header.access_mode = BRW_ALIGN_16;*/
brw_set_dest(insn, dest);
brw_set_src0(insn, brw_null_reg());
- brw_set_dp_read_message(insn,
+ brw_set_dp_read_message(p->brw,
+ insn,
bind_table_index,
oword, /* 0 = lower Oword, 1 = upper Oword */
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
@@ -1092,11 +1224,12 @@ void brw_fb_WRITE(struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
- brw_set_dp_write_message(insn,
+ brw_set_dp_write_message(p->brw,
+ insn,
binding_table_index,
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
@@ -1122,7 +1255,9 @@ void brw_SAMPLE(struct brw_compile *p,
GLuint msg_type,
GLuint response_length,
GLuint msg_length,
- GLboolean eot)
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
{
GLboolean need_stall = 0;
@@ -1187,7 +1322,7 @@ void brw_SAMPLE(struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
@@ -1197,7 +1332,9 @@ void brw_SAMPLE(struct brw_compile *p,
msg_type,
response_length,
msg_length,
- eot);
+ eot,
+ header_present,
+ simd_mode);
}
if (need_stall) {
@@ -1232,15 +1369,16 @@ void brw_urb_WRITE(struct brw_compile *p,
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
- assert(msg_length < 16);
+ assert(msg_length < BRW_MAX_MRF);
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_urb_message(insn,
+ brw_set_urb_message(p->brw,
+ insn,
allocate,
used,
msg_length,
@@ -1251,3 +1389,37 @@ void brw_urb_WRITE(struct brw_compile *p,
swizzle);
}
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < 16);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_ff_sync_message(p->brw,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c
index 2993574..d27c6c2 100644
--- a/i965/brw_fallback.c
+++ b/i965/brw_fallback.c
@@ -37,6 +37,9 @@
#include "tnl/tnl.h"
#include "brw_context.h"
#include "brw_fallback.h"
+#include "intel_chipset.h"
+#include "intel_fbo.h"
+#include "intel_regions.h"
#include "glapi/glapi.h"
@@ -44,6 +47,7 @@
static GLboolean do_check_fallback(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
GLcontext *ctx = &brw->intel.ctx;
GLuint i;
@@ -81,6 +85,33 @@ static GLboolean do_check_fallback(struct brw_context *brw)
return GL_TRUE;
}
+ /* _NEW_BUFFERS */
+ if (IS_965(intel->intelScreen->deviceID) &&
+ !IS_G4X(intel->intelScreen->deviceID)) {
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+ /* The original gen4 hardware couldn't set up WM surfaces pointing
+ * at an offset within a tile, which can happen when rendering to
+ * anything but the base level of a texture or the +X face/0 depth.
+ * This was fixed with the 4 Series hardware.
+ *
+ * For these original chips, you would have to make the depth and
+ * color destination surfaces include information on the texture
+ * type, LOD, face, and various limits to use them as a destination.
+ * I would have done this, but there's also a nasty requirement that
+ * the depth and the color surfaces all be of the same LOD, which
+ * may be a worse requirement than this alignment. (Also, we may
+ * want to just demote the texture to untiled, instead).
+ */
+ if (irb->region && irb->region->tiling != I915_TILING_NONE &&
+ (irb->region->draw_offset & 4095)) {
+ DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n");
+ return GL_TRUE;
+ }
+ }
+ }
return GL_FALSE;
}
diff --git a/i965/brw_gs.c b/i965/brw_gs.c
index a8b74a0..48c2b9a 100644
--- a/i965/brw_gs.c
+++ b/i965/brw_gs.c
@@ -54,12 +54,17 @@ static void compile_gs_prog( struct brw_context *brw,
memset(&c, 0, sizeof(c));
c.key = *key;
-
+ c.need_ff_sync = BRW_IS_IGDNG(brw);
/* Need to locate the two positions present in vertex + header.
* These are currently hardcoded:
*/
c.nr_attrs = brw_count_bits(c.key.attrs);
- c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
c.nr_bytes = c.nr_regs * REG_SIZE;
diff --git a/i965/brw_gs.h b/i965/brw_gs.h
index 18a4537..bbb991e 100644
--- a/i965/brw_gs.h
+++ b/i965/brw_gs.h
@@ -62,6 +62,7 @@ struct brw_gs_compile {
GLuint nr_attrs;
GLuint nr_regs;
GLuint nr_bytes;
+ GLboolean need_ff_sync;
};
#define ATTR_SIZE (4*4)
diff --git a/i965/brw_gs_emit.c b/i965/brw_gs_emit.c
index 22e0d25..a9b2aa2 100644
--- a/i965/brw_gs_emit.c
+++ b/i965/brw_gs_emit.c
@@ -101,6 +101,23 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
BRW_URB_SWIZZLE_NONE);
}
+static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
+{
+ struct brw_compile *p = &c->func;
+ brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
void brw_gs_quads( struct brw_gs_compile *c )
@@ -110,6 +127,8 @@ void brw_gs_quads( struct brw_gs_compile *c )
/* Use polygons for correct edgeflag behaviour. Note that vertex 3
* is the PV for quads, but vertex 0 for polygons:
*/
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
@@ -120,6 +139,8 @@ void brw_gs_quad_strip( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 4);
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
@@ -129,6 +150,9 @@ void brw_gs_quad_strip( struct brw_gs_compile *c )
void brw_gs_tris( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 3);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
@@ -137,6 +161,9 @@ void brw_gs_tris( struct brw_gs_compile *c )
void brw_gs_lines( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 2);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
}
@@ -144,6 +171,9 @@ void brw_gs_lines( struct brw_gs_compile *c )
void brw_gs_points( struct brw_gs_compile *c )
{
brw_gs_alloc_regs(c, 1);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
}
diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c
index 27023cf..a761c03 100644
--- a/i965/brw_gs_state.c
+++ b/i965/brw_gs_state.c
@@ -95,6 +95,9 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
gs.thread4.max_threads = 0; /* Hardware requirement */
+ if (BRW_IS_IGDNG(brw))
+ gs.thread4.rendering_enable = 1;
+
if (INTEL_DEBUG & DEBUG_STATS)
gs.thread4.stats_enable = 1;
diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c
index 9bc5c35..ea71857 100644
--- a/i965/brw_misc_state.c
+++ b/i965/brw_misc_state.c
@@ -118,7 +118,10 @@ static void upload_binding_table_pointers(struct brw_context *brw)
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
- OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+ if (brw->vs.bind_bo != NULL)
+ OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+ else
+ OUT_BATCH(0);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
@@ -169,6 +172,7 @@ static void prepare_psp_urb_cbs(struct brw_context *brw)
brw_add_validated_bo(brw, brw->vs.state_bo);
brw_add_validated_bo(brw, brw->gs.state_bo);
brw_add_validated_bo(brw, brw->clip.state_bo);
+ brw_add_validated_bo(brw, brw->sf.state_bo);
brw_add_validated_bo(brw, brw->wm.state_bo);
brw_add_validated_bo(brw, brw->cc.state_bo);
}
@@ -208,7 +212,7 @@ static void emit_depthbuffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct intel_region *region = brw->state.depth_region;
- unsigned int len = BRW_IS_G4X(brw) ? 6 : 5;
+ unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
if (region == NULL) {
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
@@ -219,7 +223,7 @@ static void emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(0);
OUT_BATCH(0);
- if (BRW_IS_G4X(brw))
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -241,6 +245,8 @@ static void emit_depthbuffer(struct brw_context *brw)
return;
}
+ assert(region->tiling != I915_TILING_X);
+
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH(((region->pitch * region->cpp) - 1) |
@@ -256,7 +262,7 @@ static void emit_depthbuffer(struct brw_context *brw)
((region->height - 1) << 19));
OUT_BATCH(0);
- if (BRW_IS_G4X(brw))
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -369,7 +375,7 @@ static void upload_aa_line_parameters(struct brw_context *brw)
{
struct brw_aa_line_parameters balp;
- if (!BRW_IS_G4X(brw))
+ if (BRW_IS_965(brw))
return;
/* use legacy aa line coverage computation */
@@ -506,14 +512,27 @@ static void upload_state_base_address( struct brw_context *brw )
/* Output the structure (brw_state_base_address) directly to the
* batchbuffer, so we can emit relocations inline.
*/
- BEGIN_BATCH(6, IGNORE_CLIPRECTS);
- OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
- OUT_BATCH(1); /* General state base address */
- OUT_BATCH(1); /* Surface state base address */
- OUT_BATCH(1); /* Indirect object base address */
- OUT_BATCH(1); /* General state upper bound */
- OUT_BATCH(1); /* Indirect object upper bound */
- ADVANCE_BATCH();
+ if (BRW_IS_IGDNG(brw)) {
+ BEGIN_BATCH(8, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* Instruction base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ OUT_BATCH(1); /* Instruction access upper bound */
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ ADVANCE_BATCH();
+ }
}
const struct brw_tracked_state brw_state_base_address = {
diff --git a/i965/brw_queryobj.c b/i965/brw_queryobj.c
index cb9169e..a195bc3 100644
--- a/i965/brw_queryobj.c
+++ b/i965/brw_queryobj.c
@@ -146,17 +146,12 @@ static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q)
static void brw_check_query(GLcontext *ctx, struct gl_query_object *q)
{
- /* XXX: Need to expose dri_bo_is_idle from bufmgr. */
-#if 0
struct brw_query_object *query = (struct brw_query_object *)q;
- if (dri_bo_is_idle(query->bo)) {
+ if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
brw_queryobj_get_results(query);
query->Base.Ready = GL_TRUE;
}
-#else
- brw_wait_query(ctx, q);
-#endif
}
/** Called to set up the query BO and account for its aperture space */
diff --git a/i965/brw_sf.c b/i965/brw_sf.c
index c3c8597..e1c2c77 100644
--- a/i965/brw_sf.c
+++ b/i965/brw_sf.c
@@ -166,6 +166,9 @@ static void upload_sf_prog(struct brw_context *brw)
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
+ /* _NEW_HINT */
+ key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+
/* _NEW_POLYGON */
if (key.do_twoside_color) {
/* If we're rendering to a FBO, we have to invert the polygon
@@ -188,7 +191,7 @@ static void upload_sf_prog(struct brw_context *brw)
const struct brw_tracked_state brw_sf_prog = {
.dirty = {
- .mesa = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT),
+ .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT),
.brw = (BRW_NEW_REDUCED_PRIMITIVE),
.cache = CACHE_NEW_VS_PROG
},
diff --git a/i965/brw_sf.h b/i965/brw_sf.h
index 1c0fb70..6426b6d 100644
--- a/i965/brw_sf.h
+++ b/i965/brw_sf.h
@@ -51,7 +51,8 @@ struct brw_sf_prog_key {
GLuint do_flat_shading:1;
GLuint frontface_ccw:1;
GLuint do_point_sprite:1;
- GLuint pad:10;
+ GLuint linear_color:1; /**< linear interp vs. perspective interp */
+ GLuint pad:25;
GLenum SpriteOrigin;
};
diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c
index 862835f..ca8f97f 100644
--- a/i965/brw_sf_emit.c
+++ b/i965/brw_sf_emit.c
@@ -151,6 +151,8 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+ GLuint jmpi = 1;
+
if (!nr)
return;
@@ -159,18 +161,21 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
brw_push_insn_state(p);
- brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1));
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
brw_JMPI(p, ip, ip, c->pv);
copy_colors(c, c->vert[1], c->vert[0]);
copy_colors(c, c->vert[2], c->vert[0]);
- brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1));
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
copy_colors(c, c->vert[0], c->vert[1]);
copy_colors(c, c->vert[2], c->vert[1]);
- brw_JMPI(p, ip, ip, brw_imm_ud(nr*2));
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
copy_colors(c, c->vert[0], c->vert[2]);
copy_colors(c, c->vert[1], c->vert[2]);
@@ -184,7 +189,8 @@ static void do_flatshade_line( struct brw_sf_compile *c )
struct brw_compile *p = &c->func;
struct brw_reg ip = brw_ip_reg();
GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
-
+ GLuint jmpi = 1;
+
if (!nr)
return;
@@ -193,13 +199,16 @@ static void do_flatshade_line( struct brw_sf_compile *c )
if (c->key.primitive == SF_UNFILLED_TRIS)
return;
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
brw_push_insn_state(p);
- brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1));
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
brw_JMPI(p, ip, ip, c->pv);
copy_colors(c, c->vert[1], c->vert[0]);
- brw_JMPI(p, ip, ip, brw_imm_ud(nr));
+ brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
copy_colors(c, c->vert[0], c->vert[1]);
brw_pop_insn_state(p);
@@ -218,7 +227,7 @@ static void alloc_regs( struct brw_sf_compile *c )
/* Values computed by fixed function unit:
*/
- c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD);
+ c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
c->det = brw_vec1_grf(1, 2);
c->dx0 = brw_vec1_grf(1, 3);
c->dx2 = brw_vec1_grf(1, 4);
@@ -295,9 +304,6 @@ static void invert_det( struct brw_sf_compile *c)
}
-#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \
- FRAG_BIT_COL0 | \
- FRAG_BIT_COL1)
static GLboolean calculate_masks( struct brw_sf_compile *c,
GLuint reg,
@@ -306,9 +312,16 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
GLushort *pc_linear)
{
GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
- GLuint persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS;
+ GLuint persp_mask;
GLuint linear_mask;
+ if (c->key.do_flat_shading || c->key.linear_color)
+ persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS |
+ FRAG_BIT_COL0 |
+ FRAG_BIT_COL1);
+ else
+ persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS);
+
if (c->key.do_flat_shading)
linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
else
@@ -674,7 +687,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
(1<<_3DPRIM_POLYGON) |
(1<<_3DPRIM_RECTLIST) |
(1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
saveflag = p->flag_value;
brw_push_insn_state(p);
@@ -695,7 +708,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
(1<<_3DPRIM_LINESTRIP_CONT) |
(1<<_3DPRIM_LINESTRIP_BF) |
(1<<_3DPRIM_LINESTRIP_CONT_BF)));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
saveflag = p->flag_value;
brw_push_insn_state(p);
@@ -708,7 +721,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
saveflag = p->flag_value;
brw_push_insn_state(p);
diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c
index c999187..bc0f076 100644
--- a/i965/brw_sf_state.c
+++ b/i965/brw_sf_state.c
@@ -113,7 +113,7 @@ struct brw_sf_unit_key {
unsigned int nr_urb_entries, urb_size, sfsize;
- GLenum front_face, cull_face;
+ GLenum front_face, cull_face, provoking_vertex;
unsigned scissor:1;
unsigned line_smooth:1;
unsigned point_sprite:1;
@@ -153,6 +153,9 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
key->point_attenuated = ctx->Point._Attenuated;
+ /* _NEW_LIGHT */
+ key->provoking_vertex = ctx->Light.ProvokingVertex;
+
key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
}
@@ -162,7 +165,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
{
struct brw_sf_unit_state sf;
dri_bo *bo;
-
+ int chipset_max_threads;
memset(&sf, 0, sizeof(sf));
sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
@@ -171,13 +174,26 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
sf.thread3.dispatch_grf_start_reg = 3;
- sf.thread3.urb_entry_read_offset = 1;
+
+ if (BRW_IS_IGDNG(brw))
+ sf.thread3.urb_entry_read_offset = 3;
+ else
+ sf.thread3.urb_entry_read_offset = 1;
+
sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
sf.thread4.nr_urb_entries = key->nr_urb_entries;
sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
- /* Each SF thread produces 1 PUE, and there can be up to 24 threads */
- sf.thread4.max_threads = MIN2(24, key->nr_urb_entries) - 1;
+
+ /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
+ * 48(IGDNG) threads
+ */
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 48;
+ else
+ chipset_max_threads = 24;
+
+ sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
sf.thread4.max_threads = 0;
@@ -233,7 +249,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
else if (sf.sf6.line_width <= 0x2)
sf.sf6.line_width = 0;
- /* _NEW_POINT */
+ /* _NEW_BUFFERS */
key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
if (!key->render_to_fbo) {
/* Rendering to an OpenGL window */
@@ -263,6 +279,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
}
/* XXX clamp max depends on AA vs. non-AA */
+ /* _NEW_POINT */
sf.sf7.sprite_point = key->point_sprite;
sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
sf.sf7.use_point_size_state = !key->point_attenuated;
@@ -270,9 +287,15 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
/* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
*/
- sf.sf7.trifan_pv = 2;
- sf.sf7.linestrip_pv = 1;
- sf.sf7.tristrip_pv = 2;
+ if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) {
+ sf.sf7.trifan_pv = 2;
+ sf.sf7.linestrip_pv = 1;
+ sf.sf7.tristrip_pv = 2;
+ } else {
+ sf.sf7.trifan_pv = 1;
+ sf.sf7.linestrip_pv = 0;
+ sf.sf7.tristrip_pv = 0;
+ }
sf.sf7.line_last_pixel_enable = 0;
/* Set bias for OpenGL rasterization rules:
@@ -286,6 +309,9 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
&sf, sizeof(sf),
NULL, NULL);
+ /* STATE_PREFETCH command description describes this state as being
+ * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
+ */
/* Emit SF program relocation */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -326,9 +352,11 @@ static void upload_sf_unit( struct brw_context *brw )
const struct brw_tracked_state brw_sf_unit = {
.dirty = {
.mesa = (_NEW_POLYGON |
+ _NEW_LIGHT |
_NEW_LINE |
_NEW_POINT |
- _NEW_SCISSOR),
+ _NEW_SCISSOR |
+ _NEW_BUFFERS),
.brw = BRW_NEW_URB_FENCE,
.cache = (CACHE_NEW_SF_VP |
CACHE_NEW_SF_PROG)
diff --git a/i965/brw_state.h b/i965/brw_state.h
index 81b0a45..5335eac 100644
--- a/i965/brw_state.h
+++ b/i965/brw_state.h
@@ -72,11 +72,13 @@ const struct brw_tracked_state brw_sf_vp;
const struct brw_tracked_state brw_state_base_address;
const struct brw_tracked_state brw_urb_fence;
const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_surfaces;
const struct brw_tracked_state brw_vs_prog;
const struct brw_tracked_state brw_vs_unit;
const struct brw_tracked_state brw_wm_input_sizes;
const struct brw_tracked_state brw_wm_prog;
const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_constant_surface;
const struct brw_tracked_state brw_wm_surfaces;
const struct brw_tracked_state brw_wm_unit;
@@ -84,12 +86,24 @@ const struct brw_tracked_state brw_psp_urb_cbs;
const struct brw_tracked_state brw_pipe_control;
-const struct brw_tracked_state brw_clear_surface_cache;
-const struct brw_tracked_state brw_clear_batch_cache;
-
const struct brw_tracked_state brw_drawing_rect;
const struct brw_tracked_state brw_indices;
const struct brw_tracked_state brw_vertices;
+const struct brw_tracked_state brw_index_buffer;
+
+/**
+ * Use same key for WM and VS surfaces.
+ */
+struct brw_surface_key {
+ GLenum target, depthmode;
+ dri_bo *bo;
+ GLint format, internal_format;
+ GLint first_level, last_level;
+ GLint width, height, depth;
+ GLint pitch, cpp;
+ uint32_t tiling;
+ GLuint offset;
+};
/***********************************************************************
* brw_state.c
@@ -135,8 +149,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
void *aux_return);
void brw_state_cache_check_size( struct brw_context *brw );
-void brw_init_cache( struct brw_context *brw );
-void brw_destroy_cache( struct brw_context *brw );
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
/***********************************************************************
* brw_state_batch.c
@@ -148,6 +162,11 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
const void *data,
GLuint sz );
void brw_destroy_batch_cache( struct brw_context *brw );
-void brw_clear_batch_cache_flush( struct brw_context *brw );
+void brw_clear_batch_cache( struct brw_context *brw );
+
+/* brw_wm_surface_state.c */
+dri_bo *
+brw_create_constant_surface( struct brw_context *brw,
+ struct brw_surface_key *key );
#endif
diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c
index 811940e..7821898 100644
--- a/i965/brw_state_batch.c
+++ b/i965/brw_state_batch.c
@@ -79,7 +79,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
return GL_TRUE;
}
-static void clear_batch_cache( struct brw_context *brw )
+void brw_clear_batch_cache( struct brw_context *brw )
{
struct brw_cached_batch_item *item = brw->cached_batch_items;
@@ -93,18 +93,7 @@ static void clear_batch_cache( struct brw_context *brw )
brw->cached_batch_items = NULL;
}
-void brw_clear_batch_cache_flush( struct brw_context *brw )
-{
- clear_batch_cache(brw);
-
- brw->state.dirty.mesa |= ~0;
- brw->state.dirty.brw |= ~0;
- brw->state.dirty.cache |= ~0;
-}
-
-
-
void brw_destroy_batch_cache( struct brw_context *brw )
{
- clear_batch_cache(brw);
+ brw_clear_batch_cache(brw);
}
diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c
index d5b5166..e40d7a0 100644
--- a/i965/brw_state_cache.c
+++ b/i965/brw_state_cache.c
@@ -56,9 +56,9 @@
* incorrect program is run for the other instance.
*/
+#include "main/imports.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
-#include "main/imports.h"
/* XXX: Fixme - have to include these to get the sizes of the prog_key
* structs:
@@ -69,8 +69,10 @@
#include "brw_sf.h"
#include "brw_gs.h"
-static GLuint hash_key( const void *key, GLuint key_size,
- dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+
+static GLuint
+hash_key(const void *key, GLuint key_size,
+ dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
{
GLuint *ikey = (GLuint *)key;
GLuint hash = 0, i;
@@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size,
return hash;
}
+
/**
* Marks a new buffer as being chosen for the given cache id.
*/
@@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
cache->brw->state.dirty.cache |= 1 << cache_id;
}
+
static struct brw_cache_item *
search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
GLuint hash, const void *key, GLuint key_size,
@@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
}
-static void rehash( struct brw_cache *cache )
+static void
+rehash(struct brw_cache *cache)
{
struct brw_cache_item **items;
struct brw_cache_item *c, *next;
@@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache )
cache->size = size;
}
+
/**
* Returns the buffer object matching cache_id and key, or NULL.
*/
-dri_bo *brw_search_cache( struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
- void *aux_return )
+dri_bo *
+brw_search_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
+ void *aux_return)
{
struct brw_cache_item *item;
GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
@@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
return item->bo;
}
+
dri_bo *
brw_upload_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
@@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache,
return bo;
}
-/* This doesn't really work with aux data. Use search/upload instead
+
+/**
+ * This doesn't really work with aux data. Use search/upload instead
*/
dri_bo *
brw_cache_data_sz(struct brw_cache *cache,
@@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache,
return bo;
}
+
/**
* Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
*
@@ -319,21 +330,22 @@ enum pool_type {
DW_GENERAL_STATE
};
+
static void
-brw_init_cache_id( struct brw_context *brw,
- const char *name,
- enum brw_cache_id id,
- GLuint key_size,
- GLuint aux_size)
+brw_init_cache_id(struct brw_cache *cache,
+ const char *name,
+ enum brw_cache_id id,
+ GLuint key_size,
+ GLuint aux_size)
{
- struct brw_cache *cache = &brw->cache;
-
cache->name[id] = strdup(name);
cache->key_size[id] = key_size;
cache->aux_size[id] = aux_size;
}
-void brw_init_cache( struct brw_context *brw )
+
+static void
+brw_init_non_surface_cache(struct brw_context *brw)
{
struct brw_cache *cache = &brw->cache;
@@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw )
cache->size = 7;
cache->n_items = 0;
cache->items = (struct brw_cache_item **)
- _mesa_calloc(cache->size *
- sizeof(struct brw_cache_item));
+ _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CC_VP",
BRW_CC_VP,
sizeof(struct brw_cc_viewport),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CC_UNIT",
BRW_CC_UNIT,
sizeof(struct brw_cc_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"WM_PROG",
BRW_WM_PROG,
sizeof(struct brw_wm_prog_key),
sizeof(struct brw_wm_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SAMPLER_DEFAULT_COLOR",
BRW_SAMPLER_DEFAULT_COLOR,
sizeof(struct brw_sampler_default_color),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SAMPLER",
BRW_SAMPLER,
0, /* variable key/data size */
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"WM_UNIT",
BRW_WM_UNIT,
sizeof(struct brw_wm_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SF_PROG",
BRW_SF_PROG,
sizeof(struct brw_sf_prog_key),
sizeof(struct brw_sf_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SF_VP",
BRW_SF_VP,
sizeof(struct brw_sf_viewport),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SF_UNIT",
BRW_SF_UNIT,
sizeof(struct brw_sf_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"VS_UNIT",
BRW_VS_UNIT,
sizeof(struct brw_vs_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"VS_PROG",
BRW_VS_PROG,
sizeof(struct brw_vs_prog_key),
sizeof(struct brw_vs_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CLIP_UNIT",
BRW_CLIP_UNIT,
sizeof(struct brw_clip_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"CLIP_PROG",
BRW_CLIP_PROG,
sizeof(struct brw_clip_prog_key),
sizeof(struct brw_clip_prog_data));
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"GS_UNIT",
BRW_GS_UNIT,
sizeof(struct brw_gs_unit_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"GS_PROG",
BRW_GS_PROG,
sizeof(struct brw_gs_prog_key),
sizeof(struct brw_gs_prog_data));
+}
+
+
+static void
+brw_init_surface_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->surface_cache;
+
+ cache->brw = brw;
- brw_init_cache_id(brw,
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
"SS_SURFACE",
BRW_SS_SURFACE,
sizeof(struct brw_surface_state),
0);
- brw_init_cache_id(brw,
+ brw_init_cache_id(cache,
"SS_SURF_BIND",
BRW_SS_SURF_BIND,
0,
0);
}
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+ brw_init_non_surface_cache(brw);
+ brw_init_surface_cache(brw);
+}
+
+
static void
-brw_clear_cache( struct brw_context *brw )
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
{
struct brw_cache_item *c, *next;
GLuint i;
@@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw )
if (INTEL_DEBUG & DEBUG_STATE)
_mesa_printf("%s\n", __FUNCTION__);
- for (i = 0; i < brw->cache.size; i++) {
- for (c = brw->cache.items[i]; c; c = next) {
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
int j;
next = c->next;
@@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw )
free((void *)c->key);
free(c);
}
- brw->cache.items[i] = NULL;
+ cache->items[i] = NULL;
}
- brw->cache.n_items = 0;
+ cache->n_items = 0;
if (brw->curbe.last_buf) {
_mesa_free(brw->curbe.last_buf);
@@ -483,25 +517,46 @@ brw_clear_cache( struct brw_context *brw )
brw->state.dirty.cache |= ~0;
}
-void brw_state_cache_check_size( struct brw_context *brw )
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
{
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+
/* un-tuned guess. We've got around 20 state objects for a total of around
* 32k, so 1000 of them is around 1.5MB.
*/
if (brw->cache.n_items > 1000)
- brw_clear_cache(brw);
+ brw_clear_cache(brw, &brw->cache);
+
+ if (brw->surface_cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->surface_cache);
}
-void brw_destroy_cache( struct brw_context *brw )
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
{
GLuint i;
- brw_clear_cache(brw);
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s\n", __FUNCTION__);
+
+ brw_clear_cache(brw, cache);
for (i = 0; i < BRW_MAX_CACHE; i++) {
- dri_bo_unreference(brw->cache.last_bo[i]);
- free(brw->cache.name[i]);
+ dri_bo_unreference(cache->last_bo[i]);
+ free(cache->name[i]);
}
- free(brw->cache.items);
- brw->cache.items = NULL;
- brw->cache.size = 0;
+ free(cache->items);
+ cache->items = NULL;
+ cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+ brw_destroy_cache(brw, &brw->cache);
+ brw_destroy_cache(brw, &brw->surface_cache);
}
diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c
index a713262..e94fa7d 100644
--- a/i965/brw_state_dump.c
+++ b/i965/brw_state_dump.c
@@ -126,6 +126,8 @@ static void dump_wm_surface_state(struct brw_context *brw)
surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
state_out(name, surf, surfoff, 4, "mip base %d\n",
surf->ss4.min_lod);
+ state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n",
+ surf->ss5.x_offset, surf->ss5.y_offset);
dri_bo_unmap(surf_bo);
}
diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c
index 5de1450..b817b74 100644
--- a/i965/brw_state_upload.c
+++ b/i965/brw_state_upload.c
@@ -59,11 +59,12 @@ const struct brw_tracked_state *atoms[] =
&brw_curbe_offsets,
&brw_recalculate_urb_fence,
-
&brw_cc_vp,
&brw_cc_unit,
- &brw_wm_surfaces, /* must do before samplers */
+ &brw_vs_surfaces, /* must do before unit */
+ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
+ &brw_wm_surfaces, /* must do before samplers and unit */
&brw_wm_samplers,
&brw_wm_unit,
@@ -88,54 +89,27 @@ const struct brw_tracked_state *atoms[] =
&brw_line_stipple,
&brw_aa_line_parameters,
- /* Ordering of the commands below is documented as fixed.
- */
-#if 0
- &brw_pipelined_state_pointers,
- &brw_urb_fence,
- &brw_constant_buffer_state,
-#else
+
&brw_psp_urb_cbs,
-#endif
&brw_drawing_rect,
&brw_indices,
+ &brw_index_buffer,
&brw_vertices,
- NULL, /* brw_constant_buffer */
+ &brw_constant_buffer
};
void brw_init_state( struct brw_context *brw )
{
- GLuint i;
-
- brw_init_cache(brw);
-
- brw->state.atoms = _mesa_malloc(sizeof(atoms));
- brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
- _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms));
-
- /* Patch in a pointer to the dynamic state atom:
- */
- for (i = 0; i < brw->state.nr_atoms; i++)
- if (brw->state.atoms[i] == NULL)
- brw->state.atoms[i] = &brw->curbe.tracked_state;
-
- _mesa_memcpy(&brw->curbe.tracked_state,
- &brw_constant_buffer,
- sizeof(brw_constant_buffer));
+ brw_init_caches(brw);
}
void brw_destroy_state( struct brw_context *brw )
{
- if (brw->state.atoms) {
- _mesa_free(brw->state.atoms);
- brw->state.atoms = NULL;
- }
-
- brw_destroy_cache(brw);
+ brw_destroy_caches(brw);
brw_destroy_batch_cache(brw);
}
@@ -218,6 +192,7 @@ static struct dirty_bit_map mesa_bits[] = {
DEFINE_BIT(_NEW_MULTISAMPLE),
DEFINE_BIT(_NEW_TRACK_MATRIX),
DEFINE_BIT(_NEW_PROGRAM),
+ DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
{0, 0, 0}
};
@@ -231,10 +206,10 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_PRIMITIVE),
DEFINE_BIT(BRW_NEW_CONTEXT),
DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
- DEFINE_BIT(BRW_NEW_INPUT_VARYING),
DEFINE_BIT(BRW_NEW_PSP),
DEFINE_BIT(BRW_NEW_FENCE),
DEFINE_BIT(BRW_NEW_INDICES),
+ DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
DEFINE_BIT(BRW_NEW_VERTICES),
DEFINE_BIT(BRW_NEW_BATCH),
DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
@@ -312,6 +287,7 @@ void brw_validate_state( struct brw_context *brw )
if (brw->emit_state_always) {
state->mesa |= ~0;
state->brw |= ~0;
+ state->cache |= ~0;
}
if (brw->fragment_program != ctx->FragmentProgram._Current) {
@@ -330,13 +306,13 @@ void brw_validate_state( struct brw_context *brw )
return;
if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
- brw_clear_batch_cache_flush(brw);
+ brw_clear_batch_cache(brw);
brw->intel.Fallback = 0;
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
break;
@@ -347,6 +323,19 @@ void brw_validate_state( struct brw_context *brw )
}
}
}
+
+ /* Make sure that the textures which are referenced by the current
+ * brw fragment program are actually present/valid.
+ * If this fails, we can experience GPU lock-ups.
+ */
+ {
+ const struct brw_fragment_program *fp;
+ fp = brw_fragment_program_const(brw->fragment_program);
+ if (fp) {
+ assert((fp->tex_units_used & ctx->Texture._EnabledUnits)
+ == fp->tex_units_used);
+ }
+ }
}
@@ -367,8 +356,8 @@ void brw_upload_state(struct brw_context *brw)
_mesa_memset(&examined, 0, sizeof(examined));
prev = *state;
- for (i = 0; i < brw->state.nr_atoms; i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
struct brw_state_flags generated;
assert(atom->dirty.mesa ||
@@ -397,7 +386,7 @@ void brw_upload_state(struct brw_context *brw)
}
else {
for (i = 0; i < Elements(atoms); i++) {
- const struct brw_tracked_state *atom = brw->state.atoms[i];
+ const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
break;
diff --git a/i965/brw_structs.h b/i965/brw_structs.h
index 89e2981..66d4127 100644
--- a/i965/brw_structs.h
+++ b/i965/brw_structs.h
@@ -33,6 +33,14 @@
#ifndef BRW_STRUCTS_H
#define BRW_STRUCTS_H
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+
/* Command packets:
*/
struct header
@@ -434,8 +442,8 @@ struct brw_urb_fence
{
GLuint sf_fence:10;
GLuint vf_fence:10;
- GLuint cs_fence:10;
- GLuint pad:2;
+ GLuint cs_fence:11;
+ GLuint pad:1;
} bits1;
};
@@ -815,7 +823,9 @@ struct brw_gs_unit_state
struct
{
- GLuint pad0:10;
+ GLuint pad0:8;
+ GLuint rendering_enable:1; /* for IGDNG */
+ GLuint pad4:1;
GLuint stats_enable:1;
GLuint nr_urb_entries:7;
GLuint pad1:1;
@@ -923,6 +933,28 @@ struct brw_wm_unit_state
GLfloat global_depth_offset_constant;
GLfloat global_depth_offset_scale;
+
+ /* for IGDNG only */
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_1:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_2:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_3:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_3:26;
+ } wm10;
};
struct brw_sampler_default_color {
@@ -1075,7 +1107,7 @@ struct brw_surface_state
GLuint y_offset:4;
GLuint pad0:1;
GLuint x_offset:7;
- } ss5; /* NEW in Integrated Graphics Device */
+ } ss5; /* New in G4X */
};
@@ -1168,7 +1200,7 @@ struct brw_instruction
GLuint predicate_control:4;
GLuint predicate_inverse:1;
GLuint execution_size:3;
- GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+ GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
GLuint pad0:2;
GLuint debug_control:1;
GLuint saturate:1;
@@ -1196,7 +1228,9 @@ struct brw_instruction
GLuint dest_reg_type:3;
GLuint src0_reg_file:2;
GLuint src0_reg_type:3;
- GLuint pad:6;
+ GLuint src1_reg_file:2; /* 0x00000c00 */
+ GLuint src1_reg_type:3; /* 0x00007000 */
+ GLuint pad:1;
GLint dest_indirect_offset:10; /* offset against the deref'd address reg */
GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */
GLuint dest_horiz_stride:2;
@@ -1211,7 +1245,7 @@ struct brw_instruction
GLuint src0_reg_type:3;
GLuint src1_reg_file:2;
GLuint src1_reg_type:3;
- GLuint pad0:1;
+ GLuint pad:1;
GLuint dest_writemask:4;
GLuint dest_subreg_nr:1;
GLuint dest_reg_nr:8;
@@ -1298,6 +1332,14 @@ struct brw_instruction
GLuint pad1:6;
} ia16;
+ struct
+ {
+ GLuint pad:26;
+ GLuint end_of_thread:1;
+ GLuint pad1:1;
+ GLuint sfid:4;
+ } send_igdng; /* for IGDNG only */
+
} bits2;
union
@@ -1308,7 +1350,7 @@ struct brw_instruction
GLuint src1_reg_nr:8;
GLuint src1_abs:1;
GLuint src1_negate:1;
- GLuint pad:1;
+ GLuint src1_address_mode:1;
GLuint src1_horiz_stride:2;
GLuint src1_width:3;
GLuint src1_vert_stride:4;
@@ -1323,7 +1365,7 @@ struct brw_instruction
GLuint src1_reg_nr:8;
GLuint src1_abs:1;
GLuint src1_negate:1;
- GLuint pad0:1;
+ GLuint src1_address_mode:1;
GLuint src1_swz_z:2;
GLuint src1_swz_w:2;
GLuint pad1:1;
@@ -1337,7 +1379,7 @@ struct brw_instruction
GLuint src1_subreg_nr:3;
GLuint src1_abs:1;
GLuint src1_negate:1;
- GLuint pad0:1;
+ GLuint src1_address_mode:1;
GLuint src1_horiz_stride:2;
GLuint src1_width:3;
GLuint src1_vert_stride:4;
@@ -1385,6 +1427,21 @@ struct brw_instruction
} math;
struct {
+ GLuint function:4;
+ GLuint int_type:1;
+ GLuint precision:1;
+ GLuint saturate:1;
+ GLuint data_type:1;
+ GLuint snapshot:1;
+ GLuint pad0:10;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } math_igdng;
+
+ struct {
GLuint binding_table_index:8;
GLuint sampler:4;
GLuint return_format:2;
@@ -1407,9 +1464,38 @@ struct brw_instruction
GLuint end_of_thread:1;
} sampler_g4x;
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint msg_type:4;
+ GLuint simd_mode:2;
+ GLuint pad0:1;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } sampler_igdng;
+
struct brw_urb_immediate urb;
struct {
+ GLuint opcode:4;
+ GLuint offset:6;
+ GLuint swizzle_control:2;
+ GLuint pad:1;
+ GLuint allocate:1;
+ GLuint used:1;
+ GLuint complete:1;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } urb_igdng;
+
+ struct {
GLuint binding_table_index:8;
GLuint msg_control:4;
GLuint msg_type:2;
@@ -1423,6 +1509,19 @@ struct brw_instruction
struct {
GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint msg_type:3;
+ GLuint target_cache:2;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_read_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint pixel_scoreboard_clear:1;
GLuint msg_type:3;
@@ -1435,6 +1534,20 @@ struct brw_instruction
} dp_write;
struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:3;
+ GLuint send_commit_msg:1;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_write_igdng;
+
+ struct {
GLuint pad:16;
GLuint response_length:4;
GLuint msg_length:4;
@@ -1443,8 +1556,18 @@ struct brw_instruction
GLuint end_of_thread:1;
} generic;
+ struct {
+ GLuint pad:19;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } generic_igdng;
+
GLint d;
GLuint ud;
+ float f;
} bits3;
};
diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c
index 51a617f..5986cbf 100644
--- a/i965/brw_tex_layout.c
+++ b/i965/brw_tex_layout.c
@@ -28,7 +28,6 @@
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
-
/* Code to layout images in a mipmap tree for i965.
*/
@@ -37,17 +36,93 @@
#include "intel_tex_layout.h"
#include "intel_context.h"
#include "main/macros.h"
+#include "intel_chipset.h"
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
-GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt )
+GLboolean brw_miptree_layout(struct intel_context *intel,
+ struct intel_mipmap_tree *mt,
+ uint32_t tiling)
{
- /* XXX: these vary depending on image format:
- */
-/* GLint align_w = 4; */
+ /* XXX: these vary depending on image format: */
+ /* GLint align_w = 4; */
switch (mt->target) {
- case GL_TEXTURE_CUBE_MAP:
+ case GL_TEXTURE_CUBE_MAP:
+ if (IS_IGDNG(intel->intelScreen->deviceID)) {
+ GLuint align_h = 2, align_w = 4;
+ GLuint level;
+ GLuint x = 0;
+ GLuint y = 0;
+ GLuint width = mt->width0;
+ GLuint height = mt->height0;
+ GLuint qpitch = 0;
+ GLuint y_pitch = 0;
+
+ mt->pitch = mt->width0;
+ intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+ y_pitch = ALIGN(height, align_h);
+
+ if (mt->compressed) {
+ mt->pitch = ALIGN(mt->width0, align_w);
+ }
+
+ if (mt->first_level != mt->last_level) {
+ GLuint mip1_width;
+
+ if (mt->compressed) {
+ mip1_width = ALIGN(minify(mt->width0), align_w)
+ + ALIGN(minify(minify(mt->width0)), align_w);
+ } else {
+ mip1_width = ALIGN(minify(mt->width0), align_w)
+ + minify(minify(mt->width0));
+ }
+
+ if (mip1_width > mt->pitch) {
+ mt->pitch = mip1_width;
+ }
+ }
+
+ mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
+
+ if (mt->compressed) {
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
+ mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
+ } else {
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
+ mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
+ }
+
+ for (level = mt->first_level; level <= mt->last_level; level++) {
+ GLuint img_height;
+ GLuint nr_images = 6;
+ GLuint q = 0;
+
+ intel_miptree_set_level_info(mt, level, nr_images, x, y, width,
+ height, 1);
+
+ for (q = 0; q < nr_images; q++)
+ intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch);
+
+ if (mt->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = ALIGN(height, align_h);
+
+ if (level == mt->first_level + 1) {
+ x += ALIGN(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = minify(width);
+ height = minify(height);
+ }
+
+ break;
+ }
+
case GL_TEXTURE_3D: {
GLuint width = mt->width0;
GLuint height = mt->height0;
@@ -59,25 +134,25 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
GLuint align_w = 4;
mt->total_height = 0;
-
+ intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+
if (mt->compressed) {
- align_w = intel_compressed_alignment(mt->internal_format);
mt->pitch = ALIGN(width, align_w);
pack_y_pitch = (height + 3) / 4;
} else {
- mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
- pack_y_pitch = ALIGN(mt->height0, align_h);
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
+ pack_y_pitch = ALIGN(mt->height0, align_h);
}
- pack_x_pitch = mt->pitch;
+ pack_x_pitch = width;
pack_x_nr = 1;
- for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
+ for (level = mt->first_level ; level <= mt->last_level ; level++) {
GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
GLint x = 0;
GLint y = 0;
GLint q, j;
-
+
intel_miptree_set_level_info(mt, level, nr_images,
0, mt->total_height,
width, height, depth);
@@ -89,7 +164,7 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
}
x = 0;
- y += pack_y_pitch;
+ y += pack_y_pitch;
}
@@ -98,40 +173,50 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
height = minify(height);
depth = minify(depth);
- if (mt->compressed) {
- pack_y_pitch = (height + 3) / 4;
-
- if (pack_x_pitch > ALIGN(width, align_w)) {
- pack_x_pitch = ALIGN(width, align_w);
- pack_x_nr <<= 1;
- }
- } else {
- if (pack_x_pitch > 4) {
- pack_x_pitch >>= 1;
- pack_x_nr <<= 1;
- assert(pack_x_pitch * pack_x_nr <= mt->pitch);
- }
-
- if (pack_y_pitch > 2) {
- pack_y_pitch >>= 1;
- pack_y_pitch = ALIGN(pack_y_pitch, align_h);
- }
- }
+ if (mt->compressed) {
+ pack_y_pitch = (height + 3) / 4;
+
+ if (pack_x_pitch > ALIGN(width, align_w)) {
+ pack_x_pitch = ALIGN(width, align_w);
+ pack_x_nr <<= 1;
+ }
+ } else {
+ if (pack_x_pitch > 4) {
+ pack_x_pitch >>= 1;
+ pack_x_nr <<= 1;
+ assert(pack_x_pitch * pack_x_nr <= mt->pitch);
+ }
+
+ if (pack_y_pitch > 2) {
+ pack_y_pitch >>= 1;
+ pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+ }
+ }
}
+ /* The 965's sampler lays cachelines out according to how accesses
+ * in the texture surfaces run, so they may be "vertical" through
+ * memory. As a result, the docs say in Surface Padding Requirements:
+ * Sampling Engine Surfaces that two extra rows of padding are required.
+ * We don't know of similar requirements for pre-965, but given that
+ * those docs are silent on padding requirements in general, let's play
+ * it safe.
+ */
+ if (mt->target == GL_TEXTURE_CUBE_MAP)
+ mt->total_height += 2;
break;
}
default:
- i945_miptree_layout_2d(intel, mt);
+ i945_miptree_layout_2d(intel, mt, tiling);
break;
}
- DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
- mt->pitch,
+ DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ mt->pitch,
mt->total_height,
mt->cpp,
mt->pitch * mt->total_height * mt->cpp );
-
+
return GL_TRUE;
}
diff --git a/i965/brw_urb.c b/i965/brw_urb.c
index 7673dd3..8c6f435 100644
--- a/i965/brw_urb.c
+++ b/i965/brw_urb.c
@@ -143,7 +143,29 @@ static void recalculate_urb_fence( struct brw_context *brw )
brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
-
+
+ brw->urb.constrained = 0;
+
+ if (BRW_IS_IGDNG(brw)) {
+ brw->urb.nr_vs_entries = 128;
+ brw->urb.nr_sf_entries = 48;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ }
+ } else if (BRW_IS_G4X(brw)) {
+ brw->urb.nr_vs_entries = 64;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ }
+ }
+
if (!check_urb_layout(brw)) {
brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
@@ -169,9 +191,8 @@ static void recalculate_urb_fence( struct brw_context *brw )
if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
_mesa_printf("URB CONSTRAINED\n");
}
- else
- brw->urb.constrained = 0;
+done:
if (INTEL_DEBUG & DEBUG_URB)
_mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
brw->urb.vs_start,
diff --git a/i965/brw_vs.c b/i965/brw_vs.c
index e3111c6..f0c79ef 100644
--- a/i965/brw_vs.c
+++ b/i965/brw_vs.c
@@ -90,8 +90,6 @@ static void brw_upload_vs_prog(struct brw_context *brw)
struct brw_vertex_program *vp =
(struct brw_vertex_program *)brw->vertex_program;
- assert (vp && !vp->program.IsNVProgram);
-
memset(&key, 0, sizeof(key));
/* Just upload the program verbatim for now. Always send it all
diff --git a/i965/brw_vs.h b/i965/brw_vs.h
index 1e4f660..4a59136 100644
--- a/i965/brw_vs.h
+++ b/i965/brw_vs.h
@@ -58,6 +58,7 @@ struct brw_vs_compile {
GLuint first_output;
GLuint nr_outputs;
+ GLuint first_overflow_output; /**< VERT_ATTRIB_x */
GLuint first_tmp;
GLuint last_tmp;
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
index b69616d..108e19c 100644
--- a/i965/brw_vs_emit.c
+++ b/i965/brw_vs_emit.c
@@ -68,14 +68,20 @@ static void release_tmps( struct brw_vs_compile *c )
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
GLuint i, reg = 0, mrf;
+ int attributes_in_vue;
-#if 0
- if (c->vp->program.Base.Parameters->NumParameters >= 6)
- c->vp->use_const_buffer = 1;
+ /* Determine whether to use a real constant buffer or use a block
+ * of GRF registers for constants. The later is faster but only
+ * works if everything fits in the GRF.
+ * XXX this heuristic/check may need some fine tuning...
+ */
+ if (c->vp->program.Base.Parameters->NumParameters +
+ c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+ c->vp->use_const_buffer = GL_TRUE;
else
-#endif
c->vp->use_const_buffer = GL_FALSE;
- /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
+
+ /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
/* r0 -- reserved as usual
*/
@@ -123,16 +129,27 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
reg++;
}
}
+ /* If there are no inputs, we'll still be reading one attribute's worth
+ * because it's required -- see urb_read_length setting.
+ */
+ if (c->nr_inputs == 0)
+ reg++;
- /* Allocate outputs: TODO: could organize the non-position outputs
- * to go straight into message regs.
+ /* Allocate outputs. The non-position outputs go straight into message regs.
*/
c->nr_outputs = 0;
c->first_output = reg;
- mrf = 4;
+ c->first_overflow_output = 0;
+
+ if (BRW_IS_IGDNG(c->func.brw))
+ mrf = 8;
+ else
+ mrf = 4;
+
for (i = 0; i < VERT_RESULT_MAX; i++) {
if (c->prog_data.outputs_written & (1 << i)) {
c->nr_outputs++;
+ assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
if (i == VERT_RESULT_HPOS) {
c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
reg++;
@@ -143,8 +160,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
mrf++; /* just a placeholder? XXX fix later stages & remove this */
}
else {
- c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
- mrf++;
+ if (mrf < 16) {
+ c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
+ mrf++;
+ }
+ else {
+ /* too many vertex results to fit in MRF, use GRF for overflow */
+ if (!c->first_overflow_output)
+ c->first_overflow_output = i;
+ c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
}
}
}
@@ -200,8 +226,23 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
* vertex urb, so is half the amount:
*/
c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+ /* Setting this field to 0 leads to undefined behavior according to the
+ * the VS_STATE docs. Our VUEs will always have at least one attribute
+ * sitting in them, even if it's padding.
+ */
+ if (c->prog_data.urb_read_length == 0)
+ c->prog_data.urb_read_length = 1;
+
+ /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
+ * them to fit the biggest thing they need to.
+ */
+ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
+
+ if (BRW_IS_IGDNG(c->func.brw))
+ c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+ else
+ c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
- c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
c->prog_data.total_grf = reg;
if (INTEL_DEBUG & DEBUG_VS) {
@@ -709,10 +750,11 @@ get_constant(struct brw_vs_compile *c,
struct brw_compile *p = &c->func;
struct brw_reg const_reg;
struct brw_reg const2_reg;
+ const GLboolean relAddr = src->RelAddr;
assert(argIndex < 3);
- if (c->current_const[argIndex].index != src->Index || src->RelAddr) {
+ if (c->current_const[argIndex].index != src->Index || relAddr) {
struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
c->current_const[argIndex].index = src->Index;
@@ -725,13 +767,13 @@ get_constant(struct brw_vs_compile *c,
brw_dp_READ_4_vs(p,
c->current_const[argIndex].reg,/* writeback dest */
0, /* oword */
- src->RelAddr, /* relative indexing? */
+ relAddr, /* relative indexing? */
addrReg, /* address register */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
- if (src->RelAddr) {
+ if (relAddr) {
/* second read */
const2_reg = get_tmp(c);
@@ -742,7 +784,7 @@ get_constant(struct brw_vs_compile *c,
brw_dp_READ_4_vs(p,
const2_reg, /* writeback dest */
1, /* oword */
- src->RelAddr, /* relative indexing? */
+ relAddr, /* relative indexing? */
addrReg, /* address register */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER
@@ -752,7 +794,7 @@ get_constant(struct brw_vs_compile *c,
const_reg = c->current_const[argIndex].reg;
- if (src->RelAddr) {
+ if (relAddr) {
/* merge the two Owords into the constant register */
/* const_reg[7..4] = const2_reg[7..4] */
brw_MOV(p,
@@ -869,6 +911,7 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT:
case PROGRAM_UNIFORM:
+ case PROGRAM_ENV_PARAM:
if (c->vp->use_const_buffer) {
return get_constant(c, inst, argIndex);
}
@@ -888,7 +931,6 @@ get_src_reg( struct brw_vs_compile *c,
return brw_null_reg();
case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
case PROGRAM_WRITE_ONLY:
default:
assert(0);
@@ -1061,6 +1103,8 @@ static void emit_vertex_write( struct brw_vs_compile *c)
struct brw_reg m0 = brw_message_reg(0);
struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
+ int eot;
+ GLuint len_vertext_header = 2;
if (c->key.copy_edgeflag) {
brw_MOV(p,
@@ -1070,14 +1114,16 @@ static void emit_vertex_write( struct brw_vs_compile *c)
/* Build ndc coords */
ndc = get_tmp(c);
+ /* ndc = 1.0 / pos.w */
emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+ /* ndc.xyz = pos * ndc */
brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
/* Update the header for point size, user clipping flags, and -ve rhw
* workaround.
*/
if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
- c->key.nr_userclip || !BRW_IS_G4X(p->brw))
+ c->key.nr_userclip || BRW_IS_965(p->brw))
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -1108,7 +1154,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (!BRW_IS_G4X(p->brw)) {
+ if (BRW_IS_965(p->brw)) {
brw_CMP(p,
vec8(brw_null_reg()),
BRW_CONDITIONAL_L,
@@ -1135,7 +1181,23 @@ static void emit_vertex_write( struct brw_vs_compile *c)
*/
brw_set_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, offset(m0, 2), ndc);
- brw_MOV(p, offset(m0, 3), pos);
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
+ brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
+ /* m4, m5 contain the distances from vertex to the user clip planeXXX.
+ * Seems it is useless for us.
+ * m6 is used for aligning, so that the remainder of vertex element is
+ * reg-aligned.
+ */
+ brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */
+ len_vertext_header = 6;
+ } else {
+ brw_MOV(p, offset(m0, 3), pos);
+ len_vertext_header = 2;
+ }
+
+ eot = (c->first_overflow_output == 0);
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
@@ -1143,12 +1205,43 @@ static void emit_vertex_write( struct brw_vs_compile *c)
c->r0, /* src */
0, /* allocate */
1, /* used */
- c->nr_outputs + 3, /* msg len */
+ MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
0, /* response len */
- 1, /* eot */
+ eot, /* eot */
1, /* writes complete */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
+
+ if (c->first_overflow_output > 0) {
+ /* Not all of the vertex outputs/results fit into the MRF.
+ * Move the overflowed attributes from the GRF to the MRF and
+ * issue another brw_urb_WRITE().
+ */
+ /* XXX I'm not 100% sure about which MRF regs to use here. Starting
+ * at mrf[4] atm...
+ */
+ GLuint i, mrf = 0;
+ for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
+ if (c->prog_data.outputs_written & (1 << i)) {
+ /* move from GRF to MRF */
+ brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+ mrf++;
+ }
+ }
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ 4, /* starting mrf reg nr */
+ c->r0, /* src */
+ 0, /* allocate */
+ 1, /* used */
+ mrf+1, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ BRW_MAX_MRF-1, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+ }
}
@@ -1169,28 +1262,49 @@ post_vs_emit( struct brw_vs_compile *c,
/* patch up the END code to jump past subroutines, etc */
offset = last_inst - end_inst;
- brw_set_src1(end_inst, brw_imm_d(offset * 16));
+ if (offset > 1) {
+ brw_set_src1(end_inst, brw_imm_d(offset * 16));
+ } else {
+ end_inst->header.opcode = BRW_OPCODE_NOP;
+ }
}
+static uint32_t
+get_predicate(uint32_t swizzle)
+{
+ switch (swizzle) {
+ case SWIZZLE_XXXX:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_X;
+ case SWIZZLE_YYYY:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+ case SWIZZLE_ZZZZ:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+ case SWIZZLE_WWWW:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_W;
+ default:
+ _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle);
+ return BRW_PREDICATE_NORMAL;
+ }
+}
/* Emit the vertex program instructions here.
*/
void brw_vs_emit(struct brw_vs_compile *c )
{
-#define MAX_IFSN 32
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
struct brw_compile *p = &c->func;
- GLuint nr_insns = c->vp->program.Base.NumInstructions;
- GLuint insn, if_insn = 0;
+ const GLuint nr_insns = c->vp->program.Base.NumInstructions;
+ GLuint insn, if_depth = 0, loop_depth = 0;
GLuint end_offset = 0;
struct brw_instruction *end_inst, *last_inst;
- struct brw_instruction *if_inst[MAX_IFSN];
- struct brw_indirect stack_index = brw_indirect(0, 0);
-
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ const struct brw_indirect stack_index = brw_indirect(0, 0);
GLuint index;
GLuint file;
if (INTEL_DEBUG & DEBUG_VS) {
- _mesa_printf("vs-emit:\n");
+ _mesa_printf("vs-mesa:\n");
_mesa_print_program(&c->vp->program.Base);
_mesa_printf("\n");
}
@@ -1219,7 +1333,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
for (insn = 0; insn < nr_insns; insn++) {
- struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+ const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
struct brw_reg args[3], dst;
GLuint i;
@@ -1232,7 +1346,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
*/
if (inst->Opcode != OPCODE_SWZ)
for (i = 0; i < 3; i++) {
- struct prog_src_register *src = &inst->SrcReg[i];
+ const struct prog_src_register *src = &inst->SrcReg[i];
index = src->Index;
file = src->File;
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
@@ -1376,16 +1490,54 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_xpd(p, dst, args[0], args[1]);
break;
case OPCODE_IF:
- assert(if_insn < MAX_IFSN);
- if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+ if_inst[if_depth]->header.predicate_control =
+ get_predicate(inst->DstReg.CondSwizzle);
+ if_depth++;
break;
case OPCODE_ELSE:
- if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
case OPCODE_ENDIF:
- assert(if_insn > 0);
- brw_ENDIF(p, if_inst[--if_insn]);
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
break;
+#if 0
+ case OPCODE_BGNLOOP:
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_BRK:
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_CONT:
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_ENDLOOP:
+ {
+ struct brw_instruction *inst0, *inst1;
+ loop_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = inst1 - inst0;
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ }
+ break;
+#else
+ (void) loop_inst;
+ (void) loop_depth;
+#endif
case OPCODE_BRA:
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
@@ -1430,6 +1582,19 @@ void brw_vs_emit(struct brw_vs_compile *c )
"unknown");
}
+ /* Set the predication update on the last instruction of the native
+ * instruction sequence.
+ *
+ * This would be problematic if it was set on a math instruction,
+ * but that shouldn't be the case with the current GLSL compiler.
+ */
+ if (inst->CondUpdate) {
+ struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+ assert(hw_insn->header.destreg__conditionalmod == 0);
+ hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+ }
+
if ((inst->DstReg.File == PROGRAM_OUTPUT)
&& (inst->DstReg.Index != VERT_RESULT_HPOS)
&& c->output_regs[inst->DstReg.Index].used_in_src) {
@@ -1467,4 +1632,13 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_vertex_write(c);
post_vs_emit(c, end_inst, last_inst);
+
+ if (INTEL_DEBUG & DEBUG_VS) {
+ int i;
+
+ _mesa_printf("vs-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
+ }
}
diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c
index 3d29538..d790ab6 100644
--- a/i965/brw_vs_state.c
+++ b/i965/brw_vs_state.c
@@ -97,7 +97,11 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
* brw_urb_WRITE() results.
*/
vs.thread1.single_program_flow = 0;
- vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
+ if (BRW_IS_IGDNG(brw))
+ vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else
+ vs.thread1.binding_table_entry_count = key->nr_surfaces;
vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
@@ -105,10 +109,16 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
vs.thread3.urb_entry_read_offset = 0;
vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- vs.thread4.nr_urb_entries = key->nr_urb_entries;
+ if (BRW_IS_IGDNG(brw))
+ vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+ else
+ vs.thread4.nr_urb_entries = key->nr_urb_entries;
+
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- if (BRW_IS_G4X(brw))
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 72;
+ else if (BRW_IS_G4X(brw))
chipset_max_threads = 32;
else
chipset_max_threads = 16;
@@ -120,6 +130,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
/* No samplers for ARB_vp programs:
*/
+ /* It has to be set to 0 for IGDNG
+ */
vs.vs5.sampler_count = 0;
if (INTEL_DEBUG & DEBUG_STATS)
diff --git a/i965/brw_vs_surface_state.c b/i965/brw_vs_surface_state.c
new file mode 100644
index 0000000..89f4752
--- /dev/null
+++ b/i965/brw_vs_surface_state.c
@@ -0,0 +1,226 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/mtypes.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "shader/prog_parameter.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/* Creates a new VS constant buffer reflecting the current VS program's
+ * constants, if needed by the VS program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_vs_update_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ drm_intel_bo *const_buffer;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ if (!vp->use_const_buffer)
+ return NULL;
+
+ const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+ size, 64);
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+ return const_buffer;
+}
+
+/**
+ * Update the surface state for a VS constant buffer.
+ *
+ * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
+ */
+static void
+brw_update_vs_constant_surface( GLcontext *ctx,
+ GLuint surf)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_surface_key key;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+
+ assert(surf == 0);
+
+ /* If we're in this state update atom, we need to update VS constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ dri_bo_unreference(vp->const_buffer);
+ vp->const_buffer = brw_vs_update_constant_buffer(brw);
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (vp->const_buffer == 0) {
+ drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+ brw->vs.surf_bo[surf] = NULL;
+ return;
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = MESA_FORMAT_RGBA_FLOAT32;
+ key.internal_format = GL_RGBA;
+ key.bo = vp->const_buffer;
+ key.depthmode = GL_NONE;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+ brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->vs.surf_bo[surf] == NULL) {
+ brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+ }
+}
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static dri_bo *
+brw_vs_get_binding_table(struct brw_context *brw)
+{
+ dri_bo *bind_bo;
+
+ bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, BRW_VS_MAX_SURF,
+ NULL);
+
+ if (bind_bo == NULL) {
+ GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
+ uint32_t *data = malloc(data_size);
+ int i;
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++)
+ if (brw->vs.surf_bo[i])
+ data[i] = brw->vs.surf_bo[i]->offset;
+ else
+ data[i] = 0;
+
+ bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, BRW_VS_MAX_SURF,
+ data, data_size,
+ NULL, NULL);
+
+ /* Emit binding table relocations to surface state */
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ /* The presumed offsets were set in the data values for
+ * brw_upload_cache.
+ */
+ drm_intel_bo_emit_reloc(bind_bo, i * 4,
+ brw->vs.surf_bo[i], 0,
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+ }
+ }
+
+ free(data);
+ }
+
+ return bind_bo;
+}
+
+/**
+ * Vertex shader surfaces (constant buffer).
+ *
+ * This consumes the state updates for the constant buffer needing
+ * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
+ * CACHE_NEW_SURF_BIND for the binding table upload.
+ */
+static void prepare_vs_surfaces(struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ int i;
+ int nr_surfaces = 0;
+
+ brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ nr_surfaces = i + 1;
+ }
+ }
+
+ if (brw->vs.nr_surfaces != nr_surfaces) {
+ brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+ brw->vs.nr_surfaces = nr_surfaces;
+ }
+
+ /* Note that we don't end up updating the bind_bo if we don't have a
+ * surface to be pointing at. This should be relatively harmless, as it
+ * just slightly increases our working set size.
+ */
+ if (brw->vs.nr_surfaces != 0) {
+ dri_bo_unreference(brw->vs.bind_bo);
+ brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+ }
+}
+
+const struct brw_tracked_state brw_vs_surfaces = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_VERTEX_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_vs_surfaces,
+};
+
+
+
diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c
index ba03afd..ac11790 100644
--- a/i965/brw_vtbl.c
+++ b/i965/brw_vtbl.c
@@ -177,14 +177,6 @@ static void brw_note_fence( struct intel_context *intel, GLuint fence )
brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
}
-
-static void brw_note_unlock( struct intel_context *intel )
-{
- struct brw_context *brw = brw_context(&intel->ctx);
- brw_state_cache_check_size(brw);
-}
-
-
/* called from intelWaitForIdle() and intelFlush()
*
* For now, just flush everything. Could be smarter later.
@@ -194,7 +186,7 @@ static GLuint brw_flush_cmd( void )
struct brw_mi_flush flush;
flush.opcode = CMD_MI_FLUSH;
flush.pad = 0;
- flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE;
+ flush.flags = BRW_FLUSH_STATE_CACHE;
return *(GLuint *)&flush;
}
@@ -215,7 +207,6 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.invalidate_state = brw_invalidate_state;
brw->intel.vtbl.note_fence = brw_note_fence;
- brw->intel.vtbl.note_unlock = brw_note_unlock;
brw->intel.vtbl.new_batch = brw_new_batch;
brw->intel.vtbl.finish_batch = brw_finish_batch;
brw->intel.vtbl.destroy = brw_destroy_context;
diff --git a/i965/brw_wm.c b/i965/brw_wm.c
index 8a3b7df..2292de9 100644
--- a/i965/brw_wm.c
+++ b/i965/brw_wm.c
@@ -41,13 +41,13 @@ GLuint brw_wm_nr_args( GLuint opcode )
{
switch (opcode) {
case WM_FRONTFACING:
- return 0;
case WM_PIXELXY:
+ return 0;
case WM_CINTERP:
case WM_WPOSXY:
+ case WM_DELTAXY:
return 1;
case WM_LINTERP:
- case WM_DELTAXY:
case WM_PIXELW:
return 2;
case WM_FB_WRITE:
@@ -171,9 +171,11 @@ static void do_wm_prog( struct brw_context *brw,
* differently from "simple" shaders.
*/
if (fp->isGLSL) {
+ c->dispatch_width = 8;
brw_wm_glsl_emit(brw, c);
}
else {
+ c->dispatch_width = 16;
brw_wm_non_glsl_emit(brw, c);
}
@@ -202,6 +204,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
(struct brw_fragment_program *)brw->fragment_program;
+ GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
GLuint lookup = 0;
GLuint line_aa;
GLuint i;
@@ -263,6 +266,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
brw_wm_lookup_iz(line_aa,
lookup,
+ uses_depth,
key);
@@ -272,6 +276,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
/* _NEW_LIGHT */
key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+ /* _NEW_HINT */
+ key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+
/* _NEW_TEXTURE */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
@@ -351,6 +358,7 @@ const struct brw_tracked_state brw_wm_prog = {
.dirty = {
.mesa = (_NEW_COLOR |
_NEW_DEPTH |
+ _NEW_HINT |
_NEW_STENCIL |
_NEW_POLYGON |
_NEW_LINE |
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
index 295fed8..ae98b54 100644
--- a/i965/brw_wm.h
+++ b/i965/brw_wm.h
@@ -63,6 +63,7 @@ struct brw_wm_prog_key {
GLuint computes_depth:1; /* could be derived from program string */
GLuint source_depth_to_render_target:1;
GLuint flat_shade:1;
+ GLuint linear_color:1; /**< linear interpolation vs perspective interp */
GLuint runtime_check_aads_emit:1;
GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
@@ -241,19 +242,25 @@ struct brw_wm_compile {
GLuint max_wm_grf;
GLuint last_scratch;
+ GLuint cur_inst; /**< index of current instruction */
+
+ GLboolean out_of_regs; /**< ran out of GRF registers? */
+
/** Mapping from Mesa registers to hardware registers */
struct {
GLboolean inited;
struct brw_reg reg;
} wm_regs[PROGRAM_PAYLOAD+1][256][4];
+ GLboolean used_grf[BRW_WM_MAX_GRF];
+ GLuint first_free_grf;
struct brw_reg stack;
struct brw_reg emit_mask_reg;
- GLuint reg_index; /**< Index of next free GRF register */
GLuint tmp_regs[BRW_WM_MAX_GRF];
GLuint tmp_index;
GLuint tmp_max;
GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
+ GLuint dispatch_width;
/** we may need up to 3 constants per instruction (if use_const_buffer) */
struct {
@@ -286,6 +293,7 @@ void brw_wm_print_program( struct brw_wm_compile *c,
void brw_wm_lookup_iz( GLuint line_aa,
GLuint lookup,
+ GLboolean ps_uses_depth,
struct brw_wm_prog_key *key );
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c
index a870e75..268f796 100644
--- a/i965/brw_wm_emit.c
+++ b/i965/brw_wm_emit.c
@@ -65,8 +65,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
static void emit_pixel_xy(struct brw_compile *p,
const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
+ GLuint mask)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
@@ -98,8 +97,7 @@ static void emit_pixel_xy(struct brw_compile *p,
static void emit_delta_xy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1)
+ const struct brw_reg *arg0)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
@@ -353,6 +351,19 @@ static void emit_mad( struct brw_compile *p,
}
}
+static void emit_trunc( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_RNDZ(p, dst[i], arg0[i]);
+ }
+ }
+}
static void emit_lrp( struct brw_compile *p,
const struct brw_reg *dst,
@@ -532,16 +543,18 @@ static void emit_dp3( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
- assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MAC(p, dst[0], arg0[2], arg1[2]);
+ brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
brw_set_saturate(p, 0);
}
@@ -552,17 +565,19 @@ static void emit_dp4( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
- assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MAC(p, dst[0], arg0[3], arg1[3]);
+ brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
brw_set_saturate(p, 0);
}
@@ -573,17 +588,19 @@ static void emit_dph( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
+ const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
- assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
- brw_MAC(p, dst[0], arg0[2], arg1[2]);
+ brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_ADD(p, dst[0], dst[0], arg1[3]);
+ brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
brw_set_saturate(p, 0);
}
@@ -619,18 +636,19 @@ static void emit_math1( struct brw_compile *p,
GLuint mask,
const struct brw_reg *arg0 )
{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
- //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
- // function == BRW_MATH_FUNCTION_SINCOS);
-
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
brw_MOV(p, brw_message_reg(2), arg0[0]);
/* Send two messages to perform all 16 operations:
*/
brw_math_16(p,
- dst[0],
+ dst[dst_chan],
function,
(mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
2,
@@ -646,10 +664,12 @@ static void emit_math2( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1)
{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
- assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
brw_push_insn_state(p);
@@ -668,7 +688,7 @@ static void emit_math2( struct brw_compile *p,
*/
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p,
- dst[0],
+ dst[dst_chan],
function,
(mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
2,
@@ -678,7 +698,7 @@ static void emit_math2( struct brw_compile *p,
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math(p,
- offset(dst[0],1),
+ offset(dst[dst_chan],1),
function,
(mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
4,
@@ -701,6 +721,7 @@ static void emit_tex( struct brw_wm_compile *c,
GLuint msgLength, responseLength;
GLuint i, nr;
GLuint emit;
+ GLuint msg_type;
/* How many input regs are there?
*/
@@ -714,10 +735,14 @@ static void emit_tex( struct brw_wm_compile *c,
emit = WRITEMASK_XY;
nr = 2;
break;
- default:
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
emit = WRITEMASK_XYZ;
nr = 3;
break;
+ default:
+ /* unexpected target */
+ abort();
}
if (inst->tex_shadow) {
@@ -738,6 +763,18 @@ static void emit_tex( struct brw_wm_compile *c,
responseLength = 8; /* always */
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (inst->tex_shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ } else {
+ if (inst->tex_shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+ }
+
brw_SAMPLE(p,
retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
1,
@@ -745,12 +782,12 @@ static void emit_tex( struct brw_wm_compile *c,
SURF_INDEX_TEXTURE(inst->tex_unit),
inst->tex_unit, /* sampler */
inst->writemask,
- (inst->tex_shadow ?
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
+ msg_type,
responseLength,
msgLength,
- 0);
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
}
@@ -762,7 +799,7 @@ static void emit_txb( struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
GLuint msgLength;
-
+ GLuint msg_type;
/* Shadow ignored for txb.
*/
switch (inst->tex_idx) {
@@ -777,16 +814,25 @@ static void emit_txb( struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(4), arg[1]);
brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
break;
- default:
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
brw_MOV(p, brw_message_reg(2), arg[0]);
brw_MOV(p, brw_message_reg(4), arg[1]);
brw_MOV(p, brw_message_reg(6), arg[2]);
break;
+ default:
+ /* unexpected target */
+ abort();
}
brw_MOV(p, brw_message_reg(8), arg[3]);
msgLength = 9;
+ if (BRW_IS_IGDNG(p->brw))
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+
brw_SAMPLE(p,
retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
1,
@@ -794,10 +840,12 @@ static void emit_txb( struct brw_wm_compile *c,
SURF_INDEX_TEXTURE(inst->tex_unit),
inst->tex_unit, /* sampler */
inst->writemask,
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
+ msg_type,
8, /* responseLength */
msgLength,
- 0);
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
}
@@ -1009,7 +1057,7 @@ static void emit_fb_write( struct brw_wm_compile *c,
get_element_ud(brw_vec8_grf(1,0), 6),
brw_imm_ud(1<<26));
- jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
{
emit_aa(c, arg1, 2);
fire_fb_write(c, 0, nr, target, eot);
@@ -1161,11 +1209,11 @@ void brw_wm_emit( struct brw_wm_compile *c )
/* Generated instructions for calculating triangle interpolants:
*/
case WM_PIXELXY:
- emit_pixel_xy(p, dst, dst_flags, args[0]);
+ emit_pixel_xy(p, dst, dst_flags);
break;
case WM_DELTAXY:
- emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
+ emit_delta_xy(p, dst, dst_flags, args[0]);
break;
case WM_WPOSXY:
@@ -1222,6 +1270,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_dph(p, dst, dst_flags, args[0], args[1]);
break;
+ case OPCODE_TRUNC:
+ emit_trunc(p, dst, dst_flags, args[0]);
+ break;
+
case OPCODE_LRP:
emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
break;
@@ -1348,4 +1400,13 @@ void brw_wm_emit( struct brw_wm_compile *c )
inst->dst[i]->hw_reg,
inst->dst[i]->spill_slot);
}
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ int i;
+
+ _mesa_printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
+ }
}
diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c
index 49aad28..123fe84 100644
--- a/i965/brw_wm_fp.c
+++ b/i965/brw_wm_fp.c
@@ -42,6 +42,12 @@
#include "shader/prog_statevars.h"
+/** An invalid texture target */
+#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
+
+/** An invalid texture unit */
+#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
+
#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
#define X 0
@@ -199,6 +205,15 @@ static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
{
struct prog_instruction *inst = get_fp_inst(c);
+ assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
+ tex_src_unit == TEX_UNIT_NONE);
+ assert(tex_src_target < NUM_TEXTURE_TARGETS ||
+ tex_src_target == TEX_TARGET_NONE);
+
+ /* update mask of which texture units are referenced by this program */
+ if (tex_src_unit != TEX_UNIT_NONE)
+ c->fp->tex_units_used |= (1 << tex_src_unit);
+
memset(inst, 0, sizeof(*inst));
inst->Opcode = op;
@@ -223,12 +238,45 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c,
struct prog_src_register src2 )
{
return emit_tex_op(c, op, dest, saturate,
- 0, 0, 0, /* tex unit, target, shadow */
+ TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
src0, src1, src2);
}
-
+/* Many Mesa opcodes produce the same value across all the result channels.
+ * We'd rather not have to support that splatting in the opcode implementations,
+ * and brw_wm_pass*.c wants to optimize them out by shuffling references around
+ * anyway. We can easily get both by emitting the opcode to one channel, and
+ * then MOVing it to the others, which brw_wm_pass*.c already understands.
+ */
+static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
+ const struct prog_instruction *inst0)
+{
+ struct prog_instruction *inst;
+ unsigned int dst_chan;
+ unsigned int other_channel_mask;
+
+ if (inst0->DstReg.WriteMask == 0)
+ return NULL;
+
+ dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
+ inst = get_fp_inst(c);
+ *inst = *inst0;
+ inst->DstReg.WriteMask = 1 << dst_chan;
+
+ other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
+ if (other_channel_mask != 0) {
+ inst = emit_op(c,
+ OPCODE_MOV,
+ dst_mask(inst0->DstReg, other_channel_mask),
+ 0,
+ src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
+ src_undef(),
+ src_undef());
+ }
+ return inst;
+}
+
/***********************************************************************
* Special instructions for interpolation and other tasks
@@ -354,24 +402,28 @@ static void emit_interp( struct brw_wm_compile *c,
src_undef());
}
else {
- emit_op(c,
- WM_LINTERP,
- dst,
- 0,
- interp,
- deltas,
- src_undef());
+ if (c->key.linear_color) {
+ emit_op(c,
+ WM_LINTERP,
+ dst,
+ 0,
+ interp,
+ deltas,
+ src_undef());
+ }
+ else {
+ /* perspective-corrected color interpolation */
+ emit_op(c,
+ WM_PINTERP,
+ dst,
+ 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
+ }
}
break;
case FRAG_ATTRIB_FOGC:
- /* The FOGC input is really special. When a program uses glFogFragCoord,
- * the results returned are supposed to be (f,0,0,1). But for Mesa GLSL,
- * the glFrontFacing and glPointCoord values are also stashed in FOGC.
- * So, write the interpolated fog value to X, then either 0, 1, or the
- * stashed values to Y, Z, W. Note that this means that
- * glFogFragCoord.yzw can be wrong in those cases!
- */
-
/* Interpolate the fog coordinate */
emit_op(c,
WM_PINTERP,
@@ -381,26 +433,40 @@ static void emit_interp( struct brw_wm_compile *c,
deltas,
get_pixel_w(c));
- /* Move the front facing value into FOGC.y if it's needed. */
- if (c->fp->program.UsesFrontFacing) {
- emit_op(c,
- WM_FRONTFACING,
- dst_mask(dst, WRITEMASK_Y),
- 0,
- src_undef(),
- src_undef(),
- src_undef());
- } else {
- emit_op(c,
- OPCODE_MOV,
- dst_mask(dst, WRITEMASK_Y),
- 0,
- src_swizzle1(interp, SWIZZLE_ZERO),
- src_undef(),
- src_undef());
- }
+ emit_op(c,
+ OPCODE_MOV,
+ dst_mask(dst, WRITEMASK_YZW),
+ 0,
+ src_swizzle(interp,
+ SWIZZLE_ZERO,
+ SWIZZLE_ZERO,
+ SWIZZLE_ZERO,
+ SWIZZLE_ONE),
+ src_undef(),
+ src_undef());
+ break;
+
+ case FRAG_ATTRIB_FACE:
+ /* XXX review/test this case */
+ emit_op(c,
+ WM_FRONTFACING,
+ dst_mask(dst, WRITEMASK_X),
+ 0,
+ src_undef(),
+ src_undef(),
+ src_undef());
+ break;
+
+ case FRAG_ATTRIB_PNTC:
+ /* XXX review/test this case */
+ emit_op(c,
+ WM_PINTERP,
+ dst_mask(dst, WRITEMASK_XY),
+ 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
- /* Should do the PointCoord thing here. */
emit_op(c,
OPCODE_MOV,
dst_mask(dst, WRITEMASK_ZW),
@@ -413,6 +479,7 @@ static void emit_interp( struct brw_wm_compile *c,
src_undef(),
src_undef());
break;
+
default:
emit_op(c,
WM_PINTERP,
@@ -631,6 +698,8 @@ static void precalc_tex( struct brw_wm_compile *c,
struct prog_dst_register tmpcoord;
const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+ assert(unit < BRW_MAX_TEX_UNIT);
+
if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
struct prog_instruction *out;
struct prog_dst_register tmp0 = get_temp(c);
@@ -671,7 +740,7 @@ static void precalc_tex( struct brw_wm_compile *c,
/* tmp0 = 1 / tmp1 */
emit_op(c, OPCODE_RCP,
- tmp0,
+ dst_mask(tmp0, WRITEMASK_X),
0,
tmp1src,
src_undef(),
@@ -682,7 +751,7 @@ static void precalc_tex( struct brw_wm_compile *c,
tmpcoord,
0,
src0,
- tmp0src,
+ src_swizzle1(tmp0src, SWIZZLE_X),
src_undef());
release_temp(c, tmp0);
@@ -705,7 +774,11 @@ static void precalc_tex( struct brw_wm_compile *c,
tmpcoord,
0,
inst->SrcReg[0],
- scale,
+ src_swizzle(scale,
+ SWIZZLE_X,
+ SWIZZLE_Y,
+ SWIZZLE_ONE,
+ SWIZZLE_ONE),
src_undef());
coord = src_reg_from_dst(tmpcoord);
@@ -1029,6 +1102,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
c->delta_xy = src_undef();
c->pixel_w = src_undef();
c->nr_fp_insns = 0;
+ c->fp->tex_units_used = 0x0;
/* Emit preamble instructions. This is where special instructions such as
* WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
@@ -1096,6 +1170,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
case OPCODE_TXB:
out = emit_insn(c, inst);
out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+ assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
break;
case OPCODE_XPD:
@@ -1122,9 +1197,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
break;
case OPCODE_PRINT:
break;
-
default:
- emit_insn(c, inst);
+ if (brw_wm_is_scalar_result(inst->Opcode))
+ emit_scalar_insn(c, inst);
+ else
+ emit_insn(c, inst);
break;
}
}
diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c
index a907e1b..7c210ab 100644
--- a/i965/brw_wm_glsl.c
+++ b/i965/brw_wm_glsl.c
@@ -1,5 +1,7 @@
#include "main/macros.h"
#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_optimize.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
@@ -8,6 +10,9 @@ enum _subroutine {
SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
};
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint component);
/**
* Determine if the given fragment program uses GLSL features such
@@ -20,8 +25,8 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
for (i = 0; i < fp->Base.NumInstructions; i++) {
const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
+ case OPCODE_ARL:
case OPCODE_IF:
- case OPCODE_TRUNC:
case OPCODE_ENDIF:
case OPCODE_CAL:
case OPCODE_BRK:
@@ -42,6 +47,83 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
}
+
+static void
+reclaim_temps(struct brw_wm_compile *c);
+
+
+/** Mark GRF register as used. */
+static void
+prealloc_grf(struct brw_wm_compile *c, int r)
+{
+ c->used_grf[r] = GL_TRUE;
+}
+
+
+/** Mark given GRF register as not in use. */
+static void
+release_grf(struct brw_wm_compile *c, int r)
+{
+ /*assert(c->used_grf[r]);*/
+ c->used_grf[r] = GL_FALSE;
+ c->first_free_grf = MIN2(c->first_free_grf, r);
+}
+
+
+/** Return index of a free GRF, mark it as used. */
+static int
+alloc_grf(struct brw_wm_compile *c)
+{
+ GLuint r;
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ /* no free temps, try to reclaim some */
+ reclaim_temps(c);
+ c->first_free_grf = 0;
+
+ /* try alloc again */
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ for (r = 0; r < BRW_WM_MAX_GRF; r++) {
+ assert(c->used_grf[r]);
+ }
+
+ /* really, no free GRF regs found */
+ if (!c->out_of_regs) {
+ /* print warning once per compilation */
+ _mesa_warning(NULL, "i965: ran out of registers for fragment program");
+ c->out_of_regs = GL_TRUE;
+ }
+
+ return -1;
+}
+
+
+/** Return number of GRF registers used */
+static int
+num_grf_used(const struct brw_wm_compile *c)
+{
+ int r;
+ for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
+ if (c->used_grf[r])
+ return r + 1;
+ return 0;
+}
+
+
+
/**
* Record the mapping of a Mesa register to a hardware register.
*/
@@ -52,27 +134,26 @@ static void set_reg(struct brw_wm_compile *c, int file, int index,
c->wm_regs[file][index][component].inited = GL_TRUE;
}
-/**
- * Examine instruction's write mask to find index of first component
- * enabled for writing.
- */
-static int get_scalar_dst_index(const struct prog_instruction *inst)
-{
- int i;
- for (i = 0; i < 4; i++)
- if (inst->DstReg.WriteMask & (1<<i))
- break;
- return i;
-}
-
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
struct brw_reg reg;
- if(c->tmp_index == c->tmp_max)
- c->tmp_regs[ c->tmp_max++ ] = c->reg_index++;
-
+
+ /* if we need to allocate another temp, grow the tmp_regs[] array */
+ if (c->tmp_index == c->tmp_max) {
+ int r = alloc_grf(c);
+ if (r < 0) {
+ /*printf("Out of temps in %s\n", __FUNCTION__);*/
+ r = 50; /* XXX random register! */
+ }
+ c->tmp_regs[ c->tmp_max++ ] = r;
+ }
+
+ /* form the GRF register */
reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
+ /*printf("alloc_temp %d\n", reg.nr);*/
+ assert(reg.nr < BRW_WM_MAX_GRF);
return reg;
+
}
/**
@@ -130,35 +211,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
return brw_null_reg();
}
+ assert(index < 256);
+ assert(component < 4);
+
/* see if we've already allocated a HW register for this Mesa register */
if (c->wm_regs[file][index][component].inited) {
- /* yes, re-use */
- reg = c->wm_regs[file][index][component].reg;
+ /* yes, re-use */
+ reg = c->wm_regs[file][index][component].reg;
}
else {
/* no, allocate new register */
- reg = brw_vec8_grf(c->reg_index, 0);
- }
+ int grf = alloc_grf(c);
+ /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
+ if (grf < 0) {
+ /* totally out of temps */
+ grf = 51; /* XXX random register! */
+ }
- /* if this is a new register allocation, record it in the table */
- if (!c->wm_regs[file][index][component].inited) {
- set_reg(c, file, index, component, reg);
- c->reg_index++;
- }
+ reg = brw_vec8_grf(grf, 0);
+ /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
- if (c->reg_index >= BRW_WM_MAX_GRF - 12) {
- /* ran out of temporary registers! */
-#if 1
- /* This is a big hack for now.
- * Return bad register index, just don't hang the GPU.
- */
- _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index);
- c->reg_index = BRW_WM_MAX_GRF - 13;
-#else
- return brw_null_reg();
-#endif
+ set_reg(c, file, index, component, reg);
}
-
+
if (neg & (1 << component)) {
reg = negate(reg);
}
@@ -168,6 +243,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
}
+
+/**
+ * This is called if we run out of GRF registers. Examine the live intervals
+ * of temp regs in the program and free those which won't be used again.
+ */
+static void
+reclaim_temps(struct brw_wm_compile *c)
+{
+ GLint intBegin[MAX_PROGRAM_TEMPS];
+ GLint intEnd[MAX_PROGRAM_TEMPS];
+ int index;
+
+ /*printf("Reclaim temps:\n");*/
+
+ _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
+ intBegin, intEnd);
+
+ for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
+ if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
+ /* program temp[i] can be freed */
+ int component;
+ /*printf(" temp[%d] is dead\n", index);*/
+ for (component = 0; component < 4; component++) {
+ if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
+ int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
+ release_grf(c, r);
+ /*
+ printf(" Reclaim temp %d, reg %d at inst %d\n",
+ index, r, c->cur_inst);
+ */
+ c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
+ }
+ }
+ }
+ }
+}
+
+
+
+
/**
* Preallocate registers. This sets up the Mesa to hardware register
* mapping for certain registers, such as constants (uniforms/state vars)
@@ -179,6 +294,10 @@ static void prealloc_reg(struct brw_wm_compile *c)
struct brw_reg reg;
int urb_read_length = 0;
GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+ GLuint reg_index = 0;
+
+ memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
+ c->first_free_grf = 0;
for (i = 0; i < 4; i++) {
if (i < c->key.nr_depth_regs)
@@ -187,14 +306,20 @@ static void prealloc_reg(struct brw_wm_compile *c)
reg = brw_vec8_grf(0, 0);
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
}
- c->reg_index += 2 * c->key.nr_depth_regs;
+ reg_index += 2 * c->key.nr_depth_regs;
/* constants */
{
- const int nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
/* use a real constant buffer, or just use a section of the GRF? */
- c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/
+ /* XXX this heuristic may need adjustment... */
+ if ((nr_params + nr_temps) * 4 + reg_index > 80)
+ c->fp->use_const_buffer = GL_TRUE;
+ else
+ c->fp->use_const_buffer = GL_FALSE;
+ /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
if (c->fp->use_const_buffer) {
/* We'll use a real constant buffer and fetch constants from
@@ -216,7 +341,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
for (i = 0; i < nr_params; i++) {
/* loop over XYZW channels */
for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(c->reg_index + index / 8, index % 8);
+ reg = brw_vec1_grf(reg_index + index / 8, index % 8);
/* Save pointer to parameter/constant value.
* Constants will be copied in prepare_constant_buffer()
*/
@@ -226,7 +351,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
}
/* number of constant regs used (each reg is float[8]) */
c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
- c->reg_index += c->nr_creg;
+ reg_index += c->nr_creg;
}
}
@@ -242,23 +367,52 @@ static void prealloc_reg(struct brw_wm_compile *c)
fp_input = -1;
if (fp_input >= 0 && inputs & (1 << fp_input)) {
- urb_read_length = c->reg_index;
- reg = brw_vec8_grf(c->reg_index, 0);
+ urb_read_length = reg_index;
+ reg = brw_vec8_grf(reg_index, 0);
for (j = 0; j < 4; j++)
set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
}
if (c->key.vp_outputs_written & (1 << i)) {
- c->reg_index += 2;
+ reg_index += 2;
}
}
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
c->prog_data.urb_read_length = urb_read_length;
c->prog_data.curb_read_length = c->nr_creg;
- c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
- c->reg_index++;
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
- c->reg_index += 2;
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index++;
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index += 2;
+
+ /* mark GRF regs [0..reg_index-1] as in-use */
+ for (i = 0; i < reg_index; i++)
+ prealloc_grf(c, i);
+
+ /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
+ prealloc_grf(c, 126);
+ prealloc_grf(c, 127);
+
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct prog_instruction *inst = &c->prog_instructions[i];
+ struct brw_reg dst[4];
+
+ switch (inst->Opcode) {
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ /* Allocate the channels of texture results contiguously,
+ * since they are written out that way by the sampler unit.
+ */
+ for (j = 0; j < 4; j++) {
+ dst[j] = get_dst_reg(c, inst, j);
+ if (j != 0)
+ assert(dst[j].nr == dst[j - 1].nr + 1);
+ }
+ break;
+ default:
+ break;
+ }
+ }
/* An instruction may reference up to three constants.
* They'll be found in these registers.
@@ -267,12 +421,12 @@ static void prealloc_reg(struct brw_wm_compile *c)
if (c->fp->use_const_buffer) {
for (i = 0; i < 3; i++) {
c->current_const[i].index = -1;
- c->current_const[i].reg = alloc_tmp(c);
+ c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
}
}
#if 0
printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
- printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index);
+ printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
#endif
}
@@ -376,6 +530,14 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
const GLuint nr = 1;
const GLuint component = GET_SWZ(src->Swizzle, channel);
+ /* Extended swizzle terms */
+ if (component == SWIZZLE_ZERO) {
+ return brw_imm_f(0.0F);
+ }
+ else if (component == SWIZZLE_ONE) {
+ return brw_imm_f(1.0F);
+ }
+
if (c->fp->use_const_buffer &&
(src->File == PROGRAM_STATE_VAR ||
src->File == PROGRAM_CONSTANT ||
@@ -489,23 +651,6 @@ static void invoke_subroutine( struct brw_wm_compile *c,
}
}
-static void emit_abs( struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for (i = 0; i < 4; i++) {
- if (inst->DstReg.WriteMask & (1<<i)) {
- struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i);
- src = get_src_reg(c, inst, 0, i);
- brw_MOV(p, dst, brw_abs(src));
- }
- }
- brw_set_saturate(p, 0);
-}
-
static void emit_trunc( struct brw_wm_compile *c,
const struct prog_instruction *inst)
{
@@ -673,27 +818,26 @@ static void emit_fb_write(struct brw_wm_compile *c,
}
if (c->key.dest_depth_reg) {
- GLuint comp = c->key.dest_depth_reg / 2;
- GLuint off = c->key.dest_depth_reg % 2;
+ const GLuint comp = c->key.dest_depth_reg / 2;
+ const GLuint off = c->key.dest_depth_reg % 2;
- assert(comp == 1);
- assert(off == 0);
-#if 0
- /* XXX do we need this code? comp always 1, off always 0, it seems */
if (off != 0) {
+ /* XXX this code needs review/testing */
+ struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
+ struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
+
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+ brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
/* 2nd half? */
- brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+ brw_MOV(p, brw_message_reg(nr+1), arg1_1);
brw_pop_insn_state(p);
}
else
-#endif
{
- struct brw_reg src = get_src_reg(c, inst, 1, 1);
- brw_MOV(p, brw_message_reg(nr), src);
+ struct brw_reg src = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src);
}
nr += 2;
}
@@ -882,12 +1026,20 @@ static void emit_dp3(struct brw_wm_compile *c,
struct brw_reg src0[3], src1[3], dst;
int i;
struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
for (i = 0; i < 3; i++) {
src0[i] = get_src_reg(c, inst, 0, i);
src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ dst = get_dst_reg(c, inst, dst_chan);
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
@@ -901,11 +1053,19 @@ static void emit_dp4(struct brw_wm_compile *c,
struct brw_reg src0[4], src1[4], dst;
int i;
struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
for (i = 0; i < 4; i++) {
src0[i] = get_src_reg(c, inst, 0, i);
src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ dst = get_dst_reg(c, inst, dst_chan);
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
@@ -920,11 +1080,19 @@ static void emit_dph(struct brw_wm_compile *c,
struct brw_reg src0[4], src1[4], dst;
int i;
struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
for (i = 0; i < 4; i++) {
src0[i] = get_src_reg(c, inst, 0, i);
src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ dst = get_dst_reg(c, inst, dst_chan);
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_MAC(p, dst, src0[2], src1[2]);
@@ -942,37 +1110,28 @@ static void emit_math1(struct brw_wm_compile *c,
const struct prog_instruction *inst, GLuint func)
{
struct brw_compile *p = &c->func;
- struct brw_reg src0, dst, tmp;
- const int mark = mark_tmps( c );
- int i;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
- tmp = alloc_tmp(c);
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
/* Get first component of source register */
+ dst = get_dst_reg(c, inst, dst_chan);
src0 = get_src_reg(c, inst, 0, 0);
- /* tmp = func(src0) */
brw_MOV(p, brw_message_reg(2), src0);
brw_math(p,
- tmp,
+ dst,
func,
(inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
2,
brw_null_reg(),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
-
- /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
-
- /* replicate tmp value across enabled dest channels */
- for (i = 0; i < 4; i++) {
- if (inst->DstReg.WriteMask & (1 << i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, tmp);
- }
- }
-
- release_tmps(c, mark);
}
static void emit_rcp(struct brw_wm_compile *c,
@@ -1043,24 +1202,6 @@ static void emit_arl(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
-static void emit_sub(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg_imm(c, inst, 1, i);
- brw_ADD(p, dst, src0, negate(src1));
- }
- }
- brw_set_saturate(p, 0);
-}
static void emit_mul(struct brw_wm_compile *c,
const struct prog_instruction *inst)
@@ -1172,7 +1313,15 @@ static void emit_pow(struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct brw_reg dst, src0, src1;
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ dst = get_dst_reg(c, inst, dst_chan);
src0 = get_src_reg_imm(c, inst, 0, 0);
src1 = get_src_reg_imm(c, inst, 1, 0);
@@ -2474,8 +2623,12 @@ static void emit_txb(struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct brw_reg dst[4], src[4], payload_reg;
- GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+ /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
+ const GLuint unit = inst->TexSrcUnit;
GLuint i;
+ GLuint msg_type;
+
+ assert(unit < BRW_MAX_TEX_UNIT);
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
@@ -2496,14 +2649,26 @@ static void emit_txb(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(3), src[1]);
brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
break;
- default:
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
brw_MOV(p, brw_message_reg(2), src[0]);
brw_MOV(p, brw_message_reg(3), src[1]);
brw_MOV(p, brw_message_reg(4), src[2]);
break;
+ default:
+ /* invalid target */
+ abort();
}
brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
+ } else {
+ /* Does it work well on SIMD8? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ }
+
brw_SAMPLE(p,
retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
1, /* msg_reg_nr */
@@ -2511,10 +2676,12 @@ static void emit_txb(struct brw_wm_compile *c,
SURF_INDEX_TEXTURE(unit),
unit, /* sampler */
inst->DstReg.WriteMask, /* writemask */
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */
+ msg_type, /* msg_type */
4, /* response_length */
4, /* msg_length */
- 0); /* eot */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
}
@@ -2523,11 +2690,15 @@ static void emit_tex(struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct brw_reg dst[4], src[4], payload_reg;
- GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+ /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
+ const GLuint unit = inst->TexSrcUnit;
GLuint msg_len;
GLuint i, nr;
GLuint emit;
GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
+ GLuint msg_type;
+
+ assert(unit < BRW_MAX_TEX_UNIT);
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
@@ -2546,10 +2717,14 @@ static void emit_tex(struct brw_wm_compile *c,
emit = WRITEMASK_XY;
nr = 2;
break;
- default:
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
emit = WRITEMASK_XYZ;
nr = 3;
break;
+ default:
+ /* invalid target */
+ abort();
}
msg_len = 1;
@@ -2568,6 +2743,16 @@ static void emit_tex(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
}
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
+ } else {
+ /* Does it work for shadow on SIMD8 ? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+ }
+
brw_SAMPLE(p,
retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
1, /* msg_reg_nr */
@@ -2575,10 +2760,12 @@ static void emit_tex(struct brw_wm_compile *c,
SURF_INDEX_TEXTURE(unit),
unit, /* sampler */
inst->DstReg.WriteMask, /* writemask */
- BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */
+ msg_type, /* msg_type */
4, /* response_length */
shadow ? 6 : 4, /* msg_length */
- 0); /* eot */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
if (shadow)
brw_MOV(p, dst[3], brw_imm_f(1.0));
@@ -2595,15 +2782,15 @@ static void post_wm_emit( struct brw_wm_compile *c )
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
-#define MAX_IFSN 32
+#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
- struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
- struct brw_instruction *inst0, *inst1;
- int i, if_insn = 0, loop_insn = 0;
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ GLuint i, if_depth = 0, loop_depth = 0;
struct brw_compile *p = &c->func;
struct brw_indirect stack_index = brw_indirect(0, 0);
- c->reg_index = 0;
+ c->out_of_regs = GL_FALSE;
+
prealloc_reg(c);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
@@ -2611,6 +2798,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
for (i = 0; i < c->nr_fp_insns; i++) {
const struct prog_instruction *inst = &c->prog_instructions[i];
+ c->cur_inst = i;
+
#if 0
_mesa_printf("Inst %d: ", i);
_mesa_print_instruction(inst);
@@ -2653,18 +2842,12 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case WM_FRONTFACING:
emit_frontfacing(c, inst);
break;
- case OPCODE_ABS:
- emit_abs(c, inst);
- break;
case OPCODE_ADD:
emit_add(c, inst);
break;
case OPCODE_ARL:
emit_arl(c, inst);
break;
- case OPCODE_SUB:
- emit_sub(c, inst);
- break;
case OPCODE_FRC:
emit_frc(c, inst);
break;
@@ -2770,15 +2953,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_kil(c);
break;
case OPCODE_IF:
- assert(if_insn < MAX_IFSN);
- if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
break;
case OPCODE_ELSE:
- if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
case OPCODE_ENDIF:
- assert(if_insn > 0);
- brw_ENDIF(p, if_inst[--if_insn]);
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
break;
case OPCODE_BGNSUB:
brw_save_label(p, inst->Comment, p->nr_insn);
@@ -2812,7 +2995,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
break;
case OPCODE_BGNLOOP:
/* XXX may need to invalidate the current_constant regs */
- loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
break;
case OPCODE_BRK:
brw_BREAK(p);
@@ -2823,25 +3006,34 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_ENDLOOP:
- loop_insn--;
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
- /* patch all the BREAK instructions from
- last BEGINLOOP */
- while (inst0 > loop_inst[loop_insn]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
- inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
+ loop_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
- } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
- inst0->bits3.if_else.jump_count = inst1 - inst0;
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
- }
- }
- break;
+ }
+ }
+ }
+ break;
default:
_mesa_printf("unsupported IR in fragment shader %d\n",
inst->Opcode);
}
+
if (inst->CondUpdate)
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
else
@@ -2849,13 +3041,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
}
post_wm_emit(c);
- if (c->reg_index >= BRW_WM_MAX_GRF) {
- _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()");
- /* XXX we need to do some proper error recovery here */
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
}
}
-
/**
* Do GPU code generation for shaders that use GLSL features such as
* flow control. Other shaders will be compiled with the
@@ -2876,6 +3069,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
brw_wm_print_program(c, "brw_wm_glsl_emit done");
}
- c->prog_data.total_grf = c->reg_index;
+ c->prog_data.total_grf = num_grf_used(c);
c->prog_data.total_scratch = 0;
}
diff --git a/i965/brw_wm_iz.c b/i965/brw_wm_iz.c
index bd60ac9..5e399ac 100644
--- a/i965/brw_wm_iz.c
+++ b/i965/brw_wm_iz.c
@@ -116,8 +116,13 @@ const struct {
{ C, 0, 1, 1, 1 }
};
+/**
+ * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
+ * \param lookup bitmask of IZ_* flags
+ */
void brw_wm_lookup_iz( GLuint line_aa,
GLuint lookup,
+ GLboolean ps_uses_depth,
struct brw_wm_prog_key *key )
{
GLuint reg = 2;
@@ -127,7 +132,7 @@ void brw_wm_lookup_iz( GLuint line_aa,
if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
key->computes_depth = 1;
- if (wm_iz_table[lookup].sd_present) {
+ if (wm_iz_table[lookup].sd_present || ps_uses_depth) {
key->source_depth_reg = reg;
reg += 2;
}
diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c
index 9214276..6279258 100644
--- a/i965/brw_wm_pass0.c
+++ b/i965/brw_wm_pass0.c
@@ -257,34 +257,6 @@ static void pass0_set_dst( struct brw_wm_compile *c,
}
-static void pass0_set_dst_scalar( struct brw_wm_compile *c,
- struct brw_wm_instruction *out,
- const struct prog_instruction *inst,
- GLuint writemask )
-{
- if (writemask) {
- const struct prog_dst_register *dst = &inst->DstReg;
- GLuint i;
-
- /* Compute only the first (X) value:
- */
- out->writemask = WRITEMASK_X;
- out->dst[0] = get_value(c);
-
- /* Update our tracking register file for all the components in
- * writemask:
- */
- for (i = 0; i < 4; i++) {
- if (writemask & (1<<i)) {
- pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[0]);
- }
- }
- }
- else
- out->writemask = 0;
-}
-
-
static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
struct prog_src_register src,
GLuint i )
@@ -363,10 +335,7 @@ translate_insn(struct brw_wm_compile *c,
/* Dst:
*/
- if (brw_wm_is_scalar_result(out->opcode))
- pass0_set_dst_scalar(c, out, inst, writemask);
- else
- pass0_set_dst(c, out, inst, writemask);
+ pass0_set_dst(c, out, inst, writemask);
}
diff --git a/i965/brw_wm_pass1.c b/i965/brw_wm_pass1.c
index ab9aa2f..3436a24 100644
--- a/i965/brw_wm_pass1.c
+++ b/i965/brw_wm_pass1.c
@@ -159,6 +159,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
case OPCODE_FRC:
case OPCODE_MOV:
case OPCODE_SWZ:
+ case OPCODE_TRUNC:
read0 = writemask;
break;
diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c
index 3fc18ff..dff4665 100644
--- a/i965/brw_wm_sampler_state.c
+++ b/i965/brw_wm_sampler_state.c
@@ -103,6 +103,10 @@ struct wm_sampler_key {
GLenum minfilter, magfilter;
GLenum comparemode, comparefunc;
dri_bo *sdc_bo;
+
+ /** If target is cubemap, take context setting.
+ */
+ GLboolean seamless_cube_map;
} sampler[BRW_MAX_TEX_UNIT];
};
@@ -169,30 +173,33 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
}
}
- if (key->tex_target == GL_TEXTURE_CUBE_MAP &&
- (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
- /* If we're using anything but nearest sampling for a cube map, we
- * need to set this wrap mode to avoid GPU lock-ups.
- */
- sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
- sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
- sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
- }
- else if (key->tex_target == GL_TEXTURE_1D) {
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
+
+ /* Cube-maps on 965 and later must use the same wrap mode for all 3
+ * coordinate dimensions. Futher, only CUBE and CLAMP are valid.
+ */
+ if (key->tex_target == GL_TEXTURE_CUBE_MAP) {
+ if (key->seamless_cube_map &&
+ (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
+ sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ } else {
+ sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ }
+ } else if (key->tex_target == GL_TEXTURE_1D) {
/* There's a bug in 1D texture sampling - it actually pays
* attention to the wrap_t value, though it should not.
* Override the wrap_t value here to GL_REPEAT to keep
* any nonexistent border pixels from floating in.
*/
- sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
- sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
}
- else {
- sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
- sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
- sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
- }
+
/* Set shadow function:
*/
@@ -249,6 +256,9 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
entry->tex_target = texObj->Target;
+ entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
+ ? ctx->Texture.CubeMapSeamless : GL_FALSE;
+
entry->wrap_r = texObj->WrapR;
entry->wrap_s = texObj->WrapS;
entry->wrap_t = texObj->WrapT;
diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c
index 67b4117..39f8c6d 100644
--- a/i965/brw_wm_state.c
+++ b/i965/brw_wm_state.c
@@ -71,7 +71,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
key->max_threads = 1;
else {
/* WM maximum threads is number of EUs times number of threads per EU. */
- if (BRW_IS_G4X(brw))
+ if (BRW_IS_IGDNG(brw))
+ key->max_threads = 12 * 6;
+ else if (BRW_IS_G4X(brw))
key->max_threads = 10 * 5;
else
key->max_threads = 8 * 4;
@@ -141,7 +143,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- wm.thread1.binding_table_entry_count = key->nr_surfaces;
+
+ if (BRW_IS_IGDNG(brw))
+ wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else
+ wm.thread1.binding_table_entry_count = key->nr_surfaces;
if (key->total_scratch != 0) {
wm.thread2.scratch_space_base_pointer =
@@ -158,7 +164,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
+ if (BRW_IS_IGDNG(brw))
+ wm.wm4.sampler_count = 0; /* hardware requirement */
+ else
+ wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
+
if (brw->wm.sampler_bo != NULL) {
/* reloc */
wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c
index 805df8a..3dcc592 100644
--- a/i965/brw_wm_surface_state.c
+++ b/i965/brw_wm_surface_state.c
@@ -176,22 +176,6 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
}
}
-
-/**
- * Use same key for WM and VS surfaces.
- */
-struct brw_surface_key {
- GLenum target, depthmode;
- dri_bo *bo;
- GLint format, internal_format;
- GLint first_level, last_level;
- GLint width, height, depth;
- GLint pitch, cpp;
- uint32_t tiling;
- GLuint offset;
-};
-
-
static void
brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
{
@@ -268,7 +252,7 @@ brw_create_texture_surface( struct brw_context *brw,
surf.ss0.cube_neg_z = 1;
}
- bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+ bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
@@ -321,10 +305,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.tiling = intelObj->mt->region->tiling;
dri_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
- NULL);
+ brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
if (brw->wm.surf_bo[surf] == NULL) {
brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
}
@@ -336,7 +321,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
* Create the constant buffer surface. Vertex/fragment shader constants will be
* read from this buffer with Data Port Read instructions/messages.
*/
-static dri_bo *
+dri_bo *
brw_create_constant_surface( struct brw_context *brw,
struct brw_surface_key *key )
{
@@ -362,7 +347,7 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
- bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+ bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
@@ -380,39 +365,70 @@ brw_create_constant_surface( struct brw_context *brw,
return bo;
}
+/* Creates a new WM constant buffer reflecting the current fragment program's
+ * constants, if needed by the fragment program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_wm_update_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ drm_intel_bo *const_buffer;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ if (!fp->use_const_buffer)
+ return NULL;
+
+ const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer",
+ size, 64);
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+ return const_buffer;
+}
/**
* Update the surface state for a WM constant buffer.
* The constant buffer will be (re)allocated here if needed.
*/
-static dri_bo *
+static void
brw_update_wm_constant_surface( GLcontext *ctx,
- GLuint surf,
- dri_bo *const_buffer,
- const struct gl_program_parameter_list *params)
+ GLuint surf)
{
struct brw_context *brw = brw_context(ctx);
struct brw_surface_key key;
- struct intel_context *intel = &brw->intel;
- const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ const struct gl_program_parameter_list *params =
+ fp->program.Base.Parameters;
- /* free old const buffer if too small */
- if (const_buffer && const_buffer->size < size) {
- dri_bo_unreference(const_buffer);
- const_buffer = NULL;
- }
+ /* If we're in this state update atom, we need to update WM constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ dri_bo_unreference(fp->const_buffer);
+ fp->const_buffer = brw_wm_update_constant_buffer(brw);
- /* alloc new buffer if needed */
- if (!const_buffer) {
- const_buffer =
- drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64);
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (fp->const_buffer == 0) {
+ drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
+ return;
}
memset(&key, 0, sizeof(key));
key.format = MESA_FORMAT_RGBA_FLOAT32;
key.internal_format = GL_RGBA;
- key.bo = const_buffer;
+ key.bo = fp->const_buffer;
key.depthmode = GL_NONE;
key.pitch = params->NumParameters;
key.width = params->NumParameters;
@@ -427,77 +443,59 @@ brw_update_wm_constant_surface( GLcontext *ctx,
*/
dri_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+ brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
&key, sizeof(key),
&key.bo, key.bo ? 1 : 0,
NULL);
if (brw->wm.surf_bo[surf] == NULL) {
brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
}
-
- return const_buffer;
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
-
/**
- * Update the surface state for a VS constant buffer.
- * The constant buffer will be (re)allocated here if needed.
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
*/
-static dri_bo *
-brw_update_vs_constant_surface( GLcontext *ctx,
- GLuint surf,
- dri_bo *const_buffer,
- const struct gl_program_parameter_list *params)
+static void prepare_wm_constant_surface(struct brw_context *brw )
{
- struct brw_context *brw = brw_context(ctx);
- struct brw_surface_key key;
- struct intel_context *intel = &brw->intel;
- const int size = params->NumParameters * 4 * sizeof(GLfloat);
-
- assert(surf == 0);
-
- /* free old const buffer if too small */
- if (const_buffer && const_buffer->size < size) {
- dri_bo_unreference(const_buffer);
- const_buffer = NULL;
- }
-
- /* alloc new buffer if needed */
- if (!const_buffer) {
- const_buffer =
- drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
- }
-
- memset(&key, 0, sizeof(key));
-
- key.format = MESA_FORMAT_RGBA_FLOAT32;
- key.internal_format = GL_RGBA;
- key.bo = const_buffer;
- key.depthmode = GL_NONE;
- key.pitch = params->NumParameters;
- key.width = params->NumParameters;
- key.height = 1;
- key.depth = 1;
- key.cpp = 16;
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
- /*
- printf("%s:\n", __FUNCTION__);
- printf(" width %d height %d depth %d cpp %d pitch %d\n",
- key.width, key.height, key.depth, key.cpp, key.pitch);
- */
+ drm_intel_bo_unreference(fp->const_buffer);
+ fp->const_buffer = brw_wm_update_constant_buffer(brw);
- dri_bo_unreference(brw->vs.surf_bo[surf]);
- brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, key.bo ? 1 : 0,
- NULL);
- if (brw->vs.surf_bo[surf] == NULL) {
- brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (fp->const_buffer == 0) {
+ if (brw->wm.surf_bo[surf] != NULL) {
+ drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+ }
+ return;
}
- return const_buffer;
+ brw_update_wm_constant_surface(ctx, surf);
}
+const struct brw_tracked_state brw_wm_constant_surface = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_wm_constant_surface,
+};
+
/**
* Sets up a surface state structure to point at the given region.
@@ -507,7 +505,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
static void
brw_update_renderbuffer_surface(struct brw_context *brw,
struct gl_renderbuffer *rb,
- unsigned int unit, GLboolean cached)
+ unsigned int unit)
{
GLcontext *ctx = &brw->intel.ctx;
dri_bo *region_bo = NULL;
@@ -567,12 +565,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
ctx->Color.BlendEnabled);
dri_bo_unreference(brw->wm.surf_bo[unit]);
- brw->wm.surf_bo[unit] = NULL;
- if (cached)
- brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
- &key, sizeof(key),
- &region_bo, 1,
- NULL);
+ brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &region_bo, 1,
+ NULL);
if (brw->wm.surf_bo[unit] == NULL) {
struct brw_surface_state surf;
@@ -581,7 +578,27 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
surf.ss0.surface_format = key.surface_format;
surf.ss0.surface_type = key.surface_type;
- surf.ss1.base_addr = key.draw_offset;
+ if (key.tiling == I915_TILING_NONE) {
+ surf.ss1.base_addr = key.draw_offset;
+ } else {
+ uint32_t tile_offset = key.draw_offset % 4096;
+
+ surf.ss1.base_addr = key.draw_offset - tile_offset;
+
+ assert(BRW_IS_G4X(brw) || tile_offset == 0);
+ if (BRW_IS_G4X(brw)) {
+ if (key.tiling == I915_TILING_X) {
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4;
+ surf.ss5.y_offset = tile_offset / 512 / 2;
+ } else {
+ surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4;
+ surf.ss5.y_offset = tile_offset / 128 / 2;
+ }
+ }
+ }
if (region_bo != NULL)
surf.ss1.base_addr += region_bo->offset; /* reloc */
@@ -598,7 +615,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
surf.ss0.writedisable_alpha = !key.color_mask[3];
/* Key size will never match key size for textures, so we're safe. */
- brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
&key, sizeof(key),
&region_bo, 1,
&surf, sizeof(surf),
@@ -611,7 +629,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
offsetof(struct brw_surface_state, ss1),
region_bo,
- key.draw_offset,
+ surf.ss1.base_addr - region_bo->offset,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER);
}
@@ -630,7 +648,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
- bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
+ bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
NULL);
@@ -646,7 +664,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
else
data[i] = 0;
- bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
+ bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
data, data_size,
@@ -682,27 +700,17 @@ static void prepare_wm_surfaces(struct brw_context *brw )
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
brw_update_renderbuffer_surface(brw,
ctx->DrawBuffer->_ColorDrawBuffers[i],
- i,
- GL_FALSE);
+ i);
}
} else {
- brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE);
+ brw_update_renderbuffer_surface(brw, NULL, 0);
}
old_nr_surfaces = brw->wm.nr_surfaces;
brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
- /* Update surface / buffer for fragment shader constant buffer */
- {
- const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
- struct brw_fragment_program *fp =
- (struct brw_fragment_program *) brw->fragment_program;
- fp->const_buffer =
- brw_update_wm_constant_surface(ctx, surf, fp->const_buffer,
- fp->program.Base.Parameters);
-
- brw->wm.nr_surfaces = surf + 1;
- }
+ if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
+ brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
/* Update surfaces for textures */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
@@ -735,100 +743,16 @@ static void prepare_wm_surfaces(struct brw_context *brw )
brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
}
-
-/**
- * Constructs the binding table for the VS surface state.
- */
-static dri_bo *
-brw_vs_get_binding_table(struct brw_context *brw)
-{
- dri_bo *bind_bo;
-
- assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF);
-
- bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
- NULL, 0,
- brw->vs.surf_bo, brw->vs.nr_surfaces,
- NULL);
-
- if (bind_bo == NULL) {
- GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint);
- uint32_t *data = malloc(data_size);
- int i;
-
- for (i = 0; i < brw->vs.nr_surfaces; i++)
- if (brw->vs.surf_bo[i])
- data[i] = brw->vs.surf_bo[i]->offset;
- else
- data[i] = 0;
-
- bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
- NULL, 0,
- brw->vs.surf_bo, brw->vs.nr_surfaces,
- data, data_size,
- NULL, NULL);
-
- /* Emit binding table relocations to surface state */
- for (i = 0; i < BRW_VS_MAX_SURF; i++) {
- if (brw->vs.surf_bo[i] != NULL) {
- dri_bo_emit_reloc(bind_bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- 0,
- i * sizeof(GLuint),
- brw->vs.surf_bo[i]);
- }
- }
-
- free(data);
- }
-
- return bind_bo;
-}
-
-
-/**
- * Vertex shader surfaces. Just constant buffer for now. Could add vertex
- * shader textures in the future.
- */
-static void prepare_vs_surfaces(struct brw_context *brw )
-{
- GLcontext *ctx = &brw->intel.ctx;
-
- /* Update surface / buffer for vertex shader constant buffer */
- {
- const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER;
- struct brw_vertex_program *vp =
- (struct brw_vertex_program *) brw->vertex_program;
- vp->const_buffer =
- brw_update_vs_constant_surface(ctx, surf, vp->const_buffer,
- vp->program.Base.Parameters);
-
- brw->vs.nr_surfaces = 1;
- }
-
- dri_bo_unreference(brw->vs.bind_bo);
- brw->vs.bind_bo = brw_vs_get_binding_table(brw);
-
- if (1)
- brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
-}
-
-
-static void
-prepare_surfaces(struct brw_context *brw)
-{
- prepare_wm_surfaces(brw);
- prepare_vs_surfaces(brw);
-}
-
-
const struct brw_tracked_state brw_wm_surfaces = {
.dirty = {
- .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM,
- .brw = BRW_NEW_CONTEXT,
+ .mesa = (_NEW_COLOR |
+ _NEW_TEXTURE |
+ _NEW_BUFFERS),
+ .brw = (BRW_NEW_CONTEXT |
+ BRW_NEW_WM_SURFACES),
.cache = 0
},
- .prepare = prepare_surfaces,
+ .prepare = prepare_wm_surfaces,
};
diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c
index 29dc05c..6aa36d1 100644
--- a/shared/intel_batchbuffer.c
+++ b/shared/intel_batchbuffer.c
@@ -195,7 +195,16 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
{
struct intel_context *intel = batch->intel;
GLuint used = batch->ptr - batch->map;
- GLboolean was_locked = intel->locked;
+
+ if (intel->first_post_swapbuffers_batch == NULL) {
+ intel->first_post_swapbuffers_batch = intel->batch->buf;
+ drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+ }
+
+ if (intel->first_post_swapbuffers_batch == NULL) {
+ intel->first_post_swapbuffers_batch = intel->batch->buf;
+ drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+ }
if (used == 0) {
batch->cliprect_mode = IGNORE_CLIPRECTS;
@@ -243,13 +252,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
/* TODO: Just pass the relocation list and dma buffer up to the
* kernel.
*/
- if (!was_locked)
- LOCK_HARDWARE(intel);
-
+ LOCK_HARDWARE(intel);
do_flush_locked(batch, used, GL_FALSE);
-
- if (!was_locked)
- UNLOCK_HARDWARE(intel);
+ UNLOCK_HARDWARE(intel);
if (INTEL_DEBUG & DEBUG_SYNC) {
fprintf(stderr, "waiting for idle\n");
diff --git a/shared/intel_blit.c b/shared/intel_blit.c
index 4919828..0c5be4c 100644
--- a/shared/intel_blit.c
+++ b/shared/intel_blit.c
@@ -108,6 +108,8 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
}
+ assert(src->tiling != I915_TILING_Y);
+ assert(dst->tiling != I915_TILING_Y);
#ifndef I915
if (src->tiling != I915_TILING_NONE) {
CMD |= XY_SRC_TILED;
@@ -175,66 +177,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
UNLOCK_HARDWARE(intel);
}
-
-
-
-void
-intelEmitFillBlit(struct intel_context *intel,
- GLuint cpp,
- GLshort dst_pitch,
- dri_bo *dst_buffer,
- GLuint dst_offset,
- uint32_t dst_tiling,
- GLshort x, GLshort y,
- GLshort w, GLshort h,
- GLuint color)
-{
- GLuint BR13, CMD;
- BATCH_LOCALS;
-
- dst_pitch *= cpp;
-
- switch (cpp) {
- case 1:
- BR13 = (0xF0 << 16);
- CMD = XY_COLOR_BLT_CMD;
- break;
- case 2:
- BR13 = (0xF0 << 16) | BR13_565;
- CMD = XY_COLOR_BLT_CMD;
- break;
- case 4:
- BR13 = (0xF0 << 16) | BR13_8888;
- CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
- break;
- default:
- return;
- }
-#ifndef I915
- if (dst_tiling != I915_TILING_NONE) {
- CMD |= XY_DST_TILED;
- dst_pitch /= 4;
- }
-#endif
-
- DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
- __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
-
- assert(w > 0);
- assert(h > 0);
-
- BEGIN_BATCH(6, NO_LOOP_CLIPRECTS);
- OUT_BATCH(CMD);
- OUT_BATCH(BR13 | dst_pitch);
- OUT_BATCH((y << 16) | x);
- OUT_BATCH(((y + h) << 16) | (x + w));
- OUT_RELOC(dst_buffer,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- dst_offset);
- OUT_BATCH(color);
- ADVANCE_BATCH();
-}
-
static GLuint translate_raster_op(GLenum logicop)
{
switch(logicop) {
@@ -261,7 +203,7 @@ static GLuint translate_raster_op(GLenum logicop)
/* Copy BitBlt
*/
-void
+GLboolean
intelEmitCopyBlit(struct intel_context *intel,
GLuint cpp,
GLshort src_pitch,
@@ -283,6 +225,19 @@ intelEmitCopyBlit(struct intel_context *intel,
dri_bo *aper_array[3];
BATCH_LOCALS;
+ if (dst_tiling != I915_TILING_NONE) {
+ if (dst_offset & 4095)
+ return GL_FALSE;
+ if (dst_tiling == I915_TILING_Y)
+ return GL_FALSE;
+ }
+ if (src_tiling != I915_TILING_NONE) {
+ if (src_offset & 4095)
+ return GL_FALSE;
+ if (src_tiling == I915_TILING_Y)
+ return GL_FALSE;
+ }
+
/* do space/cliprects check before going any further */
do {
aper_array[0] = intel->batch->buf;
@@ -297,12 +252,7 @@ intelEmitCopyBlit(struct intel_context *intel,
} while (pass < 2);
if (pass >= 2) {
- GLboolean locked = GL_FALSE;
- if (!intel->locked) {
- LOCK_HARDWARE(intel);
- locked = GL_TRUE;
- }
-
+ LOCK_HARDWARE(intel);
dri_bo_map(dst_buffer, GL_TRUE);
dri_bo_map(src_buffer, GL_FALSE);
_mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset,
@@ -316,11 +266,9 @@ intelEmitCopyBlit(struct intel_context *intel,
dri_bo_unmap(src_buffer);
dri_bo_unmap(dst_buffer);
-
- if (locked)
- UNLOCK_HARDWARE(intel);
+ UNLOCK_HARDWARE(intel);
- return;
+ return GL_TRUE;
}
intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
@@ -347,7 +295,7 @@ intelEmitCopyBlit(struct intel_context *intel,
CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
break;
default:
- return;
+ return GL_FALSE;
}
#ifndef I915
@@ -362,7 +310,7 @@ intelEmitCopyBlit(struct intel_context *intel,
#endif
if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
- return;
+ return GL_TRUE;
}
assert(dst_x < dst_x2);
@@ -384,6 +332,8 @@ intelEmitCopyBlit(struct intel_context *intel,
ADVANCE_BATCH();
intel_batchbuffer_emit_mi_flush(intel->batch);
+
+ return GL_TRUE;
}
@@ -527,6 +477,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
BR13 |= BR13_565;
}
+ assert(irb->region->tiling != I915_TILING_Y);
+
#ifndef I915
if (irb->region->tiling != I915_TILING_NONE) {
CMD |= XY_DST_TILED;
@@ -596,7 +548,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
UNLOCK_HARDWARE(intel);
}
-void
+GLboolean
intelEmitImmediateColorExpandBlit(struct intel_context *intel,
GLuint cpp,
GLubyte *src_bits, GLuint src_size,
@@ -612,11 +564,19 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
int dwords = ALIGN(src_size, 8) / 4;
uint32_t opcode, br13, blit_cmd;
+ if (dst_tiling != I915_TILING_NONE) {
+ if (dst_offset & 4095)
+ return GL_FALSE;
+ if (dst_tiling == I915_TILING_Y)
+ return GL_FALSE;
+ }
+
assert( logic_op - GL_CLEAR >= 0 );
assert( logic_op - GL_CLEAR < 0x10 );
+ assert(dst_pitch > 0);
if (w < 0 || h < 0)
- return;
+ return GL_TRUE;
dst_pitch *= cpp;
@@ -673,4 +633,46 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
REFERENCES_CLIPRECTS );
intel_batchbuffer_emit_mi_flush(intel->batch);
+
+ return GL_TRUE;
+}
+
+/* We don't have a memmove-type blit like some other hardware, so we'll do a
+ * rectangular blit covering a large space, then emit 1-scanline blit at the
+ * end to cover the last if we need.
+ */
+void
+intel_emit_linear_blit(struct intel_context *intel,
+ drm_intel_bo *dst_bo,
+ unsigned int dst_offset,
+ drm_intel_bo *src_bo,
+ unsigned int src_offset,
+ unsigned int size)
+{
+ GLuint pitch, height;
+
+ /* The pitch is a signed value. */
+ pitch = MIN2(size, (1 << 15) - 1);
+ height = size / pitch;
+ intelEmitCopyBlit(intel, 1,
+ pitch, src_bo, src_offset, I915_TILING_NONE,
+ pitch, dst_bo, dst_offset, I915_TILING_NONE,
+ 0, 0, /* src x/y */
+ 0, 0, /* dst x/y */
+ pitch, height, /* w, h */
+ GL_COPY);
+
+ src_offset += pitch * height;
+ dst_offset += pitch * height;
+ size -= pitch * height;
+ assert (size < (1 << 15));
+ if (size != 0) {
+ intelEmitCopyBlit(intel, 1,
+ size, src_bo, src_offset, I915_TILING_NONE,
+ size, dst_bo, dst_offset, I915_TILING_NONE,
+ 0, 0, /* src x/y */
+ 0, 0, /* dst x/y */
+ size, 1, /* w, h */
+ GL_COPY);
+ }
}
diff --git a/shared/intel_blit.h b/shared/intel_blit.h
index 52065b1..240cb7c 100644
--- a/shared/intel_blit.h
+++ b/shared/intel_blit.h
@@ -35,7 +35,8 @@ extern void intelCopyBuffer(const __DRIdrawablePrivate * dpriv,
extern void intelClearWithBlit(GLcontext * ctx, GLbitfield mask);
-extern void intelEmitCopyBlit(struct intel_context *intel,
+GLboolean
+intelEmitCopyBlit(struct intel_context *intel,
GLuint cpp,
GLshort src_pitch,
dri_bo *src_buffer,
@@ -50,16 +51,7 @@ extern void intelEmitCopyBlit(struct intel_context *intel,
GLshort w, GLshort h,
GLenum logicop );
-extern void intelEmitFillBlit(struct intel_context *intel,
- GLuint cpp,
- GLshort dst_pitch,
- dri_bo *dst_buffer,
- GLuint dst_offset,
- uint32_t dst_tiling,
- GLshort x, GLshort y,
- GLshort w, GLshort h, GLuint color);
-
-void
+GLboolean
intelEmitImmediateColorExpandBlit(struct intel_context *intel,
GLuint cpp,
GLubyte *src_bits, GLuint src_size,
@@ -71,5 +63,11 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
GLshort x, GLshort y,
GLshort w, GLshort h,
GLenum logic_op);
+void intel_emit_linear_blit(struct intel_context *intel,
+ drm_intel_bo *dst_bo,
+ unsigned int dst_offset,
+ drm_intel_bo *src_bo,
+ unsigned int src_offset,
+ unsigned int size);
#endif
diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c
index 2e6b778..a022593 100644
--- a/shared/intel_buffer_objects.c
+++ b/shared/intel_buffer_objects.c
@@ -28,13 +28,18 @@
#include "main/imports.h"
#include "main/mtypes.h"
+#include "main/macros.h"
#include "main/bufferobj.h"
#include "intel_context.h"
+#include "intel_blit.h"
#include "intel_buffer_objects.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
+static GLboolean
+intel_bufferobj_unmap(GLcontext * ctx,
+ GLenum target, struct gl_buffer_object *obj);
/** Allocates a new dri_bo to store the data for the buffer object. */
static void
@@ -100,7 +105,13 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj)
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
assert(intel_obj);
- assert(!obj->Pointer); /* Mesa should have unmapped it */
+
+ /* Buffer objects are automatically unmapped when deleting according
+ * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
+ * (though it does if you call glDeleteBuffers)
+ */
+ if (obj->Pointer)
+ intel_bufferobj_unmap(ctx, 0, obj);
_mesa_free(intel_obj->sys_buffer);
if (intel_obj->region) {
@@ -119,9 +130,10 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj)
* Allocate space for and store data in a buffer object. Any data that was
* previously stored in the buffer object is lost. If data is NULL,
* memory will be allocated, but no copy will occur.
- * Called via glBufferDataARB().
+ * Called via ctx->Driver.BufferData().
+ * \return GL_TRUE for success, GL_FALSE if out of memory
*/
-static void
+static GLboolean
intel_bufferobj_data(GLcontext * ctx,
GLenum target,
GLsizeiptrARB size,
@@ -156,15 +168,19 @@ intel_bufferobj_data(GLcontext * ctx,
if (intel_obj->sys_buffer != NULL) {
if (data != NULL)
memcpy(intel_obj->sys_buffer, data, size);
- return;
+ return GL_TRUE;
}
}
#endif
intel_bufferobj_alloc_buffer(intel, intel_obj);
+ if (!intel_obj->buffer)
+ return GL_FALSE;
if (data != NULL)
dri_bo_subdata(intel_obj->buffer, 0, size, data);
}
+
+ return GL_TRUE;
}
@@ -191,8 +207,12 @@ intel_bufferobj_subdata(GLcontext * ctx,
if (intel_obj->sys_buffer)
memcpy((char *)intel_obj->sys_buffer + offset, data, size);
- else
+ else {
+ /* Flush any existing batchbuffer that might reference this data. */
+ intelFlush(ctx);
+
dri_bo_subdata(intel_obj->buffer, offset, size, data);
+ }
}
@@ -209,7 +229,10 @@ intel_bufferobj_get_subdata(GLcontext * ctx,
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
assert(intel_obj);
- dri_bo_get_subdata(intel_obj->buffer, offset, size, data);
+ if (intel_obj->sys_buffer)
+ memcpy(data, (char *)intel_obj->sys_buffer + offset, size);
+ else
+ dri_bo_get_subdata(intel_obj->buffer, offset, size, data);
}
@@ -234,6 +257,11 @@ intel_bufferobj_map(GLcontext * ctx,
return obj->Pointer;
}
+ /* Flush any existing batchbuffer that might have written to this
+ * buffer.
+ */
+ intelFlush(ctx);
+
if (intel_obj->region)
intel_bufferobj_cow(intel, intel_obj);
@@ -251,32 +279,205 @@ intel_bufferobj_map(GLcontext * ctx,
}
obj->Pointer = intel_obj->buffer->virtual;
+ obj->Length = obj->Size;
+ obj->Offset = 0;
+
return obj->Pointer;
}
+/**
+ * Called via glMapBufferRange().
+ *
+ * The goal of this extension is to allow apps to accumulate their rendering
+ * at the same time as they accumulate their buffer object. Without it,
+ * you'd end up blocking on execution of rendering every time you mapped
+ * the buffer to put new data in.
+ *
+ * We support it in 3 ways: If unsynchronized, then don't bother
+ * flushing the batchbuffer before mapping the buffer, which can save blocking
+ * in many cases. If we would still block, and they allow the whole buffer
+ * to be invalidated, then just allocate a new buffer to replace the old one.
+ * If not, and we'd block, and they allow the subrange of the buffer to be
+ * invalidated, then we can make a new little BO, let them write into that,
+ * and blit it into the real BO at unmap time.
+ */
+static void *
+intel_bufferobj_map_range(GLcontext * ctx,
+ GLenum target, GLintptr offset, GLsizeiptr length,
+ GLbitfield access, struct gl_buffer_object *obj)
+{
+ struct intel_context *intel = intel_context(ctx);
+ struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+ assert(intel_obj);
+
+ /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
+ * internally uses our functions directly.
+ */
+ obj->Offset = offset;
+ obj->Length = length;
+ obj->AccessFlags = access;
+
+ if (intel_obj->sys_buffer) {
+ obj->Pointer = intel_obj->sys_buffer + offset;
+ return obj->Pointer;
+ }
+
+ if (intel_obj->region)
+ intel_bufferobj_cow(intel, intel_obj);
+
+ /* If the mapping is synchronized with other GL operations, flush
+ * the batchbuffer so that GEM knows about the buffer access for later
+ * syncing.
+ */
+ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT))
+ intelFlush(ctx);
+
+ if (intel_obj->buffer == NULL) {
+ obj->Pointer = NULL;
+ return NULL;
+ }
+
+ /* If the user doesn't care about existing buffer contents and mapping
+ * would cause us to block, then throw out the old buffer.
+ */
+ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
+ (access & GL_MAP_INVALIDATE_BUFFER_BIT) &&
+ drm_intel_bo_busy(intel_obj->buffer)) {
+ drm_intel_bo_unreference(intel_obj->buffer);
+ intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj",
+ intel_obj->Base.Size, 64);
+ }
+
+ /* If the user is mapping a range of an active buffer object but
+ * doesn't require the current contents of that range, make a new
+ * BO, and we'll copy what they put in there out at unmap or
+ * FlushRange time.
+ */
+ if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
+ drm_intel_bo_busy(intel_obj->buffer)) {
+ if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
+ intel_obj->range_map_buffer = _mesa_malloc(length);
+ obj->Pointer = intel_obj->range_map_buffer;
+ } else {
+ intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
+ "range map",
+ length, 64);
+ if (!(access & GL_MAP_READ_BIT) &&
+ intel->intelScreen->kernel_exec_fencing) {
+ drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
+ intel_obj->mapped_gtt = GL_TRUE;
+ } else {
+ drm_intel_bo_map(intel_obj->range_map_bo,
+ (access & GL_MAP_WRITE_BIT) != 0);
+ intel_obj->mapped_gtt = GL_FALSE;
+ }
+ obj->Pointer = intel_obj->range_map_bo->virtual;
+ }
+ return obj->Pointer;
+ }
+
+ if (!(access & GL_MAP_READ_BIT) &&
+ intel->intelScreen->kernel_exec_fencing) {
+ drm_intel_gem_bo_map_gtt(intel_obj->buffer);
+ intel_obj->mapped_gtt = GL_TRUE;
+ } else {
+ drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
+ intel_obj->mapped_gtt = GL_FALSE;
+ }
+
+ obj->Pointer = intel_obj->buffer->virtual + offset;
+ return obj->Pointer;
+}
+
+/* Ideally we'd use a BO to avoid taking up cache space for the temporary
+ * data, but FlushMappedBufferRange may be followed by further writes to
+ * the pointer, so we would have to re-map after emitting our blit, which
+ * would defeat the point.
+ */
+static void
+intel_bufferobj_flush_mapped_range(GLcontext *ctx, GLenum target,
+ GLintptr offset, GLsizeiptr length,
+ struct gl_buffer_object *obj)
+{
+ struct intel_context *intel = intel_context(ctx);
+ struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+ drm_intel_bo *temp_bo;
+
+ /* Unless we're in the range map using a temporary system buffer,
+ * there's no work to do.
+ */
+ if (intel_obj->range_map_buffer == NULL)
+ return;
+
+ temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64);
+
+ drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer);
+
+ intel_emit_linear_blit(intel,
+ intel_obj->buffer, obj->Offset + offset,
+ temp_bo, 0,
+ length);
+
+ drm_intel_bo_unreference(temp_bo);
+}
+
/**
- * Called via glMapBufferARB().
+ * Called via glUnmapBuffer().
*/
static GLboolean
intel_bufferobj_unmap(GLcontext * ctx,
GLenum target, struct gl_buffer_object *obj)
{
+ struct intel_context *intel = intel_context(ctx);
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
assert(intel_obj);
+ assert(obj->Pointer);
if (intel_obj->sys_buffer != NULL) {
- assert(obj->Pointer);
- obj->Pointer = NULL;
+ /* always keep the mapping around. */
+ } else if (intel_obj->range_map_buffer != NULL) {
+ /* Since we've emitted some blits to buffers that will (likely) be used
+ * in rendering operations in other cache domains in this batch, emit a
+ * flush. Once again, we wish for a domain tracker in libdrm to cover
+ * usage inside of a batchbuffer.
+ */
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+ free(intel_obj->range_map_buffer);
+ intel_obj->range_map_buffer = NULL;
+ } else if (intel_obj->range_map_bo != NULL) {
+ if (intel_obj->mapped_gtt) {
+ drm_intel_gem_bo_unmap_gtt(intel_obj->range_map_bo);
+ } else {
+ drm_intel_bo_unmap(intel_obj->range_map_bo);
+ }
+
+ intel_emit_linear_blit(intel,
+ intel_obj->buffer, obj->Offset,
+ intel_obj->range_map_bo, 0,
+ obj->Length);
+
+ /* Since we've emitted some blits to buffers that will (likely) be used
+ * in rendering operations in other cache domains in this batch, emit a
+ * flush. Once again, we wish for a domain tracker in libdrm to cover
+ * usage inside of a batchbuffer.
+ */
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+
+ drm_intel_bo_unreference(intel_obj->range_map_bo);
+ intel_obj->range_map_bo = NULL;
} else if (intel_obj->buffer != NULL) {
- assert(obj->Pointer);
if (intel_obj->mapped_gtt) {
drm_intel_gem_bo_unmap_gtt(intel_obj->buffer);
} else {
drm_intel_bo_unmap(intel_obj->buffer);
}
- obj->Pointer = NULL;
}
+ obj->Pointer = NULL;
+ obj->Offset = 0;
+ obj->Length = 0;
+
return GL_TRUE;
}
@@ -294,30 +495,94 @@ intel_bufferobj_buffer(struct intel_context *intel,
}
if (intel_obj->buffer == NULL) {
+ void *sys_buffer = intel_obj->sys_buffer;
+
+ /* only one of buffer and sys_buffer could be non-NULL */
intel_bufferobj_alloc_buffer(intel, intel_obj);
+ intel_obj->sys_buffer = NULL;
+
intel_bufferobj_subdata(&intel->ctx,
GL_ARRAY_BUFFER_ARB,
0,
intel_obj->Base.Size,
- intel_obj->sys_buffer,
+ sys_buffer,
&intel_obj->Base);
- _mesa_free(intel_obj->sys_buffer);
+ _mesa_free(sys_buffer);
intel_obj->sys_buffer = NULL;
}
return intel_obj->buffer;
}
+static void
+intel_bufferobj_copy_subdata(GLcontext *ctx,
+ struct gl_buffer_object *src,
+ struct gl_buffer_object *dst,
+ GLintptr read_offset, GLintptr write_offset,
+ GLsizeiptr size)
+{
+ struct intel_context *intel = intel_context(ctx);
+ struct intel_buffer_object *intel_src = intel_buffer_object(src);
+ struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
+ drm_intel_bo *src_bo, *dst_bo;
+
+ if (size == 0)
+ return;
+
+ /* If we're in system memory, just map and memcpy. */
+ if (intel_src->sys_buffer || intel_dst->sys_buffer) {
+ /* The same buffer may be used, but note that regions copied may
+ * not overlap.
+ */
+ if (src == dst) {
+ char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
+ GL_READ_WRITE, dst);
+ memcpy(ptr + write_offset, ptr + read_offset, size);
+ intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+ } else {
+ const char *src_ptr;
+ char *dst_ptr;
+
+ src_ptr = intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER,
+ GL_READ_ONLY, src);
+ dst_ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
+ GL_WRITE_ONLY, dst);
+
+ memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
+
+ intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src);
+ intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+ }
+ }
+
+ /* Otherwise, we have real BOs, so blit them. */
+
+ dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART);
+ src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ);
+
+ intel_emit_linear_blit(intel,
+ dst_bo, write_offset,
+ src_bo, read_offset, size);
+
+ /* Since we've emitted some blits to buffers that will (likely) be used
+ * in rendering operations in other cache domains in this batch, emit a
+ * flush. Once again, we wish for a domain tracker in libdrm to cover
+ * usage inside of a batchbuffer.
+ */
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
void
-intel_bufferobj_init(struct intel_context *intel)
+intelInitBufferObjectFuncs(struct dd_function_table *functions)
{
- GLcontext *ctx = &intel->ctx;
-
- ctx->Driver.NewBufferObject = intel_bufferobj_alloc;
- ctx->Driver.DeleteBuffer = intel_bufferobj_free;
- ctx->Driver.BufferData = intel_bufferobj_data;
- ctx->Driver.BufferSubData = intel_bufferobj_subdata;
- ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata;
- ctx->Driver.MapBuffer = intel_bufferobj_map;
- ctx->Driver.UnmapBuffer = intel_bufferobj_unmap;
+ functions->NewBufferObject = intel_bufferobj_alloc;
+ functions->DeleteBuffer = intel_bufferobj_free;
+ functions->BufferData = intel_bufferobj_data;
+ functions->BufferSubData = intel_bufferobj_subdata;
+ functions->GetBufferSubData = intel_bufferobj_get_subdata;
+ functions->MapBuffer = intel_bufferobj_map;
+ functions->MapBufferRange = intel_bufferobj_map_range;
+ functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
+ functions->UnmapBuffer = intel_bufferobj_unmap;
+ functions->CopyBufferSubData = intel_bufferobj_copy_subdata;
}
diff --git a/shared/intel_buffer_objects.h b/shared/intel_buffer_objects.h
index 0431015..bf3e08a 100644
--- a/shared/intel_buffer_objects.h
+++ b/shared/intel_buffer_objects.h
@@ -48,6 +48,12 @@ struct intel_buffer_object
struct intel_region *region; /* Is there a zero-copy texture
associated with this (pixel)
buffer object? */
+
+ drm_intel_bo *range_map_bo;
+ void *range_map_buffer;
+ unsigned int range_map_offset;
+ GLsizei range_map_size;
+
GLboolean mapped_gtt;
};
@@ -60,7 +66,7 @@ dri_bo *intel_bufferobj_buffer(struct intel_context *intel,
/* Hook the bufferobject implementation into mesa:
*/
-void intel_bufferobj_init(struct intel_context *intel);
+void intelInitBufferObjectFuncs(struct dd_function_table *functions);
@@ -72,10 +78,7 @@ void intel_bufferobj_init(struct intel_context *intel);
static INLINE struct intel_buffer_object *
intel_buffer_object(struct gl_buffer_object *obj)
{
- if (obj->Name)
- return (struct intel_buffer_object *) obj;
- else
- return NULL;
+ return (struct intel_buffer_object *) obj;
}
/* Helpers for zerocopy image uploads. See also intel_regions.h:
diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c
index d2fad9e..e7357e7 100644
--- a/shared/intel_buffers.c
+++ b/shared/intel_buffers.c
@@ -157,7 +157,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
/* Do this here, not core Mesa, since this function is called from
* many places within the driver.
*/
- if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+ if (ctx->NewState & _NEW_BUFFERS) {
/* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
_mesa_update_framebuffer(ctx);
/* this updates the DrawBuffer's Width/Height if it's a FBO */
@@ -345,6 +345,23 @@ intelDrawBuffer(GLcontext * ctx, GLenum mode)
static void
intelReadBuffer(GLcontext * ctx, GLenum mode)
{
+ if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) {
+ struct intel_context *const intel = intel_context(ctx);
+ const GLboolean was_front_buffer_reading =
+ intel->is_front_buffer_reading;
+
+ intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
+ || (mode == GL_FRONT);
+
+ /* If we weren't front-buffer reading before but we are now, make sure
+ * that the front-buffer has actually been allocated.
+ */
+ if (!was_front_buffer_reading && intel->is_front_buffer_reading) {
+ intel_update_renderbuffers(intel->driContext,
+ intel->driContext->driDrawablePriv);
+ }
+ }
+
if (ctx->ReadBuffer == ctx->DrawBuffer) {
/* This will update FBO completeness status.
* A framebuffer will be incomplete if the GL_READ_BUFFER setting
diff --git a/shared/intel_chipset.h b/shared/intel_chipset.h
index 4593d90..3dc8653 100644
--- a/shared/intel_chipset.h
+++ b/shared/intel_chipset.h
@@ -66,6 +66,10 @@
#define PCI_CHIP_Q45_G 0x2E12
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G41_G 0x2E32
+#define PCI_CHIP_B43_G 0x2E42
+
+#define PCI_CHIP_ILD_G 0x0042
+#define PCI_CHIP_ILM_G 0x0046
#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \
devid == PCI_CHIP_I915_GM || \
@@ -73,15 +77,22 @@
devid == PCI_CHIP_I945_GME || \
devid == PCI_CHIP_I965_GM || \
devid == PCI_CHIP_I965_GME || \
- devid == PCI_CHIP_GM45_GM || IS_IGD(devid))
+ devid == PCI_CHIP_GM45_GM || \
+ IS_IGD(devid) || \
+ devid == PCI_CHIP_ILM_G)
#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
devid == PCI_CHIP_Q45_G || \
devid == PCI_CHIP_G45_G || \
- devid == PCI_CHIP_G41_G)
+ devid == PCI_CHIP_G41_G || \
+ devid == PCI_CHIP_B43_G)
#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G)
+#define IS_IGDNG(devid) (IS_ILD(devid) || IS_ILM(devid))
+
#define IS_915(devid) (devid == PCI_CHIP_I915_G || \
devid == PCI_CHIP_E7221_G || \
devid == PCI_CHIP_I915_GM)
@@ -99,7 +110,8 @@
devid == PCI_CHIP_I965_GM || \
devid == PCI_CHIP_I965_GME || \
devid == PCI_CHIP_I946_GZ || \
- IS_G4X(devid))
+ IS_G4X(devid) || \
+ IS_IGDNG(devid))
#define IS_9XX(devid) (IS_915(devid) || \
IS_945(devid) || \
diff --git a/shared/intel_clear.c b/shared/intel_clear.c
index 19f4763..1b0e221 100644
--- a/shared/intel_clear.c
+++ b/shared/intel_clear.c
@@ -27,25 +27,9 @@
**************************************************************************/
#include "main/glheader.h"
-#include "main/enums.h"
-#include "main/image.h"
#include "main/mtypes.h"
-#include "main/arrayobj.h"
-#include "main/attrib.h"
-#include "main/blend.h"
-#include "main/bufferobj.h"
-#include "main/buffers.h"
-#include "main/depth.h"
-#include "main/enable.h"
-#include "main/macros.h"
-#include "main/matrix.h"
-#include "main/polygon.h"
-#include "main/texstate.h"
-#include "main/shaders.h"
-#include "main/stencil.h"
-#include "main/varray.h"
-#include "glapi/dispatch.h"
#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
#include "intel_context.h"
#include "intel_blit.h"
@@ -53,240 +37,11 @@
#include "intel_clear.h"
#include "intel_fbo.h"
#include "intel_pixel.h"
+#include "intel_regions.h"
+#include "intel_batchbuffer.h"
#define FILE_DEBUG_FLAG DEBUG_BLIT
-#define TRI_CLEAR_COLOR_BITS (BUFFER_BIT_BACK_LEFT | \
- BUFFER_BIT_FRONT_LEFT | \
- BUFFER_BIT_COLOR0 | \
- BUFFER_BIT_COLOR1 | \
- BUFFER_BIT_COLOR2 | \
- BUFFER_BIT_COLOR3 | \
- BUFFER_BIT_COLOR4 | \
- BUFFER_BIT_COLOR5 | \
- BUFFER_BIT_COLOR6 | \
- BUFFER_BIT_COLOR7)
-
-
-/**
- * Per-context one-time init of things for intl_clear_tris().
- * Basically set up a private array object for vertex/color arrays.
- */
-static void
-init_clear(GLcontext *ctx)
-{
- struct intel_context *intel = intel_context(ctx);
- struct gl_array_object *arraySave = NULL;
- const GLuint arrayBuffer = ctx->Array.ArrayBufferObj->Name;
- const GLuint elementBuffer = ctx->Array.ElementArrayBufferObj->Name;
-
- /* create new array object */
- intel->clear.arrayObj = _mesa_new_array_object(ctx, ~0);
-
- /* save current array object, bind new one */
- _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj);
- _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, intel->clear.arrayObj);
-
- /* one-time setup of vertex arrays (pos, color) */
- _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
- _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0);
- _mesa_ColorPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), intel->clear.color);
- _mesa_VertexPointer(3, GL_FLOAT, 3 * sizeof(GLfloat), intel->clear.vertices);
- _mesa_Enable(GL_COLOR_ARRAY);
- _mesa_Enable(GL_VERTEX_ARRAY);
-
- /* restore original array object */
- _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave);
- _mesa_reference_array_object(ctx, &arraySave, NULL);
-
- /* restore original buffer objects */
- _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, arrayBuffer);
- _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuffer);
-}
-
-
-
-/**
- * Perform glClear where mask contains only color, depth, and/or stencil.
- *
- * The implementation is based on calling into Mesa to set GL state and
- * performing normal triangle rendering. The intent of this path is to
- * have as generic a path as possible, so that any driver could make use of
- * it.
- */
-void
-intel_clear_tris(GLcontext *ctx, GLbitfield mask)
-{
- struct intel_context *intel = intel_context(ctx);
- GLfloat dst_z;
- struct gl_framebuffer *fb = ctx->DrawBuffer;
- int i;
- GLboolean saved_fp_enable = GL_FALSE, saved_vp_enable = GL_FALSE;
- GLuint saved_shader_program = 0;
- unsigned int saved_active_texture;
- struct gl_array_object *arraySave = NULL;
-
- if (!intel->clear.arrayObj)
- init_clear(ctx);
-
- assert((mask & ~(TRI_CLEAR_COLOR_BITS | BUFFER_BIT_DEPTH |
- BUFFER_BIT_STENCIL)) == 0);
-
- _mesa_PushAttrib(GL_COLOR_BUFFER_BIT |
- GL_CURRENT_BIT |
- GL_DEPTH_BUFFER_BIT |
- GL_ENABLE_BIT |
- GL_POLYGON_BIT |
- GL_STENCIL_BUFFER_BIT |
- GL_TRANSFORM_BIT |
- GL_CURRENT_BIT);
- saved_active_texture = ctx->Texture.CurrentUnit;
-
- /* Disable existing GL state we don't want to apply to a clear. */
- _mesa_Disable(GL_ALPHA_TEST);
- _mesa_Disable(GL_BLEND);
- _mesa_Disable(GL_CULL_FACE);
- _mesa_Disable(GL_FOG);
- _mesa_Disable(GL_POLYGON_SMOOTH);
- _mesa_Disable(GL_POLYGON_STIPPLE);
- _mesa_Disable(GL_POLYGON_OFFSET_FILL);
- _mesa_Disable(GL_LIGHTING);
- _mesa_Disable(GL_CLIP_PLANE0);
- _mesa_Disable(GL_CLIP_PLANE1);
- _mesa_Disable(GL_CLIP_PLANE2);
- _mesa_Disable(GL_CLIP_PLANE3);
- _mesa_Disable(GL_CLIP_PLANE4);
- _mesa_Disable(GL_CLIP_PLANE5);
- _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL);
- if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) {
- saved_fp_enable = GL_TRUE;
- _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB);
- }
- if (ctx->Extensions.ARB_vertex_program && ctx->VertexProgram.Enabled) {
- saved_vp_enable = GL_TRUE;
- _mesa_Disable(GL_VERTEX_PROGRAM_ARB);
- }
- if (ctx->Extensions.ARB_shader_objects && ctx->Shader.CurrentProgram) {
- saved_shader_program = ctx->Shader.CurrentProgram->Name;
- _mesa_UseProgramObjectARB(0);
- }
-
- if (ctx->Texture._EnabledUnits != 0) {
- int i;
-
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- _mesa_ActiveTextureARB(GL_TEXTURE0 + i);
- _mesa_Disable(GL_TEXTURE_1D);
- _mesa_Disable(GL_TEXTURE_2D);
- _mesa_Disable(GL_TEXTURE_3D);
- if (ctx->Extensions.ARB_texture_cube_map)
- _mesa_Disable(GL_TEXTURE_CUBE_MAP_ARB);
- if (ctx->Extensions.NV_texture_rectangle)
- _mesa_Disable(GL_TEXTURE_RECTANGLE_NV);
- if (ctx->Extensions.MESA_texture_array) {
- _mesa_Disable(GL_TEXTURE_1D_ARRAY_EXT);
- _mesa_Disable(GL_TEXTURE_2D_ARRAY_EXT);
- }
- }
- }
-
- /* save current array object, bind our private one */
- _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj);
- _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, intel->clear.arrayObj);
-
- intel_meta_set_passthrough_transform(intel);
-
- for (i = 0; i < 4; i++) {
- COPY_4FV(intel->clear.color[i], ctx->Color.ClearColor);
- }
-
- /* convert clear Z from [0,1] to NDC coord in [-1,1] */
- dst_z = -1.0 + 2.0 * ctx->Depth.Clear;
-
- /* Prepare the vertices, which are the same regardless of which buffer we're
- * drawing to.
- */
- intel->clear.vertices[0][0] = fb->_Xmin;
- intel->clear.vertices[0][1] = fb->_Ymin;
- intel->clear.vertices[0][2] = dst_z;
- intel->clear.vertices[1][0] = fb->_Xmax;
- intel->clear.vertices[1][1] = fb->_Ymin;
- intel->clear.vertices[1][2] = dst_z;
- intel->clear.vertices[2][0] = fb->_Xmax;
- intel->clear.vertices[2][1] = fb->_Ymax;
- intel->clear.vertices[2][2] = dst_z;
- intel->clear.vertices[3][0] = fb->_Xmin;
- intel->clear.vertices[3][1] = fb->_Ymax;
- intel->clear.vertices[3][2] = dst_z;
-
- while (mask != 0) {
- GLuint this_mask = 0;
- GLuint color_bit;
-
- color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS);
- if (color_bit != 0)
- this_mask |= (1 << (color_bit - 1));
-
- /* Clear depth/stencil in the same pass as color. */
- this_mask |= (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL));
-
- /* Select the current color buffer and use the color write mask if
- * we have one, otherwise don't write any color channels.
- */
- if (this_mask & BUFFER_BIT_FRONT_LEFT)
- _mesa_DrawBuffer(GL_FRONT_LEFT);
- else if (this_mask & BUFFER_BIT_BACK_LEFT)
- _mesa_DrawBuffer(GL_BACK_LEFT);
- else if (color_bit != 0)
- _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0 +
- (color_bit - BUFFER_COLOR0 - 1));
- else
- _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
-
- /* Control writing of the depth clear value to depth. */
- if (this_mask & BUFFER_BIT_DEPTH) {
- _mesa_DepthFunc(GL_ALWAYS);
- _mesa_Enable(GL_DEPTH_TEST);
- } else {
- _mesa_Disable(GL_DEPTH_TEST);
- _mesa_DepthMask(GL_FALSE);
- }
-
- /* Control writing of the stencil clear value to stencil. */
- if (this_mask & BUFFER_BIT_STENCIL) {
- _mesa_Enable(GL_STENCIL_TEST);
- _mesa_StencilOpSeparate(GL_FRONT_AND_BACK,
- GL_REPLACE, GL_REPLACE, GL_REPLACE);
- _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS,
- ctx->Stencil.Clear,
- ctx->Stencil.WriteMask[0]);
- } else {
- _mesa_Disable(GL_STENCIL_TEST);
- }
-
- _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
- mask &= ~this_mask;
- }
-
- intel_meta_restore_transform(intel);
-
- _mesa_ActiveTextureARB(GL_TEXTURE0 + saved_active_texture);
- if (saved_fp_enable)
- _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB);
- if (saved_vp_enable)
- _mesa_Enable(GL_VERTEX_PROGRAM_ARB);
-
- if (saved_shader_program)
- _mesa_UseProgramObjectARB(saved_shader_program);
-
- _mesa_PopAttrib();
-
- /* restore current array object */
- _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave);
- _mesa_reference_array_object(ctx, &arraySave, NULL);
-}
-
static const char *buffer_names[] = {
[BUFFER_FRONT_LEFT] = "front",
[BUFFER_BACK_LEFT] = "back",
@@ -340,7 +95,7 @@ intelClear(GLcontext *ctx, GLbitfield mask)
= intel_get_rb_region(fb, BUFFER_STENCIL);
if (stencilRegion) {
/* have hw stencil */
- if (IS_965(intel->intelScreen->deviceID) ||
+ if (stencilRegion->tiling == I915_TILING_Y ||
(ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
/* We have to use the 3D engine if we're clearing a partial mask
* of the stencil buffer, or if we're on a 965 which has a tiled
@@ -357,9 +112,10 @@ intelClear(GLcontext *ctx, GLbitfield mask)
/* HW depth */
if (mask & BUFFER_BIT_DEPTH) {
+ const struct intel_region *irb = intel_get_rb_region(fb, BUFFER_DEPTH);
+
/* clear depth with whatever method is used for stencil (see above) */
- if (IS_965(intel->intelScreen->deviceID) ||
- tri_mask & BUFFER_BIT_STENCIL)
+ if (irb->tiling == I915_TILING_Y || tri_mask & BUFFER_BIT_STENCIL)
tri_mask |= BUFFER_BIT_DEPTH;
else
blit_mask |= BUFFER_BIT_DEPTH;
@@ -369,7 +125,7 @@ intelClear(GLcontext *ctx, GLbitfield mask)
* buffer with it.
*/
if (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) {
- int color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS);
+ int color_bit = _mesa_ffs(mask & BUFFER_BITS_COLOR);
if (color_bit != 0) {
tri_mask |= blit_mask & (1 << (color_bit - 1));
blit_mask &= ~(1 << (color_bit - 1));
@@ -379,14 +135,18 @@ intelClear(GLcontext *ctx, GLbitfield mask)
/* SW fallback clearing */
swrast_mask = mask & ~tri_mask & ~blit_mask;
- for (i = 0; i < BUFFER_COUNT; i++) {
- GLuint bufBit = 1 << i;
- if ((blit_mask | tri_mask) & bufBit) {
+ {
+ /* look for non-Intel renderbuffers (clear them with swrast) */
+ GLbitfield blit_or_tri = blit_mask | tri_mask;
+ while (blit_or_tri) {
+ GLuint i = _mesa_ffs(blit_or_tri) - 1;
+ GLbitfield bufBit = 1 << i;
if (!fb->Attachment[i].Renderbuffer->ClassID) {
blit_mask &= ~bufBit;
tri_mask &= ~bufBit;
swrast_mask |= bufBit;
}
+ blit_or_tri ^= bufBit;
}
}
@@ -411,7 +171,8 @@ intelClear(GLcontext *ctx, GLbitfield mask)
}
DBG("\n");
}
- intel_clear_tris(ctx, tri_mask);
+
+ _mesa_meta_clear(&intel->ctx, tri_mask);
}
if (swrast_mask) {
diff --git a/shared/intel_context.c b/shared/intel_context.c
index cfd983d..fce42e9 100644
--- a/shared/intel_context.c
+++ b/shared/intel_context.c
@@ -38,6 +38,7 @@
#include "swrast_setup/swrast_setup.h"
#include "tnl/tnl.h"
#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
#include "i830_dri.h"
@@ -161,6 +162,15 @@ intelGetString(GLcontext * ctx, GLenum name)
case PCI_CHIP_G41_G:
chipset = "Intel(R) G41";
break;
+ case PCI_CHIP_B43_G:
+ chipset = "Intel(R) B43";
+ break;
+ case PCI_CHIP_ILD_G:
+ chipset = "Intel(R) IGDNG_D";
+ break;
+ case PCI_CHIP_ILM_G:
+ chipset = "Intel(R) IGDNG_M";
+ break;
default:
chipset = "Unknown Intel Chipset";
break;
@@ -220,7 +230,9 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
struct intel_renderbuffer *stencil_rb;
i = 0;
- if ((intel->is_front_buffer_rendering || !intel_fb->color_rb[1])
+ if ((intel->is_front_buffer_rendering ||
+ intel->is_front_buffer_reading ||
+ !intel_fb->color_rb[1])
&& intel_fb->color_rb[0]) {
attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[0]);
@@ -396,7 +408,7 @@ intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
if (!driContext->driScreenPriv->dri2.enabled)
return;
- if (!intel->internal_viewport_call && ctx->DrawBuffer->Name == 0) {
+ if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
/* If we're rendering to the fake front buffer, make sure all the pending
* drawing has landed on the real front buffer. Otherwise when we
* eventually get to DRI2GetBuffersWithFormat the stale real front
@@ -495,7 +507,8 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush)
if (screen->dri2.loader &&
(screen->dri2.loader->base.version >= 2)
- && (screen->dri2.loader->flushFrontBuffer != NULL)) {
+ && (screen->dri2.loader->flushFrontBuffer != NULL) &&
+ intel->driDrawable && intel->driDrawable->loaderPrivate) {
(*screen->dri2.loader->flushFrontBuffer)(intel->driDrawable,
intel->driDrawable->loaderPrivate);
@@ -505,7 +518,7 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush)
* each of N places that do rendering. This has worse performances,
* but it is much easier to get correct.
*/
- if (intel->is_front_buffer_rendering) {
+ if (!intel->is_front_buffer_rendering) {
intel->front_buffer_dirty = GL_FALSE;
}
}
@@ -521,7 +534,27 @@ intelFlush(GLcontext * ctx)
static void
intel_glFlush(GLcontext *ctx)
{
+ struct intel_context *intel = intel_context(ctx);
+
intel_flush(ctx, GL_TRUE);
+
+ /* We're using glFlush as an indicator that a frame is done, which is
+ * what DRI2 does before calling SwapBuffers (and means we should catch
+ * people doing front-buffer rendering, as well)..
+ *
+ * Wait for the swapbuffers before the one we just emitted, so we don't
+ * get too many swaps outstanding for apps that are GPU-heavy but not
+ * CPU-heavy.
+ *
+ * Unfortunately, we don't have a handle to the batch containing the swap,
+ * and getting our hands on that doesn't seem worth it, so we just us the
+ * first batch we emitted after the last swap.
+ */
+ if (intel->first_post_swapbuffers_batch != NULL) {
+ drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
+ drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
+ }
}
void
@@ -561,10 +594,15 @@ intelInitDriverFunctions(struct dd_function_table *functions)
functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
intelInitTextureFuncs(functions);
+ intelInitTextureImageFuncs(functions);
+ intelInitTextureSubImageFuncs(functions);
+ intelInitTextureCopyImageFuncs(functions);
intelInitStateFuncs(functions);
intelInitClearFuncs(functions);
intelInitBufferFuncs(functions);
intelInitPixelFuncs(functions);
+ intelInitBufferObjectFuncs(functions);
+ intel_init_syncobj_functions(functions);
}
@@ -607,6 +645,10 @@ intelInitContext(struct intel_context *intel,
intel->maxBatchSize = BATCH_SZ;
intel->bufmgr = intelScreen->bufmgr;
+
+ if (0) /* for debug */
+ drm_intel_bufmgr_set_debug(intel->bufmgr, 1);
+
intel->ttm = intelScreen->ttm;
if (intel->ttm) {
int bo_reuse_mode;
@@ -660,7 +702,15 @@ intelInitContext(struct intel_context *intel,
*/
_mesa_init_point(ctx);
+ meta_init_metaops(ctx, &intel->meta);
ctx->Const.MaxColorAttachments = 4; /* XXX FBO: review this */
+ if (IS_965(intelScreen->deviceID)) {
+ if (MAX_WIDTH > 8192)
+ ctx->Const.MaxRenderbufferSize = 8192;
+ } else {
+ if (MAX_WIDTH > 2048)
+ ctx->Const.MaxRenderbufferSize = 2048;
+ }
/* Initialize the software rasterizer and helper modules. */
_swrast_CreateContext(ctx);
@@ -672,6 +722,8 @@ intelInitContext(struct intel_context *intel,
_swrast_allow_pixel_fog(ctx, GL_FALSE);
_swrast_allow_vertex_fog(ctx, GL_TRUE);
+ _mesa_meta_init(ctx);
+
intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
intel->hw_stipple = 1;
@@ -718,7 +770,6 @@ intelInitContext(struct intel_context *intel,
intel->batch = intel_batchbuffer_alloc(intel);
- intel_bufferobj_init(intel);
intel_fbo_init(intel);
if (intel->ctx.Mesa_DXTn) {
@@ -728,6 +779,15 @@ intelInitContext(struct intel_context *intel,
else if (driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) {
_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
}
+ intel->use_texture_tiling = driQueryOptionb(&intel->optionCache,
+ "texture_tiling");
+ if (intel->use_texture_tiling &&
+ !intel->intelScreen->kernel_exec_fencing) {
+ fprintf(stderr, "No kernel support for execution fencing, "
+ "disabling texture tiling\n");
+ intel->use_texture_tiling = GL_FALSE;
+ }
+ intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z");
intel->prim.primitive = ~0;
@@ -767,8 +827,9 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
INTEL_FIREVERTICES(intel);
- if (intel->clear.arrayObj)
- _mesa_delete_array_object(&intel->ctx, intel->clear.arrayObj);
+ _mesa_meta_free(&intel->ctx);
+
+ meta_destroy_metaops(&intel->meta);
intel->vtbl.destroy(intel);
@@ -787,15 +848,68 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
intel->prim.vb = NULL;
dri_bo_unreference(intel->prim.vb_bo);
intel->prim.vb_bo = NULL;
+ dri_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
if (release_texture_heaps) {
- /* This share group is about to go away, free our private
- * texture object data.
+ /* Nothing is currently done here to free texture heaps;
+ * but we're not using the texture heap utilities, so I
+ * rather think we shouldn't. I've taken a look, and can't
+ * find any private texture data hanging around anywhere, but
+ * I'm not yet certain there isn't any at all...
*/
- if (INTEL_DEBUG & DEBUG_TEXTURE)
+ /* if (INTEL_DEBUG & DEBUG_TEXTURE)
fprintf(stderr, "do something to free texture heaps\n");
+ */
}
+ /* XXX In intelMakeCurrent() below, the context's static regions are
+ * referenced inside the frame buffer; it's listed as a hack,
+ * with a comment of "XXX FBO temporary fix-ups!", but
+ * as long as it's there, we should release the regions here.
+ * The do/while loop around the block is used to allow the
+ * "continue" statements inside the block to exit the block,
+ * to avoid many layers of "if" constructs.
+ */
+ do {
+ __DRIdrawablePrivate * driDrawPriv = intel->driDrawable;
+ struct intel_framebuffer *intel_fb;
+ struct intel_renderbuffer *irbDepth, *irbStencil;
+ if (!driDrawPriv) {
+ /* We're already detached from the drawable; exit this block. */
+ continue;
+ }
+ intel_fb = (struct intel_framebuffer *) driDrawPriv->driverPrivate;
+ if (!intel_fb) {
+ /* The frame buffer is already gone; exit this block. */
+ continue;
+ }
+ irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
+ irbStencil = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
+
+ /* If the regions of the frame buffer still match the regions
+ * of the context, release them. If they've changed somehow,
+ * leave them alone.
+ */
+ if (intel_fb->color_rb[0] && intel_fb->color_rb[0]->region == intel->front_region) {
+ intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL);
+ }
+ if (intel_fb->color_rb[1] && intel_fb->color_rb[1]->region == intel->back_region) {
+ intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL);
+ }
+
+ if (irbDepth && irbDepth->region == intel->depth_region) {
+ intel_renderbuffer_set_region(irbDepth, NULL);
+ }
+ /* Usually, the stencil buffer is the same as the depth buffer;
+ * but they're handled separately in MakeCurrent, so we'll
+ * handle them separately here.
+ */
+ if (irbStencil && irbStencil->region == intel->depth_region) {
+ intel_renderbuffer_set_region(irbStencil, NULL);
+ }
+ } while (0);
+
intel_region_release(&intel->front_region);
intel_region_release(&intel->back_region);
intel_region_release(&intel->depth_region);
@@ -804,12 +918,23 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
/* free the Mesa context */
_mesa_free_context_data(&intel->ctx);
+
+ FREE(intel);
+ driContextPriv->driverPrivate = NULL;
}
}
GLboolean
intelUnbindContext(__DRIcontextPrivate * driContextPriv)
{
+ struct intel_context *intel =
+ (struct intel_context *) driContextPriv->driverPrivate;
+
+ /* Deassociate the context with the drawables.
+ */
+ intel->driDrawable = NULL;
+ intel->driReadDrawable = NULL;
+
return GL_TRUE;
}
@@ -832,7 +957,10 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
if (driDrawPriv != driReadPriv)
intel_update_renderbuffers(driContextPriv, driReadPriv);
} else {
- /* XXX FBO temporary fix-ups! */
+ /* XXX FBO temporary fix-ups! These are released in
+ * intelDextroyContext(), above. Changes here should be
+ * reflected there.
+ */
/* if the renderbuffers don't have regions, init them from the context */
struct intel_renderbuffer *irbDepth
= intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
@@ -917,7 +1045,6 @@ intelContendedLock(struct intel_context *intel, GLuint flags)
int me = intel->hHWContext;
drmGetLock(intel->driFd, intel->hHWContext, flags);
- intel->locked = 1;
if (INTEL_DEBUG & DEBUG_LOCK)
_mesa_printf("%s - got contended lock\n", __progname);
@@ -974,9 +1101,12 @@ void LOCK_HARDWARE( struct intel_context *intel )
struct intel_framebuffer *intel_fb = NULL;
struct intel_renderbuffer *intel_rb = NULL;
- _glthread_LOCK_MUTEX(lockMutex);
- assert(!intel->locked);
- intel->locked = 1;
+ intel->locked++;
+ if (intel->locked >= 2)
+ return;
+
+ if (!sPriv->dri2.enabled)
+ _glthread_LOCK_MUTEX(lockMutex);
if (intel->driDrawable) {
intel_fb = intel->driDrawable->driverPrivate;
@@ -1023,13 +1153,16 @@ void UNLOCK_HARDWARE( struct intel_context *intel )
{
__DRIscreen *sPriv = intel->driScreen;
- intel->vtbl.note_unlock( intel );
- intel->locked = 0;
+ intel->locked--;
+ if (intel->locked > 0)
+ return;
- if (!sPriv->dri2.enabled)
- DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
+ assert(intel->locked == 0);
- _glthread_UNLOCK_MUTEX(lockMutex);
+ if (!sPriv->dri2.enabled) {
+ DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
+ _glthread_UNLOCK_MUTEX(lockMutex);
+ }
if (INTEL_DEBUG & DEBUG_LOCK)
_mesa_printf("%s - unlocked\n", __progname);
diff --git a/shared/intel_context.h b/shared/intel_context.h
index b5cf7e6..03e7cf3 100644
--- a/shared/intel_context.h
+++ b/shared/intel_context.h
@@ -33,6 +33,7 @@
#include "main/mtypes.h"
#include "main/mm.h"
#include "texmem.h"
+#include "dri_metaops.h"
#include "drm.h"
#include "intel_bufmgr.h"
@@ -79,9 +80,19 @@ extern void intelFallback(struct intel_context *intel, GLuint bit,
#define INTEL_MAX_FIXUP 64
+struct intel_sync_object {
+ struct gl_sync_object Base;
+
+ /** Batch associated with this sync object */
+ drm_intel_bo *bo;
+};
+
+/**
+ * intel_context is derived from Mesa's context class: GLcontext.
+ */
struct intel_context
{
- GLcontext ctx; /* the parent class */
+ GLcontext ctx; /**< base class, must be first field */
struct
{
@@ -91,7 +102,6 @@ struct intel_context
void (*new_batch) (struct intel_context * intel);
void (*emit_invarient_state) (struct intel_context * intel);
void (*note_fence) (struct intel_context *intel, GLuint fence);
- void (*note_unlock) (struct intel_context *intel);
void (*update_texture_state) (struct intel_context * intel);
void (*render_start) (struct intel_context * intel);
@@ -158,19 +168,7 @@ struct intel_context
void (*debug_batch)(struct intel_context *intel);
} vtbl;
- struct {
- struct gl_fragment_program *bitmap_fp;
- struct gl_vertex_program *passthrough_vp;
-
- struct gl_fragment_program *saved_fp;
- GLboolean saved_fp_enable;
- struct gl_vertex_program *saved_vp;
- GLboolean saved_vp_enable;
-
- GLint saved_vp_x, saved_vp_y;
- GLsizei saved_vp_width, saved_vp_height;
- GLenum saved_matrix_mode;
- } meta;
+ struct dri_metaops meta;
GLint refcount;
GLuint Fallback;
@@ -182,7 +180,6 @@ struct intel_context
struct intel_region *front_region;
struct intel_region *back_region;
struct intel_region *depth_region;
- GLboolean internal_viewport_call;
/**
* This value indicates that the kernel memory manager is being used
@@ -191,6 +188,7 @@ struct intel_context
GLboolean ttm;
struct intel_batchbuffer *batch;
+ drm_intel_bo *first_post_swapbuffers_batch;
GLboolean no_batch_wrap;
unsigned batch_id;
@@ -215,13 +213,6 @@ struct intel_context
GLuint ClearColor565;
GLuint ClearColor8888;
- /* info for intel_clear_tris() */
- struct
- {
- struct gl_array_object *arrayObj;
- GLfloat vertices[4][3];
- GLfloat color[4][4];
- } clear;
/* Offsets of fields within the current vertex:
*/
@@ -294,6 +285,17 @@ struct intel_context
* easily.
*/
GLboolean is_front_buffer_rendering;
+ /**
+ * Track whether front-buffer is the current read target.
+ *
+ * This is closely associated with is_front_buffer_rendering, but may
+ * be set separately. The DRI2 fake front buffer must be referenced
+ * either way.
+ */
+ GLboolean is_front_buffer_reading;
+
+ GLboolean use_texture_tiling;
+ GLboolean use_early_z;
drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */
@@ -316,7 +318,7 @@ struct intel_context
__DRIdrawablePrivate *driReadDrawable;
__DRIscreenPrivate *driScreen;
intelScreenPrivate *intelScreen;
- volatile struct drm_i915_sarea *sarea;
+ volatile drm_i915_sarea_t *sarea;
GLuint lastStamp;
@@ -474,6 +476,8 @@ extern void intelFlush(GLcontext * ctx);
extern void intelInitDriverFunctions(struct dd_function_table *functions);
+void intel_init_syncobj_functions(struct dd_function_table *functions);
+
/* ================================================================
* intel_state.c:
@@ -549,6 +553,9 @@ void intel_viewport(GLcontext * ctx, GLint x, GLint y,
void intel_update_renderbuffers(__DRIcontext *context,
__DRIdrawable *drawable);
+void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region,
+ uint32_t buffer_id);
+
/*======================================================================
* Inline conversion functions.
* These are better-typed than the macros used previously:
diff --git a/shared/intel_extensions.c b/shared/intel_extensions.c
index 9ec1b4e..2e61c55 100644
--- a/shared/intel_extensions.c
+++ b/shared/intel_extensions.c
@@ -30,10 +30,14 @@
#include "intel_extensions.h"
+#define need_GL_ARB_copy_buffer
#define need_GL_ARB_framebuffer_object
+#define need_GL_ARB_map_buffer_range
#define need_GL_ARB_occlusion_query
#define need_GL_ARB_point_parameters
#define need_GL_ARB_shader_objects
+#define need_GL_ARB_sync
+#define need_GL_ARB_vertex_array_object
#define need_GL_ARB_vertex_program
#define need_GL_ARB_vertex_shader
#define need_GL_ARB_window_pos
@@ -45,9 +49,12 @@
#define need_GL_EXT_fog_coord
#define need_GL_EXT_framebuffer_object
#define need_GL_EXT_framebuffer_blit
+#define need_GL_EXT_gpu_program_parameters
#define need_GL_EXT_point_parameters
+#define need_GL_EXT_provoking_vertex
#define need_GL_EXT_secondary_color
#define need_GL_EXT_stencil_two_side
+#define need_GL_APPLE_vertex_array_object
#define need_GL_ATI_separate_stencil
#define need_GL_ATI_envmap_bumpmap
#define need_GL_NV_point_sprite
@@ -65,8 +72,13 @@
* i965_dri.
*/
static const struct dri_extension card_extensions[] = {
+ { "GL_ARB_copy_buffer", GL_ARB_copy_buffer_functions },
+ { "GL_ARB_half_float_pixel", NULL },
+ { "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions },
{ "GL_ARB_multitexture", NULL },
{ "GL_ARB_point_parameters", GL_ARB_point_parameters_functions },
+ { "GL_ARB_point_sprite", NULL },
+ { "GL_ARB_sync", GL_ARB_sync_functions },
{ "GL_ARB_texture_border_clamp", NULL },
{ "GL_ARB_texture_cube_map", NULL },
{ "GL_ARB_texture_env_add", NULL },
@@ -75,6 +87,7 @@ static const struct dri_extension card_extensions[] = {
{ "GL_ARB_texture_env_dot3", NULL },
{ "GL_ARB_texture_mirrored_repeat", NULL },
{ "GL_ARB_texture_rectangle", NULL },
+ { "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions},
{ "GL_ARB_vertex_program", GL_ARB_vertex_program_functions },
{ "GL_ARB_window_pos", GL_ARB_window_pos_functions },
{ "GL_EXT_blend_color", GL_EXT_blend_color_functions },
@@ -85,7 +98,9 @@ static const struct dri_extension card_extensions[] = {
{ "GL_EXT_blend_subtract", NULL },
{ "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions },
{ "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+ { "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions },
{ "GL_EXT_packed_depth_stencil", NULL },
+ { "GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions },
{ "GL_EXT_secondary_color", GL_EXT_secondary_color_functions },
{ "GL_EXT_stencil_wrap", NULL },
{ "GL_EXT_texture_edge_clamp", NULL },
@@ -95,6 +110,7 @@ static const struct dri_extension card_extensions[] = {
{ "GL_EXT_texture_lod_bias", NULL },
{ "GL_3DFX_texture_compression_FXT1", NULL },
{ "GL_APPLE_client_storage", NULL },
+ { "GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions},
{ "GL_MESA_pack_invert", NULL },
{ "GL_MESA_ycbcr_texture", NULL },
{ "GL_NV_blend_square", NULL },
@@ -112,8 +128,10 @@ static const struct dri_extension i915_extensions[] = {
{ "GL_ARB_fragment_program", NULL },
{ "GL_ARB_shadow", NULL },
{ "GL_ARB_texture_non_power_of_two", NULL },
+ { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions },
{ "GL_ATI_texture_env_combine3", NULL },
{ "GL_EXT_shadow_funcs", NULL },
+ { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions },
{ "GL_NV_texture_env_combine4", NULL },
{ NULL, NULL }
};
@@ -128,6 +146,7 @@ static const struct dri_extension brw_extensions[] = {
{ "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions},
{ "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions },
{ "GL_ARB_point_sprite", NULL },
+ { "GL_ARB_seamless_cube_map", NULL },
{ "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
{ "GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
{ "GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c
index 30f58b1..804c034 100644
--- a/shared/intel_fbo.c
+++ b/shared/intel_fbo.c
@@ -35,6 +35,7 @@
#include "main/context.h"
#include "main/texformat.h"
#include "main/texrender.h"
+#include "drivers/common/meta.h"
#include "intel_context.h"
#include "intel_buffers.h"
@@ -217,7 +218,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
height, pitch);
- irb->region = intel_region_alloc(intel, cpp, width, height, pitch,
+ irb->region = intel_region_alloc(intel, I915_TILING_NONE,
+ cpp, width, height, pitch,
GL_TRUE);
if (!irb->region)
return GL_FALSE; /* out of memory? */
@@ -575,9 +577,10 @@ intel_render_texture(GLcontext * ctx,
ASSERT(newImage);
- if (newImage->Border != 0) {
- /* Fallback on drawing to a texture with a border, which won't have a
- * miptree.
+ intel_image = intel_texture_image(newImage);
+ if (!intel_image->mt) {
+ /* Fallback on drawing to a texture that doesn't have a miptree
+ * (has a border, width/height 0, etc.)
*/
_mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
_mesa_render_texture(ctx, fb, att);
@@ -608,7 +611,6 @@ intel_render_texture(GLcontext * ctx,
irb->Base.RefCount);
/* point the renderbufer's region to the texture image region */
- intel_image = intel_texture_image(newImage);
if (irb->region != intel_image->mt->region) {
if (irb->region)
intel_region_release(&irb->region);
@@ -680,6 +682,11 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
if (rb == NULL)
continue;
+ if (irb == NULL) {
+ fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
+ continue;
+ }
+
switch (irb->texformat->MesaFormat) {
case MESA_FORMAT_ARGB8888:
case MESA_FORMAT_RGB565:
@@ -694,74 +701,6 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
/**
- * Called from glBlitFramebuffer().
- * For now, we're doing an approximation with glCopyPixels().
- * XXX we need to bypass all the per-fragment operations, except scissor.
- */
-static void
-intel_blit_framebuffer(GLcontext *ctx,
- GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
- GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
- GLbitfield mask, GLenum filter)
-{
- const GLfloat xZoomSave = ctx->Pixel.ZoomX;
- const GLfloat yZoomSave = ctx->Pixel.ZoomY;
- GLsizei width, height;
- GLfloat xFlip = 1.0F, yFlip = 1.0F;
-
- if (srcX1 < srcX0) {
- GLint tmp = srcX1;
- srcX1 = srcX0;
- srcX0 = tmp;
- xFlip = -1.0F;
- }
-
- if (srcY1 < srcY0) {
- GLint tmp = srcY1;
- srcY1 = srcY0;
- srcY0 = tmp;
- yFlip = -1.0F;
- }
-
- width = srcX1 - srcX0;
- height = srcY1 - srcY0;
-
- ctx->Pixel.ZoomX = xFlip * (dstX1 - dstX0) / (srcX1 - srcY0);
- ctx->Pixel.ZoomY = yFlip * (dstY1 - dstY0) / (srcY1 - srcY0);
-
- if (ctx->Pixel.ZoomX < 0.0F) {
- dstX0 = MAX2(dstX0, dstX1);
- }
- else {
- dstX0 = MIN2(dstX0, dstX1);
- }
-
- if (ctx->Pixel.ZoomY < 0.0F) {
- dstY0 = MAX2(dstY0, dstY1);
- }
- else {
- dstY0 = MIN2(dstY0, dstY1);
- }
-
- if (mask & GL_COLOR_BUFFER_BIT) {
- ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height,
- dstX0, dstY0, GL_COLOR);
- }
- if (mask & GL_DEPTH_BUFFER_BIT) {
- ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height,
- dstX0, dstY0, GL_DEPTH);
- }
- if (mask & GL_STENCIL_BUFFER_BIT) {
- ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height,
- dstX0, dstY0, GL_STENCIL);
- }
-
- ctx->Pixel.ZoomX = xZoomSave;
- ctx->Pixel.ZoomY = yZoomSave;
-}
-
-
-/**
* Do one-time context initializations related to GL_EXT_framebuffer_object.
* Hook in device driver functions.
*/
@@ -776,5 +715,5 @@ intel_fbo_init(struct intel_context *intel)
intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture;
intel->ctx.Driver.ResizeBuffers = intel_resize_buffers;
intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer;
- intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer;
+ intel->ctx.Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer;
}
diff --git a/shared/intel_generatemipmap.c b/shared/intel_generatemipmap.c
new file mode 100644
index 0000000..12059e1
--- /dev/null
+++ b/shared/intel_generatemipmap.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/bufferobj.h"
+#include "main/teximage.h"
+#include "main/texenv.h"
+#include "main/texobj.h"
+#include "main/texstate.h"
+#include "main/texparam.h"
+#include "main/varray.h"
+#include "main/attrib.h"
+#include "main/enable.h"
+#include "main/buffers.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/depth.h"
+#include "main/hash.h"
+#include "main/mipmap.h"
+#include "main/blend.h"
+#include "glapi/dispatch.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_pixel.h"
+#include "intel_tex.h"
+#include "intel_mipmap_tree.h"
+
+static const char *intel_fp_tex2d =
+ "!!ARBfp1.0\n"
+ "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n"
+ "END\n";
+
+static GLboolean
+intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name,
+ int level, int width, int height)
+{
+ struct intel_context *intel = intel_context(ctx);
+ GLfloat vertices[4][2];
+ GLint status;
+
+ /* Set to source from the previous level */
+ _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1);
+ _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1);
+
+ /* Set to draw into the current level */
+ _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
+ GL_COLOR_ATTACHMENT0_EXT,
+ GL_TEXTURE_2D,
+ tex_name,
+ level);
+ /* Choose to render to the color attachment. */
+ _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+
+ status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT);
+ if (status != GL_FRAMEBUFFER_COMPLETE_EXT)
+ return GL_FALSE;
+
+ meta_set_passthrough_transform(&intel->meta);
+
+ /* XXX: Doing it right would involve setting up the transformation to do
+ * 0-1 mapping or something, and not changing the vertex data.
+ */
+ vertices[0][0] = 0;
+ vertices[0][1] = 0;
+ vertices[1][0] = width;
+ vertices[1][1] = 0;
+ vertices[2][0] = width;
+ vertices[2][1] = height;
+ vertices[3][0] = 0;
+ vertices[3][1] = height;
+
+ _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices);
+ _mesa_Enable(GL_VERTEX_ARRAY);
+ meta_set_default_texrect(&intel->meta);
+
+ _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ meta_restore_texcoords(&intel->meta);
+ meta_restore_transform(&intel->meta);
+
+ return GL_TRUE;
+}
+
+static GLboolean
+intel_generate_mipmap_2d(GLcontext *ctx,
+ GLenum target,
+ struct gl_texture_object *texObj)
+{
+ struct intel_context *intel = intel_context(ctx);
+ GLint old_active_texture;
+ int level, max_levels, start_level, end_level;
+ GLuint fb_name;
+ GLboolean success = GL_FALSE;
+ struct gl_framebuffer *saved_fbo = NULL;
+ struct gl_buffer_object *saved_array_buffer = NULL;
+ struct gl_buffer_object *saved_element_buffer = NULL;
+
+ _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
+ GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT |
+ GL_DEPTH_BUFFER_BIT);
+ _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
+ old_active_texture = ctx->Texture.CurrentUnit;
+ _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer);
+
+ /* use default array/index buffers */
+ _mesa_reference_buffer_object(ctx, &saved_array_buffer,
+ ctx->Array.ArrayBufferObj);
+ _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj,
+ ctx->Shared->NullBufferObj);
+ _mesa_reference_buffer_object(ctx, &saved_element_buffer,
+ ctx->Array.ElementArrayBufferObj);
+ _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj,
+ ctx->Shared->NullBufferObj);
+
+ _mesa_Disable(GL_POLYGON_STIPPLE);
+ _mesa_Disable(GL_DEPTH_TEST);
+ _mesa_Disable(GL_STENCIL_TEST);
+ _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ _mesa_DepthMask(GL_FALSE);
+
+ /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our
+ * minification.
+ */
+ _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
+ _mesa_Enable(GL_TEXTURE_2D);
+ _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name);
+ _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
+ GL_LINEAR_MIPMAP_NEAREST);
+ _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+
+ /* Bind the new renderbuffer to the color attachment point. */
+ _mesa_GenFramebuffersEXT(1, &fb_name);
+ _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name);
+
+ meta_set_fragment_program(&intel->meta, &intel->meta.tex2d_fp,
+ intel_fp_tex2d);
+ meta_set_passthrough_vertex_program(&intel->meta);
+
+ max_levels = _mesa_max_texture_levels(ctx, texObj->Target);
+ start_level = texObj->BaseLevel;
+ end_level = texObj->MaxLevel;
+
+ /* Loop generating level+1 from level. */
+ for (level = start_level; level < end_level && level < max_levels - 1; level++) {
+ const struct gl_texture_image *srcImage;
+ int width, height;
+
+ srcImage = _mesa_select_tex_image(ctx, texObj, target, level);
+ if (srcImage->Border != 0)
+ goto fail;
+
+ width = srcImage->Width / 2;
+ if (width < 1)
+ width = 1;
+ height = srcImage->Height / 2;
+ if (height < 1)
+ height = 1;
+
+ if (width == srcImage->Width &&
+ height == srcImage->Height) {
+ /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the
+ * maximum texture level for the object, so break out when we've gone
+ * over the edge.
+ */
+ break;
+ }
+
+ /* Make sure that there's space allocated for the target level.
+ * We could skip this if there's already space allocated and save some
+ * time.
+ */
+ _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat,
+ width, height, 0,
+ GL_RGBA, GL_UNSIGNED_INT, NULL);
+
+ if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1,
+ width, height))
+ goto fail;
+ }
+
+ success = GL_TRUE;
+
+fail:
+ meta_restore_fragment_program(&intel->meta);
+ meta_restore_vertex_program(&intel->meta);
+
+ /* restore array/index buffers */
+ _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj,
+ saved_array_buffer);
+ _mesa_reference_buffer_object(ctx, &saved_array_buffer, NULL);
+ _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj,
+ saved_element_buffer);
+ _mesa_reference_buffer_object(ctx, &saved_element_buffer, NULL);
+
+
+ _mesa_DeleteFramebuffersEXT(1, &fb_name);
+ _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
+ if (saved_fbo)
+ _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name);
+ _mesa_reference_framebuffer(&saved_fbo, NULL);
+ _mesa_PopClientAttrib();
+ _mesa_PopAttrib();
+
+ return success;
+}
+
+
+/**
+ * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
+ * level).
+ *
+ * The texture object's miptree must be mapped.
+ *
+ * It would be really nice if this was just called by Mesa whenever mipmaps
+ * needed to be regenerated, rather than us having to remember to do so in
+ * each texture image modification path.
+ *
+ * This function should also include an accelerated path.
+ */
+void
+intel_generate_mipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ struct intel_context *intel = intel_context(ctx);
+ struct intel_texture_object *intelObj = intel_texture_object(texObj);
+ GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+ int face, i;
+
+ /* HW path */
+ if (target == GL_TEXTURE_2D &&
+ ctx->Extensions.EXT_framebuffer_object &&
+ ctx->Extensions.ARB_fragment_program &&
+ ctx->Extensions.ARB_vertex_program) {
+ GLboolean success;
+
+ /* We'll be accessing this texture using GL entrypoints, which should
+ * be resilient against other access to this texture.
+ */
+ _mesa_unlock_texture(ctx, texObj);
+ success = intel_generate_mipmap_2d(ctx, target, texObj);
+ _mesa_lock_texture(ctx, texObj);
+
+ if (success)
+ return;
+ }
+
+ /* SW path */
+ intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
+ _mesa_generate_mipmap(ctx, target, texObj);
+ intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
+
+ /* Update the level information in our private data in the new images, since
+ * it didn't get set as part of a normal TexImage path.
+ */
+ for (face = 0; face < nr_faces; face++) {
+ for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+ struct intel_texture_image *intelImage;
+
+ intelImage = intel_texture_image(texObj->Image[face][i]);
+ if (intelImage == NULL)
+ break;
+
+ intelImage->level = i;
+ intelImage->face = face;
+ /* Unreference the miptree to signal that the new Data is a bare
+ * pointer from mesa.
+ */
+ intel_miptree_release(intel, &intelImage->mt);
+ }
+ }
+}
diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c
index 6e1e034..c985da5 100644
--- a/shared/intel_mipmap_tree.c
+++ b/shared/intel_mipmap_tree.c
@@ -57,14 +57,16 @@ intel_miptree_create_internal(struct intel_context *intel,
GLuint last_level,
GLuint width0,
GLuint height0,
- GLuint depth0, GLuint cpp, GLuint compress_byte)
+ GLuint depth0, GLuint cpp, GLuint compress_byte,
+ uint32_t tiling)
{
GLboolean ok;
struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
- DBG("%s target %s format %s level %d..%d\n", __FUNCTION__,
+ DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(target),
- _mesa_lookup_enum_by_nr(internal_format), first_level, last_level);
+ _mesa_lookup_enum_by_nr(internal_format),
+ first_level, last_level, mt);
mt->target = target_to_target(target);
mt->internal_format = internal_format;
@@ -80,15 +82,16 @@ intel_miptree_create_internal(struct intel_context *intel,
#ifdef I915
if (IS_945(intel->intelScreen->deviceID))
- ok = i945_miptree_layout(intel, mt);
+ ok = i945_miptree_layout(intel, mt, tiling);
else
- ok = i915_miptree_layout(intel, mt);
+ ok = i915_miptree_layout(intel, mt, tiling);
#else
- ok = brw_miptree_layout(intel, mt);
+ ok = brw_miptree_layout(intel, mt, tiling);
#endif
if (!ok) {
free(mt);
+ DBG("%s not okay - returning NULL\n", __FUNCTION__);
return NULL;
}
@@ -98,6 +101,7 @@ intel_miptree_create_internal(struct intel_context *intel,
struct intel_mipmap_tree *
intel_miptree_create(struct intel_context *intel,
GLenum target,
+ GLenum base_format,
GLenum internal_format,
GLuint first_level,
GLuint last_level,
@@ -107,10 +111,23 @@ intel_miptree_create(struct intel_context *intel,
GLboolean expect_accelerated_upload)
{
struct intel_mipmap_tree *mt;
+ uint32_t tiling;
+
+ if (intel->use_texture_tiling && compress_byte == 0 &&
+ intel->intelScreen->kernel_exec_fencing) {
+ if (IS_965(intel->intelScreen->deviceID) &&
+ (base_format == GL_DEPTH_COMPONENT ||
+ base_format == GL_DEPTH_STENCIL_EXT))
+ tiling = I915_TILING_Y;
+ else
+ tiling = I915_TILING_X;
+ } else
+ tiling = I915_TILING_NONE;
mt = intel_miptree_create_internal(intel, target, internal_format,
first_level, last_level, width0,
- height0, depth0, cpp, compress_byte);
+ height0, depth0, cpp, compress_byte,
+ tiling);
/*
* pitch == 0 || height == 0 indicates the null texture
*/
@@ -118,6 +135,7 @@ intel_miptree_create(struct intel_context *intel,
return NULL;
mt->region = intel_region_alloc(intel,
+ tiling,
mt->cpp,
mt->pitch,
mt->total_height,
@@ -147,7 +165,8 @@ intel_miptree_create_for_region(struct intel_context *intel,
mt = intel_miptree_create_internal(intel, target, internal_format,
first_level, last_level,
region->width, region->height, 1,
- region->cpp, compress_byte);
+ region->cpp, compress_byte,
+ I915_TILING_NONE);
if (!mt)
return mt;
#if 0
@@ -185,6 +204,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
int intel_miptree_pitch_align (struct intel_context *intel,
struct intel_mipmap_tree *mt,
+ uint32_t tiling,
int pitch)
{
#ifdef I915
@@ -205,6 +225,11 @@ int intel_miptree_pitch_align (struct intel_context *intel,
pitch_align = 4;
}
+ if (tiling == I915_TILING_X)
+ pitch_align = 512;
+ else if (tiling == I915_TILING_Y)
+ pitch_align = 128;
+
pitch = ALIGN(pitch * mt->cpp, pitch_align);
#ifdef I915
@@ -328,23 +353,31 @@ intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
}
-
void
-intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
- GLuint level, GLuint img,
- GLuint x, GLuint y)
+intel_miptree_set_image_offset_ex(struct intel_mipmap_tree *mt,
+ GLuint level, GLuint img,
+ GLuint x, GLuint y,
+ GLuint offset)
{
if (img == 0 && level == 0)
assert(x == 0 && y == 0);
assert(img < mt->level[level].nr_images);
- mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp;
+ mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp + offset;
DBG("%s level %d img %d pos %d,%d image_offset %x\n",
__FUNCTION__, level, img, x, y, mt->level[level].image_offset[img]);
}
+void
+intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+ GLuint level, GLuint img,
+ GLuint x, GLuint y)
+{
+ intel_miptree_set_image_offset_ex(mt, level, img, x, y, 0);
+}
+
/* Although we use the image_offset[] array to store relative offsets
* to cube faces, Mesa doesn't know anything about this and expects
@@ -454,11 +487,11 @@ intel_miptree_image_data(struct intel_context *intel,
0, 0, /* source x, y */
dst->level[level].width, height); /* width, height */
- src += src_image_pitch * dst->cpp;
+ src = (char *)src + src_image_pitch * dst->cpp;
}
}
-extern GLuint intel_compressed_alignment(GLenum);
+extern void intel_get_texture_alignment_unit(GLenum, GLuint *, GLuint *);
/* Copy mipmap image between trees
*/
void
@@ -475,20 +508,37 @@ intel_miptree_image_copy(struct intel_context *intel,
const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level);
const GLuint *src_depth_offset = intel_miptree_depth_offsets(src, level);
GLuint i;
+ GLboolean success;
if (dst->compressed) {
- GLuint alignment = intel_compressed_alignment(dst->internal_format);
+ GLuint align_w, align_h;
+
+ intel_get_texture_alignment_unit(dst->internal_format, &align_w, &align_h);
height = (height + 3) / 4;
- width = ((width + alignment - 1) & ~(alignment - 1));
+ width = ALIGN(width, align_w);
}
for (i = 0; i < depth; i++) {
- intel_region_copy(intel,
- dst->region, dst_offset + dst_depth_offset[i],
- 0,
- 0,
- src->region, src_offset + src_depth_offset[i],
- 0, 0, width, height);
+ success = intel_region_copy(intel,
+ dst->region, dst_offset + dst_depth_offset[i],
+ 0, 0,
+ src->region, src_offset + src_depth_offset[i],
+ 0, 0, width, height, GL_COPY);
+ if (!success) {
+ GLubyte *src_ptr, *dst_ptr;
+
+ src_ptr = intel_region_map(intel, src->region);
+ dst_ptr = intel_region_map(intel, dst->region);
+
+ _mesa_copy_rect(dst_ptr + dst_offset + dst_depth_offset[i],
+ dst->cpp,
+ dst->pitch,
+ 0, 0, width, height,
+ src_ptr + src_offset + src_depth_offset[i],
+ src->pitch,
+ 0, 0);
+ intel_region_unmap(intel, src->region);
+ intel_region_unmap(intel, dst->region);
+ }
}
-
}
diff --git a/shared/intel_mipmap_tree.h b/shared/intel_mipmap_tree.h
index 4060b9d..c890b2a 100644
--- a/shared/intel_mipmap_tree.h
+++ b/shared/intel_mipmap_tree.h
@@ -126,6 +126,7 @@ struct intel_mipmap_tree
struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
GLenum target,
+ GLenum base_format,
GLenum internal_format,
GLuint first_level,
GLuint last_level,
@@ -148,6 +149,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
int intel_miptree_pitch_align (struct intel_context *intel,
struct intel_mipmap_tree *mt,
+ uint32_t tiling,
int pitch);
void intel_miptree_reference(struct intel_mipmap_tree **dst,
@@ -194,6 +196,11 @@ void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
GLuint x, GLuint y,
GLuint w, GLuint h, GLuint d);
+void intel_miptree_set_image_offset_ex(struct intel_mipmap_tree *mt,
+ GLuint level,
+ GLuint img, GLuint x, GLuint y,
+ GLuint offset);
+
void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
GLuint level,
GLuint img, GLuint x, GLuint y);
@@ -218,10 +225,13 @@ void intel_miptree_image_copy(struct intel_context *intel,
/* i915_mipmap_tree.c:
*/
GLboolean i915_miptree_layout(struct intel_context *intel,
- struct intel_mipmap_tree *mt);
+ struct intel_mipmap_tree *mt,
+ uint32_t tiling);
GLboolean i945_miptree_layout(struct intel_context *intel,
- struct intel_mipmap_tree *mt);
+ struct intel_mipmap_tree *mt,
+ uint32_t tiling);
GLboolean brw_miptree_layout(struct intel_context *intel,
- struct intel_mipmap_tree *mt);
+ struct intel_mipmap_tree *mt,
+ uint32_t tiling);
#endif
diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c
index fc0ac0b..a300141 100644
--- a/shared/intel_pixel.c
+++ b/shared/intel_pixel.c
@@ -27,9 +27,12 @@
#include "main/enums.h"
#include "main/state.h"
+#include "main/bufferobj.h"
#include "main/context.h"
#include "main/enable.h"
#include "main/matrix.h"
+#include "main/texstate.h"
+#include "main/varray.h"
#include "main/viewport.h"
#include "swrast/swrast.h"
#include "shader/arbprogram.h"
@@ -174,167 +177,6 @@ intel_check_blit_format(struct intel_region * region,
}
void
-intel_meta_set_passthrough_transform(struct intel_context *intel)
-{
- GLcontext *ctx = &intel->ctx;
-
- intel->meta.saved_vp_x = ctx->Viewport.X;
- intel->meta.saved_vp_y = ctx->Viewport.Y;
- intel->meta.saved_vp_width = ctx->Viewport.Width;
- intel->meta.saved_vp_height = ctx->Viewport.Height;
- intel->meta.saved_matrix_mode = ctx->Transform.MatrixMode;
-
- intel->internal_viewport_call = GL_TRUE;
- _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height);
- intel->internal_viewport_call = GL_FALSE;
-
- _mesa_MatrixMode(GL_PROJECTION);
- _mesa_PushMatrix();
- _mesa_LoadIdentity();
- _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1);
-
- _mesa_MatrixMode(GL_MODELVIEW);
- _mesa_PushMatrix();
- _mesa_LoadIdentity();
-}
-
-void
-intel_meta_restore_transform(struct intel_context *intel)
-{
- _mesa_MatrixMode(GL_PROJECTION);
- _mesa_PopMatrix();
- _mesa_MatrixMode(GL_MODELVIEW);
- _mesa_PopMatrix();
-
- _mesa_MatrixMode(intel->meta.saved_matrix_mode);
-
- intel->internal_viewport_call = GL_TRUE;
- _mesa_Viewport(intel->meta.saved_vp_x, intel->meta.saved_vp_y,
- intel->meta.saved_vp_width, intel->meta.saved_vp_height);
- intel->internal_viewport_call = GL_FALSE;
-}
-
-/**
- * Set up a vertex program to pass through the position and first texcoord
- * for pixel path.
- */
-void
-intel_meta_set_passthrough_vertex_program(struct intel_context *intel)
-{
- GLcontext *ctx = &intel->ctx;
- static const char *vp =
- "!!ARBvp1.0\n"
- "TEMP vertexClip;\n"
- "DP4 vertexClip.x, state.matrix.mvp.row[0], vertex.position;\n"
- "DP4 vertexClip.y, state.matrix.mvp.row[1], vertex.position;\n"
- "DP4 vertexClip.z, state.matrix.mvp.row[2], vertex.position;\n"
- "DP4 vertexClip.w, state.matrix.mvp.row[3], vertex.position;\n"
- "MOV result.position, vertexClip;\n"
- "MOV result.texcoord[0], vertex.texcoord[0];\n"
- "MOV result.color, vertex.color;\n"
- "END\n";
-
- assert(intel->meta.saved_vp == NULL);
-
- _mesa_reference_vertprog(ctx, &intel->meta.saved_vp,
- ctx->VertexProgram.Current);
- if (intel->meta.passthrough_vp == NULL) {
- GLuint prog_name;
- _mesa_GenPrograms(1, &prog_name);
- _mesa_BindProgram(GL_VERTEX_PROGRAM_ARB, prog_name);
- _mesa_ProgramStringARB(GL_VERTEX_PROGRAM_ARB,
- GL_PROGRAM_FORMAT_ASCII_ARB,
- strlen(vp), (const GLubyte *)vp);
- _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp,
- ctx->VertexProgram.Current);
- _mesa_DeletePrograms(1, &prog_name);
- }
-
- FLUSH_VERTICES(ctx, _NEW_PROGRAM);
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
- intel->meta.passthrough_vp);
- ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
- &intel->meta.passthrough_vp->Base);
-
- intel->meta.saved_vp_enable = ctx->VertexProgram.Enabled;
- _mesa_Enable(GL_VERTEX_PROGRAM_ARB);
-}
-
-/**
- * Restores the previous vertex program after
- * intel_meta_set_passthrough_vertex_program()
- */
-void
-intel_meta_restore_vertex_program(struct intel_context *intel)
-{
- GLcontext *ctx = &intel->ctx;
-
- FLUSH_VERTICES(ctx, _NEW_PROGRAM);
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
- intel->meta.saved_vp);
- _mesa_reference_vertprog(ctx, &intel->meta.saved_vp, NULL);
- ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
- &ctx->VertexProgram.Current->Base);
-
- if (!intel->meta.saved_vp_enable)
- _mesa_Disable(GL_VERTEX_PROGRAM_ARB);
-}
-
-/**
- * Binds the given program string to GL_FRAGMENT_PROGRAM_ARB, caching the
- * program object.
- */
-void
-intel_meta_set_fragment_program(struct intel_context *intel,
- struct gl_fragment_program **prog,
- const char *prog_string)
-{
- GLcontext *ctx = &intel->ctx;
- assert(intel->meta.saved_fp == NULL);
-
- _mesa_reference_fragprog(ctx, &intel->meta.saved_fp,
- ctx->FragmentProgram.Current);
- if (*prog == NULL) {
- GLuint prog_name;
- _mesa_GenPrograms(1, &prog_name);
- _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, prog_name);
- _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB,
- GL_PROGRAM_FORMAT_ASCII_ARB,
- strlen(prog_string), (const GLubyte *)prog_string);
- _mesa_reference_fragprog(ctx, prog, ctx->FragmentProgram.Current);
- /* Note that DeletePrograms unbinds the program on us */
- _mesa_DeletePrograms(1, &prog_name);
- }
-
- FLUSH_VERTICES(ctx, _NEW_PROGRAM);
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, *prog);
- ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, &((*prog)->Base));
-
- intel->meta.saved_fp_enable = ctx->FragmentProgram.Enabled;
- _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB);
-}
-
-/**
- * Restores the previous fragment program after
- * intel_meta_set_fragment_program()
- */
-void
-intel_meta_restore_fragment_program(struct intel_context *intel)
-{
- GLcontext *ctx = &intel->ctx;
-
- FLUSH_VERTICES(ctx, _NEW_PROGRAM);
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current,
- intel->meta.saved_fp);
- _mesa_reference_fragprog(ctx, &intel->meta.saved_fp, NULL);
- ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
- &ctx->FragmentProgram.Current->Base);
-
- if (!intel->meta.saved_fp_enable)
- _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB);
-}
-
-void
intelInitPixelFuncs(struct dd_function_table *functions)
{
functions->Accum = _swrast_Accum;
@@ -342,18 +184,7 @@ intelInitPixelFuncs(struct dd_function_table *functions)
functions->Bitmap = intelBitmap;
functions->CopyPixels = intelCopyPixels;
functions->DrawPixels = intelDrawPixels;
-#ifdef I915
- functions->ReadPixels = intelReadPixels;
-#endif
}
-}
-
-void
-intel_free_pixel_state(struct intel_context *intel)
-{
- GLcontext *ctx = &intel->ctx;
-
- _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, NULL);
- _mesa_reference_fragprog(ctx, &intel->meta.bitmap_fp, NULL);
+ functions->ReadPixels = intelReadPixels;
}
diff --git a/shared/intel_pixel.h b/shared/intel_pixel.h
index cb41fa1..96a6dd1 100644
--- a/shared/intel_pixel.h
+++ b/shared/intel_pixel.h
@@ -31,16 +31,6 @@
#include "main/mtypes.h"
void intelInitPixelFuncs(struct dd_function_table *functions);
-void intel_meta_set_passthrough_transform(struct intel_context *intel);
-void intel_meta_restore_transform(struct intel_context *intel);
-void intel_meta_set_passthrough_vertex_program(struct intel_context *intel);
-void intel_meta_restore_vertex_program(struct intel_context *intel);
-void intel_meta_set_fragment_program(struct intel_context *intel,
- struct gl_fragment_program **prog,
- const char *prog_string);
-void intel_meta_restore_fragment_program(struct intel_context *intel);
-void intel_free_pixel_state(struct intel_context *intel);
-
GLboolean intel_check_blit_fragment_ops(GLcontext * ctx,
GLboolean src_alpha_is_one);
@@ -76,6 +66,4 @@ void intelBitmap(GLcontext * ctx,
const struct gl_pixelstore_attrib *unpack,
const GLubyte * pixels);
-void intel_clear_tris(GLcontext *ctx, GLbitfield mask);
-
#endif
diff --git a/shared/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c
index a2ccae1..b543a0b 100644
--- a/shared/intel_pixel_bitmap.c
+++ b/shared/intel_pixel_bitmap.c
@@ -42,6 +42,7 @@
#include "main/varray.h"
#include "main/attrib.h"
#include "main/enable.h"
+#include "main/viewport.h"
#include "shader/arbprogram.h"
#include "glapi/dispatch.h"
#include "swrast/swrast.h"
@@ -92,19 +93,12 @@ static const GLubyte *map_pbo( GLcontext *ctx,
return ADD_POINTERS(buf, bitmap);
}
-static GLboolean test_bit( const GLubyte *src,
- GLuint bit )
+static GLboolean test_bit( const GLubyte *src, GLuint bit )
{
return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
}
-static GLboolean test_msb_bit(const GLubyte *src, GLuint bit)
-{
- return (src[bit/8] & (1<<(7 - (bit % 8)))) ? 1 : 0;
-}
-
-static void set_bit( GLubyte *dest,
- GLuint bit )
+static void set_bit( GLubyte *dest, GLuint bit )
{
dest[bit/8] |= 1 << (bit % 8);
}
@@ -194,7 +188,7 @@ do_blit_bitmap( GLcontext *ctx,
struct gl_framebuffer *fb = ctx->DrawBuffer;
GLfloat tmpColor[4];
GLubyte ubcolor[4];
- GLuint color8888, color565;
+ GLuint color;
unsigned int num_cliprects;
drm_clip_rect_t *cliprects;
int x_off, y_off;
@@ -232,8 +226,11 @@ do_blit_bitmap( GLcontext *ctx,
UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
- color8888 = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], ubcolor[2], ubcolor[3]);
- color565 = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]);
+ if (dst->cpp == 2)
+ color = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]);
+ else
+ color = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1],
+ ubcolor[2], ubcolor[3]);
if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
return GL_FALSE;
@@ -307,21 +304,21 @@ do_blit_bitmap( GLcontext *ctx,
fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0)
continue;
- /*
- */
- intelEmitImmediateColorExpandBlit( intel,
- dst->cpp,
- (GLubyte *)stipple,
- sz,
- (dst->cpp == 2) ? color565 : color8888,
- dst->pitch,
- dst->buffer,
- 0,
- dst->tiling,
- box_x + px,
- box_y + py,
- w, h,
- logic_op);
+ if (!intelEmitImmediateColorExpandBlit(intel,
+ dst->cpp,
+ (GLubyte *)stipple,
+ sz,
+ color,
+ dst->pitch,
+ dst->buffer,
+ 0,
+ dst->tiling,
+ box_x + px,
+ box_y + py,
+ w, h,
+ logic_op)) {
+ return GL_FALSE;
+ }
}
}
}
@@ -360,11 +357,8 @@ intel_texture_bitmap(GLcontext * ctx,
"END\n";
GLuint texname;
GLfloat vertices[4][4];
- GLfloat texcoords[4][2];
GLint old_active_texture;
- GLubyte *unpacked_bitmap;
GLubyte *a8_bitmap;
- int x, y;
GLfloat dst_z;
/* We need a fragment program for the KIL effect */
@@ -409,6 +403,12 @@ intel_texture_bitmap(GLcontext * ctx,
return GL_FALSE;
}
+ if (ctx->Fog.Enabled) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glBitmap() fallback: fog\n");
+ return GL_FALSE;
+ }
+
/* Check that we can load in a texture this big. */
if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
@@ -418,22 +418,16 @@ intel_texture_bitmap(GLcontext * ctx,
return GL_FALSE;
}
- /* Convert the A1 bitmap to an A8 format suitable for glTexImage */
if (unpack->BufferObj->Name) {
bitmap = map_pbo(ctx, width, height, unpack, bitmap);
if (bitmap == NULL)
return GL_TRUE; /* even though this is an error, we're done */
}
- unpacked_bitmap = _mesa_unpack_bitmap(width, height, bitmap,
- unpack);
+
+ /* Convert the A1 bitmap to an A8 format suitable for glTexImage */
a8_bitmap = _mesa_calloc(width * height);
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- if (test_msb_bit(unpacked_bitmap, ALIGN(width, 8) * y + x))
- a8_bitmap[y * width + x] = 0xff;
- }
- }
- _mesa_free(unpacked_bitmap);
+ _mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff);
+
if (unpack->BufferObj->Name) {
/* done with PBO so unmap it now */
ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
@@ -467,15 +461,18 @@ intel_texture_bitmap(GLcontext * ctx,
GL_ALPHA, GL_UNSIGNED_BYTE, a8_bitmap);
_mesa_free(a8_bitmap);
- intel_meta_set_fragment_program(intel, &intel->meta.bitmap_fp, fp);
+ meta_set_fragment_program(&intel->meta, &intel->meta.bitmap_fp, fp);
_mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0,
ctx->Current.RasterColor);
- intel_meta_set_passthrough_vertex_program(intel);
- intel_meta_set_passthrough_transform(intel);
+ meta_set_passthrough_vertex_program(&intel->meta);
+ meta_set_passthrough_transform(&intel->meta);
/* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */
dst_z = -1.0 + 2.0 * ctx->Current.RasterPos[2];
+ /* RasterPos[2] already takes into account the DepthRange mapping. */
+ _mesa_DepthRange(0.0, 1.0);
+
vertices[0][0] = dst_x;
vertices[0][1] = dst_y;
vertices[0][2] = dst_z;
@@ -493,25 +490,15 @@ intel_texture_bitmap(GLcontext * ctx,
vertices[3][2] = dst_z;
vertices[3][3] = 1.0;
- texcoords[0][0] = 0.0;
- texcoords[0][1] = 0.0;
- texcoords[1][0] = 1.0;
- texcoords[1][1] = 0.0;
- texcoords[2][0] = 1.0;
- texcoords[2][1] = 1.0;
- texcoords[3][0] = 0.0;
- texcoords[3][1] = 1.0;
-
_mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices);
- _mesa_ClientActiveTextureARB(GL_TEXTURE0);
- _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
_mesa_Enable(GL_VERTEX_ARRAY);
- _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+ meta_set_default_texrect(&intel->meta);
_mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
- intel_meta_restore_transform(intel);
- intel_meta_restore_fragment_program(intel);
- intel_meta_restore_vertex_program(intel);
+ meta_restore_texcoords(&intel->meta);
+ meta_restore_transform(&intel->meta);
+ meta_restore_fragment_program(&intel->meta);
+ meta_restore_vertex_program(&intel->meta);
_mesa_PopClientAttrib();
_mesa_Disable(GL_TEXTURE_2D); /* asserted that it was disabled at entry */
diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c
index d50dd68..07ca8f7 100644
--- a/shared/intel_pixel_copy.c
+++ b/shared/intel_pixel_copy.c
@@ -26,18 +26,13 @@
**************************************************************************/
#include "main/glheader.h"
-#include "main/enums.h"
#include "main/image.h"
#include "main/state.h"
#include "main/mtypes.h"
-#include "main/macros.h"
-#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
-#include "intel_screen.h"
#include "intel_context.h"
-#include "intel_batchbuffer.h"
#include "intel_buffers.h"
-#include "intel_blit.h"
#include "intel_regions.h"
#include "intel_pixel.h"
@@ -97,162 +92,6 @@ intel_check_copypixel_blit_fragment_ops(GLcontext * ctx)
ctx->Color.BlendEnabled);
}
-#ifdef I915
-/* Doesn't work for overlapping regions. Could do a double copy or
- * just fallback.
- */
-static GLboolean
-do_texture_copypixels(GLcontext * ctx,
- GLint srcx, GLint srcy,
- GLsizei width, GLsizei height,
- GLint dstx, GLint dsty, GLenum type)
-{
- struct intel_context *intel = intel_context(ctx);
- struct intel_region *dst = intel_drawbuf_region(intel);
- struct intel_region *src = copypix_src_region(intel, type);
- GLenum src_format;
- GLenum src_type;
-
- DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__,
- srcx, srcy, width, height, dstx, dsty);
-
- if (!src || !dst || type != GL_COLOR)
- return GL_FALSE;
-
- if (ctx->_ImageTransferState) {
- if (INTEL_DEBUG & DEBUG_PIXEL)
- fprintf(stderr, "%s: check_color failed\n", __FUNCTION__);
- return GL_FALSE;
- }
-
- /* Can't handle overlapping regions. Don't have sufficient control
- * over rasterization to pull it off in-place. Punt on these for
- * now.
- *
- * XXX: do a copy to a temporary.
- */
- if (src->buffer == dst->buffer) {
- drm_clip_rect_t srcbox;
- drm_clip_rect_t dstbox;
- drm_clip_rect_t tmp;
-
- srcbox.x1 = srcx;
- srcbox.y1 = srcy;
- srcbox.x2 = srcx + width;
- srcbox.y2 = srcy + height;
-
- if (ctx->Pixel.ZoomX > 0) {
- dstbox.x1 = dstx;
- dstbox.x2 = dstx + width * ctx->Pixel.ZoomX;
- } else {
- dstbox.x1 = dstx + width * ctx->Pixel.ZoomX;
- dstbox.x2 = dstx;
- }
- if (ctx->Pixel.ZoomY > 0) {
- dstbox.y1 = dsty;
- dstbox.y2 = dsty + height * ctx->Pixel.ZoomY;
- } else {
- dstbox.y1 = dsty + height * ctx->Pixel.ZoomY;
- dstbox.y2 = dsty;
- }
-
- DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2);
- DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2,
- width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
-
- if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) {
- DBG("%s: regions overlap\n", __FUNCTION__);
- return GL_FALSE;
- }
- }
-
- intelFlush(&intel->ctx);
-
- intel->vtbl.install_meta_state(intel);
-
- /* Is this true? Also will need to turn depth testing on according
- * to state:
- */
- intel->vtbl.meta_no_stencil_write(intel);
- intel->vtbl.meta_no_depth_write(intel);
-
- /* Set the 3d engine to draw into the destination region:
- */
- intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
-
- intel->vtbl.meta_import_pixel_state(intel);
-
- if (src->cpp == 2) {
- src_format = GL_RGB;
- src_type = GL_UNSIGNED_SHORT_5_6_5;
- }
- else {
- src_format = GL_BGRA;
- src_type = GL_UNSIGNED_BYTE;
- }
-
- /* Set the frontbuffer up as a large rectangular texture.
- */
- if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0,
- src->pitch,
- src->height, src_format, src_type)) {
- intel->vtbl.leave_meta_state(intel);
- return GL_FALSE;
- }
-
-
- intel->vtbl.meta_texture_blend_replace(intel);
-
- LOCK_HARDWARE(intel);
-
- if (intel->driDrawable->numClipRects) {
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-
- srcy = dPriv->h - srcy - height; /* convert from gl to hardware coords */
-
- srcx += dPriv->x;
- srcy += dPriv->y;
-
- /* Clip against the source region. This is the only source
- * clipping we do. XXX: Just set the texcord wrap mode to clamp
- * or similar.
- *
- */
- if (0) {
- GLint orig_x = srcx;
- GLint orig_y = srcy;
-
- if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
- &srcx, &srcy, &width, &height))
- goto out;
-
- dstx += srcx - orig_x;
- dsty += (srcy - orig_y) * ctx->Pixel.ZoomY;
- }
-
- /* Just use the regular cliprect mechanism... Does this need to
- * even hold the lock???
- */
- intel->vtbl.meta_draw_quad(intel,
- dstx,
- dstx + width * ctx->Pixel.ZoomX,
- dPriv->h - (dsty + height * ctx->Pixel.ZoomY),
- dPriv->h - (dsty), 0, /* XXX: what z value? */
- 0x00ff00ff,
- srcx, srcx + width, srcy, srcy + height);
-
- out:
- intel->vtbl.leave_meta_state(intel);
- intel_batchbuffer_emit_mi_flush(intel->batch);
- }
- UNLOCK_HARDWARE(intel);
-
- DBG("%s: success\n", __FUNCTION__);
- return GL_TRUE;
-}
-#endif /* I915 */
-
/**
* CopyPixels with the blitter. Don't support zooming, pixel transfer, etc.
@@ -272,6 +111,12 @@ do_blit_copypixels(GLcontext * ctx,
drm_clip_rect_t *cliprects;
int x_off, y_off;
+ if (type == GL_DEPTH || type == GL_STENCIL) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glCopyPixels() fallback: GL_DEPTH || GL_STENCIL\n");
+ return GL_FALSE;
+ }
+
/* Update draw buffer bounds */
_mesa_update_state(ctx);
@@ -362,14 +207,16 @@ do_blit_copypixels(GLcontext * ctx,
&clip_x, &clip_y, &clip_w, &clip_h))
continue;
- intelEmitCopyBlit(intel, dst->cpp,
- src->pitch, src->buffer, 0, src->tiling,
- dst->pitch, dst->buffer, 0, dst->tiling,
- clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */
- clip_x, clip_y, /* dstx, dsty */
- clip_w, clip_h,
- ctx->Color.ColorLogicOpEnabled ?
- ctx->Color.LogicOp : GL_COPY);
+ if (!intel_region_copy(intel,
+ dst, 0, clip_x, clip_y,
+ src, 0, clip_x + delta_x, clip_y + delta_y,
+ clip_w, clip_h,
+ ctx->Color.ColorLogicOpEnabled ?
+ ctx->Color.LogicOp : GL_COPY)) {
+ DBG("%s: blit failure\n", __FUNCTION__);
+ UNLOCK_HARDWARE(intel);
+ return GL_FALSE;
+ }
}
}
out:
@@ -392,12 +239,6 @@ intelCopyPixels(GLcontext * ctx,
if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
return;
-#ifdef I915
- if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
- return;
-#endif
-
- DBG("fallback to _swrast_CopyPixels\n");
-
- _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
+ /* this will use swrast if needed */
+ _mesa_meta_copy_pixels(ctx, srcx, srcy, width, height, destx, desty, type);
}
diff --git a/shared/intel_pixel_draw.c b/shared/intel_pixel_draw.c
index d80069d..7fbb89f 100644
--- a/shared/intel_pixel_draw.c
+++ b/shared/intel_pixel_draw.c
@@ -29,8 +29,6 @@
#include "main/enums.h"
#include "main/image.h"
#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/bufferobj.h"
#include "main/teximage.h"
#include "main/texenv.h"
#include "main/texobj.h"
@@ -41,169 +39,22 @@
#include "main/enable.h"
#include "main/buffers.h"
#include "main/fbobject.h"
-#include "main/renderbuffer.h"
#include "main/depth.h"
#include "main/hash.h"
#include "main/blend.h"
-#include "glapi/dispatch.h"
#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
-#include "intel_screen.h"
#include "intel_context.h"
#include "intel_batchbuffer.h"
#include "intel_blit.h"
#include "intel_buffers.h"
#include "intel_regions.h"
#include "intel_pixel.h"
-#include "intel_buffer_objects.h"
#include "intel_fbo.h"
-static GLboolean
-intel_texture_drawpixels(GLcontext * ctx,
- GLint x, GLint y,
- GLsizei width, GLsizei height,
- GLenum format,
- GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid *pixels)
-{
- struct intel_context *intel = intel_context(ctx);
- GLuint texname;
- GLfloat vertices[4][4];
- GLfloat texcoords[4][2];
- GLfloat z;
- GLint old_active_texture;
- GLenum internalFormat;
-
- /* We're going to mess with texturing with no regard to existing texture
- * state, so if there is some set up we have to bail.
- */
- if (ctx->Texture._EnabledUnits != 0) {
- if (INTEL_DEBUG & DEBUG_FALLBACKS)
- fprintf(stderr, "glDrawPixels() fallback: texturing enabled\n");
- return GL_FALSE;
- }
-
- /* Can't do textured DrawPixels with a fragment program, unless we were
- * to generate a new program that sampled our texture and put the results
- * in the fragment color before the user's program started.
- */
- if (ctx->FragmentProgram.Enabled) {
- if (INTEL_DEBUG & DEBUG_FALLBACKS)
- fprintf(stderr, "glDrawPixels() fallback: fragment program enabled\n");
- return GL_FALSE;
- }
-
- /* We don't have a way to generate fragments with stencil values which
- * will set the resulting stencil value.
- */
- if (format == GL_STENCIL_INDEX || format == GL_DEPTH_STENCIL)
- return GL_FALSE;
-
- /* Check that we can load in a texture this big. */
- if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
- height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
- if (INTEL_DEBUG & DEBUG_FALLBACKS)
- fprintf(stderr, "glDrawPixels() fallback: bitmap too large (%dx%d)\n",
- width, height);
- return GL_FALSE;
- }
-
- /* To do DEPTH_COMPONENT, we would need to change our setup to not draw to
- * the color buffer, and sample the texture values into the fragment depth
- * in a program.
- */
- if (format == GL_DEPTH_COMPONENT) {
- if (INTEL_DEBUG & DEBUG_FALLBACKS)
- fprintf(stderr,
- "glDrawPixels() fallback: format == GL_DEPTH_COMPONENT\n");
- return GL_FALSE;
- }
-
- if (!ctx->Extensions.ARB_texture_non_power_of_two &&
- (!is_power_of_two(width) || !is_power_of_two(height))) {
- if (INTEL_DEBUG & DEBUG_FALLBACKS)
- fprintf(stderr,
- "glDrawPixels() fallback: NPOT texture\n");
- return GL_FALSE;
- }
-
- _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
- GL_CURRENT_BIT);
- _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
-
- /* XXX: pixel store stuff */
- _mesa_Disable(GL_POLYGON_STIPPLE);
-
- old_active_texture = ctx->Texture.CurrentUnit;
- _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
- _mesa_Enable(GL_TEXTURE_2D);
- _mesa_GenTextures(1, &texname);
- _mesa_BindTexture(GL_TEXTURE_2D, texname);
- _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- _mesa_TexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
- if (type == GL_ALPHA)
- internalFormat = GL_ALPHA;
- else
- internalFormat = GL_RGBA;
- _mesa_TexImage2D(GL_TEXTURE_2D, 0, internalFormat, width, height, 0, format,
- type, pixels);
-
- intel_meta_set_passthrough_transform(intel);
-
- /* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */
- z = -1.0 + 2.0 * ctx->Current.RasterPos[2];
-
- /* Create the vertex buffer based on the current raster pos. The x and y
- * we're handed are ctx->Current.RasterPos[0,1] rounded to integers.
- * We also apply the depth. However, the W component is already multiplied
- * into ctx->Current.RasterPos[0,1,2] and we can ignore it at this point.
- */
- vertices[0][0] = x;
- vertices[0][1] = y;
- vertices[0][2] = z;
- vertices[0][3] = 1.0;
- vertices[1][0] = x + width * ctx->Pixel.ZoomX;
- vertices[1][1] = y;
- vertices[1][2] = z;
- vertices[1][3] = 1.0;
- vertices[2][0] = x + width * ctx->Pixel.ZoomX;
- vertices[2][1] = y + height * ctx->Pixel.ZoomY;
- vertices[2][2] = z;
- vertices[2][3] = 1.0;
- vertices[3][0] = x;
- vertices[3][1] = y + height * ctx->Pixel.ZoomY;
- vertices[3][2] = z;
- vertices[3][3] = 1.0;
-
- texcoords[0][0] = 0.0;
- texcoords[0][1] = 0.0;
- texcoords[1][0] = 1.0;
- texcoords[1][1] = 0.0;
- texcoords[2][0] = 1.0;
- texcoords[2][1] = 1.0;
- texcoords[3][0] = 0.0;
- texcoords[3][1] = 1.0;
-
- _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices);
- _mesa_ClientActiveTextureARB(GL_TEXTURE0);
- _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
- _mesa_Enable(GL_VERTEX_ARRAY);
- _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
- _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
- intel_meta_restore_transform(intel);
-
- _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
- _mesa_PopClientAttrib();
- _mesa_PopAttrib();
-
- _mesa_DeleteTextures(1, &texname);
-
- return GL_TRUE;
-}
+/** XXX compare perf of this vs. _mesa_meta_draw_pixels(STENCIL) */
static GLboolean
intel_stencil_drawpixels(GLcontext * ctx,
GLint x, GLint y,
@@ -216,14 +67,14 @@ intel_stencil_drawpixels(GLcontext * ctx,
struct intel_context *intel = intel_context(ctx);
GLuint texname, rb_name, fb_name, old_fb_name;
GLfloat vertices[4][2];
- GLfloat texcoords[4][2];
struct intel_renderbuffer *irb;
struct intel_renderbuffer *depth_irb;
struct gl_renderbuffer *rb;
struct gl_pixelstore_attrib old_unpack;
GLstencil *stencil_pixels;
- int row;
+ int row, y1, y2;
GLint old_active_texture;
+ GLboolean rendering_to_fbo = ctx->DrawBuffer->Name != 0;
if (format != GL_STENCIL_INDEX)
return GL_FALSE;
@@ -358,34 +209,35 @@ intel_stencil_drawpixels(GLcontext * ctx,
ctx->Unpack = old_unpack;
_mesa_free(stencil_pixels);
- intel_meta_set_passthrough_transform(intel);
+ meta_set_passthrough_transform(&intel->meta);
+ /* Since we're rendering to the framebuffer as if it was an FBO,
+ * if it's the window system we have to flip the coordinates.
+ */
+ if (rendering_to_fbo) {
+ y1 = y;
+ y2 = y + height * ctx->Pixel.ZoomY;
+ } else {
+ y1 = irb->Base.Height - (y + height * ctx->Pixel.ZoomY);
+ y2 = irb->Base.Height - y;
+ }
vertices[0][0] = x;
- vertices[0][1] = y;
+ vertices[0][1] = y1;
vertices[1][0] = x + width * ctx->Pixel.ZoomX;
- vertices[1][1] = y;
+ vertices[1][1] = y1;
vertices[2][0] = x + width * ctx->Pixel.ZoomX;
- vertices[2][1] = y + height * ctx->Pixel.ZoomY;
+ vertices[2][1] = y2;
vertices[3][0] = x;
- vertices[3][1] = y + height * ctx->Pixel.ZoomY;
-
- texcoords[0][0] = 0.0;
- texcoords[0][1] = 0.0;
- texcoords[1][0] = 1.0;
- texcoords[1][1] = 0.0;
- texcoords[2][0] = 1.0;
- texcoords[2][1] = 1.0;
- texcoords[3][0] = 0.0;
- texcoords[3][1] = 1.0;
+ vertices[3][1] = y2;
_mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices);
- _mesa_ClientActiveTextureARB(GL_TEXTURE0);
- _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords);
_mesa_Enable(GL_VERTEX_ARRAY);
- _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+ meta_set_default_texrect(&intel->meta);
+
_mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
- intel_meta_restore_transform(intel);
+ meta_restore_texcoords(&intel->meta);
+ meta_restore_transform(&intel->meta);
_mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
_mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, old_fb_name);
@@ -409,17 +261,25 @@ intelDrawPixels(GLcontext * ctx,
const struct gl_pixelstore_attrib *unpack,
const GLvoid * pixels)
{
- if (intel_texture_drawpixels(ctx, x, y, width, height, format, type,
- unpack, pixels))
- return;
-
+#if 0
+ /* XXX this function doesn't seem to work reliably even when all
+ * the pre-requisite conditions are met.
+ * Note that this function is never hit with conform.
+ * Fall back to swrast because even the _mesa_meta_draw_pixels() approach
+ * isn't working because of an apparent stencil bug.
+ */
if (intel_stencil_drawpixels(ctx, x, y, width, height, format, type,
unpack, pixels))
return;
+#else
+ (void) intel_stencil_drawpixels; /* silence warning */
+ if (format == GL_STENCIL_INDEX) {
+ _swrast_DrawPixels(ctx, x, y, width, height, format, type,
+ unpack, pixels);
+ return;
+ }
+#endif
- if (INTEL_DEBUG & DEBUG_PIXEL)
- _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
-
- _swrast_DrawPixels(ctx, x, y, width, height, format, type,
- unpack, pixels);
+ _mesa_meta_draw_pixels(ctx, x, y, width, height, format, type,
+ unpack, pixels);
}
diff --git a/i915/intel_pixel_read.c b/shared/intel_pixel_read.c
index 56087aa..8713463 100644
--- a/i915/intel_pixel_read.c
+++ b/shared/intel_pixel_read.c
@@ -1,8 +1,8 @@
/**************************************************************************
- *
+ *
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
- *
+ *
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
- *
+ *
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ *
**************************************************************************/
#include "main/glheader.h"
@@ -31,6 +31,7 @@
#include "main/macros.h"
#include "main/image.h"
#include "main/bufferobj.h"
+#include "main/state.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
@@ -45,10 +46,10 @@
/* For many applications, the new ability to pull the source buffers
* back out of the GTT and then do the packing/conversion operations
* in software will be as much of an improvement as trying to get the
- * blitter and/or texture engine to do the work.
+ * blitter and/or texture engine to do the work.
*
* This step is gated on private backbuffers.
- *
+ *
* Obviously the frontbuffer can't be pulled back, so that is either
* an argument for blit/texture readpixels, or for blitting to a
* temporary and then pulling that back.
@@ -179,7 +180,7 @@ do_blit_readpixels(GLcontext * ctx,
if (!src)
return GL_FALSE;
- if (dst) {
+ if (pack->BufferObj->Name) {
/* XXX This validation should be done by core mesa:
*/
if (!_mesa_validate_pbo_access(2, pack, width, height, 1,
@@ -260,16 +261,19 @@ do_blit_readpixels(GLcontext * ctx,
if (!intel_intersect_cliprects(&rect, &src_rect, &box[i]))
continue;
- intelEmitCopyBlit(intel,
- src->cpp,
- src->pitch, src->buffer, 0, src->tiling,
- rowLength, dst_buffer, dst_offset, GL_FALSE,
- rect.x1,
- rect.y1,
- rect.x1 - src_rect.x1,
- rect.y2 - src_rect.y2,
- rect.x2 - rect.x1, rect.y2 - rect.y1,
- GL_COPY);
+ if (!intelEmitCopyBlit(intel,
+ src->cpp,
+ src->pitch, src->buffer, 0, src->tiling,
+ rowLength, dst_buffer, dst_offset, GL_FALSE,
+ rect.x1,
+ rect.y1,
+ rect.x1 - src_rect.x1,
+ rect.y2 - src_rect.y2,
+ rect.x2 - rect.x1, rect.y2 - rect.y1,
+ GL_COPY)) {
+ UNLOCK_HARDWARE(intel);
+ return GL_FALSE;
+ }
}
}
UNLOCK_HARDWARE(intel);
@@ -291,6 +295,7 @@ intelReadPixels(GLcontext * ctx,
intelFlush(ctx);
+#ifdef I915
if (do_blit_readpixels
(ctx, x, y, width, height, format, type, pack, pixels))
return;
@@ -298,9 +303,22 @@ intelReadPixels(GLcontext * ctx,
if (do_texture_readpixels
(ctx, x, y, width, height, format, type, pack, pixels))
return;
+#else
+ (void)do_blit_readpixels;
+ (void)do_texture_readpixels;
+#endif
if (INTEL_DEBUG & DEBUG_PIXEL)
_mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+ /* Update Mesa state before calling down into _swrast_ReadPixels, as
+ * the spans code requires the computed buffer states to be up to date,
+ * but _swrast_ReadPixels only updates Mesa state after setting up
+ * the spans code.
+ */
+
+ if (ctx->NewState)
+ _mesa_update_state(ctx);
+
_swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels);
}
diff --git a/shared/intel_reg.h b/shared/intel_reg.h
index 57ac8f0..d19f1ba 100644
--- a/shared/intel_reg.h
+++ b/shared/intel_reg.h
@@ -189,6 +189,19 @@
#define S7_DEPTH_OFFSET_CONST_MASK ~0
+/* p143 */
+#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK (0x3<<24)
+#define BUF_3D_ID_DEPTH (0x7<<24)
+#define BUF_3D_USE_FENCE (1<<23)
+#define BUF_3D_TILED_SURFACE (1<<22)
+#define BUF_3D_TILE_WALK_X 0
+#define BUF_3D_TILE_WALK_Y (1<<21)
+#define BUF_3D_PITCH(x) (((x)/4)<<2)
+/* Dword 2 */
+#define BUF_3D_ADDR(x) ((x) & ~0x3)
+
/* Primitive dispatch on 830-945 */
#define _3DPRIMITIVE (CMD_3D | (0x1f << 24))
#define PRIM_INDIRECT (1<<23)
diff --git a/shared/intel_regions.c b/shared/intel_regions.c
index 0aa5b8c..a86c66a 100644
--- a/shared/intel_regions.c
+++ b/shared/intel_regions.c
@@ -52,17 +52,77 @@
#define FILE_DEBUG_FLAG DEBUG_REGION
+/* This should be set to the maximum backtrace size desired.
+ * Set it to 0 to disable backtrace debugging.
+ */
+#define DEBUG_BACKTRACE_SIZE 0
+
+#if DEBUG_BACKTRACE_SIZE == 0
+/* Use the standard debug output */
+#define _DBG(...) DBG(__VA_ARGS__)
+#else
+/* Use backtracing debug output */
+#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);}
+
+/* Backtracing debug support */
+#include <execinfo.h>
+
+static void
+debug_backtrace(void)
+{
+ void *trace[DEBUG_BACKTRACE_SIZE];
+ char **strings = NULL;
+ int traceSize;
+ register int i;
+
+ traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE);
+ strings = backtrace_symbols(trace, traceSize);
+ if (strings == NULL) {
+ DBG("no backtrace:");
+ return;
+ }
+
+ /* Spit out all the strings with a colon separator. Ignore
+ * the first, since we don't really care about the call
+ * to debug_backtrace() itself. Skip until the final "/" in
+ * the trace to avoid really long lines.
+ */
+ for (i = 1; i < traceSize; i++) {
+ char *p = strings[i], *slash = strings[i];
+ while (*p) {
+ if (*p++ == '/') {
+ slash = p;
+ }
+ }
+
+ DBG("%s:", slash);
+ }
+
+ /* Free up the memory, and we're done */
+ free(strings);
+}
+
+#endif
+
+
+
/* XXX: Thread safety?
*/
GLubyte *
intel_region_map(struct intel_context *intel, struct intel_region *region)
{
- DBG("%s\n", __FUNCTION__);
+ intelFlush(&intel->ctx);
+
+ _DBG("%s %p\n", __FUNCTION__, region);
if (!region->map_refcount++) {
if (region->pbo)
intel_region_cow(intel, region);
- dri_bo_map(region->buffer, GL_TRUE);
+ if (region->tiling != I915_TILING_NONE &&
+ intel->intelScreen->kernel_exec_fencing)
+ drm_intel_gem_bo_map_gtt(region->buffer);
+ else
+ dri_bo_map(region->buffer, GL_TRUE);
region->map = region->buffer->virtual;
}
@@ -72,9 +132,13 @@ intel_region_map(struct intel_context *intel, struct intel_region *region)
void
intel_region_unmap(struct intel_context *intel, struct intel_region *region)
{
- DBG("%s\n", __FUNCTION__);
+ _DBG("%s %p\n", __FUNCTION__, region);
if (!--region->map_refcount) {
- dri_bo_unmap(region->buffer);
+ if (region->tiling != I915_TILING_NONE &&
+ intel->intelScreen->kernel_exec_fencing)
+ drm_intel_gem_bo_unmap_gtt(region->buffer);
+ else
+ dri_bo_unmap(region->buffer);
region->map = NULL;
}
}
@@ -87,10 +151,10 @@ intel_region_alloc_internal(struct intel_context *intel,
{
struct intel_region *region;
- DBG("%s\n", __FUNCTION__);
-
- if (buffer == NULL)
+ if (buffer == NULL) {
+ _DBG("%s <-- NULL\n", __FUNCTION__);
return NULL;
+ }
region = calloc(sizeof(*region), 1);
region->cpp = cpp;
@@ -104,15 +168,40 @@ intel_region_alloc_internal(struct intel_context *intel,
region->tiling = I915_TILING_NONE;
region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE;
+ _DBG("%s <-- %p\n", __FUNCTION__, region);
return region;
}
struct intel_region *
intel_region_alloc(struct intel_context *intel,
+ uint32_t tiling,
GLuint cpp, GLuint width, GLuint height, GLuint pitch,
GLboolean expect_accelerated_upload)
{
dri_bo *buffer;
+ struct intel_region *region;
+
+ /* If we're tiled, our allocations are in 8 or 32-row blocks, so
+ * failure to align our height means that we won't allocate enough pages.
+ *
+ * If we're untiled, we still have to align to 2 rows high because the
+ * data port accesses 2x2 blocks even if the bottom row isn't to be
+ * rendered, so failure to align means we could walk off the end of the
+ * GTT and fault.
+ */
+ if (tiling == I915_TILING_X)
+ height = ALIGN(height, 8);
+ else if (tiling == I915_TILING_Y)
+ height = ALIGN(height, 32);
+ else
+ height = ALIGN(height, 2);
+
+ /* If we're untiled, we have to align to 2 rows high because the
+ * data port accesses 2x2 blocks even if the bottom row isn't to be
+ * rendered, so failure to align means we could walk off the end of the
+ * GTT and fault.
+ */
+ height = ALIGN(height, 2);
if (expect_accelerated_upload) {
buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region",
@@ -122,7 +211,16 @@ intel_region_alloc(struct intel_context *intel,
pitch * cpp * height, 64);
}
- return intel_region_alloc_internal(intel, cpp, width, height, pitch, buffer);
+ region = intel_region_alloc_internal(intel, cpp, width, height,
+ pitch, buffer);
+
+ if (tiling != I915_TILING_NONE) {
+ assert(((pitch * cpp) & 127) == 0);
+ drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp);
+ drm_intel_bo_get_tiling(buffer, &region->tiling, &region->bit_6_swizzle);
+ }
+
+ return region;
}
struct intel_region *
@@ -158,7 +256,7 @@ void
intel_region_reference(struct intel_region **dst, struct intel_region *src)
{
if (src)
- DBG("%s %p %d\n", __FUNCTION__, src, src->refcount);
+ _DBG("%s %p %d\n", __FUNCTION__, src, src->refcount);
assert(*dst == NULL);
if (src) {
@@ -172,10 +270,12 @@ intel_region_release(struct intel_region **region_handle)
{
struct intel_region *region = *region_handle;
- if (region == NULL)
+ if (region == NULL) {
+ _DBG("%s NULL\n", __FUNCTION__);
return;
+ }
- DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1);
+ _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1);
ASSERT(region->refcount > 0);
region->refcount--;
@@ -249,9 +349,7 @@ intel_region_data(struct intel_context *intel,
const void *src, GLuint src_pitch,
GLuint srcx, GLuint srcy, GLuint width, GLuint height)
{
- GLboolean locked = GL_FALSE;
-
- DBG("%s\n", __FUNCTION__);
+ _DBG("%s\n", __FUNCTION__);
if (intel == NULL)
return;
@@ -264,39 +362,33 @@ intel_region_data(struct intel_context *intel,
intel_region_cow(intel, dst);
}
- if (!intel->locked) {
- LOCK_HARDWARE(intel);
- locked = GL_TRUE;
- }
-
+ LOCK_HARDWARE(intel);
_mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
dst->cpp,
dst->pitch,
dstx, dsty, width, height, src, src_pitch, srcx, srcy);
intel_region_unmap(intel, dst);
-
- if (locked)
- UNLOCK_HARDWARE(intel);
-
+ UNLOCK_HARDWARE(intel);
}
/* Copy rectangular sub-regions. Need better logic about when to
* push buffers into AGP - will currently do so whenever possible.
*/
-void
+GLboolean
intel_region_copy(struct intel_context *intel,
struct intel_region *dst,
GLuint dst_offset,
GLuint dstx, GLuint dsty,
struct intel_region *src,
GLuint src_offset,
- GLuint srcx, GLuint srcy, GLuint width, GLuint height)
+ GLuint srcx, GLuint srcy, GLuint width, GLuint height,
+ GLenum logicop)
{
- DBG("%s\n", __FUNCTION__);
+ _DBG("%s\n", __FUNCTION__);
if (intel == NULL)
- return;
+ return GL_FALSE;
if (dst->pbo) {
if (dstx == 0 &&
@@ -308,41 +400,12 @@ intel_region_copy(struct intel_context *intel,
assert(src->cpp == dst->cpp);
- intelEmitCopyBlit(intel,
- dst->cpp,
- src->pitch, src->buffer, src_offset, src->tiling,
- dst->pitch, dst->buffer, dst_offset, dst->tiling,
- srcx, srcy, dstx, dsty, width, height,
- GL_COPY);
-}
-
-/* Fill a rectangular sub-region. Need better logic about when to
- * push buffers into AGP - will currently do so whenever possible.
- */
-void
-intel_region_fill(struct intel_context *intel,
- struct intel_region *dst,
- GLuint dst_offset,
- GLuint dstx, GLuint dsty,
- GLuint width, GLuint height, GLuint color)
-{
- DBG("%s\n", __FUNCTION__);
-
- if (intel == NULL)
- return;
-
- if (dst->pbo) {
- if (dstx == 0 &&
- dsty == 0 && width == dst->pitch && height == dst->height)
- intel_region_release_pbo(intel, dst);
- else
- intel_region_cow(intel, dst);
- }
-
- intelEmitFillBlit(intel,
- dst->cpp,
- dst->pitch, dst->buffer, dst_offset, dst->tiling,
- dstx, dsty, width, height, color);
+ return intelEmitCopyBlit(intel,
+ dst->cpp,
+ src->pitch, src->buffer, src_offset, src->tiling,
+ dst->pitch, dst->buffer, dst_offset, dst->tiling,
+ srcx, srcy, dstx, dsty, width, height,
+ logicop);
}
/* Attach to a pbo, discarding our data. Effectively zero-copy upload
@@ -353,9 +416,13 @@ intel_region_attach_pbo(struct intel_context *intel,
struct intel_region *region,
struct intel_buffer_object *pbo)
{
+ dri_bo *buffer;
+
if (region->pbo == pbo)
return;
+ _DBG("%s %p %p\n", __FUNCTION__, region, pbo);
+
/* If there is already a pbo attached, break the cow tie now.
* Don't call intel_region_release_pbo() as that would
* unnecessarily allocate a new buffer we would have to immediately
@@ -371,10 +438,13 @@ intel_region_attach_pbo(struct intel_context *intel,
region->buffer = NULL;
}
+ /* make sure pbo has a buffer of its own */
+ buffer = intel_bufferobj_buffer(intel, pbo, INTEL_WRITE_FULL);
+
region->pbo = pbo;
region->pbo->region = region;
- dri_bo_reference(pbo->buffer);
- region->buffer = pbo->buffer;
+ dri_bo_reference(buffer);
+ region->buffer = buffer;
}
@@ -385,6 +455,7 @@ void
intel_region_release_pbo(struct intel_context *intel,
struct intel_region *region)
{
+ _DBG("%s %p\n", __FUNCTION__, region);
assert(region->buffer == region->pbo->buffer);
region->pbo->region = NULL;
region->pbo = NULL;
@@ -403,34 +474,27 @@ void
intel_region_cow(struct intel_context *intel, struct intel_region *region)
{
struct intel_buffer_object *pbo = region->pbo;
- GLboolean was_locked = intel->locked;
-
- if (intel == NULL)
- return;
+ GLboolean ok;
intel_region_release_pbo(intel, region);
assert(region->cpp * region->pitch * region->height == pbo->Base.Size);
- DBG("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size);
+ _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size);
/* Now blit from the texture buffer to the new buffer:
*/
- was_locked = intel->locked;
- if (!was_locked)
- LOCK_HARDWARE(intel);
-
- intelEmitCopyBlit(intel,
- region->cpp,
- region->pitch, region->buffer, 0, region->tiling,
- region->pitch, pbo->buffer, 0, region->tiling,
- 0, 0, 0, 0,
- region->pitch, region->height,
- GL_COPY);
-
- if (!was_locked)
- UNLOCK_HARDWARE(intel);
+ LOCK_HARDWARE(intel);
+ ok = intelEmitCopyBlit(intel,
+ region->cpp,
+ region->pitch, pbo->buffer, 0, region->tiling,
+ region->pitch, region->buffer, 0, region->tiling,
+ 0, 0, 0, 0,
+ region->pitch, region->height,
+ GL_COPY);
+ assert(ok);
+ UNLOCK_HARDWARE(intel);
}
dri_bo *
@@ -459,6 +523,10 @@ intel_recreate_static(struct intel_context *intel,
if (region == NULL) {
region = calloc(sizeof(*region), 1);
region->refcount = 1;
+ _DBG("%s creating new region %p\n", __FUNCTION__, region);
+ }
+ else {
+ _DBG("%s %p\n", __FUNCTION__, region);
}
if (intel->ctx.Visual.rgbBits == 24)
diff --git a/shared/intel_regions.h b/shared/intel_regions.h
index 45e2bf4..0d379bd 100644
--- a/shared/intel_regions.h
+++ b/shared/intel_regions.h
@@ -73,7 +73,8 @@ struct intel_region
* copied by calling intel_reference_region().
*/
struct intel_region *intel_region_alloc(struct intel_context *intel,
- GLuint cpp, GLuint width,
+ uint32_t tiling,
+ GLuint cpp, GLuint width,
GLuint height, GLuint pitch,
GLboolean expect_accelerated_upload);
@@ -109,21 +110,15 @@ void intel_region_data(struct intel_context *intel,
/* Copy rectangular sub-regions
*/
-void intel_region_copy(struct intel_context *intel,
- struct intel_region *dest,
- GLuint dest_offset,
- GLuint destx, GLuint desty,
- struct intel_region *src,
- GLuint src_offset,
- GLuint srcx, GLuint srcy, GLuint width, GLuint height);
-
-/* Fill a rectangular sub-region
- */
-void intel_region_fill(struct intel_context *intel,
- struct intel_region *dest,
- GLuint dest_offset,
- GLuint destx, GLuint desty,
- GLuint width, GLuint height, GLuint color);
+GLboolean
+intel_region_copy(struct intel_context *intel,
+ struct intel_region *dest,
+ GLuint dest_offset,
+ GLuint destx, GLuint desty,
+ struct intel_region *src,
+ GLuint src_offset,
+ GLuint srcx, GLuint srcy, GLuint width, GLuint height,
+ GLenum logicop);
/* Helpers for zerocopy uploads, particularly texture image uploads:
*/
diff --git a/shared/intel_screen.c b/shared/intel_screen.c
index 0f278b3..1b8c56e 100644
--- a/shared/intel_screen.c
+++ b/shared/intel_screen.c
@@ -49,6 +49,10 @@
#include "i915_drm.h"
#include "i830_dri.h"
+#define DRI_CONF_TEXTURE_TILING(def) \
+ DRI_CONF_OPT_BEGIN(texture_tiling, bool, def) \
+ DRI_CONF_DESC(en, "Enable texture tiling") \
+ DRI_CONF_OPT_END \
PUBLIC const char __driConfigOptions[] =
DRI_CONF_BEGIN
@@ -64,6 +68,17 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
DRI_CONF_DESC_END
DRI_CONF_OPT_END
+
+#ifdef I915
+ DRI_CONF_TEXTURE_TILING(false)
+#else
+ DRI_CONF_TEXTURE_TILING(true)
+#endif
+
+ DRI_CONF_OPT_BEGIN(early_z, bool, false)
+ DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).")
+ DRI_CONF_OPT_END
+
DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY
DRI_CONF_FORCE_S3TC_ENABLE(false)
@@ -76,7 +91,7 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_SECTION_END
DRI_CONF_END;
-const GLuint __driNConfigOptions = 8;
+const GLuint __driNConfigOptions = 10;
#ifdef USE_NEW_INTERFACE
static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
@@ -305,7 +320,7 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv)
dri_bufmgr_destroy(intelScreen->bufmgr);
intelUnmapScreenRegions(intelScreen);
- driDestroyOptionCache(&intelScreen->optionCache);
+ driDestroyOptionInfo(&intelScreen->optionCache);
FREE(intelScreen);
sPriv->private = NULL;
@@ -587,7 +602,7 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
GLboolean gem_supported;
struct drm_i915_getparam gp;
__DRIscreenPrivate *spriv = intelScreen->driScrnPriv;
- int num_fences;
+ int num_fences = 0;
intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
@@ -617,10 +632,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
/* Otherwise, use the classic buffer manager. */
if (intelScreen->bufmgr == NULL) {
if (gem_disable) {
- fprintf(stderr, "GEM disabled. Using classic.\n");
+ _mesa_warning(NULL, "GEM disabled. Using classic.");
} else {
- fprintf(stderr, "Failed to initialize GEM. "
- "Falling back to classic.\n");
+ _mesa_warning(NULL,
+ "Failed to initialize GEM. Falling back to classic.");
}
if (intelScreen->tex.size == 0) {
diff --git a/shared/intel_span.c b/shared/intel_span.c
index 34b78eb..8df4990 100644
--- a/shared/intel_span.c
+++ b/shared/intel_span.c
@@ -501,7 +501,7 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
/**
- * Prepare for softare rendering. Map current read/draw framebuffers'
+ * Prepare for software rendering. Map current read/draw framebuffers'
* renderbuffes and all currently bound texture objects.
*
* Old note: Moved locking out to get reasonable span performance.
@@ -526,7 +526,7 @@ intelSpanRenderStart(GLcontext * ctx)
}
/**
- * Called when done softare rendering. Unmap the buffers we mapped in
+ * Called when done software rendering. Unmap the buffers we mapped in
* the above function.
*/
void
diff --git a/shared/intel_syncobj.c b/shared/intel_syncobj.c
new file mode 100644
index 0000000..1286fe9
--- /dev/null
+++ b/shared/intel_syncobj.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file intel_syncobj.c
+ *
+ * Support for ARB_sync
+ *
+ * ARB_sync is implemented by flushing the current batchbuffer and keeping a
+ * reference on it. We can then check for completion or wait for compeltion
+ * using the normal buffer object mechanisms. This does mean that if an
+ * application is using many sync objects, it will emit small batchbuffers
+ * which may end up being a significant overhead. In other tests of removing
+ * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
+ * performance bottleneck, though.
+ */
+
+#include "main/simple_list.h"
+#include "main/imports.h"
+
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+static struct gl_sync_object *
+intel_new_sync_object(GLcontext *ctx, GLuint id)
+{
+ struct intel_sync_object *sync;
+
+ sync = _mesa_calloc(sizeof(struct intel_sync_object));
+
+ return &sync->Base;
+}
+
+static void
+intel_delete_sync_object(GLcontext *ctx, struct gl_sync_object *s)
+{
+ struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+ drm_intel_bo_unreference(sync->bo);
+ _mesa_free(sync);
+}
+
+static void
+intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s,
+ GLenum condition, GLbitfield flags)
+{
+ struct intel_context *intel = intel_context(ctx);
+ struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+ assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+
+ sync->bo = intel->batch->buf;
+ drm_intel_bo_reference(sync->bo);
+
+ intelFlush(ctx);
+}
+
+/* We ignore the user-supplied timeout. This is weaselly -- we're allowed to
+ * round to an implementation-dependent accuracy, and right now our
+ * implementation "rounds" to the wait-forever value.
+ *
+ * The fix would be a new kernel function to do the GTT transition with a
+ * timeout.
+ */
+static void intel_client_wait_sync(GLcontext *ctx, struct gl_sync_object *s,
+ GLbitfield flags, GLuint64 timeout)
+{
+ struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+ if (sync->bo) {
+ drm_intel_bo_wait_rendering(sync->bo);
+ s->StatusFlag = 1;
+ drm_intel_bo_unreference(sync->bo);
+ sync->bo = NULL;
+ }
+}
+
+/* We have nothing to do for WaitSync. Our GL command stream is sequential,
+ * so given that the sync object has already flushed the batchbuffer,
+ * any batchbuffers coming after this waitsync will naturally not occur until
+ * the previous one is done.
+ */
+static void intel_server_wait_sync(GLcontext *ctx, struct gl_sync_object *s,
+ GLbitfield flags, GLuint64 timeout)
+{
+}
+
+static void intel_check_sync(GLcontext *ctx, struct gl_sync_object *s)
+{
+ struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+ if (sync->bo && drm_intel_bo_busy(sync->bo)) {
+ drm_intel_bo_unreference(sync->bo);
+ sync->bo = NULL;
+ s->StatusFlag = 1;
+ }
+}
+
+void intel_init_syncobj_functions(struct dd_function_table *functions)
+{
+ functions->NewSyncObject = intel_new_sync_object;
+ functions->DeleteSyncObject = intel_delete_sync_object;
+ functions->FenceSync = intel_fence_sync;
+ functions->CheckSync = intel_check_sync;
+ functions->ClientWaitSync = intel_client_wait_sync;
+ functions->ServerWaitSync = intel_server_wait_sync;
+}
diff --git a/shared/intel_tex.c b/shared/intel_tex.c
index ae0994b..df63f29 100644
--- a/shared/intel_tex.c
+++ b/shared/intel_tex.c
@@ -158,81 +158,11 @@ timed_memcpy(void *dest, const void *src, size_t n)
}
#endif /* DO_DEBUG */
-/**
- * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
- * level).
- *
- * The texture object's miptree must be mapped.
- *
- * It would be really nice if this was just called by Mesa whenever mipmaps
- * needed to be regenerated, rather than us having to remember to do so in
- * each texture image modification path.
- *
- * This function should also include an accelerated path.
- */
-void
-intel_generate_mipmap(GLcontext *ctx, GLenum target,
- struct gl_texture_object *texObj)
-{
- struct intel_context *intel = intel_context(ctx);
- struct intel_texture_object *intelObj = intel_texture_object(texObj);
- GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
- int face, i;
-
- _mesa_generate_mipmap(ctx, target, texObj);
-
- /* Update the level information in our private data in the new images, since
- * it didn't get set as part of a normal TexImage path.
- */
- for (face = 0; face < nr_faces; face++) {
- for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
- struct intel_texture_image *intelImage;
-
- intelImage = intel_texture_image(texObj->Image[face][i]);
- if (intelImage == NULL)
- break;
-
- intelImage->level = i;
- intelImage->face = face;
- /* Unreference the miptree to signal that the new Data is a bare
- * pointer from mesa.
- */
- intel_miptree_release(intel, &intelImage->mt);
- }
- }
-}
-
-static void intelGenerateMipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj)
-{
- struct intel_context *intel = intel_context(ctx);
- struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
- intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
- intel_generate_mipmap(ctx, target, texObj);
- intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
-}
-
void
intelInitTextureFuncs(struct dd_function_table *functions)
{
functions->ChooseTextureFormat = intelChooseTextureFormat;
- functions->TexImage1D = intelTexImage1D;
- functions->TexImage2D = intelTexImage2D;
- functions->TexImage3D = intelTexImage3D;
- functions->TexSubImage1D = intelTexSubImage1D;
- functions->TexSubImage2D = intelTexSubImage2D;
- functions->TexSubImage3D = intelTexSubImage3D;
- functions->CopyTexImage1D = intelCopyTexImage1D;
- functions->CopyTexImage2D = intelCopyTexImage2D;
- functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
- functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
- functions->GetTexImage = intelGetTexImage;
- functions->GenerateMipmap = intelGenerateMipmap;
-
- /* compressed texture functions */
- functions->CompressedTexImage2D = intelCompressedTexImage2D;
- functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D;
- functions->GetCompressedTexImage = intelGetCompressedTexImage;
+ functions->GenerateMipmap = intel_generate_mipmap;
functions->NewTextureObject = intelNewTextureObject;
functions->NewTextureImage = intelNewTextureImage;
diff --git a/shared/intel_tex.h b/shared/intel_tex.h
index f5372d8..471aa2a 100644
--- a/shared/intel_tex.h
+++ b/shared/intel_tex.h
@@ -35,116 +35,17 @@
void intelInitTextureFuncs(struct dd_function_table *functions);
+void intelInitTextureImageFuncs(struct dd_function_table *functions);
+
+void intelInitTextureSubImageFuncs(struct dd_function_table *functions);
+
+void intelInitTextureCopyImageFuncs(struct dd_function_table *functions);
+
const struct gl_texture_format *intelChooseTextureFormat(GLcontext * ctx,
GLint internalFormat,
GLenum format,
GLenum type);
-
-void intelTexImage3D(GLcontext * ctx,
- GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint depth,
- GLint border,
- GLenum format, GLenum type, const void *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage);
-
-void intelTexSubImage3D(GLcontext * ctx,
- GLenum target,
- GLint level,
- GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLsizei height, GLsizei depth,
- GLenum format, GLenum type,
- const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage);
-
-void intelTexImage2D(GLcontext * ctx,
- GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint border,
- GLenum format, GLenum type, const void *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage);
-
-void intelTexSubImage2D(GLcontext * ctx,
- GLenum target,
- GLint level,
- GLint xoffset, GLint yoffset,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage);
-
-void intelTexImage1D(GLcontext * ctx,
- GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint border,
- GLenum format, GLenum type, const void *pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage);
-
-void intelTexSubImage1D(GLcontext * ctx,
- GLenum target,
- GLint level,
- GLint xoffset,
- GLsizei width,
- GLenum format, GLenum type,
- const GLvoid * pixels,
- const struct gl_pixelstore_attrib *packing,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage);
-
-void intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLint border);
-
-void intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLint border);
-
-void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
- GLint xoffset, GLint x, GLint y, GLsizei width);
-
-void intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
- GLint xoffset, GLint yoffset,
- GLint x, GLint y, GLsizei width, GLsizei height);
-
-void intelGetTexImage(GLcontext * ctx, GLenum target, GLint level,
- GLenum format, GLenum type, GLvoid * pixels,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage);
-
-void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint border,
- GLsizei imageSize, const GLvoid *data,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage );
-
-void intelCompressedTexSubImage2D(GLcontext * ctx,
- GLenum target,
- GLint level,
- GLint xoffset, GLint yoffset,
- GLsizei width, GLsizei height,
- GLenum format, GLsizei imageSize,
- const GLvoid * pixels,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage);
-
-void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
- GLvoid *pixels,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage);
-
void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
unsigned long long offset, GLint depth, GLuint pitch);
void intelSetTexBuffer(__DRIcontext *pDRICtx,
diff --git a/shared/intel_tex_copy.c b/shared/intel_tex_copy.c
index 90bbb8c..74f7f58 100644
--- a/shared/intel_tex_copy.c
+++ b/shared/intel_tex_copy.c
@@ -73,11 +73,9 @@ get_teximage_source(struct intel_context *intel, GLenum internalFormat)
return NULL;
case GL_RGBA:
case GL_RGBA8:
- return intel_readbuf_region(intel);
case GL_RGB:
- if (intel->ctx.Visual.rgbBits == 16)
- return intel_readbuf_region(intel);
- return NULL;
+ case GL_RGB8:
+ return intel_readbuf_region(intel);
default:
return NULL;
}
@@ -99,14 +97,24 @@ do_copy_texsubimage(struct intel_context *intel,
if (!intelImage->mt || !src) {
if (INTEL_DEBUG & DEBUG_FALLBACKS)
- fprintf(stderr, "%s fail %p %p\n",
- __FUNCTION__, intelImage->mt, src);
+ fprintf(stderr, "%s fail %p %p (0x%08x)\n",
+ __FUNCTION__, intelImage->mt, src, internalFormat);
+ return GL_FALSE;
+ }
+
+ if (intelImage->mt->cpp != src->cpp) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "%s fail %d vs %d cpp\n",
+ __FUNCTION__, intelImage->mt->cpp, src->cpp);
return GL_FALSE;
}
intelFlush(ctx);
LOCK_HARDWARE(intel);
{
+ drm_intel_bo *dst_bo = intel_region_buffer(intel,
+ intelImage->mt->region,
+ INTEL_WRITE_PART);
GLuint image_offset = intel_miptree_image_offset(intelImage->mt,
intelImage->face,
intelImage->level);
@@ -118,8 +126,12 @@ do_copy_texsubimage(struct intel_context *intel,
dstx += x - orig_x;
dsty += y - orig_y;
- /* image_offset may be non-page-aligned, but that's illegal for tiling. */
- assert(intelImage->mt->region->tiling == I915_TILING_NONE);
+ /* Can't blit to tiled buffers with non-tile-aligned offset. */
+ if (intelImage->mt->region->tiling != I915_TILING_NONE &&
+ (image_offset & 4095) != 0) {
+ UNLOCK_HARDWARE(intel);
+ return GL_FALSE;
+ }
if (ctx->ReadBuffer->Name == 0) {
/* reading from a window, adjust x, y */
@@ -140,35 +152,35 @@ do_copy_texsubimage(struct intel_context *intel,
src_pitch = src->pitch;
}
- intelEmitCopyBlit(intel,
- intelImage->mt->cpp,
- src_pitch,
- src->buffer,
- 0,
- src->tiling,
- intelImage->mt->pitch,
- intelImage->mt->region->buffer,
- image_offset,
- intelImage->mt->region->tiling,
- x, y, dstx, dsty, width, height,
- GL_COPY);
+ if (!intelEmitCopyBlit(intel,
+ intelImage->mt->cpp,
+ src_pitch,
+ src->buffer,
+ 0,
+ src->tiling,
+ intelImage->mt->pitch,
+ dst_bo,
+ image_offset,
+ intelImage->mt->region->tiling,
+ x, y, dstx, dsty, width, height,
+ GL_COPY)) {
+ UNLOCK_HARDWARE(intel);
+ return GL_FALSE;
+ }
}
UNLOCK_HARDWARE(intel);
/* GL_SGIS_generate_mipmap */
if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) {
- ctx->Driver.GenerateMipmap(ctx, target, texObj);
+ intel_generate_mipmap(ctx, target, texObj);
}
return GL_TRUE;
}
-
-
-
-void
+static void
intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
GLenum internalFormat,
GLint x, GLint y, GLsizei width, GLint border)
@@ -214,7 +226,8 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
width, border);
}
-void
+
+static void
intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
GLenum internalFormat,
GLint x, GLint y, GLsizei width, GLsizei height,
@@ -262,7 +275,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
}
-void
+static void
intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
GLint xoffset, GLint x, GLint y, GLsizei width)
{
@@ -287,8 +300,7 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
}
-
-void
+static void
intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
GLint xoffset, GLint yoffset,
GLint x, GLint y, GLsizei width, GLsizei height)
@@ -301,7 +313,6 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
_mesa_select_tex_image(ctx, texObj, target, level);
GLenum internalFormat = texImage->InternalFormat;
-
/* Need to check texture is compatible with source format.
*/
@@ -316,3 +327,13 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
xoffset, yoffset, x, y, width, height);
}
}
+
+
+void
+intelInitTextureCopyImageFuncs(struct dd_function_table *functions)
+{
+ functions->CopyTexImage1D = intelCopyTexImage1D;
+ functions->CopyTexImage2D = intelCopyTexImage2D;
+ functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
+ functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
+}
diff --git a/shared/intel_tex_image.c b/shared/intel_tex_image.c
index 5e61e9e..c5f5220 100644
--- a/shared/intel_tex_image.c
+++ b/shared/intel_tex_image.c
@@ -131,6 +131,7 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel,
comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat);
intelObj->mt = intel_miptree_create(intel,
intelObj->base.Target,
+ intelImage->base._BaseFormat,
intelImage->base.InternalFormat,
firstLevel,
lastLevel,
@@ -205,7 +206,7 @@ try_pbo_upload(struct intel_context *intel,
GLuint src_offset, src_stride;
GLuint dst_offset, dst_stride;
- if (!pbo ||
+ if (unpack->BufferObj->Name == 0 ||
intel->ctx._ImageTransferState ||
unpack->SkipPixels || unpack->SkipRows) {
DBG("%s: failure 1\n", __FUNCTION__);
@@ -235,12 +236,15 @@ try_pbo_upload(struct intel_context *intel,
INTEL_WRITE_FULL);
- intelEmitCopyBlit(intel,
- intelImage->mt->cpp,
- src_stride, src_buffer, src_offset, GL_FALSE,
- dst_stride, dst_buffer, dst_offset, GL_FALSE,
- 0, 0, 0, 0, width, height,
- GL_COPY);
+ if (!intelEmitCopyBlit(intel,
+ intelImage->mt->cpp,
+ src_stride, src_buffer, src_offset, GL_FALSE,
+ dst_stride, dst_buffer, dst_offset, GL_FALSE,
+ 0, 0, 0, 0, width, height,
+ GL_COPY)) {
+ UNLOCK_HARDWARE(intel);
+ return GL_FALSE;
+ }
}
UNLOCK_HARDWARE(intel);
@@ -248,7 +252,6 @@ try_pbo_upload(struct intel_context *intel,
}
-
static GLboolean
try_pbo_zcopy(struct intel_context *intel,
struct intel_texture_image *intelImage,
@@ -261,7 +264,7 @@ try_pbo_zcopy(struct intel_context *intel,
GLuint src_offset, src_stride;
GLuint dst_offset, dst_stride;
- if (!pbo ||
+ if (unpack->BufferObj->Name == 0 ||
intel->ctx._ImageTransferState ||
unpack->SkipPixels || unpack->SkipRows) {
DBG("%s: failure 1\n", __FUNCTION__);
@@ -293,10 +296,6 @@ try_pbo_zcopy(struct intel_context *intel,
}
-
-
-
-
static void
intelTexImage(GLcontext * ctx,
GLint dims,
@@ -307,7 +306,8 @@ intelTexImage(GLcontext * ctx,
GLenum format, GLenum type, const void *pixels,
const struct gl_pixelstore_attrib *unpack,
struct gl_texture_object *texObj,
- struct gl_texture_image *texImage, GLsizei imageSize, int compressed)
+ struct gl_texture_image *texImage, GLsizei imageSize,
+ GLboolean compressed)
{
struct intel_context *intel = intel_context(ctx);
struct intel_texture_object *intelObj = intel_texture_object(texObj);
@@ -316,7 +316,6 @@ intelTexImage(GLcontext * ctx,
GLint postConvHeight = height;
GLint texelBytes, sizeInBytes;
GLuint dstRowStride = 0, srcRowStride = texImage->RowStride;
- GLboolean needs_map;
DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
@@ -414,7 +413,9 @@ intelTexImage(GLcontext * ctx,
* a miptree, so create one just for our level and store it in the image.
* It'll get moved into the object miptree at validate time.
*/
- intelImage->mt = intel_miptree_create(intel, target, internalFormat,
+ intelImage->mt = intel_miptree_create(intel, target,
+ intelImage->base.TexFormat->BaseFormat,
+ internalFormat,
level, level,
width, height, depth,
intelImage->base.TexFormat->TexelBytes,
@@ -426,7 +427,7 @@ intelTexImage(GLcontext * ctx,
*/
if (dims <= 2 &&
intelImage->mt &&
- intel_buffer_object(unpack->BufferObj) &&
+ unpack->BufferObj->Name != 0 &&
check_pbo_format(internalFormat, format,
type, intelImage->base.TexFormat)) {
@@ -464,8 +465,6 @@ intelTexImage(GLcontext * ctx,
DBG("pbo upload failed\n");
}
-
-
/* intelCopyTexImage calls this function with pixels == NULL, with
* the expectation that the mipmap tree will be set up but nothing
* more will be done. This is where those calls return:
@@ -482,15 +481,8 @@ intelTexImage(GLcontext * ctx,
LOCK_HARDWARE(intel);
- /* Two cases where we need a mapping of the miptree: when the user supplied
- * data is mapped as well (non-PBO, memcpy upload) or when we're going to do
- * (software) mipmap generation.
- */
- needs_map = (pixels != NULL) || (level == texObj->BaseLevel &&
- texObj->GenerateMipmap);
-
if (intelImage->mt) {
- if (needs_map)
+ if (pixels != NULL)
texImage->Data = intel_miptree_image_map(intel,
intelImage->mt,
intelImage->face,
@@ -547,25 +539,26 @@ intelTexImage(GLcontext * ctx,
format, type, pixels, unpack)) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
}
-
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- intel_generate_mipmap(ctx, target, texObj);
- }
}
_mesa_unmap_teximage_pbo(ctx, unpack);
if (intelImage->mt) {
- if (needs_map)
+ if (pixels != NULL)
intel_miptree_image_unmap(intel, intelImage->mt);
texImage->Data = NULL;
}
UNLOCK_HARDWARE(intel);
+
+ /* GL_SGIS_generate_mipmap */
+ if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+ intel_generate_mipmap(ctx, target, texObj);
+ }
}
-void
+
+static void
intelTexImage3D(GLcontext * ctx,
GLenum target, GLint level,
GLint internalFormat,
@@ -578,11 +571,11 @@ intelTexImage3D(GLcontext * ctx,
{
intelTexImage(ctx, 3, target, level,
internalFormat, width, height, depth, border,
- format, type, pixels, unpack, texObj, texImage, 0, 0);
+ format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
}
-void
+static void
intelTexImage2D(GLcontext * ctx,
GLenum target, GLint level,
GLint internalFormat,
@@ -594,10 +587,11 @@ intelTexImage2D(GLcontext * ctx,
{
intelTexImage(ctx, 2, target, level,
internalFormat, width, height, 1, border,
- format, type, pixels, unpack, texObj, texImage, 0, 0);
+ format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
}
-void
+
+static void
intelTexImage1D(GLcontext * ctx,
GLenum target, GLint level,
GLint internalFormat,
@@ -609,21 +603,24 @@ intelTexImage1D(GLcontext * ctx,
{
intelTexImage(ctx, 1, target, level,
internalFormat, width, 1, 1, border,
- format, type, pixels, unpack, texObj, texImage, 0, 0);
+ format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
}
-void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint border,
- GLsizei imageSize, const GLvoid *data,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage )
+
+static void
+intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint height, GLint border,
+ GLsizei imageSize, const GLvoid *data,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage )
{
intelTexImage(ctx, 2, target, level,
internalFormat, width, height, 1, border,
- 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, 1);
+ 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, GL_TRUE);
}
+
/**
* Need to map texture image into memory before copying image data,
* then unmap it.
@@ -632,7 +629,7 @@ static void
intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
GLenum format, GLenum type, GLvoid * pixels,
struct gl_texture_object *texObj,
- struct gl_texture_image *texImage, int compressed)
+ struct gl_texture_image *texImage, GLboolean compressed)
{
struct intel_context *intel = intel_context(ctx);
struct intel_texture_image *intelImage = intel_texture_image(texImage);
@@ -686,28 +683,29 @@ intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
}
}
-void
+
+static void
intelGetTexImage(GLcontext * ctx, GLenum target, GLint level,
GLenum format, GLenum type, GLvoid * pixels,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage)
{
intel_get_tex_image(ctx, target, level, format, type, pixels,
- texObj, texImage, 0);
-
-
+ texObj, texImage, GL_FALSE);
}
-void
+
+static void
intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
GLvoid *pixels,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage)
{
intel_get_tex_image(ctx, target, level, 0, 0, pixels,
- texObj, texImage, 1);
+ texObj, texImage, GL_TRUE);
}
+
void
intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
unsigned long long offset, GLint depth, GLuint pitch)
@@ -816,3 +814,16 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
*/
intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
}
+
+
+void
+intelInitTextureImageFuncs(struct dd_function_table *functions)
+{
+ functions->TexImage1D = intelTexImage1D;
+ functions->TexImage2D = intelTexImage2D;
+ functions->TexImage3D = intelTexImage3D;
+ functions->GetTexImage = intelGetTexImage;
+
+ functions->CompressedTexImage2D = intelCompressedTexImage2D;
+ functions->GetCompressedTexImage = intelGetCompressedTexImage;
+}
diff --git a/shared/intel_tex_layout.c b/shared/intel_tex_layout.c
index e6f9a41..7d69ea4 100644
--- a/shared/intel_tex_layout.c
+++ b/shared/intel_tex_layout.c
@@ -35,26 +35,39 @@
#include "intel_context.h"
#include "main/macros.h"
-GLuint intel_compressed_alignment(GLenum internalFormat)
+void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h)
{
- GLuint alignment = 4;
-
switch (internalFormat) {
case GL_COMPRESSED_RGB_FXT1_3DFX:
case GL_COMPRESSED_RGBA_FXT1_3DFX:
- alignment = 8;
+ *w = 8;
+ *h = 4;
+ break;
+
+ case GL_RGB_S3TC:
+ case GL_RGB4_S3TC:
+ case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+ case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+ case GL_RGBA_S3TC:
+ case GL_RGBA4_S3TC:
+ case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+ case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+ *w = 4;
+ *h = 4;
break;
default:
+ *w = 4;
+ *h = 2;
break;
}
-
- return alignment;
}
-void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt )
+void i945_miptree_layout_2d( struct intel_context *intel,
+ struct intel_mipmap_tree *mt,
+ uint32_t tiling )
{
- GLint align_h = 2, align_w = 4;
+ GLuint align_h = 2, align_w = 4;
GLuint level;
GLuint x = 0;
GLuint y = 0;
@@ -62,9 +75,9 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr
GLuint height = mt->height0;
mt->pitch = mt->width0;
+ intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
if (mt->compressed) {
- align_w = intel_compressed_alignment(mt->internal_format);
mt->pitch = ALIGN(mt->width0, align_w);
}
@@ -92,7 +105,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr
/* Pitch must be a whole number of dwords, even though we
* express it in texels.
*/
- mt->pitch = intel_miptree_pitch_align (intel, mt, mt->pitch);
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch);
mt->total_height = 0;
for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
diff --git a/shared/intel_tex_layout.h b/shared/intel_tex_layout.h
index dbc90e6..c9de9b5 100644
--- a/shared/intel_tex_layout.h
+++ b/shared/intel_tex_layout.h
@@ -38,5 +38,7 @@ static GLuint minify( GLuint d )
return MAX2(1, d>>1);
}
-extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt );
-extern GLuint intel_compressed_alignment(GLenum);
+extern void i945_miptree_layout_2d(struct intel_context *intel,
+ struct intel_mipmap_tree *mt,
+ uint32_t tiling);
+extern void intel_get_texture_alignment_unit(GLenum, GLuint *, GLuint *);
diff --git a/shared/intel_tex_subimage.c b/shared/intel_tex_subimage.c
index f86de56..8903707 100644
--- a/shared/intel_tex_subimage.c
+++ b/shared/intel_tex_subimage.c
@@ -44,10 +44,12 @@ intelTexSubimage(GLcontext * ctx,
GLenum target, GLint level,
GLint xoffset, GLint yoffset, GLint zoffset,
GLint width, GLint height, GLint depth,
+ GLsizei imageSize,
GLenum format, GLenum type, const void *pixels,
const struct gl_pixelstore_attrib *packing,
struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
+ struct gl_texture_image *texImage,
+ GLboolean compressed)
{
struct intel_context *intel = intel_context(ctx);
struct intel_texture_image *intelImage = intel_texture_image(texImage);
@@ -59,9 +61,14 @@ intelTexSubimage(GLcontext * ctx,
intelFlush(ctx);
- pixels =
- _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format,
- type, pixels, packing, "glTexSubImage2D");
+ if (compressed)
+ pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize,
+ pixels, packing,
+ "glCompressedTexImage");
+ else
+ pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth,
+ format, type, pixels, packing,
+ "glTexSubImage");
if (!pixels)
return;
@@ -90,20 +97,28 @@ intelTexSubimage(GLcontext * ctx,
assert(dstRowStride);
- if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
- texImage->TexFormat,
- texImage->Data,
- xoffset, yoffset, zoffset,
- dstRowStride,
- texImage->ImageOffsets,
- width, height, depth,
- format, type, pixels, packing)) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+ if (compressed) {
+ if (intelImage->mt) {
+ struct intel_region *dst = intelImage->mt->region;
+
+ _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch,
+ xoffset, yoffset / 4,
+ (width + 3) & ~3, (height + 3) / 4,
+ pixels, (width + 3) & ~3, 0, 0);
+ } else
+ memcpy(texImage->Data, pixels, imageSize);
}
-
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- intel_generate_mipmap(ctx, target, texObj);
+ else {
+ if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
+ texImage->TexFormat,
+ texImage->Data,
+ xoffset, yoffset, zoffset,
+ dstRowStride,
+ texImage->ImageOffsets,
+ width, height, depth,
+ format, type, pixels, packing)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+ }
}
_mesa_unmap_teximage_pbo(ctx, packing);
@@ -114,13 +129,15 @@ intelTexSubimage(GLcontext * ctx,
}
UNLOCK_HARDWARE(intel);
-}
-
-
+ /* GL_SGIS_generate_mipmap */
+ if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+ intel_generate_mipmap(ctx, target, texObj);
+ }
+}
-void
+static void
intelTexSubImage3D(GLcontext * ctx,
GLenum target,
GLint level,
@@ -132,18 +149,15 @@ intelTexSubImage3D(GLcontext * ctx,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage)
{
-
intelTexSubimage(ctx, 3,
target, level,
xoffset, yoffset, zoffset,
- width, height, depth,
- format, type, pixels, packing, texObj, texImage);
-
+ width, height, depth, 0,
+ format, type, pixels, packing, texObj, texImage, GL_FALSE);
}
-
-void
+static void
intelTexSubImage2D(GLcontext * ctx,
GLenum target,
GLint level,
@@ -155,17 +169,15 @@ intelTexSubImage2D(GLcontext * ctx,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage)
{
-
intelTexSubimage(ctx, 2,
target, level,
xoffset, yoffset, 0,
- width, height, 1,
- format, type, pixels, packing, texObj, texImage);
-
+ width, height, 1, 0,
+ format, type, pixels, packing, texObj, texImage, GL_FALSE);
}
-void
+static void
intelTexSubImage1D(GLcontext * ctx,
GLenum target,
GLint level,
@@ -180,12 +192,11 @@ intelTexSubImage1D(GLcontext * ctx,
intelTexSubimage(ctx, 1,
target, level,
xoffset, 0, 0,
- width, 1, 1,
- format, type, pixels, packing, texObj, texImage);
-
+ width, 1, 1, 0,
+ format, type, pixels, packing, texObj, texImage, GL_FALSE);
}
-void
+static void
intelCompressedTexSubImage2D(GLcontext * ctx,
GLenum target,
GLint level,
@@ -196,6 +207,20 @@ intelCompressedTexSubImage2D(GLcontext * ctx,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage)
{
- fprintf(stderr, "stubbed CompressedTexSubImage2D: %dx%d@%dx%d\n",
- width, height, xoffset, yoffset);
+ intelTexSubimage(ctx, 2,
+ target, level,
+ xoffset, yoffset, 0,
+ width, height, 1, imageSize,
+ format, 0, pixels, &ctx->Unpack, texObj, texImage, GL_TRUE);
+}
+
+
+
+void
+intelInitTextureSubImageFuncs(struct dd_function_table *functions)
+{
+ functions->TexSubImage1D = intelTexSubImage1D;
+ functions->TexSubImage2D = intelTexSubImage2D;
+ functions->TexSubImage3D = intelTexSubImage3D;
+ functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D;
}
diff --git a/shared/intel_tex_validate.c b/shared/intel_tex_validate.c
index 05a375e..a284d54 100644
--- a/shared/intel_tex_validate.c
+++ b/shared/intel_tex_validate.c
@@ -199,6 +199,7 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
if (!intelObj->mt) {
intelObj->mt = intel_miptree_create(intel,
intelObj->base.Target,
+ firstImage->base._BaseFormat,
firstImage->base.InternalFormat,
intelObj->firstLevel,
intelObj->lastLevel,
@@ -241,7 +242,7 @@ intel_tex_map_level_images(struct intel_context *intel,
struct intel_texture_image *intelImage =
intel_texture_image(intelObj->base.Image[face][level]);
- if (intelImage->mt) {
+ if (intelImage && intelImage->mt) {
intelImage->base.Data =
intel_miptree_image_map(intel,
intelImage->mt,
@@ -268,7 +269,7 @@ intel_tex_unmap_level_images(struct intel_context *intel,
struct intel_texture_image *intelImage =
intel_texture_image(intelObj->base.Image[face][level]);
- if (intelImage->mt) {
+ if (intelImage && intelImage->mt) {
intel_miptree_image_unmap(intel, intelImage->mt);
intelImage->base.Data = NULL;
}