summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configure.ac8
-rw-r--r--i915/Makefile.am3
-rw-r--r--i915/i830_texblend.c1
-rw-r--r--i915/i830_texstate.c23
-rw-r--r--i915/i830_vtbl.c16
-rw-r--r--i915/i915_context.c5
-rw-r--r--i915/i915_context.h16
-rw-r--r--i915/i915_debug.c1
-rw-r--r--i915/i915_fragprog.c235
-rw-r--r--i915/i915_program.c31
-rw-r--r--i915/i915_program.h5
-rw-r--r--i915/i915_reg.h6
-rw-r--r--i915/i915_state.c32
-rw-r--r--i915/i915_texstate.c50
-rw-r--r--i915/i915_vtbl.c16
-rw-r--r--i915/intel_tris.c11
-rw-r--r--i965/Makefile.am3
-rw-r--r--i965/brw_cc.c51
-rw-r--r--i965/brw_clip.c3
-rw-r--r--i965/brw_clip.h9
-rw-r--r--i965/brw_clip_line.c8
-rw-r--r--i965/brw_clip_state.c11
-rw-r--r--i965/brw_clip_tri.c12
-rw-r--r--i965/brw_context.c3
-rw-r--r--i965/brw_context.h41
-rw-r--r--i965/brw_curbe.c2
-rw-r--r--i965/brw_defines.h16
-rw-r--r--i965/brw_draw.c21
-rw-r--r--i965/brw_draw_upload.c27
-rw-r--r--i965/brw_eu.h10
-rw-r--r--i965/brw_eu_emit.c3
-rw-r--r--i965/brw_fallback.c6
-rw-r--r--i965/brw_gs.c10
-rw-r--r--i965/brw_gs.h9
-rw-r--r--i965/brw_gs_emit.c38
-rw-r--r--i965/brw_gs_state.c5
-rw-r--r--i965/brw_misc_state.c10
-rw-r--r--i965/brw_sf.c8
-rw-r--r--i965/brw_sf.h8
-rw-r--r--i965/brw_sf_emit.c22
-rw-r--r--i965/brw_sf_state.c10
-rw-r--r--i965/brw_state.h10
-rw-r--r--i965/brw_state_cache.c95
-rw-r--r--i965/brw_state_upload.c7
-rw-r--r--i965/brw_tex.c32
-rw-r--r--i965/brw_tex_layout.c7
-rw-r--r--i965/brw_util.c2
-rw-r--r--i965/brw_util.h2
-rw-r--r--i965/brw_vs.c2
-rw-r--r--i965/brw_vs_emit.c154
-rw-r--r--i965/brw_vs_state.c37
-rw-r--r--i965/brw_vs_surface_state.c16
-rw-r--r--i965/brw_vtbl.c29
-rw-r--r--i965/brw_wm.c39
-rw-r--r--i965/brw_wm.h167
-rw-r--r--i965/brw_wm_emit.c766
-rw-r--r--i965/brw_wm_fp.c68
-rw-r--r--i965/brw_wm_glsl.c1276
-rw-r--r--i965/brw_wm_pass0.c7
-rw-r--r--i965/brw_wm_pass1.c6
-rw-r--r--i965/brw_wm_pass2.c4
-rw-r--r--i965/brw_wm_sampler_state.c21
-rw-r--r--i965/brw_wm_state.c9
-rw-r--r--i965/brw_wm_surface_state.c81
-rw-r--r--shared/intel_batchbuffer.c39
-rw-r--r--shared/intel_batchbuffer.h14
-rw-r--r--shared/intel_blit.c30
-rw-r--r--shared/intel_buffer_objects.c30
-rw-r--r--shared/intel_buffers.c38
-rw-r--r--shared/intel_buffers.h2
-rw-r--r--shared/intel_clear.c6
-rw-r--r--shared/intel_context.c127
-rw-r--r--shared/intel_context.h57
-rw-r--r--shared/intel_depthtmp.h10
-rw-r--r--shared/intel_extensions.c43
-rw-r--r--shared/intel_extensions.h2
-rw-r--r--shared/intel_fbo.c249
-rw-r--r--shared/intel_fbo.h5
-rw-r--r--shared/intel_generatemipmap.c304
-rw-r--r--shared/intel_mipmap_tree.c213
-rw-r--r--shared/intel_mipmap_tree.h32
-rw-r--r--shared/intel_pixel.c14
-rw-r--r--shared/intel_pixel.h2
-rw-r--r--shared/intel_pixel_bitmap.c18
-rw-r--r--shared/intel_pixel_copy.c4
-rw-r--r--shared/intel_pixel_draw.c10
-rw-r--r--shared/intel_pixel_read.c22
-rw-r--r--shared/intel_regions.c3
-rw-r--r--shared/intel_regions.h2
-rw-r--r--shared/intel_screen.c48
-rw-r--r--shared/intel_span.c272
-rw-r--r--shared/intel_span.h2
-rw-r--r--shared/intel_spantmp.h10
-rw-r--r--shared/intel_syncobj.c2
-rw-r--r--shared/intel_tex.c50
-rw-r--r--shared/intel_tex.h10
-rw-r--r--shared/intel_tex_copy.c76
-rw-r--r--shared/intel_tex_format.c85
-rw-r--r--shared/intel_tex_image.c176
-rw-r--r--shared/intel_tex_layout.h2
-rw-r--r--shared/intel_tex_obj.h1
-rw-r--r--shared/intel_tex_subimage.c31
-rw-r--r--shared/intel_tex_validate.c67
103 files changed, 2697 insertions, 3053 deletions
diff --git a/configure.ac b/configure.ac
index af47813..d339ca1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
# Process this file with autoconf to produce a configure script
AC_PREREQ(2.57)
-AC_INIT([mesa-dri-i9xx], 7.6.0, [], mesa-dri-i9xx)
+AC_INIT([mesa-dri-i9xx], 7.7.0, [], mesa-dri-i9xx)
AM_INIT_AUTOMAKE([dist-bzip2])
@@ -15,9 +15,9 @@ AC_PROG_CC
# Checks for header files.
AC_HEADER_STDC
-PKG_CHECK_MODULES([DRM], [libdrm >= 2.4.3])
-PKG_CHECK_MODULES([DRI], [libmesadri >= 7.6.0 libmesadri < 7.7.0
- libmesadricommon >= 7.6.0 libmesadricommon < 7.7.0])
+PKG_CHECK_MODULES([DRM], [libdrm >= 2.4.15 libdrm_intel])
+PKG_CHECK_MODULES([DRI], [libmesadri >= 7.7.0 libmesadri < 7.8.0
+ libmesadricommon >= 7.7.0 libmesadricommon < 7.8.0])
AC_OUTPUT([
Makefile
diff --git a/i915/Makefile.am b/i915/Makefile.am
index 451e9fd..c994f96 100644
--- a/i915/Makefile.am
+++ b/i915/Makefile.am
@@ -5,7 +5,7 @@ I915_CFLAGS = -I../shared -I../shared/server -DI915
i915_dri_la_LTLIBRARIES = i915_dri.la
i915_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(I915_CFLAGS)
i915_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
- $(DRM_LIBS) -ldrm_intel $(DRI_LIBS)
+ $(DRM_LIBS) $(DRI_LIBS)
i915_dri_ladir = @libdir@/dri
i915_dri_la_SOURCES = \
i830_context.c \
@@ -20,7 +20,6 @@ i915_dri_la_SOURCES = \
../shared/intel_batchbuffer.c \
../shared/intel_clear.c \
../shared/intel_extensions.c \
- ../shared/intel_generatemipmap.c \
../shared/intel_mipmap_tree.c \
../shared/intel_tex_layout.c \
../shared/intel_tex_image.c \
diff --git a/i915/i830_texblend.c b/i915/i830_texblend.c
index 09f7f37..3f64be8 100644
--- a/i915/i830_texblend.c
+++ b/i915/i830_texblend.c
@@ -30,7 +30,6 @@
#include "main/mtypes.h"
#include "main/simple_list.h"
#include "main/enums.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "main/mm.h"
diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c
index 6f998fa..ce409b3 100644
--- a/i915/i830_texstate.c
+++ b/i915/i830_texstate.c
@@ -27,7 +27,6 @@
#include "main/mtypes.h"
#include "main/enums.h"
-#include "main/texformat.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
@@ -56,10 +55,9 @@ translate_texture_format(GLuint mesa_format, GLuint internal_format)
case MESA_FORMAT_ARGB4444:
return MAPSURF_16BIT | MT_16BIT_ARGB4444;
case MESA_FORMAT_ARGB8888:
- if (internal_format == GL_RGB)
- return MAPSURF_32BIT | MT_32BIT_XRGB8888;
- else
- return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+ return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+ case MESA_FORMAT_XRGB8888:
+ return MAPSURF_32BIT | MT_32BIT_XRGB8888;
case MESA_FORMAT_YCBCR_REV:
return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
case MESA_FORMAT_YCBCR:
@@ -160,13 +158,20 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
pitch = intelObj->pitchOverride;
} else {
+ GLuint dst_x, dst_y;
+
+ intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0,
+ &dst_x, &dst_y);
+
dri_bo_reference(intelObj->mt->region->buffer);
i830->state.tex_buffer[unit] = intelObj->mt->region->buffer;
- i830->state.tex_offset[unit] = intel_miptree_image_offset(intelObj->mt,
- 0, intelObj->
- firstLevel);
+ /* XXX: This calculation is probably broken for tiled images with
+ * a non-page-aligned offset.
+ */
+ i830->state.tex_offset[unit] = (dst_x + dst_y * intelObj->mt->pitch) *
+ intelObj->mt->cpp;
- format = translate_texture_format(firstImage->TexFormat->MesaFormat,
+ format = translate_texture_format(firstImage->TexFormat,
firstImage->InternalFormat);
pitch = intelObj->mt->pitch * intelObj->mt->cpp;
}
diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c
index 983f672..e8c8d5a 100644
--- a/i915/i830_vtbl.c
+++ b/i915/i830_vtbl.c
@@ -26,7 +26,6 @@
**************************************************************************/
#include "glapi/glapi.h"
-#include "main/texformat.h"
#include "i830_context.h"
#include "i830_reg.h"
@@ -646,8 +645,9 @@ i830_state_draw_region(struct intel_context *intel,
DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z); /* .5 */
if (irb != NULL) {
- switch (irb->texformat->MesaFormat) {
+ switch (irb->Base.Format) {
case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
value |= DV_PF_8888;
break;
case MESA_FORMAT_RGB565:
@@ -661,7 +661,7 @@ i830_state_draw_region(struct intel_context *intel,
break;
default:
_mesa_problem(ctx, "Bad renderbuffer format: %d\n",
- irb->texformat->MesaFormat);
+ irb->Base.Format);
}
}
@@ -719,15 +719,6 @@ i830_new_batch(struct intel_context *intel)
assert(!intel->no_batch_wrap);
}
-
-
-static GLuint
-i830_flush_cmd(void)
-{
- return MI_FLUSH | FLUSH_MAP_CACHE;
-}
-
-
static void
i830_assert_not_dirty( struct intel_context *intel )
{
@@ -753,7 +744,6 @@ i830InitVtbl(struct i830_context *i830)
i830->intel.vtbl.reduced_primitive_state = i830_reduced_primitive_state;
i830->intel.vtbl.set_draw_region = i830_set_draw_region;
i830->intel.vtbl.update_texture_state = i830UpdateTextureState;
- i830->intel.vtbl.flush_cmd = i830_flush_cmd;
i830->intel.vtbl.render_start = i830_render_start;
i830->intel.vtbl.render_prevalidate = i830_render_prevalidate;
i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;
diff --git a/i915/i915_context.c b/i915/i915_context.c
index 3ab7d68..7d4c7cf 100644
--- a/i915/i915_context.c
+++ b/i915/i915_context.c
@@ -40,6 +40,7 @@
#include "utils.h"
#include "i915_reg.h"
+#include "i915_program.h"
#include "intel_regions.h"
#include "intel_batchbuffer.h"
@@ -80,6 +81,8 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state)
i915_update_stencil(ctx);
if (new_state & (_NEW_LIGHT))
i915_update_provoking_vertex(ctx);
+ if (new_state & (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
+ i915_update_program(ctx);
}
@@ -139,7 +142,7 @@ i915CreateContext(const __GLcontextModes * mesaVis,
ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS;
ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;
-
+ ctx->Const.MaxVarying = I915_TEX_UNITS;
/* Advertise the full hardware capabilities. The new memory
* manager should cope much better with overload situations:
diff --git a/i915/i915_context.h b/i915/i915_context.h
index 8de4a9d..25418d5 100644
--- a/i915/i915_context.h
+++ b/i915/i915_context.h
@@ -39,6 +39,7 @@
#define I915_FALLBACK_LOGICOP 0x20000
#define I915_FALLBACK_POLYGON_SMOOTH 0x40000
#define I915_FALLBACK_POINT_SMOOTH 0x80000
+#define I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN 0x100000
#define I915_UPLOAD_CTX 0x1
#define I915_UPLOAD_BUFFERS 0x2
@@ -121,10 +122,14 @@ enum {
#define I915_MAX_CONSTANT 32
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
+#define I915_MAX_INSN (I915_MAX_DECL_INSN + \
+ I915_MAX_TEX_INSN + \
+ I915_MAX_ALU_INSN)
-#define I915_PROGRAM_SIZE 192
-
-#define I915_MAX_INSN (I915_MAX_TEX_INSN+I915_MAX_ALU_INSN)
+/* Maximum size of the program packet, which matches the limits on
+ * decl, tex, and ALU instructions.
+ */
+#define I915_PROGRAM_SIZE (I915_MAX_INSN * 3 + 1)
/* Hardware version of a parsed fragment program. "Derived" from the
* mesa fragment_program struct.
@@ -154,8 +159,9 @@ struct i915_fragment_program
*/
GLcontext *ctx;
- GLuint declarations[I915_PROGRAM_SIZE];
- GLuint program[I915_PROGRAM_SIZE];
+ /* declarations contains the packet header. */
+ GLuint declarations[I915_MAX_DECL_INSN * 3 + 1];
+ GLuint program[(I915_MAX_TEX_INSN + I915_MAX_ALU_INSN) * 3];
GLfloat constant[I915_MAX_CONSTANT][4];
GLuint constant_flags[I915_MAX_CONSTANT];
diff --git a/i915/i915_debug.c b/i915/i915_debug.c
index f7bb7ea..fecfac3 100644
--- a/i915/i915_debug.c
+++ b/i915/i915_debug.c
@@ -806,6 +806,7 @@ static GLboolean i915_debug_packet( struct debug_stream *stream )
default:
return debug(stream, "", 0);
}
+ break;
default:
assert(0);
return 0;
diff --git a/i915/i915_fragprog.c b/i915/i915_fragprog.c
index 2db10c6..d9c6144 100644
--- a/i915/i915_fragprog.c
+++ b/i915/i915_fragprog.c
@@ -89,7 +89,8 @@ src_vector(struct i915_fragment_program *p,
*/
case PROGRAM_TEMPORARY:
if (source->Index >= I915_MAX_TEMPORARY) {
- i915_program_error(p, "Exceeded max temporary reg");
+ i915_program_error(p, "Exceeded max temporary reg: %d/%d",
+ source->Index, I915_MAX_TEMPORARY);
return 0;
}
src = UREG(REG_TYPE_R, source->Index);
@@ -121,10 +122,23 @@ src_vector(struct i915_fragment_program *p,
src = i915_emit_decl(p, REG_TYPE_T,
T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
D0_CHANNEL_ALL);
+ break;
+
+ case FRAG_ATTRIB_VAR0:
+ case FRAG_ATTRIB_VAR0 + 1:
+ case FRAG_ATTRIB_VAR0 + 2:
+ case FRAG_ATTRIB_VAR0 + 3:
+ case FRAG_ATTRIB_VAR0 + 4:
+ case FRAG_ATTRIB_VAR0 + 5:
+ case FRAG_ATTRIB_VAR0 + 6:
+ case FRAG_ATTRIB_VAR0 + 7:
+ src = i915_emit_decl(p, REG_TYPE_T,
+ T_TEX0 + (source->Index - FRAG_ATTRIB_VAR0),
+ D0_CHANNEL_ALL);
break;
default:
- i915_program_error(p, "Bad source->Index");
+ i915_program_error(p, "Bad source->Index: %d", source->Index);
return 0;
}
break;
@@ -146,6 +160,7 @@ src_vector(struct i915_fragment_program *p,
case PROGRAM_CONSTANT:
case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
+ case PROGRAM_UNIFORM:
src =
i915_emit_param4fv(p,
program->Base.Parameters->ParameterValues[source->
@@ -153,7 +168,7 @@ src_vector(struct i915_fragment_program *p,
break;
default:
- i915_program_error(p, "Bad source->File");
+ i915_program_error(p, "Bad source->File: %d", source->File);
return 0;
}
@@ -186,13 +201,14 @@ get_result_vector(struct i915_fragment_program *p,
p->depth_written = 1;
return UREG(REG_TYPE_OD, 0);
default:
- i915_program_error(p, "Bad inst->DstReg.Index");
+ i915_program_error(p, "Bad inst->DstReg.Index: %d",
+ inst->DstReg.Index);
return 0;
}
case PROGRAM_TEMPORARY:
return UREG(REG_TYPE_R, inst->DstReg.Index);
default:
- i915_program_error(p, "Bad inst->DstReg.File");
+ i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File);
return 0;
}
}
@@ -231,7 +247,7 @@ translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
case TEXTURE_CUBE_INDEX:
return D0_SAMPLE_TYPE_CUBE;
default:
- i915_program_error(p, "TexSrcBit");
+ i915_program_error(p, "TexSrcBit: %d", bit);
return 0;
}
}
@@ -351,7 +367,7 @@ upload_program(struct i915_fragment_program *p)
while (1) {
GLuint src0, src1, src2, flags;
- GLuint tmp = 0, consts0 = 0, consts1 = 0;
+ GLuint tmp = 0, dst, consts0 = 0, consts1 = 0;
switch (inst->Opcode) {
case OPCODE_ABS:
@@ -503,6 +519,10 @@ upload_program(struct i915_fragment_program *p)
EMIT_1ARG_ARITH(A0_FLR);
break;
+ case OPCODE_TRUNC:
+ EMIT_1ARG_ARITH(A0_TRC);
+ break;
+
case OPCODE_FRC:
EMIT_1ARG_ARITH(A0_FRC);
break;
@@ -516,6 +536,22 @@ upload_program(struct i915_fragment_program *p)
0, src0, T0_TEXKILL);
break;
+ case OPCODE_KIL_NV:
+ if (inst->DstReg.CondMask == COND_TR) {
+ tmp = i915_get_utemp(p);
+
+ i915_emit_texld(p, get_live_regs(p, inst),
+ tmp, A0_DEST_CHANNEL_ALL,
+ 0, /* use a dummy dest reg */
+ swizzle(tmp, ONE, ONE, ONE, ONE), /* always */
+ T0_TEXKILL);
+ } else {
+ p->error = 1;
+ i915_program_error(p, "Unsupported KIL_NV condition code: %d",
+ inst->DstReg.CondMask);
+ }
+ break;
+
case OPCODE_LG2:
src0 = src_vector(p, &inst->SrcReg[0], program);
@@ -615,6 +651,20 @@ upload_program(struct i915_fragment_program *p)
EMIT_2ARG_ARITH(A0_MUL);
break;
+ case OPCODE_NOISE1:
+ case OPCODE_NOISE2:
+ case OPCODE_NOISE3:
+ case OPCODE_NOISE4:
+ /* Don't implement noise because we just don't have the instructions
+ * to spare. We aren't the first vendor to do so.
+ */
+ i915_program_error(p, "Stubbed-out noise functions");
+ i915_emit_arith(p,
+ A0_MOV,
+ get_result_vector(p, inst),
+ get_result_flags(inst), 0,
+ swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
+
case OPCODE_POW:
src0 = src_vector(p, &inst->SrcReg[0], program);
src1 = src_vector(p, &inst->SrcReg[1], program);
@@ -721,9 +771,38 @@ upload_program(struct i915_fragment_program *p)
}
break;
- case OPCODE_SGE:
- EMIT_2ARG_ARITH(A0_SGE);
- break;
+ case OPCODE_SEQ:
+ tmp = i915_get_utemp(p);
+ flags = get_result_flags(inst);
+ dst = get_result_vector(p, inst);
+
+ /* dst = src1 >= src2 */
+ i915_emit_arith(p,
+ A0_SGE,
+ dst,
+ flags, 0,
+ src_vector(p, &inst->SrcReg[0], program),
+ src_vector(p, &inst->SrcReg[1], program),
+ 0);
+ /* tmp = src1 <= src2 */
+ i915_emit_arith(p,
+ A0_SGE,
+ tmp,
+ flags, 0,
+ negate(src_vector(p, &inst->SrcReg[0], program),
+ 1, 1, 1, 1),
+ negate(src_vector(p, &inst->SrcReg[1], program),
+ 1, 1, 1, 1),
+ 0);
+ /* dst = tmp && dst */
+ i915_emit_arith(p,
+ A0_MUL,
+ dst,
+ flags, 0,
+ dst,
+ tmp,
+ 0);
+ break;
case OPCODE_SIN:
src0 = src_vector(p, &inst->SrcReg[0], program);
@@ -809,10 +888,71 @@ upload_program(struct i915_fragment_program *p)
break;
+ case OPCODE_SGE:
+ EMIT_2ARG_ARITH(A0_SGE);
+ break;
+
+ case OPCODE_SGT:
+ i915_emit_arith(p,
+ A0_SLT,
+ get_result_vector( p, inst ),
+ get_result_flags( inst ), 0,
+ negate(src_vector( p, &inst->SrcReg[0], program),
+ 1, 1, 1, 1),
+ negate(src_vector( p, &inst->SrcReg[1], program),
+ 1, 1, 1, 1),
+ 0);
+ break;
+
+ case OPCODE_SLE:
+ i915_emit_arith(p,
+ A0_SGE,
+ get_result_vector( p, inst ),
+ get_result_flags( inst ), 0,
+ negate(src_vector( p, &inst->SrcReg[0], program),
+ 1, 1, 1, 1),
+ negate(src_vector( p, &inst->SrcReg[1], program),
+ 1, 1, 1, 1),
+ 0);
+ break;
+
case OPCODE_SLT:
EMIT_2ARG_ARITH(A0_SLT);
break;
+ case OPCODE_SNE:
+ tmp = i915_get_utemp(p);
+ flags = get_result_flags(inst);
+ dst = get_result_vector(p, inst);
+
+ /* dst = src1 < src2 */
+ i915_emit_arith(p,
+ A0_SLT,
+ dst,
+ flags, 0,
+ src_vector(p, &inst->SrcReg[0], program),
+ src_vector(p, &inst->SrcReg[1], program),
+ 0);
+ /* tmp = src1 > src2 */
+ i915_emit_arith(p,
+ A0_SLT,
+ tmp,
+ flags, 0,
+ negate(src_vector(p, &inst->SrcReg[0], program),
+ 1, 1, 1, 1),
+ negate(src_vector(p, &inst->SrcReg[1], program),
+ 1, 1, 1, 1),
+ 0);
+ /* dst = tmp || dst */
+ i915_emit_arith(p,
+ A0_ADD,
+ dst,
+ flags | A0_DEST_SATURATE, 0,
+ dst,
+ tmp,
+ 0);
+ break;
+
case OPCODE_SUB:
src0 = src_vector(p, &inst->SrcReg[0], program);
src1 = src_vector(p, &inst->SrcReg[1], program);
@@ -869,8 +1009,39 @@ upload_program(struct i915_fragment_program *p)
case OPCODE_END:
return;
+ case OPCODE_BGNLOOP:
+ case OPCODE_BGNSUB:
+ case OPCODE_BRA:
+ case OPCODE_BRK:
+ case OPCODE_CAL:
+ case OPCODE_CONT:
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ case OPCODE_ELSE:
+ case OPCODE_ENDIF:
+ case OPCODE_ENDLOOP:
+ case OPCODE_ENDSUB:
+ case OPCODE_IF:
+ case OPCODE_RET:
+ p->error = 1;
+ i915_program_error(p, "Unsupported opcode: %s",
+ _mesa_opcode_string(inst->Opcode));
+ return;
+
+ case OPCODE_EXP:
+ case OPCODE_LOG:
+ /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in
+ * prog_instruction.h, but apparently GLSL doesn't ever emit them.
+ * Instead, it translates to EX2 or LG2.
+ */
+ case OPCODE_TXD:
+ case OPCODE_TXL:
+ /* These opcodes are claimed by GLSL in prog_instruction.h, but
+ * only NV_vp/fp appears to emit them.
+ */
default:
- i915_program_error(p, "bad opcode");
+ i915_program_error(p, "bad opcode: %s",
+ _mesa_opcode_string(inst->Opcode));
return;
}
@@ -906,7 +1077,7 @@ check_wpos(struct i915_fragment_program *p)
p->wpos_tex = -1;
for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
- if (inputs & FRAG_BIT_TEX(i))
+ if (inputs & (FRAG_BIT_TEX(i) | FRAG_BIT_VAR(i)))
continue;
else if (inputs & FRAG_BIT_WPOS) {
p->wpos_tex = i;
@@ -1055,6 +1226,28 @@ i915ProgramStringNotify(GLcontext * ctx,
_tnl_program_string(ctx, target, prog);
}
+void
+i915_update_program(GLcontext *ctx)
+{
+ struct intel_context *intel = intel_context(ctx);
+ struct i915_context *i915 = i915_context(&intel->ctx);
+ struct i915_fragment_program *fp =
+ (struct i915_fragment_program *) ctx->FragmentProgram._Current;
+
+ if (i915->current_program != fp) {
+ if (i915->current_program) {
+ i915->current_program->on_hardware = 0;
+ i915->current_program->params_uptodate = 0;
+ }
+
+ i915->current_program = fp;
+ }
+
+ if (!fp->translated)
+ translate_program(fp);
+
+ FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error);
+}
void
i915ValidateFragmentProgram(struct i915_context *i915)
@@ -1072,16 +1265,6 @@ i915ValidateFragmentProgram(struct i915_context *i915)
GLuint s2 = S2_TEXCOORD_NONE;
int i, offset = 0;
- if (i915->current_program != p) {
- if (i915->current_program) {
- i915->current_program->on_hardware = 0;
- i915->current_program->params_uptodate = 0;
- }
-
- i915->current_program = p;
- }
-
-
/* Important:
*/
VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
@@ -1125,6 +1308,14 @@ i915ValidateFragmentProgram(struct i915_context *i915)
EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
}
+ else if (inputsRead & FRAG_BIT_VAR(i)) {
+ int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
+
+ s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+ s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+
+ EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
+ }
else if (i == p->wpos_tex) {
/* If WPOS is required, duplicate the XYZ position data in an
diff --git a/i915/i915_program.c b/i915/i915_program.c
index e87700f..e7908bd 100644
--- a/i915/i915_program.c
+++ b/i915/i915_program.c
@@ -130,6 +130,7 @@ i915_emit_decl(struct i915_fragment_program *p,
*(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
*(p->decl++) = D1_MBZ;
*(p->decl++) = D2_MBZ;
+ assert(p->decl <= p->declarations + ARRAY_SIZE(p->declarations));
p->nr_decl_insn++;
return reg;
@@ -186,6 +187,11 @@ i915_emit_arith(struct i915_fragment_program * p,
p->utemp_flag = old_utemp_flag; /* restore */
}
+ if (p->csr >= p->program + ARRAY_SIZE(p->program)) {
+ i915_program_error(p, "Program contains too many instructions");
+ return UREG_BAD;
+ }
+
*(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
*(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
*(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
@@ -270,6 +276,11 @@ GLuint i915_emit_texld( struct i915_fragment_program *p,
p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)
p->nr_tex_indirect++;
+ if (p->csr >= p->program + ARRAY_SIZE(p->program)) {
+ i915_program_error(p, "Program contains too many instructions");
+ return UREG_BAD;
+ }
+
*(p->csr++) = (op |
T0_DEST( dest ) |
T0_SAMPLER( sampler ));
@@ -424,12 +435,21 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values)
return 0;
}
-
-
+/* Warning the user about program errors seems to be quite valuable, from
+ * our bug reports. It unfortunately means piglit reporting errors
+ * when we fall back to software due to an unsupportable program, though.
+ */
void
-i915_program_error(struct i915_fragment_program *p, const char *msg)
+i915_program_error(struct i915_fragment_program *p, const char *fmt, ...)
{
- _mesa_problem(NULL, "i915_program_error: %s", msg);
+ va_list args;
+
+ fprintf(stderr, "i915_program_error: ");
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ fprintf(stderr, "\n");
p->error = 1;
}
@@ -511,7 +531,8 @@ i915_upload_program(struct i915_context *i915,
GLuint program_size = p->csr - p->program;
GLuint decl_size = p->decl - p->declarations;
- FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, p->error);
+ if (p->error)
+ return;
/* Could just go straight to the batchbuffer from here:
*/
diff --git a/i915/i915_program.h b/i915/i915_program.h
index 14a3f08..0d17d04 100644
--- a/i915/i915_program.h
+++ b/i915/i915_program.h
@@ -145,7 +145,7 @@ extern GLuint i915_emit_param4fv(struct i915_fragment_program *p,
const GLfloat * values);
extern void i915_program_error(struct i915_fragment_program *p,
- const char *msg);
+ const char *fmt, ...);
extern void i915_init_program(struct i915_context *i915,
struct i915_fragment_program *p);
@@ -155,7 +155,6 @@ extern void i915_upload_program(struct i915_context *i915,
extern void i915_fini_program(struct i915_fragment_program *p);
-
-
+extern void i915_update_program(GLcontext *ctx);
#endif
diff --git a/i915/i915_reg.h b/i915/i915_reg.h
index b5fa7fd..7f31ff6 100644
--- a/i915/i915_reg.h
+++ b/i915/i915_reg.h
@@ -626,9 +626,9 @@
#define MT_32BIT_AWVU2101010 (0xA<<3)
#define MT_32BIT_GR1616 (0xB<<3)
#define MT_32BIT_VU1616 (0xC<<3)
-#define MT_32BIT_xI824 (0xD<<3)
-#define MT_32BIT_xA824 (0xE<<3)
-#define MT_32BIT_xL824 (0xF<<3)
+#define MT_32BIT_x8I24 (0xD<<3)
+#define MT_32BIT_x8L24 (0xE<<3)
+#define MT_32BIT_x8A24 (0xF<<3)
#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */
#define MT_422_YCRCB_NORMAL (1<<3)
#define MT_422_YCRCB_SWAPUV (2<<3)
diff --git a/i915/i915_state.c b/i915/i915_state.c
index b60efea..cc98d12 100644
--- a/i915/i915_state.c
+++ b/i915/i915_state.c
@@ -585,7 +585,7 @@ i915PointSize(GLcontext * ctx, GLfloat size)
{
struct i915_context *i915 = I915_CONTEXT(ctx);
int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_POINT_WIDTH_MASK;
- GLint point_size = (int) size;
+ GLint point_size = (int) round(size);
DBG("%s\n", __FUNCTION__);
@@ -599,6 +599,24 @@ i915PointSize(GLcontext * ctx, GLfloat size)
}
+static void
+i915PointParameterfv(GLcontext * ctx, GLenum pname, const GLfloat *params)
+{
+ struct i915_context *i915 = I915_CONTEXT(ctx);
+
+ switch (pname) {
+ case GL_POINT_SPRITE_COORD_ORIGIN:
+ /* This could be supported, but it would require modifying the fragment
+ * program to invert the y component of the texture coordinate by
+ * inserting a 'SUB tc.y, {1.0}.xxxx, tc' instruction.
+ */
+ FALLBACK(&i915->intel, I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN,
+ (params[0] != GL_UPPER_LEFT));
+ break;
+ }
+}
+
+
/* =============================================================
* Color masks
*/
@@ -939,6 +957,17 @@ i915Enable(GLcontext * ctx, GLenum cap, GLboolean state)
case GL_POLYGON_SMOOTH:
break;
+ case GL_POINT_SPRITE:
+ /* This state change is handled in i915_reduced_primitive_state because
+ * the hardware bit should only be set when rendering points.
+ */
+ I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+ if (state)
+ i915->state.Ctx[I915_CTXREG_LIS4] |= S4_SPRITE_POINT_ENABLE;
+ else
+ i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_SPRITE_POINT_ENABLE;
+ break;
+
case GL_POINT_SMOOTH:
break;
@@ -1108,6 +1137,7 @@ i915InitStateFunctions(struct dd_function_table *functions)
functions->LineWidth = i915LineWidth;
functions->LogicOpcode = i915LogicOp;
functions->PointSize = i915PointSize;
+ functions->PointParameterfv = i915PointParameterfv;
functions->PolygonStipple = i915PolygonStipple;
functions->Scissor = i915Scissor;
functions->ShadeModel = i915ShadeModel;
diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c
index 32d4b30..de25848 100644
--- a/i915/i915_texstate.c
+++ b/i915/i915_texstate.c
@@ -27,7 +27,7 @@
#include "main/mtypes.h"
#include "main/enums.h"
-#include "main/texformat.h"
+#include "main/macros.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
@@ -37,7 +37,7 @@
static GLuint
-translate_texture_format(GLuint mesa_format, GLuint internal_format,
+translate_texture_format(gl_format mesa_format, GLuint internal_format,
GLenum DepthMode)
{
switch (mesa_format) {
@@ -56,10 +56,9 @@ translate_texture_format(GLuint mesa_format, GLuint internal_format,
case MESA_FORMAT_ARGB4444:
return MAPSURF_16BIT | MT_16BIT_ARGB4444;
case MESA_FORMAT_ARGB8888:
- if (internal_format == GL_RGB)
- return MAPSURF_32BIT | MT_32BIT_XRGB8888;
- else
- return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+ return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+ case MESA_FORMAT_XRGB8888:
+ return MAPSURF_32BIT | MT_32BIT_XRGB8888;
case MESA_FORMAT_YCBCR_REV:
return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
case MESA_FORMAT_YCBCR:
@@ -82,7 +81,12 @@ translate_texture_format(GLuint mesa_format, GLuint internal_format,
case MESA_FORMAT_RGBA_DXT5:
return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
case MESA_FORMAT_S8_Z24:
- return (MAPSURF_32BIT | MT_32BIT_xI824);
+ if (DepthMode == GL_ALPHA)
+ return (MAPSURF_32BIT | MT_32BIT_x8A24);
+ else if (DepthMode == GL_INTENSITY)
+ return (MAPSURF_32BIT | MT_32BIT_x8I24);
+ else
+ return (MAPSURF_32BIT | MT_32BIT_x8L24);
default:
fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format);
abort();
@@ -134,6 +138,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
GLuint *state = i915->state.Tex[unit], format, pitch;
GLint lodbias, aniso = 0;
GLubyte border[4];
+ GLfloat maxlod;
memset(state, 0, sizeof(state));
@@ -173,11 +178,9 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
} else {
dri_bo_reference(intelObj->mt->region->buffer);
i915->state.tex_buffer[unit] = intelObj->mt->region->buffer;
- i915->state.tex_offset[unit] = intel_miptree_image_offset(intelObj->mt,
- 0, intelObj->
- firstLevel);
+ i915->state.tex_offset[unit] = 0; /* Always the origin of the miptree */
- format = translate_texture_format(firstImage->TexFormat->MesaFormat,
+ format = translate_texture_format(firstImage->TexFormat,
firstImage->InternalFormat,
tObj->DepthMode);
pitch = intelObj->mt->pitch * intelObj->mt->cpp;
@@ -193,11 +196,15 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
}
+ /* We get one field with fraction bits to cover the maximum addressable (smallest
+ * resolution) LOD. Use it to cover both MAX_LEVEL and MAX_LOD.
+ */
+ maxlod = MIN2(tObj->MaxLod, tObj->MaxLevel - tObj->BaseLevel);
state[I915_TEXREG_MS4] =
- ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK |
- ((((intelObj->lastLevel - intelObj->firstLevel) * 4)) <<
- MS4_MAX_LOD_SHIFT) | ((firstImage->Depth - 1) <<
- MS4_VOLUME_DEPTH_SHIFT));
+ ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) |
+ MS4_CUBE_FACE_ENA_MASK |
+ (U_FIXED(CLAMP(maxlod, 0.0, 11.0), 2) << MS4_MAX_LOD_SHIFT) |
+ ((firstImage->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT));
{
@@ -263,8 +270,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
/* YUV conversion:
*/
- if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR ||
- firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV)
+ if (firstImage->TexFormat == MESA_FORMAT_YCBCR ||
+ firstImage->TexFormat == MESA_FORMAT_YCBCR_REV)
state[I915_TEXREG_SS2] |= SS2_COLORSPACE_CONVERSION;
/* Shadow:
@@ -293,6 +300,12 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
GLenum wt = tObj->WrapT;
GLenum wr = tObj->WrapR;
+ /* We program 1D textures as 2D textures, so the 2D texcoord could
+ * result in sampling border values if we don't set the T wrap to
+ * repeat.
+ */
+ if (tObj->Target == GL_TEXTURE_1D)
+ wt = GL_REPEAT;
/* 3D textures don't seem to respect the border color.
* Fallback if there's ever a danger that they might refer to
@@ -327,6 +340,9 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
(translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT));
state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT);
+ state[I915_TEXREG_SS3] |= (U_FIXED(CLAMP(tObj->MinLod, 0.0, 11.0), 4) <<
+ SS3_MIN_LOD_SHIFT);
+
}
/* convert border color from float to ubyte */
diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c
index 9a723d3..ff97e5a 100644
--- a/i915/i915_vtbl.c
+++ b/i915/i915_vtbl.c
@@ -32,7 +32,6 @@
#include "main/imports.h"
#include "main/macros.h"
#include "main/colormac.h"
-#include "main/texformat.h"
#include "tnl/t_context.h"
#include "tnl/t_vertex.h"
@@ -54,8 +53,7 @@ i915_render_prevalidate(struct intel_context *intel)
{
struct i915_context *i915 = i915_context(&intel->ctx);
- if (!intel->Fallback)
- i915ValidateFragmentProgram(i915);
+ i915ValidateFragmentProgram(i915);
}
static void
@@ -589,8 +587,9 @@ i915_state_draw_region(struct intel_context *intel,
DSTORG_VERT_BIAS(0x8) | /* .5 */
LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL);
if (irb != NULL) {
- switch (irb->texformat->MesaFormat) {
+ switch (irb->Base.Format) {
case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
value |= DV_PF_8888;
break;
case MESA_FORMAT_RGB565:
@@ -604,7 +603,7 @@ i915_state_draw_region(struct intel_context *intel,
break;
default:
_mesa_problem(ctx, "Bad renderbuffer format: %d\n",
- irb->texformat->MesaFormat);
+ irb->Base.Format);
}
}
@@ -673,12 +672,6 @@ i915_new_batch(struct intel_context *intel)
assert(!intel->no_batch_wrap);
}
-static GLuint
-i915_flush_cmd(void)
-{
- return MI_FLUSH | FLUSH_MAP_CACHE;
-}
-
static void
i915_assert_not_dirty( struct intel_context *intel )
{
@@ -700,7 +693,6 @@ i915InitVtbl(struct i915_context *i915)
i915->intel.vtbl.render_prevalidate = i915_render_prevalidate;
i915->intel.vtbl.set_draw_region = i915_set_draw_region;
i915->intel.vtbl.update_texture_state = i915UpdateTextureState;
- i915->intel.vtbl.flush_cmd = i915_flush_cmd;
i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;
i915->intel.vtbl.finish_batch = intel_finish_vb;
}
diff --git a/i915/intel_tris.c b/i915/intel_tris.c
index a905455..bc527aa 100644
--- a/i915/intel_tris.c
+++ b/i915/intel_tris.c
@@ -1076,7 +1076,9 @@ intelRunPipeline(GLcontext * ctx)
intel->NewGLState = 0;
}
+ intel_map_vertex_shader_textures(ctx);
_tnl_run_pipeline(ctx);
+ intel_unmap_vertex_shader_textures(ctx);
_mesa_unlock_context_textures(ctx);
}
@@ -1086,6 +1088,7 @@ intelRenderStart(GLcontext * ctx)
{
struct intel_context *intel = intel_context(ctx);
+ intel_check_front_buffer_rendering(intel);
intel->vtbl.render_start(intel_context(ctx));
intel->vtbl.emit_state(intel);
}
@@ -1194,12 +1197,16 @@ getFallbackString(GLuint bit)
+/**
+ * Enable/disable a fallback flag.
+ * \param bit one of INTEL_FALLBACK_x flags.
+ */
void
-intelFallback(struct intel_context *intel, GLuint bit, GLboolean mode)
+intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode)
{
GLcontext *ctx = &intel->ctx;
TNLcontext *tnl = TNL_CONTEXT(ctx);
- GLuint oldfallback = intel->Fallback;
+ const GLbitfield oldfallback = intel->Fallback;
if (mode) {
intel->Fallback |= bit;
diff --git a/i965/Makefile.am b/i965/Makefile.am
index 8ada8b5..8a61dab 100644
--- a/i965/Makefile.am
+++ b/i965/Makefile.am
@@ -5,7 +5,7 @@ I965_CFLAGS = -I../shared -I../shared/server
i965_dri_la_LTLIBRARIES = i965_dri.la
i965_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(I965_CFLAGS)
i965_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl\
- $(DRM_LIBS) -ldrm_intel $(DRI_LIBS)
+ $(DRM_LIBS) $(DRI_LIBS)
i965_dri_ladir = @libdir@/dri
i965_dri_la_SOURCES = \
../shared/intel_batchbuffer.c \
@@ -17,7 +17,6 @@ i965_dri_la_SOURCES = \
../shared/intel_decode.c \
../shared/intel_extensions.c \
../shared/intel_fbo.c \
- ../shared/intel_generatemipmap.c \
../shared/intel_mipmap_tree.c \
../shared/intel_regions.c \
../shared/intel_screen.c \
diff --git a/i965/brw_cc.c b/i965/brw_cc.c
index c724218..bac1c3a 100644
--- a/i965/brw_cc.c
+++ b/i965/brw_cc.c
@@ -34,25 +34,35 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_util.h"
+#include "intel_fbo.h"
#include "main/macros.h"
#include "main/enums.h"
static void prepare_cc_vp( struct brw_context *brw )
{
+ GLcontext *ctx = &brw->intel.ctx;
struct brw_cc_viewport ccv;
memset(&ccv, 0, sizeof(ccv));
- ccv.min_depth = 0.0;
- ccv.max_depth = 1.0;
+ /* _NEW_TRANSOFORM */
+ if (ctx->Transform.DepthClamp) {
+ /* _NEW_VIEWPORT */
+ ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+ ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+ } else {
+ ccv.min_depth = 0.0;
+ ccv.max_depth = 1.0;
+ }
dri_bo_unreference(brw->cc.vp_bo);
- brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
+ brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv),
+ NULL, 0);
}
const struct brw_tracked_state brw_cc_vp = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
@@ -80,6 +90,28 @@ struct brw_cc_unit_key {
GLenum depth_func;
};
+/**
+ * Modify blend function to force destination alpha to 1.0
+ *
+ * If \c function specifies a blend function that uses destination alpha,
+ * replace it with a function that hard-wires destination alpha to 1.0. This
+ * is used when rendering to xRGB targets.
+ */
+static GLenum
+fix_xRGB_alpha(GLenum function)
+{
+ switch (function) {
+ case GL_DST_ALPHA:
+ return GL_ONE;
+
+ case GL_ONE_MINUS_DST_ALPHA:
+ case GL_SRC_ALPHA_SATURATE:
+ return GL_ZERO;
+ }
+
+ return function;
+}
+
static void
cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
{
@@ -123,6 +155,17 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
key->blend_dst_rgb = ctx->Color.BlendDstRGB;
key->blend_src_a = ctx->Color.BlendSrcA;
key->blend_dst_a = ctx->Color.BlendDstA;
+
+ /* If the renderbuffer is XRGB, we have to frob the blend function to
+ * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA
+ * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO.
+ */
+ if (ctx->DrawBuffer->Visual.alphaBits == 0) {
+ key->blend_src_rgb = fix_xRGB_alpha(key->blend_src_rgb);
+ key->blend_src_a = fix_xRGB_alpha(key->blend_src_a);
+ key->blend_dst_rgb = fix_xRGB_alpha(key->blend_dst_rgb);
+ key->blend_dst_a = fix_xRGB_alpha(key->blend_dst_a);
+ }
}
key->alpha_enabled = ctx->Color.AlphaEnabled;
diff --git a/i965/brw_clip.c b/i965/brw_clip.c
index 20a927c..dbd10a5 100644
--- a/i965/brw_clip.c
+++ b/i965/brw_clip.c
@@ -78,7 +78,7 @@ static void compile_clip_prog( struct brw_context *brw,
delta = REG_SIZE;
for (i = 0; i < VERT_RESULT_MAX; i++)
- if (c.key.attrs & (1<<i)) {
+ if (c.key.attrs & BITFIELD64_BIT(i)) {
c.offset[i] = delta;
delta += ATTR_SIZE;
}
@@ -156,6 +156,7 @@ static void upload_clip_prog(struct brw_context *brw)
key.attrs = brw->vs.prog_data->outputs_written;
/* _NEW_LIGHT */
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+ key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
/* _NEW_TRANSFORM */
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
diff --git a/i965/brw_clip.h b/i965/brw_clip.h
index 957df44..1c68255 100644
--- a/i965/brw_clip.h
+++ b/i965/brw_clip.h
@@ -42,22 +42,21 @@
* up polygon offset and flatshading at this point:
*/
struct brw_clip_prog_key {
- GLuint attrs:32;
+ GLbitfield64 attrs;
GLuint primitive:4;
GLuint nr_userclip:3;
GLuint do_flat_shading:1;
+ GLuint pv_first:1;
GLuint do_unfilled:1;
GLuint fill_cw:2; /* includes cull information */
GLuint fill_ccw:2; /* includes cull information */
GLuint offset_cw:1;
GLuint offset_ccw:1;
- GLuint pad0:17;
-
GLuint copy_bfc_cw:1;
GLuint copy_bfc_ccw:1;
GLuint clip_mode:3;
- GLuint pad1:27;
-
+ GLuint pad0:11;
+
GLfloat offset_factor;
GLfloat offset_units;
};
diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c
index 048ca62..fa9648f 100644
--- a/i965/brw_clip_line.c
+++ b/i965/brw_clip_line.c
@@ -269,8 +269,12 @@ void brw_emit_line_clip( struct brw_clip_compile *c )
brw_clip_line_alloc_regs(c);
brw_clip_init_ff_sync(c);
- if (c->key.do_flat_shading)
- brw_clip_copy_colors(c, 0, 1);
+ if (c->key.do_flat_shading) {
+ if (c->key.pv_first)
+ brw_clip_copy_colors(c, 1, 0);
+ else
+ brw_clip_copy_colors(c, 0, 1);
+ }
clip_and_emit_line(c);
}
diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c
index 5762c95..234b374 100644
--- a/i965/brw_clip_state.c
+++ b/i965/brw_clip_state.c
@@ -43,11 +43,14 @@ struct brw_clip_unit_key {
unsigned int curbe_offset;
unsigned int nr_urb_entries, urb_size;
+
+ GLboolean depth_clamp;
};
static void
clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
{
+ GLcontext *ctx = &brw->intel.ctx;
memset(key, 0, sizeof(*key));
/* CACHE_NEW_CLIP_PROG */
@@ -62,6 +65,9 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
/* BRW_NEW_URB_FENCE */
key->nr_urb_entries = brw->urb.nr_clip_entries;
key->urb_size = brw->urb.vsize;
+
+ /* _NEW_TRANSOFORM */
+ key->depth_clamp = ctx->Transform.DepthClamp;
}
static dri_bo *
@@ -117,7 +123,8 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.clip5.userclip_enable_flags = 0x7f;
clip.clip5.userclip_must_clip = 1;
clip.clip5.guard_band_enable = 0;
- clip.clip5.viewport_z_clip_enable = 1;
+ if (!key->depth_clamp)
+ clip.clip5.viewport_z_clip_enable = 1;
clip.clip5.viewport_xy_clip_enable = 1;
clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
clip.clip5.api_mode = BRW_CLIP_API_OGL;
@@ -168,7 +175,7 @@ static void upload_clip_unit( struct brw_context *brw )
const struct brw_tracked_state brw_clip_unit = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_CLIP_PROG
diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c
index 0efd772..cf79224 100644
--- a/i965/brw_clip_tri.c
+++ b/i965/brw_clip_tri.c
@@ -188,14 +188,20 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
brw_imm_ud(_3DPRIM_POLYGON));
is_poly = brw_IF(p, BRW_EXECUTE_1);
- {
+ {
brw_clip_copy_colors(c, 1, 0);
brw_clip_copy_colors(c, 2, 0);
}
is_poly = brw_ELSE(p, is_poly);
{
- brw_clip_copy_colors(c, 0, 2);
- brw_clip_copy_colors(c, 1, 2);
+ if (c->key.pv_first) {
+ brw_clip_copy_colors(c, 1, 0);
+ brw_clip_copy_colors(c, 2, 0);
+ }
+ else {
+ brw_clip_copy_colors(c, 0, 2);
+ brw_clip_copy_colors(c, 1, 2);
+ }
}
brw_ENDIF(p, is_poly);
}
diff --git a/i965/brw_context.c b/i965/brw_context.c
index c300c33..aaa2d80 100644
--- a/i965/brw_context.c
+++ b/i965/brw_context.c
@@ -33,7 +33,7 @@
#include "main/imports.h"
#include "main/api_noop.h"
#include "main/macros.h"
-#include "main/vtxfmt.h"
+/* #include "main/vtxfmt.h" */
#include "main/simple_list.h"
#include "shader/shader_api.h"
@@ -105,6 +105,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+ ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
diff --git a/i965/brw_context.h b/i965/brw_context.h
index a5209ac..fded47a 100644
--- a/i965/brw_context.h
+++ b/i965/brw_context.h
@@ -115,7 +115,7 @@
* Handles blending and (presumably) depth and stencil testing.
*/
-#define BRW_FALLBACK_TEXTURE 0x1
+
#define BRW_MAX_CURBE (32*16)
struct brw_context;
@@ -231,7 +231,7 @@ struct brw_vs_prog_data {
GLuint curb_read_length;
GLuint urb_read_length;
GLuint total_grf;
- GLuint outputs_written;
+ GLbitfield64 outputs_written;
GLuint nr_params; /**< number of float params/constants */
GLuint inputs_read;
@@ -252,20 +252,23 @@ struct brw_vs_ouput_sizes {
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 16
+/** Max number of render targets in a shader */
+#define BRW_MAX_DRAW_BUFFERS 4
+
/**
* Size of our surface binding table for the WM.
* This contains pointers to the drawing surfaces and current texture
* objects and shader constant buffers (+2).
*/
-#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
/**
* Helpers to convert drawing buffers, textures and constant buffers
* to surface binding table indexes, for WM.
*/
#define SURF_INDEX_DRAW(d) (d)
-#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS)
-#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t))
+#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS)
+#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t))
/**
* Size of surface binding table for the VS.
@@ -317,7 +320,6 @@ struct brw_cache_item {
GLuint nr_reloc_bufs;
dri_bo *bo;
- GLuint data_size;
struct brw_cache_item *next;
};
@@ -330,7 +332,6 @@ struct brw_cache {
struct brw_cache_item **items;
GLuint size, n_items;
- GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */
GLuint aux_size[BRW_MAX_CACHE];
char *name[BRW_MAX_CACHE];
@@ -410,23 +411,6 @@ struct brw_vertex_info {
GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
};
-
-
-
-/* Cache for TNL programs.
- */
-struct brw_tnl_cache_item {
- GLuint hash;
- void *key;
- void *data;
- struct brw_tnl_cache_item *next;
-};
-
-struct brw_tnl_cache {
- struct brw_tnl_cache_item **items;
- GLuint size, n_items;
-};
-
struct brw_query_object {
struct gl_query_object Base;
@@ -454,7 +438,6 @@ struct brw_context
GLuint primitive;
GLboolean emit_state_always;
- GLboolean tmp_fallback;
GLboolean no_batch_wrap;
struct {
@@ -705,10 +688,6 @@ void brw_debug_batch(struct intel_context *intel);
/*======================================================================
* brw_tex.c
*/
-void brwUpdateTextureState( struct intel_context *intel );
-void brw_FrameBufferTexInit( struct brw_context *brw,
- struct intel_region *region );
-void brw_FrameBufferTexDestroy( struct brw_context *brw );
void brw_validate_textures( struct brw_context *brw );
@@ -763,9 +742,5 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
return (const struct brw_fragment_program *) p;
}
-
-
-#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
-
#endif
diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c
index 4be6c77..aadcfbe 100644
--- a/i965/brw_curbe.c
+++ b/i965/brw_curbe.c
@@ -130,7 +130,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
const struct brw_tracked_state brw_curbe_offsets = {
.dirty = {
.mesa = _NEW_TRANSFORM,
- .brw = BRW_NEW_VERTEX_PROGRAM,
+ .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_CONTEXT,
.cache = CACHE_NEW_WM_PROG
},
.prepare = calculate_curbe_offsets
diff --git a/i965/brw_defines.h b/i965/brw_defines.h
index 78d457a..c19510b 100644
--- a/i965/brw_defines.h
+++ b/i965/brw_defines.h
@@ -673,18 +673,10 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
-#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
-#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
-#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG 3
/* for IGDNG only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
diff --git a/i965/brw_draw.c b/i965/brw_draw.c
index c53bd47..8bcb608 100644
--- a/i965/brw_draw.c
+++ b/i965/brw_draw.c
@@ -25,13 +25,15 @@
*
**************************************************************************/
-#include <stdlib.h>
#include "main/glheader.h"
#include "main/context.h"
#include "main/state.h"
-#include "main/api_validate.h"
#include "main/enums.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo_context.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
#include "brw_draw.h"
#include "brw_defines.h"
@@ -42,11 +44,6 @@
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
-#include "tnl/tnl.h"
-#include "vbo/vbo_context.h"
-#include "swrast/swrast.h"
-#include "swrast_setup/swrast_setup.h"
-
#define FILE_DEBUG_FLAG DEBUG_BATCH
static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
@@ -145,7 +142,7 @@ static void brw_emit_prim(struct brw_context *brw,
prim_packet.start_vert_location += brw->ib.start_vertex_offset;
prim_packet.instance_count = 1;
prim_packet.start_instance_location = 0;
- prim_packet.base_vert_location = 0;
+ prim_packet.base_vert_location = prim->basevertex;
/* Can't wrap here, since we rely on the validated state. */
brw->no_batch_wrap = GL_TRUE;
@@ -156,18 +153,14 @@ static void brw_emit_prim(struct brw_context *brw,
* the besides the draw code.
*/
if (intel->always_flush_cache) {
- BEGIN_BATCH(1, IGNORE_CLIPRECTS);
- OUT_BATCH(intel->vtbl.flush_cmd());
- ADVANCE_BATCH();
+ intel_batchbuffer_emit_mi_flush(intel->batch);
}
if (prim_packet.verts_per_instance) {
intel_batchbuffer_data( brw->intel.batch, &prim_packet,
sizeof(prim_packet), LOOP_CLIPRECTS);
}
if (intel->always_flush_cache) {
- BEGIN_BATCH(1, IGNORE_CLIPRECTS);
- OUT_BATCH(intel->vtbl.flush_cmd());
- ADVANCE_BATCH();
+ intel_batchbuffer_emit_mi_flush(intel->batch);
}
brw->no_batch_wrap = GL_FALSE;
diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c
index 4aa17fa..ee684f6 100644
--- a/i965/brw_draw_upload.c
+++ b/i965/brw_draw_upload.c
@@ -25,12 +25,12 @@
*
**************************************************************************/
-#include <stdlib.h>
#include "main/glheader.h"
+#include "main/bufferobj.h"
#include "main/context.h"
#include "main/state.h"
-#include "main/api_validate.h"
+/* #include "main/api_validate.h" */
#include "main/enums.h"
#include "brw_draw.h"
@@ -375,7 +375,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
* isn't an issue at this point.
*/
if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
- intel->Fallback = 1;
+ intel->Fallback = GL_TRUE; /* boolean, not bitfield */
return;
}
@@ -384,7 +384,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
- if (input->glarray->BufferObj->Name != 0) {
+ if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
struct intel_buffer_object *intel_buffer =
intel_buffer_object(input->glarray->BufferObj);
@@ -427,7 +427,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
/* Position array not properly enabled:
*/
if (input->glarray->StrideB == 0) {
- intel->Fallback = 1;
+ intel->Fallback = GL_TRUE; /* boolean, not bitfield */
return;
}
@@ -536,16 +536,9 @@ static void brw_emit_vertices(struct brw_context *brw)
I915_GEM_DOMAIN_VERTEX, 0,
input->offset);
if (BRW_IS_IGDNG(brw)) {
- if (input->stride) {
- OUT_RELOC(input->bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- input->offset + input->stride * input->count);
- } else {
- assert(input->count == 1);
- OUT_RELOC(input->bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- input->offset + input->element_size);
- }
+ OUT_RELOC(input->bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ input->bo->size - 1);
} else
OUT_BATCH(input->stride ? input->count : 0);
OUT_BATCH(0); /* Instance data step rate */
@@ -623,7 +616,7 @@ static void brw_prepare_indices(struct brw_context *brw)
/* Turn into a proper VBO:
*/
- if (!bufferobj->Name) {
+ if (!_mesa_is_bufferobj(bufferobj)) {
brw->ib.start_vertex_offset = 0;
/* Get new bufferobj, offset:
@@ -726,7 +719,7 @@ static void brw_emit_index_buffer(struct brw_context *brw)
brw->ib.offset);
OUT_RELOC(brw->ib.bo,
I915_GEM_DOMAIN_VERTEX, 0,
- brw->ib.offset + brw->ib.size);
+ brw->ib.offset + brw->ib.size - 1);
OUT_BATCH( 0 );
ADVANCE_BATCH();
}
diff --git a/i965/brw_eu.h b/i965/brw_eu.h
index 30603bd..39eb88d 100644
--- a/i965/brw_eu.h
+++ b/i965/brw_eu.h
@@ -170,11 +170,11 @@ static INLINE struct brw_reg brw_reg( GLuint file,
GLuint writemask )
{
struct brw_reg reg;
- if (type == BRW_GENERAL_REGISTER_FILE)
+ if (file == BRW_GENERAL_REGISTER_FILE)
assert(nr < BRW_MAX_GRF);
- else if (type == BRW_MESSAGE_REGISTER_FILE)
- assert(nr < BRW_MAX_MRF);
- else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+ else if (file == BRW_MESSAGE_REGISTER_FILE)
+ assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+ else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_IP);
reg.type = type;
@@ -538,7 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
static INLINE struct brw_reg brw_message_reg( GLuint nr )
{
- assert(nr < BRW_MAX_MRF);
+ assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
nr,
0);
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
index 241cdc3..7ceabba 100644
--- a/i965/brw_eu_emit.c
+++ b/i965/brw_eu_emit.c
@@ -55,7 +55,8 @@ static void guess_execution_size( struct brw_instruction *insn,
static void brw_set_dest( struct brw_instruction *insn,
struct brw_reg dest )
{
- if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.file != BRW_MESSAGE_REGISTER_FILE)
assert(dest.nr < 128);
insn->bits1.da1.dest_reg_file = dest.file;
diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c
index d27c6c2..562a178 100644
--- a/i965/brw_fallback.c
+++ b/i965/brw_fallback.c
@@ -133,7 +133,11 @@ const struct brw_tracked_state brw_check_fallback = {
-/* Not used:
+/**
+ * Called by the INTEL_FALLBACK() macro.
+ * NOTE: this is a no-op for the i965 driver. The brw->intel.Fallback
+ * field is treated as a boolean, not a bitmask. It's only set in a
+ * couple of places.
*/
void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode )
{
diff --git a/i965/brw_gs.c b/i965/brw_gs.c
index 48c2b9a..610b6c3 100644
--- a/i965/brw_gs.c
+++ b/i965/brw_gs.c
@@ -85,10 +85,10 @@ static void compile_gs_prog( struct brw_context *brw,
*/
switch (key->primitive) {
case GL_QUADS:
- brw_gs_quads( &c );
+ brw_gs_quads( &c, key );
break;
case GL_QUAD_STRIP:
- brw_gs_quad_strip( &c );
+ brw_gs_quad_strip( &c, key );
break;
case GL_LINE_LOOP:
brw_gs_lines( &c );
@@ -149,6 +149,7 @@ static const GLenum gs_prim[GL_POLYGON+1] = {
static void populate_key( struct brw_context *brw,
struct brw_gs_prog_key *key )
{
+ GLcontext *ctx = &brw->intel.ctx;
memset(key, 0, sizeof(*key));
/* CACHE_NEW_VS_PROG */
@@ -158,6 +159,9 @@ static void populate_key( struct brw_context *brw,
key->primitive = gs_prim[brw->primitive];
key->hint_gs_always = 0; /* debug code? */
+
+ /* _NEW_LIGHT */
+ key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
key->need_gs_prog = (key->hint_gs_always ||
brw->primitive == GL_QUADS ||
@@ -193,7 +197,7 @@ static void prepare_gs_prog(struct brw_context *brw)
const struct brw_tracked_state brw_gs_prog = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_LIGHT,
.brw = BRW_NEW_PRIMITIVE,
.cache = CACHE_NEW_VS_PROG
},
diff --git a/i965/brw_gs.h b/i965/brw_gs.h
index bbb991e..010c1c2 100644
--- a/i965/brw_gs.h
+++ b/i965/brw_gs.h
@@ -40,11 +40,12 @@
#define MAX_GS_VERTS (4)
struct brw_gs_prog_key {
- GLuint attrs:32;
+ GLbitfield64 attrs;
GLuint primitive:4;
GLuint hint_gs_always:1;
+ GLuint pv_first:1;
GLuint need_gs_prog:1;
- GLuint pad:26;
+ GLuint pad:25;
};
struct brw_gs_compile {
@@ -67,8 +68,8 @@ struct brw_gs_compile {
#define ATTR_SIZE (4*4)
-void brw_gs_quads( struct brw_gs_compile *c );
-void brw_gs_quad_strip( struct brw_gs_compile *c );
+void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
+void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
void brw_gs_tris( struct brw_gs_compile *c );
void brw_gs_lines( struct brw_gs_compile *c );
void brw_gs_points( struct brw_gs_compile *c );
diff --git a/i965/brw_gs_emit.c b/i965/brw_gs_emit.c
index a9b2aa2..0fc5b02 100644
--- a/i965/brw_gs_emit.c
+++ b/i965/brw_gs_emit.c
@@ -120,7 +120,7 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
}
-void brw_gs_quads( struct brw_gs_compile *c )
+void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
{
brw_gs_alloc_regs(c, 4);
@@ -128,23 +128,39 @@ void brw_gs_quads( struct brw_gs_compile *c )
* is the PV for quads, but vertex 0 for polygons:
*/
if (c->need_ff_sync)
- brw_gs_ff_sync(c, 1);
- brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
- brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
- brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
- brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ brw_gs_ff_sync(c, 1);
+ if (key->pv_first) {
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ }
+ else {
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ }
}
-void brw_gs_quad_strip( struct brw_gs_compile *c )
+void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
{
brw_gs_alloc_regs(c, 4);
if (c->need_ff_sync)
brw_gs_ff_sync(c, 1);
- brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
- brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
- brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
- brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ if (key->pv_first) {
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ }
+ else {
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+ }
}
void brw_gs_tris( struct brw_gs_compile *c )
diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c
index a761c03..ed9d2ff 100644
--- a/i965/brw_gs_state.c
+++ b/i965/brw_gs_state.c
@@ -93,7 +93,10 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
gs.thread4.nr_urb_entries = key->nr_urb_entries;
gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- gs.thread4.max_threads = 0; /* Hardware requirement */
+ if (key->nr_urb_entries >= 8)
+ gs.thread4.max_threads = 1;
+ else
+ gs.thread4.max_threads = 0;
if (BRW_IS_IGDNG(brw))
gs.thread4.rendering_enable = 1;
diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c
index ea71857..4b0d598 100644
--- a/i965/brw_misc_state.c
+++ b/i965/brw_misc_state.c
@@ -66,7 +66,7 @@ static void upload_blend_constant_color(struct brw_context *brw)
const struct brw_tracked_state brw_blend_constant_color = {
.dirty = {
.mesa = _NEW_COLOR,
- .brw = 0,
+ .brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_blend_constant_color
@@ -93,7 +93,7 @@ static void upload_drawing_rect(struct brw_context *brw)
const struct brw_tracked_state brw_drawing_rect = {
.dirty = {
.mesa = _NEW_BUFFERS,
- .brw = 0,
+ .brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_drawing_rect
@@ -317,7 +317,7 @@ static void upload_polygon_stipple(struct brw_context *brw)
const struct brw_tracked_state brw_polygon_stipple = {
.dirty = {
.mesa = _NEW_POLYGONSTIPPLE,
- .brw = 0,
+ .brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_polygon_stipple
@@ -362,7 +362,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
const struct brw_tracked_state brw_polygon_stipple_offset = {
.dirty = {
.mesa = _NEW_WINDOW_POS,
- .brw = 0,
+ .brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_polygon_stipple_offset
@@ -425,7 +425,7 @@ static void upload_line_stipple(struct brw_context *brw)
const struct brw_tracked_state brw_line_stipple = {
.dirty = {
.mesa = _NEW_LINE,
- .brw = 0,
+ .brw = BRW_NEW_CONTEXT,
.cache = 0
},
.emit = upload_line_stipple
diff --git a/i965/brw_sf.c b/i965/brw_sf.c
index e1c2c77..968890f 100644
--- a/i965/brw_sf.c
+++ b/i965/brw_sf.c
@@ -61,7 +61,7 @@ static void compile_sf_prog( struct brw_context *brw,
c.key = *key;
c.nr_attrs = brw_count_bits(c.key.attrs);
c.nr_attr_regs = (c.nr_attrs+1)/2;
- c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
+ c.nr_setup_attrs = brw_count_bits(c.key.attrs);
c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
c.prog_data.urb_read_length = c.nr_attr_regs;
@@ -70,7 +70,7 @@ static void compile_sf_prog( struct brw_context *brw,
/* Construct map from attribute number to position in the vertex.
*/
for (i = idx = 0; i < VERT_RESULT_MAX; i++)
- if (c.key.attrs & (1<<i)) {
+ if (c.key.attrs & BITFIELD64_BIT(i)) {
c.attr_to_idx[i] = idx;
c.idx_to_attr[idx] = i;
if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
@@ -147,7 +147,7 @@ static void upload_sf_prog(struct brw_context *brw)
* edgeflag testing here, it is already done in the clip
* program.
*/
- if (key.attrs & (1<<VERT_RESULT_EDGE))
+ if (key.attrs & BITFIELD64_BIT(VERT_RESULT_EDGE))
key.primitive = SF_UNFILLED_TRIS;
else
key.primitive = SF_TRIANGLES;
@@ -161,7 +161,7 @@ static void upload_sf_prog(struct brw_context *brw)
}
key.do_point_sprite = ctx->Point.PointSprite;
- key.SpriteOrigin = ctx->Point.SpriteOrigin;
+ key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT);
/* _NEW_LIGHT */
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
diff --git a/i965/brw_sf.h b/i965/brw_sf.h
index 6426b6d..0ba731f 100644
--- a/i965/brw_sf.h
+++ b/i965/brw_sf.h
@@ -45,19 +45,19 @@
#define SF_UNFILLED_TRIS 3
struct brw_sf_prog_key {
- GLuint attrs:32;
+ GLbitfield64 attrs;
GLuint primitive:2;
GLuint do_twoside_color:1;
GLuint do_flat_shading:1;
GLuint frontface_ccw:1;
GLuint do_point_sprite:1;
GLuint linear_color:1; /**< linear interp vs. perspective interp */
- GLuint pad:25;
- GLenum SpriteOrigin;
+ GLuint sprite_origin_lower_left:1;
+ GLuint pad:24;
};
struct brw_sf_point_tex {
- GLboolean CoordReplace;
+ GLboolean CoordReplace;
};
struct brw_sf_compile {
diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c
index ca8f97f..3eae41e 100644
--- a/i965/brw_sf_emit.c
+++ b/i965/brw_sf_emit.c
@@ -56,7 +56,7 @@ static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
static GLboolean have_attr(struct brw_sf_compile *c,
GLuint attr)
{
- return (c->key.attrs & (1<<attr)) ? 1 : 0;
+ return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
}
/***********************************************************************
@@ -122,8 +122,8 @@ static void do_twoside_color( struct brw_sf_compile *c )
* Flat shading
*/
-#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
- (1<<VERT_RESULT_COL1))
+#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
+ BITFIELD64_BIT(VERT_RESULT_COL1))
static void copy_colors( struct brw_sf_compile *c,
struct brw_reg dst,
@@ -312,8 +312,8 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
GLushort *pc_linear)
{
GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
- GLuint persp_mask;
- GLuint linear_mask;
+ GLbitfield64 persp_mask;
+ GLbitfield64 linear_mask;
if (c->key.do_flat_shading || c->key.linear_color)
persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS |
@@ -331,10 +331,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
*pc_linear = 0;
*pc = 0xf;
- if (persp_mask & (1 << c->idx_to_attr[reg*2]))
+ if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2]))
*pc_persp = 0xf;
- if (linear_mask & (1 << c->idx_to_attr[reg*2]))
+ if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2]))
*pc_linear = 0xf;
/* Maybe only processs one attribute on the final round:
@@ -342,10 +342,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
if (reg*2+1 < c->nr_setup_attrs) {
*pc |= 0xf0;
- if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
+ if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1]))
*pc_persp |= 0xf0;
- if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
+ if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1]))
*pc_linear |= 0xf0;
}
@@ -551,7 +551,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
BRW_MATH_DATA_SCALAR,
BRW_MATH_PRECISION_FULL);
- if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+ if (c->key.sprite_origin_lower_left) {
brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
@@ -570,7 +570,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
{
brw_set_predicate_control_flag_value(p, pc);
if (tex->CoordReplace) {
- if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+ if (c->key.sprite_origin_lower_left) {
brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
}
diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c
index bc0f076..bb69435 100644
--- a/i965/brw_sf_state.c
+++ b/i965/brw_sf_state.c
@@ -93,7 +93,8 @@ static void upload_sf_vp(struct brw_context *brw)
}
dri_bo_unreference(brw->sf.vp_bo);
- brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
+ brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv),
+ NULL, 0);
}
const struct brw_tracked_state brw_sf_vp = {
@@ -113,7 +114,8 @@ struct brw_sf_unit_key {
unsigned int nr_urb_entries, urb_size, sfsize;
- GLenum front_face, cull_face, provoking_vertex;
+ GLenum front_face, cull_face;
+ unsigned pv_first:1;
unsigned scissor:1;
unsigned line_smooth:1;
unsigned point_sprite:1;
@@ -154,7 +156,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
key->point_attenuated = ctx->Point._Attenuated;
/* _NEW_LIGHT */
- key->provoking_vertex = ctx->Light.ProvokingVertex;
+ key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
}
@@ -287,7 +289,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
/* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
*/
- if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) {
+ if (!key->pv_first) {
sf.sf7.trifan_pv = 2;
sf.sf7.linestrip_pv = 1;
sf.sf7.tristrip_pv = 2;
diff --git a/i965/brw_state.h b/i965/brw_state.h
index 5335eac..b129b1f 100644
--- a/i965/brw_state.h
+++ b/i965/brw_state.h
@@ -112,6 +112,7 @@ void brw_validate_state(struct brw_context *brw);
void brw_upload_state(struct brw_context *brw);
void brw_init_state(struct brw_context *brw);
void brw_destroy_state(struct brw_context *brw);
+void brw_clear_validated_bos(struct brw_context *brw);
/***********************************************************************
* brw_state_cache.c
@@ -119,16 +120,10 @@ void brw_destroy_state(struct brw_context *brw);
dri_bo *brw_cache_data(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *data,
+ GLuint size,
dri_bo **reloc_bufs,
GLuint nr_reloc_bufs);
-dri_bo *brw_cache_data_sz(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *data,
- GLuint data_size,
- dri_bo **reloc_bufs,
- GLuint nr_reloc_bufs);
-
dri_bo *brw_upload_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
@@ -151,6 +146,7 @@ void brw_state_cache_check_size( struct brw_context *brw );
void brw_init_caches( struct brw_context *brw );
void brw_destroy_caches( struct brw_context *brw );
+void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo);
/***********************************************************************
* brw_state_batch.c
diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c
index e40d7a0..e4c9ba7 100644
--- a/i965/brw_state_cache.c
+++ b/i965/brw_state_cache.c
@@ -245,7 +245,6 @@ brw_upload_cache( struct brw_cache *cache,
item->bo = bo;
dri_bo_reference(bo);
- item->data_size = data_size;
if (cache->n_items > cache->size * 1.5)
rehash(cache);
@@ -275,15 +274,22 @@ brw_upload_cache( struct brw_cache *cache,
/**
- * This doesn't really work with aux data. Use search/upload instead
+ * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
+ *
+ * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * better to use, as the potentially changing offsets in the data-used-as-key
+ * will result in excessive cache misses.
+ *
+ * If aux data is involved, use search/upload instead.
+
*/
dri_bo *
-brw_cache_data_sz(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *data,
- GLuint data_size,
- dri_bo **reloc_bufs,
- GLuint nr_reloc_bufs)
+brw_cache_data(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ GLuint data_size,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs)
{
dri_bo *bo;
struct brw_cache_item *item;
@@ -306,25 +312,6 @@ brw_cache_data_sz(struct brw_cache *cache,
return bo;
}
-
-/**
- * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
- *
- * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
- * better to use, as the potentially changing offsets in the data-used-as-key
- * will result in excessive cache misses.
- */
-dri_bo *
-brw_cache_data(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *data,
- dri_bo **reloc_bufs,
- GLuint nr_reloc_bufs)
-{
- return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
- reloc_bufs, nr_reloc_bufs);
-}
-
enum pool_type {
DW_SURFACE_STATE,
DW_GENERAL_STATE
@@ -335,11 +322,9 @@ static void
brw_init_cache_id(struct brw_cache *cache,
const char *name,
enum brw_cache_id id,
- GLuint key_size,
GLuint aux_size)
{
cache->name[id] = strdup(name);
- cache->key_size[id] = key_size;
cache->aux_size[id] = aux_size;
}
@@ -359,91 +344,76 @@ brw_init_non_surface_cache(struct brw_context *brw)
brw_init_cache_id(cache,
"CC_VP",
BRW_CC_VP,
- sizeof(struct brw_cc_viewport),
0);
brw_init_cache_id(cache,
"CC_UNIT",
BRW_CC_UNIT,
- sizeof(struct brw_cc_unit_state),
0);
brw_init_cache_id(cache,
"WM_PROG",
BRW_WM_PROG,
- sizeof(struct brw_wm_prog_key),
sizeof(struct brw_wm_prog_data));
brw_init_cache_id(cache,
"SAMPLER_DEFAULT_COLOR",
BRW_SAMPLER_DEFAULT_COLOR,
- sizeof(struct brw_sampler_default_color),
0);
brw_init_cache_id(cache,
"SAMPLER",
BRW_SAMPLER,
- 0, /* variable key/data size */
0);
brw_init_cache_id(cache,
"WM_UNIT",
BRW_WM_UNIT,
- sizeof(struct brw_wm_unit_state),
0);
brw_init_cache_id(cache,
"SF_PROG",
BRW_SF_PROG,
- sizeof(struct brw_sf_prog_key),
sizeof(struct brw_sf_prog_data));
brw_init_cache_id(cache,
"SF_VP",
BRW_SF_VP,
- sizeof(struct brw_sf_viewport),
0);
brw_init_cache_id(cache,
"SF_UNIT",
BRW_SF_UNIT,
- sizeof(struct brw_sf_unit_state),
0);
brw_init_cache_id(cache,
"VS_UNIT",
BRW_VS_UNIT,
- sizeof(struct brw_vs_unit_state),
0);
brw_init_cache_id(cache,
"VS_PROG",
BRW_VS_PROG,
- sizeof(struct brw_vs_prog_key),
sizeof(struct brw_vs_prog_data));
brw_init_cache_id(cache,
"CLIP_UNIT",
BRW_CLIP_UNIT,
- sizeof(struct brw_clip_unit_state),
0);
brw_init_cache_id(cache,
"CLIP_PROG",
BRW_CLIP_PROG,
- sizeof(struct brw_clip_prog_key),
sizeof(struct brw_clip_prog_data));
brw_init_cache_id(cache,
"GS_UNIT",
BRW_GS_UNIT,
- sizeof(struct brw_gs_unit_state),
0);
brw_init_cache_id(cache,
"GS_PROG",
BRW_GS_PROG,
- sizeof(struct brw_gs_prog_key),
sizeof(struct brw_gs_prog_data));
}
@@ -463,13 +433,11 @@ brw_init_surface_cache(struct brw_context *brw)
brw_init_cache_id(cache,
"SS_SURFACE",
BRW_SS_SURFACE,
- sizeof(struct brw_surface_state),
0);
brw_init_cache_id(cache,
"SS_SURF_BIND",
BRW_SS_SURF_BIND,
- 0,
0);
}
@@ -517,6 +485,41 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
brw->state.dirty.cache |= ~0;
}
+/* Clear all entries from the cache that point to the given bo.
+ *
+ * This lets us release memory for reuse earlier for known-dead buffers,
+ * at the cost of walking the entire hash table.
+ */
+void
+brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo)
+{
+ struct brw_cache_item **prev;
+ GLuint i;
+
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s\n", __FUNCTION__);
+
+ for (i = 0; i < cache->size; i++) {
+ for (prev = &cache->items[i]; *prev;) {
+ struct brw_cache_item *c = *prev;
+
+ if (drm_intel_bo_references(c->bo, bo)) {
+ int j;
+
+ *prev = c->next;
+
+ for (j = 0; j < c->nr_reloc_bufs; j++)
+ dri_bo_unreference(c->reloc_bufs[j]);
+ dri_bo_unreference(c->bo);
+ free((void *)c->key);
+ free(c);
+ cache->n_items--;
+ } else {
+ prev = &c->next;
+ }
+ }
+ }
+}
void
brw_state_cache_check_size(struct brw_context *brw)
diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c
index b817b74..af8dfb4 100644
--- a/i965/brw_state_upload.c
+++ b/i965/brw_state_upload.c
@@ -34,6 +34,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
/* This is used to initialize brw->state.atoms[]. We could use this
* list directly except for a single atom, brw_constant_buffer, which
@@ -142,7 +143,7 @@ static void xor_states( struct brw_state_flags *result,
result->cache = a->cache ^ b->cache;
}
-static void
+void
brw_clear_validated_bos(struct brw_context *brw)
{
int i;
@@ -308,7 +309,7 @@ void brw_validate_state( struct brw_context *brw )
if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
brw_clear_batch_cache(brw);
- brw->intel.Fallback = 0;
+ brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
@@ -324,6 +325,8 @@ void brw_validate_state( struct brw_context *brw )
}
}
+ intel_check_front_buffer_rendering(intel);
+
/* Make sure that the textures which are referenced by the current
* brw fragment program are actually present/valid.
* If this fails, we can experience GPU lock-ups.
diff --git a/i965/brw_tex.c b/i965/brw_tex.c
index 71bff16..e911b10 100644
--- a/i965/brw_tex.c
+++ b/i965/brw_tex.c
@@ -39,38 +39,6 @@
#include "intel_tex.h"
#include "brw_context.h"
-
-void brw_FrameBufferTexInit( struct brw_context *brw,
- struct intel_region *region )
-{
- struct intel_context *intel = &brw->intel;
- GLcontext *ctx = &intel->ctx;
- struct gl_texture_object *obj;
- struct gl_texture_image *img;
-
- intel->frame_buffer_texobj = obj =
- ctx->Driver.NewTextureObject( ctx, (GLuint) -1, GL_TEXTURE_2D );
-
- obj->MinFilter = GL_NEAREST;
- obj->MagFilter = GL_NEAREST;
-
- img = ctx->Driver.NewTextureImage( ctx );
-
- _mesa_init_teximage_fields( ctx, GL_TEXTURE_2D, img,
- region->pitch, region->height, 1, 0,
- region->cpp == 4 ? GL_RGBA : GL_RGB );
-
- _mesa_set_tex_image( obj, GL_TEXTURE_2D, 0, img );
-}
-
-void brw_FrameBufferTexDestroy( struct brw_context *brw )
-{
- if (brw->intel.frame_buffer_texobj != NULL)
- brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx,
- brw->intel.frame_buffer_texobj );
- brw->intel.frame_buffer_texobj = NULL;
-}
-
/**
* Finalizes all textures, completing any rendering that needs to be done
* to prepare them.
diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c
index 5986cbf..e59e52e 100644
--- a/i965/brw_tex_layout.c
+++ b/i965/brw_tex_layout.c
@@ -86,10 +86,10 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
if (mt->compressed) {
- qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4;
mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
} else {
- qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h);
mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
}
@@ -102,7 +102,8 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
height, 1);
for (q = 0; q < nr_images; q++)
- intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch);
+ intel_miptree_set_image_offset(mt, level, q,
+ x, y + q * qpitch);
if (mt->compressed)
img_height = MAX2(1, height/4);
diff --git a/i965/brw_util.c b/i965/brw_util.c
index ce21aa4..bba9249 100644
--- a/i965/brw_util.c
+++ b/i965/brw_util.c
@@ -35,7 +35,7 @@
#include "brw_util.h"
#include "brw_defines.h"
-GLuint brw_count_bits( GLuint val )
+GLuint brw_count_bits(uint64_t val)
{
GLuint i;
for (i = 0; val ; val >>= 1)
diff --git a/i965/brw_util.h b/i965/brw_util.h
index 33e7cd8..04f3175 100644
--- a/i965/brw_util.h
+++ b/i965/brw_util.h
@@ -35,7 +35,7 @@
#include "main/mtypes.h"
-extern GLuint brw_count_bits( GLuint val );
+extern GLuint brw_count_bits(uint64_t val);
extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
extern GLuint brw_translate_blend_factor( GLenum factor );
extern GLuint brw_translate_blend_equation( GLenum mode );
diff --git a/i965/brw_vs.c b/i965/brw_vs.c
index f0c79ef..fd055e2 100644
--- a/i965/brw_vs.c
+++ b/i965/brw_vs.c
@@ -56,7 +56,7 @@ static void do_vs_prog( struct brw_context *brw,
c.prog_data.inputs_read = vp->program.Base.InputsRead;
if (c.key.copy_edgeflag) {
- c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE;
+ c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE);
c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
}
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
index 108e19c..27aac8b 100644
--- a/i965/brw_vs_emit.c
+++ b/i965/brw_vs_emit.c
@@ -147,7 +147,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
mrf = 4;
for (i = 0; i < VERT_RESULT_MAX; i++) {
- if (c->prog_data.outputs_written & (1 << i)) {
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
c->nr_outputs++;
assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
if (i == VERT_RESULT_HPOS) {
@@ -331,63 +331,76 @@ static void unalias3( struct brw_vs_compile *c,
}
}
-static void emit_sop( struct brw_compile *p,
+static void emit_sop( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1,
GLuint cond)
{
+ struct brw_compile *p = &c->func;
+
brw_MOV(p, dst, brw_imm_f(0.0f));
brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
brw_MOV(p, dst, brw_imm_f(1.0f));
brw_set_predicate_control_flag_value(p, 0xff);
}
-static void emit_seq( struct brw_compile *p,
+static void emit_seq( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
}
-static void emit_sne( struct brw_compile *p,
+static void emit_sne( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
}
-static void emit_slt( struct brw_compile *p,
+static void emit_slt( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_L);
}
-static void emit_sle( struct brw_compile *p,
+static void emit_sle( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_LE);
}
-static void emit_sgt( struct brw_compile *p,
+static void emit_sgt( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_G);
}
-static void emit_sge( struct brw_compile *p,
+static void emit_sge( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
+ emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_GE);
+}
+
+static void emit_cmp( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, brw_imm_f(0));
+ brw_SEL(p, dst, arg1, arg2);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
static void emit_max( struct brw_compile *p,
@@ -912,6 +925,7 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_CONSTANT:
case PROGRAM_UNIFORM:
case PROGRAM_ENV_PARAM:
+ case PROGRAM_LOCAL_PARAM:
if (c->vp->use_const_buffer) {
return get_constant(c, inst, argIndex);
}
@@ -930,7 +944,6 @@ get_src_reg( struct brw_vs_compile *c,
/* this is a normal case since we loop over all three src args */
return brw_null_reg();
- case PROGRAM_LOCAL_PARAM:
case PROGRAM_WRITE_ONLY:
default:
assert(0);
@@ -1122,7 +1135,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
/* Update the header for point size, user clipping flags, and -ve rhw
* workaround.
*/
- if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
+ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
c->key.nr_userclip || BRW_IS_965(p->brw))
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
@@ -1132,7 +1145,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_set_access_mode(p, BRW_ALIGN_16);
- if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) {
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
@@ -1208,7 +1221,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
0, /* response len */
eot, /* eot */
- 1, /* writes complete */
+ eot, /* writes complete */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
@@ -1222,7 +1235,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
*/
GLuint i, mrf = 0;
for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
- if (c->prog_data.outputs_written & (1 << i)) {
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
/* move from GRF to MRF */
brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
mrf++;
@@ -1269,10 +1282,60 @@ post_vs_emit( struct brw_vs_compile *c,
}
}
+static GLboolean
+accumulator_contains(struct brw_vs_compile *c, struct brw_reg val)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *prev_insn = &p->store[p->nr_insn - 1];
+
+ if (p->nr_insn == 0)
+ return GL_FALSE;
+
+ if (val.address_mode != BRW_ADDRESS_DIRECT)
+ return GL_FALSE;
+
+ switch (prev_insn->header.opcode) {
+ case BRW_OPCODE_MOV:
+ case BRW_OPCODE_MAC:
+ case BRW_OPCODE_MUL:
+ if (prev_insn->header.access_mode == BRW_ALIGN_16 &&
+ prev_insn->header.execution_size == val.width &&
+ prev_insn->bits1.da1.dest_reg_file == val.file &&
+ prev_insn->bits1.da1.dest_reg_type == val.type &&
+ prev_insn->bits1.da1.dest_address_mode == val.address_mode &&
+ prev_insn->bits1.da1.dest_reg_nr == val.nr &&
+ prev_insn->bits1.da16.dest_subreg_nr == val.subnr / 16 &&
+ prev_insn->bits1.da16.dest_writemask == 0xf)
+ return GL_TRUE;
+ else
+ return GL_FALSE;
+ default:
+ return GL_FALSE;
+ }
+}
+
static uint32_t
-get_predicate(uint32_t swizzle)
+get_predicate(const struct prog_instruction *inst)
{
- switch (swizzle) {
+ if (inst->DstReg.CondMask == COND_TR)
+ return BRW_PREDICATE_NONE;
+
+ /* All of GLSL only produces predicates for COND_NE and one channel per
+ * vector. Fail badly if someone starts doing something else, as it might
+ * mean infinite looping or something.
+ *
+ * We'd like to support all the condition codes, but our hardware doesn't
+ * quite match the Mesa IR, which is modeled after the NV extensions. For
+ * those, the instruction may update the condition codes or not, then any
+ * later instruction may use one of those condition codes. For gen4, the
+ * instruction may update the flags register based on one of the condition
+ * codes output by the instruction, and then further instructions may
+ * predicate on that. We can probably support this, but it won't
+ * necessarily be easy.
+ */
+ assert(inst->DstReg.CondMask == COND_NE);
+
+ switch (inst->DstReg.CondSwizzle) {
case SWIZZLE_XXXX:
return BRW_PREDICATE_ALIGN16_REPLICATE_X;
case SWIZZLE_YYYY:
@@ -1282,7 +1345,8 @@ get_predicate(uint32_t swizzle)
case SWIZZLE_WWWW:
return BRW_PREDICATE_ALIGN16_REPLICATE_W;
default:
- _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle);
+ _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n",
+ inst->DstReg.CondMask);
return BRW_PREDICATE_NORMAL;
}
}
@@ -1294,6 +1358,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
const GLuint nr_insns = c->vp->program.Base.NumInstructions;
GLuint insn, if_depth = 0, loop_depth = 0;
GLuint end_offset = 0;
@@ -1427,9 +1492,13 @@ void brw_vs_emit(struct brw_vs_compile *c )
unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
break;
case OPCODE_MAD:
- brw_MOV(p, brw_acc_reg(), args[2]);
+ if (!accumulator_contains(c, args[2]))
+ brw_MOV(p, brw_acc_reg(), args[2]);
brw_MAC(p, dst, args[0], args[1]);
break;
+ case OPCODE_CMP:
+ emit_cmp(p, dst, args[0], args[1], args[2]);
+ break;
case OPCODE_MAX:
emit_max(p, dst, args[0], args[1]);
break;
@@ -1453,25 +1522,25 @@ void brw_vs_emit(struct brw_vs_compile *c )
break;
case OPCODE_SEQ:
- emit_seq(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_seq);
break;
case OPCODE_SIN:
emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case OPCODE_SNE:
- emit_sne(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_sne);
break;
case OPCODE_SGE:
- emit_sge(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_sge);
break;
case OPCODE_SGT:
- emit_sgt(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_sgt);
break;
case OPCODE_SLT:
- emit_slt(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_slt);
break;
case OPCODE_SLE:
- emit_sle(p, dst, args[0], args[1]);
+ unalias2(c, dst, args[0], args[1], emit_sle);
break;
case OPCODE_SUB:
brw_ADD(p, dst, args[0], negate(args[1]));
@@ -1492,8 +1561,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_IF:
assert(if_depth < MAX_IF_DEPTH);
if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
- if_inst[if_depth]->header.predicate_control =
- get_predicate(inst->DstReg.CondSwizzle);
+ /* Note that brw_IF smashes the predicate_control field. */
+ if_inst[if_depth]->header.predicate_control = get_predicate(inst);
if_depth++;
break;
case OPCODE_ELSE:
@@ -1503,45 +1572,48 @@ void brw_vs_emit(struct brw_vs_compile *c )
assert(if_depth > 0);
brw_ENDIF(p, if_inst[--if_depth]);
break;
-#if 0
case OPCODE_BGNLOOP:
loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
break;
case OPCODE_BRK:
+ brw_set_predicate_control(p, get_predicate(inst));
brw_BREAK(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_CONT:
+ brw_set_predicate_control(p, get_predicate(inst));
brw_CONT(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_ENDLOOP:
{
struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
loop_depth--;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
/* patch all the BREAK/CONT instructions from last BEGINLOOP */
while (inst0 > loop_inst[loop_depth]) {
inst0--;
if (inst0->header.opcode == BRW_OPCODE_BREAK) {
- inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
}
else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
- inst0->bits3.if_else.jump_count = inst1 - inst0;
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
}
}
}
break;
-#else
- (void) loop_inst;
- (void) loop_depth;
-#endif
case OPCODE_BRA:
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_set_predicate_control(p, get_predicate(inst));
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_CAL:
brw_set_access_mode(p, BRW_ALIGN_1);
diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c
index d790ab6..7285466 100644
--- a/i965/brw_vs_state.c
+++ b/i965/brw_vs_state.c
@@ -109,10 +109,39 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
vs.thread3.urb_entry_read_offset = 0;
vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw))
- vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
- else
- vs.thread4.nr_urb_entries = key->nr_urb_entries;
+ if (BRW_IS_IGDNG(brw)) {
+ switch (key->nr_urb_entries) {
+ case 8:
+ case 12:
+ case 16:
+ case 32:
+ case 64:
+ case 96:
+ case 128:
+ case 168:
+ case 192:
+ case 224:
+ case 256:
+ vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+ break;
+ default:
+ assert(0);
+ }
+ } else {
+ switch (key->nr_urb_entries) {
+ case 8:
+ case 12:
+ case 16:
+ case 32:
+ break;
+ case 64:
+ assert(BRW_IS_G4X(brw));
+ break;
+ default:
+ assert(0);
+ }
+ vs.thread4.nr_urb_entries = key->nr_urb_entries;
+ }
vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
diff --git a/i965/brw_vs_surface_state.c b/i965/brw_vs_surface_state.c
index 89f4752..3bc9840 100644
--- a/i965/brw_vs_surface_state.c
+++ b/i965/brw_vs_surface_state.c
@@ -30,7 +30,6 @@
*/
#include "main/mtypes.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "shader/prog_parameter.h"
@@ -53,6 +52,7 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
const int size = params->NumParameters * 4 * sizeof(GLfloat);
drm_intel_bo *const_buffer;
+ int i;
/* BRW_NEW_VERTEX_PROGRAM */
if (!vp->use_const_buffer)
@@ -62,7 +62,19 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
size, 64);
/* _NEW_PROGRAM_CONSTANTS */
- dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters);
+
+ intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE);
+ for (i = 0; i < params->NumParameters; i++) {
+ memcpy(const_buffer->virtual + i * 4 * sizeof(float),
+ params->ParameterValues[i],
+ 4 * sizeof(float));
+ }
+ intel_bo_unmap_gtt_preferred(intel, const_buffer);
return const_buffer;
}
diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c
index ac11790..34aaea3 100644
--- a/i965/brw_vtbl.c
+++ b/i965/brw_vtbl.c
@@ -46,7 +46,7 @@
#include "brw_state.h"
#include "brw_fallback.h"
#include "brw_vs.h"
-
+#include "brw_wm.h"
static void
dri_bo_release(dri_bo **bo)
@@ -66,10 +66,14 @@ static void brw_destroy_context( struct intel_context *intel )
brw_destroy_state(brw);
brw_draw_destroy( brw );
-
- _mesa_free(brw->wm.compile_data);
-
- brw_FrameBufferTexDestroy( brw );
+ brw_clear_validated_bos(brw);
+ if (brw->wm.compile_data) {
+ _mesa_free(brw->wm.compile_data->instruction);
+ _mesa_free(brw->wm.compile_data->vreg);
+ _mesa_free(brw->wm.compile_data->refs);
+ _mesa_free(brw->wm.compile_data->prog_instructions);
+ _mesa_free(brw->wm.compile_data);
+ }
for (i = 0; i < brw->state.nr_color_regions; i++)
intel_region_release(&brw->state.color_regions[i]);
@@ -177,20 +181,6 @@ static void brw_note_fence( struct intel_context *intel, GLuint fence )
brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
}
-/* called from intelWaitForIdle() and intelFlush()
- *
- * For now, just flush everything. Could be smarter later.
- */
-static GLuint brw_flush_cmd( void )
-{
- struct brw_mi_flush flush;
- flush.opcode = CMD_MI_FLUSH;
- flush.pad = 0;
- flush.flags = BRW_FLUSH_STATE_CACHE;
- return *(GLuint *)&flush;
-}
-
-
static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
{
/* nothing */
@@ -211,6 +201,5 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.finish_batch = brw_finish_batch;
brw->intel.vtbl.destroy = brw_destroy_context;
brw->intel.vtbl.set_draw_region = brw_set_draw_region;
- brw->intel.vtbl.flush_cmd = brw_flush_cmd;
brw->intel.vtbl.debug_batch = brw_debug_batch;
}
diff --git a/i965/brw_wm.c b/i965/brw_wm.c
index 2292de9..6895f64 100644
--- a/i965/brw_wm.c
+++ b/i965/brw_wm.c
@@ -29,7 +29,6 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
-#include "main/texformat.h"
#include "brw_context.h"
#include "brw_util.h"
#include "brw_wm.h"
@@ -153,8 +152,21 @@ static void do_wm_prog( struct brw_context *brw,
*/
return;
}
+ c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction));
+ c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN *
+ sizeof(*c->prog_instructions));
+ c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg));
+ c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs));
} else {
+ void *instruction = c->instruction;
+ void *prog_instructions = c->prog_instructions;
+ void *vreg = c->vreg;
+ void *refs = c->refs;
memset(c, 0, sizeof(*brw->wm.compile_data));
+ c->instruction = instruction;
+ c->prog_instructions = prog_instructions;
+ c->vreg = vreg;
+ c->refs = refs;
}
memcpy(&c->key, key, sizeof(*key));
@@ -218,7 +230,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
ctx->Color.AlphaEnabled)
lookup |= IZ_PS_KILL_ALPHATEST_BIT;
- if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH))
+ if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
/* _NEW_DEPTH */
@@ -288,7 +300,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
if (img->InternalFormat == GL_YCBCR_MESA) {
key->yuvtex_mask |= 1 << i;
- if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR)
+ if (img->TexFormat == MESA_FORMAT_YCBCR)
key->yuvtex_swap_mask |= 1 << i;
}
@@ -309,6 +321,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
* from the incoming screen origin relative position we get as part of our
* payload.
*
+ * This is only needed for the WM_WPOSXY opcode when the fragment program
+ * uses the gl_FragCoord input.
+ *
* We could avoid recompiling by including this as a constant referenced by
* our program, but if we were to do that it would also be nice to handle
* getting that constant updated at batchbuffer submit time (when we
@@ -317,17 +332,21 @@ static void brw_wm_populate_key( struct brw_context *brw,
* just avoid using this as key data if the program doesn't use
* fragment.position.
*
- * This pretty much becomes moot with DRI2 and redirected buffers anyway,
- * as our origins will always be zero then.
+ * For DRI2 the origin_x/y will always be (0,0) but we still need the
+ * drawable height in order to invert the Y axis.
*/
- if (brw->intel.driDrawable != NULL) {
- key->origin_x = brw->intel.driDrawable->x;
- key->origin_y = brw->intel.driDrawable->y;
- key->drawable_height = brw->intel.driDrawable->h;
+ if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
+ if (brw->intel.driDrawable != NULL) {
+ key->origin_x = brw->intel.driDrawable->x;
+ key->origin_y = brw->intel.driDrawable->y;
+ key->drawable_height = brw->intel.driDrawable->h;
+ }
}
+ key->nr_color_regions = brw->state.nr_color_regions;
+
/* CACHE_NEW_VS_PROG */
- key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS;
+ key->vp_outputs_written = brw->vs.prog_data->outputs_written;
/* The unique fragment program ID */
key->program_string_id = fp->id;
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
index ae98b54..9dcb6e1 100644
--- a/i965/brw_wm.h
+++ b/i965/brw_wm.h
@@ -38,6 +38,8 @@
#include "brw_context.h"
#include "brw_eu.h"
+#define SATURATE (1<<5)
+
/* A big lookup table is used to figure out which and how many
* additional regs will inserted before the main payload in the WM
* program execution. These mainly relate to depth and stencil
@@ -65,18 +67,19 @@ struct brw_wm_prog_key {
GLuint flat_shade:1;
GLuint linear_color:1; /**< linear interpolation vs perspective interp */
GLuint runtime_check_aads_emit:1;
+ GLuint nr_color_regions:2;
GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
GLuint shadowtex_mask:16;
GLuint yuvtex_mask:16;
GLuint yuvtex_swap_mask:16; /* UV swaped */
- GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
+ GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
GLuint program_string_id:32;
- GLuint origin_x, origin_y;
- GLuint drawable_height;
- GLuint vp_outputs_written;
+ GLushort origin_x, origin_y;
+ GLushort drawable_height;
+ GLbitfield64 vp_outputs_written;
};
@@ -151,15 +154,16 @@ struct brw_wm_instruction {
};
-#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
+#define BRW_WM_MAX_INSN (MAX_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
#define BRW_WM_MAX_GRF 128 /* hardware limit */
#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4)
#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12)
#define BRW_WM_MAX_PARAM 256
#define BRW_WM_MAX_CONST 256
-#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
#define BRW_WM_MAX_SUBROUTINE 16
+/* used in masks next to WRITEMASK_*. */
+#define SATURATE (1<<5)
/* New opcodes to track internal operations required for WM unit.
@@ -198,19 +202,18 @@ struct brw_wm_compile {
* simplifying and adding instructions for interpolation and
* framebuffer writes.
*/
- struct prog_instruction prog_instructions[BRW_WM_MAX_INSN];
+ struct prog_instruction *prog_instructions;
GLuint nr_fp_insns;
GLuint fp_temp;
GLuint fp_interp_emitted;
GLuint fp_fragcolor_emitted;
- GLuint fp_deriv_emitted;
struct prog_src_register pixel_xy;
struct prog_src_register delta_xy;
struct prog_src_register pixel_w;
- struct brw_wm_value vreg[BRW_WM_MAX_VREG];
+ struct brw_wm_value *vreg;
GLuint nr_vreg;
struct brw_wm_value creg[BRW_WM_MAX_PARAM];
@@ -227,10 +230,10 @@ struct brw_wm_compile {
struct brw_wm_ref undef_ref;
struct brw_wm_value undef_value;
- struct brw_wm_ref refs[BRW_WM_MAX_REF];
+ struct brw_wm_ref *refs;
GLuint nr_refs;
- struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
+ struct brw_wm_instruction *instruction;
GLuint nr_insns;
struct brw_wm_constref constref[BRW_WM_MAX_CONST];
@@ -270,6 +273,12 @@ struct brw_wm_compile {
};
+/** Bits for prog_instruction::Aux field */
+#define INST_AUX_EOT 0x1
+#define INST_AUX_TARGET(T) (T << 1)
+#define INST_AUX_GET_TARGET(AUX) ((AUX) >> 1)
+
+
GLuint brw_wm_nr_args( GLuint opcode );
GLuint brw_wm_is_scalar_result( GLuint opcode );
@@ -299,5 +308,141 @@ void brw_wm_lookup_iz( GLuint line_aa,
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+/* brw_wm_emit.c */
+void emit_alu1(struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
+void emit_alu2(struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_cinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0);
+void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
+void emit_dp3(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_dp4(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_dph(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_fb_write(struct brw_wm_compile *c,
+ struct brw_reg *arg0,
+ struct brw_reg *arg1,
+ struct brw_reg *arg2,
+ GLuint target,
+ GLuint eot);
+void emit_frontfacing(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask);
+void emit_linterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas);
+void emit_lrp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2);
+void emit_mad(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2);
+void emit_math1(struct brw_wm_compile *c,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
+void emit_math2(struct brw_wm_compile *c,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_min(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_max(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_pinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w);
+void emit_pixel_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask);
+void emit_pixel_w(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas);
+void emit_sop(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLuint cond,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
+void emit_tex(struct brw_wm_compile *c,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg,
+ struct brw_reg depth_payload,
+ GLuint tex_idx,
+ GLuint sampler,
+ GLboolean shadow);
+void emit_txb(struct brw_wm_compile *c,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg,
+ struct brw_reg depth_payload,
+ GLuint tex_idx,
+ GLuint sampler);
+void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
+void emit_xpd(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
#endif
diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c
index 268f796..5390fd2 100644
--- a/i965/brw_wm_emit.c
+++ b/i965/brw_wm_emit.c
@@ -34,8 +34,6 @@
#include "brw_context.h"
#include "brw_wm.h"
-#define SATURATE (1<<5)
-
/* Not quite sure how correct this is - need to understand horiz
* vs. vertical strides a little better.
*/
@@ -46,6 +44,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
return reg;
}
+
/* Payload R0:
*
* R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
@@ -62,42 +61,50 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
* R1.8 -- ?
*/
-
-static void emit_pixel_xy(struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask)
+void emit_pixel_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask)
{
+ struct brw_compile *p = &c->func;
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+ struct brw_reg dst0_uw, dst1_uw;
+ brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ if (c->dispatch_width == 16) {
+ dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
+ dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
+ } else {
+ dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
+ dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
+ }
+
/* Calculate pixel centers by adding 1 or 0 to each of the
* micro-tile coordinates passed in r1.
*/
if (mask & WRITEMASK_X) {
brw_ADD(p,
- vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
+ dst0_uw,
stride(suboffset(r1_uw, 4), 2, 4, 0),
brw_imm_v(0x10101010));
}
if (mask & WRITEMASK_Y) {
brw_ADD(p,
- vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
+ dst1_uw,
stride(suboffset(r1_uw,5), 2, 4, 0),
brw_imm_v(0x11001100));
}
-
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ brw_pop_insn_state(p);
}
-
-static void emit_delta_xy(struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
+void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
@@ -120,10 +127,10 @@ static void emit_delta_xy(struct brw_compile *p,
}
}
-static void emit_wpos_xy(struct brw_wm_compile *c,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
+void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
@@ -148,12 +155,14 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
}
-static void emit_pixel_w( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas)
+void emit_pixel_w(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
{
+ struct brw_compile *p = &c->func;
+
/* Don't need this if all you are doing is interpolating color, for
* instance.
*/
@@ -167,21 +176,29 @@ static void emit_pixel_w( struct brw_compile *p,
brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
/* Calc w */
- brw_math_16( p, dst[3],
- BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
- BRW_MATH_PRECISION_FULL);
+ if (c->dispatch_width == 16) {
+ brw_math_16(p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ } else {
+ brw_math(p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
}
}
-
-static void emit_linterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas )
+void emit_linterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -201,12 +218,12 @@ static void emit_linterp( struct brw_compile *p,
}
-static void emit_pinterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas,
- const struct brw_reg *w)
+void emit_pinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -231,10 +248,10 @@ static void emit_pinterp( struct brw_compile *p,
}
-static void emit_cinterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+void emit_cinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -253,9 +270,9 @@ static void emit_cinterp( struct brw_compile *p,
}
/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
-static void emit_frontfacing( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask )
+void emit_frontfacing(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask)
{
struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -281,13 +298,86 @@ static void emit_frontfacing( struct brw_compile *p,
brw_set_predicate_control_flag_value(p, 0xff);
}
-static void emit_alu1( struct brw_compile *p,
- struct brw_instruction *(*func)(struct brw_compile *,
- struct brw_reg,
- struct brw_reg),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ * DDX DDY
+ * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
+ * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
+ * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
+ * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
+ * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
+ * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
+ * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
+ * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other. We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0)
+{
+ int i;
+ struct brw_reg src0, src1;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+ for (i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ if (is_ddx) {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ } else {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ }
+ brw_ADD(p, dst[i], src0, negate(src1));
+ }
+ }
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+void emit_alu1(struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
GLuint i;
@@ -305,15 +395,15 @@ static void emit_alu1( struct brw_compile *p,
}
-static void emit_alu2( struct brw_compile *p,
- struct brw_instruction *(*func)(struct brw_compile *,
- struct brw_reg,
- struct brw_reg,
- struct brw_reg),
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_alu2(struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -331,12 +421,12 @@ static void emit_alu2( struct brw_compile *p,
}
-static void emit_mad( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2 )
+void emit_mad(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2)
{
GLuint i;
@@ -351,26 +441,12 @@ static void emit_mad( struct brw_compile *p,
}
}
-static void emit_trunc( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
-{
- GLuint i;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- brw_RNDZ(p, dst[i], arg0[i]);
- }
- }
-}
-
-static void emit_lrp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2 )
+void emit_lrp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2)
{
GLuint i;
@@ -390,21 +466,24 @@ static void emit_lrp( struct brw_compile *p,
}
}
-static void emit_sop( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- GLuint cond,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_sop(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLuint cond,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_MOV(p, dst[i], brw_imm_f(0));
+ brw_push_insn_state(p);
brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst[i], brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
brw_MOV(p, dst[i], brw_imm_f(1.0));
- brw_set_predicate_control_flag_value(p, 0xff);
+ brw_pop_insn_state(p);
}
}
}
@@ -488,11 +567,11 @@ static void emit_cmp( struct brw_compile *p,
}
}
-static void emit_max( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_max(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -512,11 +591,11 @@ static void emit_max( struct brw_compile *p,
}
}
-static void emit_min( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_min(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -537,11 +616,11 @@ static void emit_min( struct brw_compile *p,
}
-static void emit_dp3( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_dp3(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
@@ -559,11 +638,11 @@ static void emit_dp3( struct brw_compile *p,
}
-static void emit_dp4( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_dp4(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
@@ -582,11 +661,11 @@ static void emit_dp4( struct brw_compile *p,
}
-static void emit_dph( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_dph(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
@@ -605,11 +684,11 @@ static void emit_dph( struct brw_compile *p,
}
-static void emit_xpd( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1 )
+void emit_xpd(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
GLuint i;
@@ -630,41 +709,68 @@ static void emit_xpd( struct brw_compile *p,
}
-static void emit_math1( struct brw_compile *p,
- GLuint function,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+void emit_math1(struct brw_wm_compile *c,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
+ struct brw_compile *p = &c->func;
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+ GLuint saturate = ((mask & SATURATE) ?
+ BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE);
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
assert(is_power_of_two(mask & WRITEMASK_XYZW));
+ /* If compressed, this will write message reg 2,3 from arg0.x's 16
+ * channels.
+ */
brw_MOV(p, brw_message_reg(2), arg0[0]);
/* Send two messages to perform all 16 operations:
*/
- brw_math_16(p,
- dst[dst_chan],
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p,
+ dst[dst_chan],
+ function,
+ saturate,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
function,
- (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 2,
+ saturate,
+ 3,
brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+ }
+ brw_pop_insn_state(p);
}
-static void emit_math2( struct brw_compile *p,
- GLuint function,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1)
+void emit_math2(struct brw_wm_compile *c,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
+ struct brw_compile *p = &c->func;
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+ GLuint saturate = ((mask & SATURATE) ?
+ BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE);
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
@@ -675,173 +781,231 @@ static void emit_math2( struct brw_compile *p,
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, brw_message_reg(2), arg0[0]);
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+ }
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, brw_message_reg(3), arg1[0]);
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+ }
-
- /* Send two messages to perform all 16 operations:
- */
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p,
dst[dst_chan],
function,
- (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ saturate,
2,
brw_null_reg(),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p,
- offset(dst[dst_chan],1),
- function,
- (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 4,
- brw_null_reg(),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-
+ /* Send two messages to perform all 16 operations:
+ */
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
+ function,
+ saturate,
+ 4,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
brw_pop_insn_state(p);
}
-
-static void emit_tex( struct brw_wm_compile *c,
- const struct brw_wm_instruction *inst,
- struct brw_reg *dst,
- GLuint dst_flags,
- struct brw_reg *arg )
+void emit_tex(struct brw_wm_compile *c,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg,
+ struct brw_reg depth_payload,
+ GLuint tex_idx,
+ GLuint sampler,
+ GLboolean shadow)
{
struct brw_compile *p = &c->func;
- GLuint msgLength, responseLength;
- GLuint i, nr;
+ struct brw_reg dst_retyped;
+ GLuint cur_mrf = 2, response_length;
+ GLuint i, nr_texcoords;
GLuint emit;
GLuint msg_type;
+ GLuint mrf_per_channel;
+ GLuint simd_mode;
+
+ if (c->dispatch_width == 16) {
+ mrf_per_channel = 2;
+ response_length = 8;
+ dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ } else {
+ mrf_per_channel = 1;
+ response_length = 4;
+ dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+ }
/* How many input regs are there?
*/
- switch (inst->tex_idx) {
+ switch (tex_idx) {
case TEXTURE_1D_INDEX:
emit = WRITEMASK_X;
- nr = 1;
+ nr_texcoords = 1;
break;
case TEXTURE_2D_INDEX:
case TEXTURE_RECT_INDEX:
emit = WRITEMASK_XY;
- nr = 2;
+ nr_texcoords = 2;
break;
case TEXTURE_3D_INDEX:
case TEXTURE_CUBE_INDEX:
emit = WRITEMASK_XYZ;
- nr = 3;
+ nr_texcoords = 3;
break;
default:
/* unexpected target */
abort();
}
- if (inst->tex_shadow) {
- nr = 4;
- emit |= WRITEMASK_W;
- }
+ /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
+ if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8)
+ nr_texcoords = 3;
- msgLength = 1;
+ /* For shadow comparisons, we have to supply u,v,r. */
+ if (shadow)
+ nr_texcoords = 3;
- for (i = 0; i < nr; i++) {
- static const GLuint swz[4] = {0,1,2,2};
- if (emit & (1<<i))
- brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
+ /* Emit the texcoords. */
+ for (i = 0; i < nr_texcoords; i++) {
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
else
- brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
- msgLength += 2;
+ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+ cur_mrf += mrf_per_channel;
}
- responseLength = 8; /* always */
+ /* Fill in the shadow comparison reference value. */
+ if (shadow) {
+ if (BRW_IS_IGDNG(p->brw)) {
+ /* Fill in the cube map array index value. */
+ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+ cur_mrf += mrf_per_channel;
+ } else if (c->dispatch_width == 8) {
+ /* Fill in the LOD bias value. */
+ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+ cur_mrf += mrf_per_channel;
+ }
+ brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+ cur_mrf += mrf_per_channel;
+ }
if (BRW_IS_IGDNG(p->brw)) {
- if (inst->tex_shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
} else {
- if (inst->tex_shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+ /* Note that G45 and older determines shadow compare and dispatch width
+ * from message length for most messages.
+ */
+ if (c->dispatch_width == 16 && shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
}
- brw_SAMPLE(p,
- retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ brw_SAMPLE(p,
+ dst_retyped,
1,
- retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
- SURF_INDEX_TEXTURE(inst->tex_unit),
- inst->tex_unit, /* sampler */
- inst->writemask,
- msg_type,
- responseLength,
- msgLength,
- 0,
+ retype(depth_payload, BRW_REGISTER_TYPE_UW),
+ SURF_INDEX_TEXTURE(sampler),
+ sampler,
+ dst_flags & WRITEMASK_XYZW,
+ msg_type,
+ response_length,
+ cur_mrf - 1,
+ 0,
1,
- BRW_SAMPLER_SIMD_MODE_SIMD16);
+ simd_mode);
}
-static void emit_txb( struct brw_wm_compile *c,
- const struct brw_wm_instruction *inst,
- struct brw_reg *dst,
- GLuint dst_flags,
- struct brw_reg *arg )
+void emit_txb(struct brw_wm_compile *c,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg,
+ struct brw_reg depth_payload,
+ GLuint tex_idx,
+ GLuint sampler)
{
struct brw_compile *p = &c->func;
GLuint msgLength;
GLuint msg_type;
- /* Shadow ignored for txb.
+ GLuint mrf_per_channel;
+ GLuint response_length;
+ struct brw_reg dst_retyped;
+
+ /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
+ * samples, so we'll use the 16-wide instruction, leave the second halves
+ * undefined, and trust the execution mask to keep the undefined pixels
+ * from mattering.
*/
- switch (inst->tex_idx) {
+ if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) {
+ if (BRW_IS_IGDNG(p->brw))
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ mrf_per_channel = 2;
+ dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
+ response_length = 8;
+ } else {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
+ mrf_per_channel = 1;
+ dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
+ response_length = 4;
+ }
+
+ /* Shadow ignored for txb. */
+ switch (tex_idx) {
case TEXTURE_1D_INDEX:
- brw_MOV(p, brw_message_reg(2), arg[0]);
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+ brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
break;
case TEXTURE_2D_INDEX:
case TEXTURE_RECT_INDEX:
- brw_MOV(p, brw_message_reg(2), arg[0]);
- brw_MOV(p, brw_message_reg(4), arg[1]);
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+ brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
+ brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
break;
case TEXTURE_3D_INDEX:
case TEXTURE_CUBE_INDEX:
- brw_MOV(p, brw_message_reg(2), arg[0]);
- brw_MOV(p, brw_message_reg(4), arg[1]);
- brw_MOV(p, brw_message_reg(6), arg[2]);
+ brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+ brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
+ brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
break;
default:
/* unexpected target */
abort();
}
- brw_MOV(p, brw_message_reg(8), arg[3]);
- msgLength = 9;
-
- if (BRW_IS_IGDNG(p->brw))
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
+ msgLength = 2 + 4 * mrf_per_channel - 1;
brw_SAMPLE(p,
- retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ dst_retyped,
1,
- retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
- SURF_INDEX_TEXTURE(inst->tex_unit),
- inst->tex_unit, /* sampler */
- inst->writemask,
+ retype(depth_payload, BRW_REGISTER_TYPE_UW),
+ SURF_INDEX_TEXTURE(sampler),
+ sampler,
+ dst_flags & WRITEMASK_XYZW,
msg_type,
- 8, /* responseLength */
+ response_length,
msgLength,
0,
1,
@@ -849,11 +1013,13 @@ static void emit_txb( struct brw_wm_compile *c,
}
-static void emit_lit( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+static void emit_lit(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
+ struct brw_compile *p = &c->func;
+
assert((mask & WRITEMASK_XW) == 0);
if (mask & WRITEMASK_Y) {
@@ -863,7 +1029,7 @@ static void emit_lit( struct brw_compile *p,
}
if (mask & WRITEMASK_Z) {
- emit_math2(p, BRW_MATH_FUNCTION_POW,
+ emit_math2(c, BRW_MATH_FUNCTION_POW,
&dst[2],
WRITEMASK_X | (mask & SATURATE),
&arg0[1],
@@ -908,6 +1074,20 @@ static void emit_kil( struct brw_wm_compile *c,
}
}
+/* KIL_NV kills the pixels that are currently executing, not based on a test
+ * of the arguments.
+ */
+static void emit_kil_nv( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
+ brw_pop_insn_state(p);
+}
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
@@ -916,7 +1096,13 @@ static void fire_fb_write( struct brw_wm_compile *c,
GLuint eot )
{
struct brw_compile *p = &c->func;
-
+ struct brw_reg dst;
+
+ if (c->dispatch_width == 16)
+ dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ else
+ dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+
/* Pass through control information:
*/
/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
@@ -933,7 +1119,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
/* Send framebuffer write message: */
/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
brw_fb_WRITE(p,
- retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ dst,
base_reg,
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
target,
@@ -965,14 +1151,15 @@ static void emit_aa( struct brw_wm_compile *c,
* \param arg1 the pass-through depth value
* \param arg2 the shader-computed depth value
*/
-static void emit_fb_write( struct brw_wm_compile *c,
- struct brw_reg *arg0,
- struct brw_reg *arg1,
- struct brw_reg *arg2,
- GLuint target,
- GLuint eot)
+void emit_fb_write(struct brw_wm_compile *c,
+ struct brw_reg *arg0,
+ struct brw_reg *arg1,
+ struct brw_reg *arg2,
+ GLuint target,
+ GLuint eot)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
GLuint nr = 2;
GLuint channel;
@@ -984,30 +1171,37 @@ static void emit_fb_write( struct brw_wm_compile *c,
/* I don't really understand how this achieves the color interleave
* (ie RGBARGBA) in the result: [Do the saturation here]
*/
- {
- brw_push_insn_state(p);
-
- for (channel = 0; channel < 4; channel++) {
+ brw_push_insn_state(p);
+
+ for (channel = 0; channel < 4; channel++) {
+ if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) {
+ /* By setting the high bit of the MRF register number, we indicate
+ * that we want COMPR4 mode - instead of doing the usual destination
+ * + 1 for the second half we get destination + 4.
+ */
+ brw_MOV(p,
+ brw_message_reg(nr + channel + (1 << 7)),
+ arg0[channel]);
+ } else {
/* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
/* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
-
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p,
brw_message_reg(nr + channel),
arg0[channel]);
-
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p,
- brw_message_reg(nr + channel + 4),
- sechalf(arg0[channel]));
- }
- /* skip over the regs populated above:
- */
- nr += 8;
-
- brw_pop_insn_state(p);
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p,
+ brw_message_reg(nr + channel + 4),
+ sechalf(arg0[channel]));
+ }
+ }
}
+ /* skip over the regs populated above:
+ */
+ nr += 8;
+ brw_pop_insn_state(p);
if (c->key.source_depth_to_render_target)
{
@@ -1057,7 +1251,7 @@ static void emit_fb_write( struct brw_wm_compile *c,
get_element_ud(brw_vec8_grf(1,0), 6),
brw_imm_ud(1<<26));
- jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
{
emit_aa(c, arg1, 2);
fire_fb_write(c, 0, nr, target, eot);
@@ -1071,7 +1265,6 @@ static void emit_fb_write( struct brw_wm_compile *c,
}
}
-
/**
* Move a GPR to scratch memory.
*/
@@ -1209,7 +1402,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
/* Generated instructions for calculating triangle interpolants:
*/
case WM_PIXELXY:
- emit_pixel_xy(p, dst, dst_flags);
+ emit_pixel_xy(c, dst, dst_flags);
break;
case WM_DELTAXY:
@@ -1221,7 +1414,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case WM_PIXELW:
- emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
+ emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
break;
case WM_LINTERP:
@@ -1258,6 +1451,14 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
+ case OPCODE_DDX:
+ emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+ break;
+
+ case OPCODE_DDY:
+ emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+ break;
+
case OPCODE_DP3:
emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;
@@ -1271,7 +1472,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case OPCODE_TRUNC:
- emit_trunc(p, dst, dst_flags, args[0]);
+ emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
break;
case OPCODE_LRP:
@@ -1298,27 +1499,27 @@ void brw_wm_emit( struct brw_wm_compile *c )
/* Higher math functions:
*/
case OPCODE_RCP:
- emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
break;
case OPCODE_RSQ:
- emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
break;
case OPCODE_SIN:
- emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
break;
case OPCODE_COS:
- emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
break;
case OPCODE_EX2:
- emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
break;
case OPCODE_LG2:
- emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
break;
case OPCODE_SCS:
@@ -1326,13 +1527,13 @@ void brw_wm_emit( struct brw_wm_compile *c )
* fixup for 16-element execution.
*/
if (dst_flags & WRITEMASK_X)
- emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
if (dst_flags & WRITEMASK_Y)
- emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
break;
case OPCODE_POW:
- emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
+ emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
break;
/* Comparisons:
@@ -1370,23 +1571,30 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case OPCODE_LIT:
- emit_lit(p, dst, dst_flags, args[0]);
+ emit_lit(c, dst, dst_flags, args[0]);
break;
/* Texturing operations:
*/
case OPCODE_TEX:
- emit_tex(c, inst, dst, dst_flags, args[0]);
+ emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
+ inst->tex_idx, inst->tex_unit,
+ inst->tex_shadow);
break;
case OPCODE_TXB:
- emit_txb(c, inst, dst, dst_flags, args[0]);
+ emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
+ inst->tex_idx, inst->tex_unit);
break;
case OPCODE_KIL:
emit_kil(c, args[0]);
break;
+ case OPCODE_KIL_NV:
+ emit_kil_nv(c);
+ break;
+
default:
_mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
inst->opcode, inst->opcode < MAX_OPCODE ?
diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c
index 123fe84..7d03179 100644
--- a/i965/brw_wm_fp.c
+++ b/i965/brw_wm_fp.c
@@ -181,6 +181,9 @@ static void release_temp( struct brw_wm_compile *c, struct prog_dst_register tem
static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
{
+ assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
+ memset(&c->prog_instructions[c->nr_fp_insns], 0,
+ sizeof(*c->prog_instructions));
return &c->prog_instructions[c->nr_fp_insns++];
}
@@ -447,7 +450,6 @@ static void emit_interp( struct brw_wm_compile *c,
break;
case FRAG_ATTRIB_FACE:
- /* XXX review/test this case */
emit_op(c,
WM_FRONTFACING,
dst_mask(dst, WRITEMASK_X),
@@ -494,38 +496,6 @@ static void emit_interp( struct brw_wm_compile *c,
c->fp_interp_emitted |= 1<<idx;
}
-static void emit_ddx( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- GLuint idx = inst->SrcReg[0].Index;
- struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-
- c->fp_deriv_emitted |= 1<<idx;
- emit_op(c,
- OPCODE_DDX,
- inst->DstReg,
- 0,
- interp,
- get_pixel_w(c),
- src_undef());
-}
-
-static void emit_ddy( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- GLuint idx = inst->SrcReg[0].Index;
- struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-
- c->fp_deriv_emitted |= 1<<idx;
- emit_op(c,
- OPCODE_DDY,
- inst->DstReg,
- 0,
- interp,
- get_pixel_w(c),
- src_undef());
-}
-
/***********************************************************************
* Hacks to extend the program parameter and constant lists.
*/
@@ -988,7 +958,7 @@ static void precalc_txp( struct brw_wm_compile *c,
-static void emit_fb_write( struct brw_wm_compile *c )
+static void emit_render_target_writes( struct brw_wm_compile *c )
{
struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
@@ -996,36 +966,34 @@ static void emit_fb_write( struct brw_wm_compile *c )
GLuint i;
struct prog_instruction *inst, *last_inst;
- struct brw_context *brw = c->func.brw;
/* The inst->Aux field is used for FB write target and the EOT marker */
- if (brw->state.nr_color_regions > 1) {
- for (i = 0 ; i < brw->state.nr_color_regions; i++) {
+ if (c->key.nr_color_regions > 1) {
+ for (i = 0 ; i < c->key.nr_color_regions; i++) {
outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
- last_inst = inst = emit_op(c,
- WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
- outcolor, payload_r0_depth, outdepth);
- inst->Aux = (i<<1);
+ last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
+ 0, outcolor, payload_r0_depth, outdepth);
+ inst->Aux = INST_AUX_TARGET(i);
if (c->fp_fragcolor_emitted) {
outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
- last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+ last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
0, outcolor, payload_r0_depth, outdepth);
- inst->Aux = (i<<1);
+ inst->Aux = INST_AUX_TARGET(i);
}
}
- last_inst->Aux |= 1; //eot
+ last_inst->Aux |= INST_AUX_EOT;
}
else {
/* if gl_FragData[0] is written, use it, else use gl_FragColor */
- if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
+ if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0))
outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
else
outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
0, outcolor, payload_r0_depth, outdepth);
- inst->Aux = 1|(0<<1);
+ inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0);
}
}
@@ -1186,14 +1154,8 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
*/
out->DstReg.WriteMask = 0;
break;
- case OPCODE_DDX:
- emit_ddx(c, inst);
- break;
- case OPCODE_DDY:
- emit_ddy(c, inst);
- break;
case OPCODE_END:
- emit_fb_write(c);
+ emit_render_target_writes(c);
break;
case OPCODE_PRINT:
break;
diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c
index 7c210ab..e8c2cb6 100644
--- a/i965/brw_wm_glsl.c
+++ b/i965/brw_wm_glsl.c
@@ -22,6 +22,7 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
{
int i;
+
for (i = 0; i < fp->Base.NumInstructions; i++) {
const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
@@ -31,8 +32,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
case OPCODE_CAL:
case OPCODE_BRK:
case OPCODE_RET:
- case OPCODE_DDX:
- case OPCODE_DDY:
case OPCODE_NOISE1:
case OPCODE_NOISE2:
case OPCODE_NOISE3:
@@ -293,7 +292,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
int i, j;
struct brw_reg reg;
int urb_read_length = 0;
- GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+ GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
GLuint reg_index = 0;
memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
@@ -372,7 +371,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
for (j = 0; j < 4; j++)
set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
}
- if (c->key.vp_outputs_written & (1 << i)) {
+ if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
reg_index += 2;
}
}
@@ -551,42 +550,6 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
}
}
-
-/**
- * Same as \sa get_src_reg() but if the register is a literal, emit
- * a brw_reg encoding the literal.
- * Note that a brw instruction only allows one src operand to be a literal.
- * For instructions with more than one operand, only the second can be a
- * literal. This means that we treat some literals as constants/uniforms
- * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
- *
- */
-static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint srcRegIndex, GLuint channel)
-{
- const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
- if (src->File == PROGRAM_CONSTANT) {
- /* a literal */
- const int component = GET_SWZ(src->Swizzle, channel);
- const GLfloat *param =
- c->fp->program.Base.Parameters->ParameterValues[src->Index];
- GLfloat value = param[component];
- if (src->Negate & (1 << channel))
- value = -value;
- if (src->Abs)
- value = FABSF(value);
-#if 0
- printf(" form immed value %f for chan %d\n", value, channel);
-#endif
- return brw_imm_f(value);
- }
- else {
- return get_src_reg(c, inst, srcRegIndex, channel);
- }
-}
-
-
/**
* Subroutines are minimal support for resusable instruction sequences.
* They are implemented as simply as possible to minimise overhead: there
@@ -651,542 +614,110 @@ static void invoke_subroutine( struct brw_wm_compile *c,
}
}
-static void emit_trunc( struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i);
- src = get_src_reg(c, inst, 0, i);
- brw_RNDZ(p, dst, src);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_mov( struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i);
- /* XXX some moves from immediate value don't work reliably!!! */
- /*src = get_src_reg_imm(c, inst, 0, i);*/
- src = get_src_reg(c, inst, 0, i);
- brw_MOV(p, dst, src);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_pixel_xy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg r1 = brw_vec1_grf(1, 0);
- struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
-
- struct brw_reg dst0, dst1;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- dst0 = get_dst_reg(c, inst, 0);
- dst1 = get_dst_reg(c, inst, 1);
- /* Calculate pixel centers by adding 1 or 0 to each of the
- * micro-tile coordinates passed in r1.
- */
- if (mask & WRITEMASK_X) {
- brw_ADD(p,
- vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
- stride(suboffset(r1_uw, 4), 2, 4, 0),
- brw_imm_v(0x10101010));
- }
-
- if (mask & WRITEMASK_Y) {
- brw_ADD(p,
- vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
- stride(suboffset(r1_uw, 5), 2, 4, 0),
- brw_imm_v(0x11001100));
- }
-}
-
-static void emit_delta_xy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg r1 = brw_vec1_grf(1, 0);
- struct brw_reg dst0, dst1, src0, src1;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- dst0 = get_dst_reg(c, inst, 0);
- dst1 = get_dst_reg(c, inst, 1);
- src0 = get_src_reg(c, inst, 0, 0);
- src1 = get_src_reg(c, inst, 0, 1);
- /* Calc delta X,Y by subtracting origin in r1 from the pixel
- * centers.
- */
- if (mask & WRITEMASK_X) {
- brw_ADD(p,
- dst0,
- retype(src0, BRW_REGISTER_TYPE_UW),
- negate(r1));
- }
-
- if (mask & WRITEMASK_Y) {
- brw_ADD(p,
- dst1,
- retype(src1, BRW_REGISTER_TYPE_UW),
- negate(suboffset(r1,1)));
-
- }
-}
-
-static void fire_fb_write( struct brw_wm_compile *c,
- GLuint base_reg,
- GLuint nr,
- GLuint target,
- GLuint eot)
-{
- struct brw_compile *p = &c->func;
- /* Pass through control information:
- */
- /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
- {
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
- brw_MOV(p,
- brw_message_reg(base_reg + 1),
- brw_vec8_grf(1, 0));
- brw_pop_insn_state(p);
- }
- /* Send framebuffer write message: */
- brw_fb_WRITE(p,
- retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
- base_reg,
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
- target,
- nr,
- 0,
- eot);
-}
-
-static void emit_fb_write(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- int nr = 2;
- int channel;
- GLuint target, eot;
- struct brw_reg src0;
-
- /* Reserve a space for AA - may not be needed:
- */
- if (c->key.aa_dest_stencil_reg)
- nr += 1;
-
- brw_push_insn_state(p);
- for (channel = 0; channel < 4; channel++) {
- src0 = get_src_reg(c, inst, 0, channel);
- /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
- /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
- brw_MOV(p, brw_message_reg(nr + channel), src0);
- }
- /* skip over the regs populated above: */
- nr += 8;
- brw_pop_insn_state(p);
-
- if (c->key.source_depth_to_render_target) {
- if (c->key.computes_depth) {
- src0 = get_src_reg(c, inst, 2, 2);
- brw_MOV(p, brw_message_reg(nr), src0);
- }
- else {
- src0 = get_src_reg(c, inst, 1, 1);
- brw_MOV(p, brw_message_reg(nr), src0);
- }
-
- nr += 2;
- }
-
- if (c->key.dest_depth_reg) {
- const GLuint comp = c->key.dest_depth_reg / 2;
- const GLuint off = c->key.dest_depth_reg % 2;
-
- if (off != 0) {
- /* XXX this code needs review/testing */
- struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
- struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
-
- brw_push_insn_state(p);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-
- brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
- /* 2nd half? */
- brw_MOV(p, brw_message_reg(nr+1), arg1_1);
- brw_pop_insn_state(p);
- }
- else
- {
- struct brw_reg src = get_src_reg(c, inst, 1, 1);
- brw_MOV(p, brw_message_reg(nr), src);
- }
- nr += 2;
- }
-
- target = inst->Aux >> 1;
- eot = inst->Aux & 1;
- fire_fb_write(c, 0, nr, target, eot);
-}
-
-static void emit_pixel_w( struct brw_wm_compile *c,
- const struct prog_instruction *inst)
+/* Workaround for using brw_wm_emit.c's emit functions, which expect
+ * destination regs to be uniquely written. Moves arguments out to
+ * temporaries as necessary for instructions which use their destination as
+ * a temporary.
+ */
+static void
+unalias3(struct brw_wm_compile *c,
+ void (*func)(struct brw_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2)
{
struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- if (mask & WRITEMASK_W) {
- struct brw_reg dst, src0, delta0, delta1;
- struct brw_reg interp3;
-
- dst = get_dst_reg(c, inst, 3);
- src0 = get_src_reg(c, inst, 0, 0);
- delta0 = get_src_reg(c, inst, 1, 0);
- delta1 = get_src_reg(c, inst, 1, 1);
-
- interp3 = brw_vec1_grf(src0.nr+1, 4);
- /* Calc 1/w - just linterp wpos[3] optimized by putting the
- * result straight into a message reg.
- */
- brw_LINE(p, brw_null_reg(), interp3, delta0);
- brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
-
- /* Calc w */
- brw_math_16( p, dst,
- BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
- BRW_MATH_PRECISION_FULL);
- }
-}
+ struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4];
+ int i, j;
+ int mark = mark_tmps(c);
-static void emit_linterp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst, delta0, delta1;
- struct brw_reg src0;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- delta0 = get_src_reg(c, inst, 1, 0);
- delta1 = get_src_reg(c, inst, 1, 1);
- nr = src0.nr;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_LINE(p, brw_null_reg(), interp[i], delta0);
- brw_MAC(p, dst, suboffset(interp[i],1), delta1);
- }
+ for (j = 0; j < 4; j++) {
+ tmp_arg0[j] = arg0[j];
+ tmp_arg1[j] = arg1[j];
+ tmp_arg2[j] = arg2[j];
}
-}
-
-static void emit_cinterp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- struct brw_reg interp[4];
- struct brw_reg dst, src0;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- nr = src0.nr;
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
+ for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, suboffset(interp[i],3));
+ for (j = 0; j < 4; j++) {
+ if (arg0[j].file == dst[i].file &&
+ dst[i].nr == arg0[j].nr) {
+ tmp_arg0[j] = alloc_tmp(c);
+ brw_MOV(p, tmp_arg0[j], arg0[j]);
+ }
+ if (arg1[j].file == dst[i].file &&
+ dst[i].nr == arg1[j].nr) {
+ tmp_arg1[j] = alloc_tmp(c);
+ brw_MOV(p, tmp_arg1[j], arg1[j]);
+ }
+ if (arg2[j].file == dst[i].file &&
+ dst[i].nr == arg2[j].nr) {
+ tmp_arg2[j] = alloc_tmp(c);
+ brw_MOV(p, tmp_arg2[j], arg2[j]);
+ }
+ }
}
}
-}
-static void emit_pinterp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- struct brw_reg interp[4];
- struct brw_reg dst, delta0, delta1;
- struct brw_reg src0, w;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- delta0 = get_src_reg(c, inst, 1, 0);
- delta1 = get_src_reg(c, inst, 1, 1);
- w = get_src_reg(c, inst, 2, 3);
- nr = src0.nr;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
+ func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_LINE(p, brw_null_reg(), interp[i], delta0);
- brw_MAC(p, dst, suboffset(interp[i],1),
- delta1);
- brw_MUL(p, dst, dst, w);
- }
- }
+ release_tmps(c, mark);
}
-/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
-static void emit_frontfacing(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
+/* Workaround for using brw_wm_emit.c's emit functions, which expect
+ * destination regs to be uniquely written. Moves arguments out to
+ * temporaries as necessary for instructions which use their destination as
+ * a temporary.
+ */
+static void
+unalias2(struct brw_wm_compile *c,
+ void (*func)(struct brw_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
{
struct brw_compile *p = &c->func;
- struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
- struct brw_reg dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, brw_imm_f(0.0));
- }
- }
+ struct brw_reg tmp_arg0[4], tmp_arg1[4];
+ int i, j;
+ int mark = mark_tmps(c);
- /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
- * us front face
- */
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, brw_imm_f(1.0));
- }
+ for (j = 0; j < 4; j++) {
+ tmp_arg0[j] = arg0[j];
+ tmp_arg1[j] = arg1[j];
}
- brw_set_predicate_control_flag_value(p, 0xff);
-}
-static void emit_xpd(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
for (i = 0; i < 4; i++) {
- GLuint i2 = (i+2)%3;
- GLuint i1 = (i+1)%3;
if (mask & (1<<i)) {
- struct brw_reg src0, src1, dst;
- dst = get_dst_reg(c, inst, i);
- src0 = negate(get_src_reg(c, inst, 0, i2));
- src1 = get_src_reg_imm(c, inst, 1, i1);
- brw_MUL(p, brw_null_reg(), src0, src1);
- src0 = get_src_reg(c, inst, 0, i1);
- src1 = get_src_reg_imm(c, inst, 1, i2);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- brw_MAC(p, dst, src0, src1);
- brw_set_saturate(p, 0);
+ for (j = 0; j < 4; j++) {
+ if (arg0[j].file == dst[i].file &&
+ dst[i].nr == arg0[j].nr) {
+ tmp_arg0[j] = alloc_tmp(c);
+ brw_MOV(p, tmp_arg0[j], arg0[j]);
+ }
+ if (arg1[j].file == dst[i].file &&
+ dst[i].nr == arg1[j].nr) {
+ tmp_arg1[j] = alloc_tmp(c);
+ brw_MOV(p, tmp_arg1[j], arg1[j]);
+ }
+ }
}
}
- brw_set_saturate(p, 0);
-}
-
-static void emit_dp3(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg src0[3], src1[3], dst;
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
- if (!(mask & WRITEMASK_XYZW))
- return;
-
- assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
- for (i = 0; i < 3; i++) {
- src0[i] = get_src_reg(c, inst, 0, i);
- src1[i] = get_src_reg_imm(c, inst, 1, i);
- }
-
- dst = get_dst_reg(c, inst, dst_chan);
- brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
- brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MAC(p, dst, src0[2], src1[2]);
- brw_set_saturate(p, 0);
-}
+ func(p, dst, mask, tmp_arg0, tmp_arg1);
-static void emit_dp4(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg src0[4], src1[4], dst;
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
-
- if (!(mask & WRITEMASK_XYZW))
- return;
-
- assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
- for (i = 0; i < 4; i++) {
- src0[i] = get_src_reg(c, inst, 0, i);
- src1[i] = get_src_reg_imm(c, inst, 1, i);
- }
- dst = get_dst_reg(c, inst, dst_chan);
- brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
- brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
- brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MAC(p, dst, src0[3], src1[3]);
- brw_set_saturate(p, 0);
-}
-
-static void emit_dph(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg src0[4], src1[4], dst;
- int i;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
-
- if (!(mask & WRITEMASK_XYZW))
- return;
-
- assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
- for (i = 0; i < 4; i++) {
- src0[i] = get_src_reg(c, inst, 0, i);
- src1[i] = get_src_reg_imm(c, inst, 1, i);
- }
- dst = get_dst_reg(c, inst, dst_chan);
- brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
- brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
- brw_MAC(p, dst, src0[2], src1[2]);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_ADD(p, dst, dst, src1[3]);
- brw_set_saturate(p, 0);
-}
-
-/**
- * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
- * Note that the result of the function is smeared across the dest
- * register's X, Y, Z and W channels (subject to writemasking of course).
- */
-static void emit_math1(struct brw_wm_compile *c,
- const struct prog_instruction *inst, GLuint func)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
-
- if (!(mask & WRITEMASK_XYZW))
- return;
-
- assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
- /* Get first component of source register */
- dst = get_dst_reg(c, inst, dst_chan);
- src0 = get_src_reg(c, inst, 0, 0);
-
- brw_MOV(p, brw_message_reg(2), src0);
- brw_math(p,
- dst,
- func,
- (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 2,
- brw_null_reg(),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-}
-
-static void emit_rcp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
-}
-
-static void emit_rsq(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
-}
-
-static void emit_sin(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
-}
-
-static void emit_cos(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
-}
-
-static void emit_ex2(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
-}
-
-static void emit_lg2(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
-}
-
-static void emit_add(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg_imm(c, inst, 1, i);
- brw_ADD(p, dst, src0, src1);
- }
- }
- brw_set_saturate(p, 0);
+ release_tmps(c, mark);
}
static void emit_arl(struct brw_wm_compile *c,
@@ -1202,180 +733,6 @@ static void emit_arl(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
-
-static void emit_mul(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg_imm(c, inst, 1, i);
- brw_MUL(p, dst, src0, src1);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_frc(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg_imm(c, inst, 0, i);
- brw_FRC(p, dst, src0);
- }
- }
- if (inst->SaturateMode != SATURATE_OFF)
- brw_set_saturate(p, 0);
-}
-
-static void emit_flr(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg_imm(c, inst, 0, i);
- brw_RNDD(p, dst, src0);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-
-static void emit_min_max(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- const GLuint mask = inst->DstReg.WriteMask;
- const int mark = mark_tmps(c);
- int i;
- brw_push_insn_state(p);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- struct brw_reg real_dst = get_dst_reg(c, inst, i);
- struct brw_reg src0 = get_src_reg(c, inst, 0, i);
- struct brw_reg src1 = get_src_reg(c, inst, 1, i);
- struct brw_reg dst;
- /* if dst==src0 or dst==src1 we need to use a temp reg */
- GLboolean use_temp = brw_same_reg(dst, src0) ||
- brw_same_reg(dst, src1);
- if (use_temp)
- dst = alloc_tmp(c);
- else
- dst = real_dst;
-
- /*
- printf(" Min/max: dst %d src0 %d src1 %d\n",
- dst.nr, src0.nr, src1.nr);
- */
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MOV(p, dst, src0);
- brw_set_saturate(p, 0);
-
- if (inst->Opcode == OPCODE_MIN)
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
- else
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0);
-
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- brw_MOV(p, dst, src1);
- brw_set_saturate(p, 0);
- brw_set_predicate_control_flag_value(p, 0xff);
- if (use_temp)
- brw_MOV(p, real_dst, dst);
- }
- }
- brw_pop_insn_state(p);
- release_tmps(c, mark);
-}
-
-static void emit_pow(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg dst, src0, src1;
- GLuint mask = inst->DstReg.WriteMask;
- int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
-
- if (!(mask & WRITEMASK_XYZW))
- return;
-
- assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
- dst = get_dst_reg(c, inst, dst_chan);
- src0 = get_src_reg_imm(c, inst, 0, 0);
- src1 = get_src_reg_imm(c, inst, 1, 0);
-
- brw_MOV(p, brw_message_reg(2), src0);
- brw_MOV(p, brw_message_reg(3), src1);
-
- brw_math(p,
- dst,
- BRW_MATH_FUNCTION_POW,
- (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 2,
- brw_null_reg(),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-}
-
-static void emit_lrp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
- int i;
- int mark = mark_tmps(c);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
-
- src1 = get_src_reg_imm(c, inst, 1, i);
-
- if (src1.nr == dst.nr) {
- tmp1 = alloc_tmp(c);
- brw_MOV(p, tmp1, src1);
- } else
- tmp1 = src1;
-
- src2 = get_src_reg(c, inst, 2, i);
- if (src2.nr == dst.nr) {
- tmp2 = alloc_tmp(c);
- brw_MOV(p, tmp2, src2);
- } else
- tmp2 = src2;
-
- brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
- brw_MUL(p, brw_null_reg(), dst, tmp2);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MAC(p, dst, src0, tmp1);
- brw_set_saturate(p, 0);
- }
- release_tmps(c, mark);
- }
-}
-
/**
* For GLSL shaders, this KIL will be unconditional.
* It may be contained inside an IF/ENDIF structure of course.
@@ -1391,144 +748,6 @@ static void emit_kil(struct brw_wm_compile *c)
brw_pop_insn_state(p);
}
-static void emit_mad(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg dst, src0, src1, src2;
- int i;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg_imm(c, inst, 1, i);
- src2 = get_src_reg_imm(c, inst, 2, i);
- brw_MUL(p, dst, src0, src1);
-
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_ADD(p, dst, dst, src2);
- brw_set_saturate(p, 0);
- }
- }
-}
-
-static void emit_sop(struct brw_wm_compile *c,
- const struct prog_instruction *inst, GLuint cond)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg dst, src0, src1;
- int i;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg_imm(c, inst, 1, i);
- brw_push_insn_state(p);
- brw_CMP(p, brw_null_reg(), cond, src0, src1);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_MOV(p, dst, brw_imm_f(0.0));
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- brw_MOV(p, dst, brw_imm_f(1.0));
- brw_pop_insn_state(p);
- }
- }
-}
-
-static void emit_slt(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_L);
-}
-
-static void emit_sle(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_LE);
-}
-
-static void emit_sgt(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_G);
-}
-
-static void emit_sge(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_GE);
-}
-
-static void emit_seq(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_EQ);
-}
-
-static void emit_sne(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
-}
-
-static void emit_ddx(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst;
- struct brw_reg src0, w;
- GLuint nr, i;
- src0 = get_src_reg(c, inst, 0, 0);
- w = get_src_reg(c, inst, 1, 3);
- nr = src0.nr;
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, interp[i]);
- brw_MUL(p, dst, dst, w);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_ddy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst;
- struct brw_reg src0, w;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- nr = src0.nr;
- w = get_src_reg(c, inst, 1, 3);
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, suboffset(interp[i], 1));
- brw_MUL(p, dst, dst, w);
- }
- }
- brw_set_saturate(p, 0);
-}
-
static INLINE struct brw_reg high_words( struct brw_reg reg )
{
return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
@@ -2581,196 +1800,6 @@ static void emit_noise4( struct brw_wm_compile *c,
release_tmps( c, mark );
}
-
-static void emit_wpos_xy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg src0[2], dst[2];
-
- dst[0] = get_dst_reg(c, inst, 0);
- dst[1] = get_dst_reg(c, inst, 1);
-
- src0[0] = get_src_reg(c, inst, 0, 0);
- src0[1] = get_src_reg(c, inst, 0, 1);
-
- /* Calculate the pixel offset from window bottom left into destination
- * X and Y channels.
- */
- if (mask & WRITEMASK_X) {
- /* X' = X - origin_x */
- brw_ADD(p,
- dst[0],
- retype(src0[0], BRW_REGISTER_TYPE_W),
- brw_imm_d(0 - c->key.origin_x));
- }
-
- if (mask & WRITEMASK_Y) {
- /* Y' = height - (Y - origin_y) = height + origin_y - Y */
- brw_ADD(p,
- dst[1],
- negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
- brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
- }
-}
-
-/* TODO
- BIAS on SIMD8 not working yet...
- */
-static void emit_txb(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg dst[4], src[4], payload_reg;
- /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
- const GLuint unit = inst->TexSrcUnit;
- GLuint i;
- GLuint msg_type;
-
- assert(unit < BRW_MAX_TEX_UNIT);
-
- payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
-
- for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i);
- for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, inst, 0, i);
-
- switch (inst->TexSrcTarget) {
- case TEXTURE_1D_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */
- brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */
- break;
- case TEXTURE_2D_INDEX:
- case TEXTURE_RECT_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), src[1]);
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
- break;
- case TEXTURE_3D_INDEX:
- case TEXTURE_CUBE_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), src[1]);
- brw_MOV(p, brw_message_reg(4), src[2]);
- break;
- default:
- /* invalid target */
- abort();
- }
- brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
-
- if (BRW_IS_IGDNG(p->brw)) {
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
- } else {
- /* Does it work well on SIMD8? */
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
- }
-
- brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
- 1, /* msg_reg_nr */
- retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
- SURF_INDEX_TEXTURE(unit),
- unit, /* sampler */
- inst->DstReg.WriteMask, /* writemask */
- msg_type, /* msg_type */
- 4, /* response_length */
- 4, /* msg_length */
- 0, /* eot */
- 1,
- BRW_SAMPLER_SIMD_MODE_SIMD8);
-}
-
-
-static void emit_tex(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg dst[4], src[4], payload_reg;
- /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
- const GLuint unit = inst->TexSrcUnit;
- GLuint msg_len;
- GLuint i, nr;
- GLuint emit;
- GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
- GLuint msg_type;
-
- assert(unit < BRW_MAX_TEX_UNIT);
-
- payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
-
- for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i);
- for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, inst, 0, i);
-
- switch (inst->TexSrcTarget) {
- case TEXTURE_1D_INDEX:
- emit = WRITEMASK_X;
- nr = 1;
- break;
- case TEXTURE_2D_INDEX:
- case TEXTURE_RECT_INDEX:
- emit = WRITEMASK_XY;
- nr = 2;
- break;
- case TEXTURE_3D_INDEX:
- case TEXTURE_CUBE_INDEX:
- emit = WRITEMASK_XYZ;
- nr = 3;
- break;
- default:
- /* invalid target */
- abort();
- }
- msg_len = 1;
-
- /* move/load S, T, R coords */
- for (i = 0; i < nr; i++) {
- static const GLuint swz[4] = {0,1,2,2};
- if (emit & (1<<i))
- brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
- else
- brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
- msg_len += 1;
- }
-
- if (shadow) {
- brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
- brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
- }
-
- if (BRW_IS_IGDNG(p->brw)) {
- if (shadow)
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
- else
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
- } else {
- /* Does it work for shadow on SIMD8 ? */
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
- }
-
- brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
- 1, /* msg_reg_nr */
- retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
- SURF_INDEX_TEXTURE(unit),
- unit, /* sampler */
- inst->DstReg.WriteMask, /* writemask */
- msg_type, /* msg_type */
- 4, /* response_length */
- shadow ? 6 : 4, /* msg_length */
- 0, /* eot */
- 1,
- BRW_SAMPLER_SIMD_MODE_SIMD8);
-
- if (shadow)
- brw_MOV(p, dst[3], brw_imm_f(1.0));
-}
-
/**
* Resolve subroutine calls after code emit is done.
@@ -2780,6 +1809,21 @@ static void post_wm_emit( struct brw_wm_compile *c )
brw_resolve_cals(&c->func);
}
+static void
+get_argument_regs(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ int index,
+ struct brw_reg *regs,
+ int mask)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1 << i))
+ regs[i] = get_src_reg(c, inst, index, i);
+ }
+}
+
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
#define MAX_IF_DEPTH 32
@@ -2797,6 +1841,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
for (i = 0; i < c->nr_fp_insns; i++) {
const struct prog_instruction *inst = &c->prog_instructions[i];
+ int dst_flags;
+ struct brw_reg args[3][4], dst[4];
+ int j;
c->cur_inst = i;
@@ -2809,127 +1856,157 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
if (c->fp->use_const_buffer)
fetch_constants(c, inst);
+ if (inst->Opcode != OPCODE_ARL) {
+ for (j = 0; j < 4; j++) {
+ if (inst->DstReg.WriteMask & (1 << j))
+ dst[j] = get_dst_reg(c, inst, j);
+ else
+ dst[j] = brw_null_reg();
+ }
+ }
+ for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
+ get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW);
+
+ dst_flags = inst->DstReg.WriteMask;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ dst_flags |= SATURATE;
+
if (inst->CondUpdate)
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
else
brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ dst_flags = inst->DstReg.WriteMask;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ dst_flags |= SATURATE;
+
switch (inst->Opcode) {
case WM_PIXELXY:
- emit_pixel_xy(c, inst);
+ emit_pixel_xy(c, dst, dst_flags);
break;
case WM_DELTAXY:
- emit_delta_xy(c, inst);
+ emit_delta_xy(p, dst, dst_flags, args[0]);
break;
case WM_PIXELW:
- emit_pixel_w(c, inst);
+ emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
break;
case WM_LINTERP:
- emit_linterp(c, inst);
+ emit_linterp(p, dst, dst_flags, args[0], args[1]);
break;
case WM_PINTERP:
- emit_pinterp(c, inst);
+ emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
break;
case WM_CINTERP:
- emit_cinterp(c, inst);
+ emit_cinterp(p, dst, dst_flags, args[0]);
break;
case WM_WPOSXY:
- emit_wpos_xy(c, inst);
+ emit_wpos_xy(c, dst, dst_flags, args[0]);
break;
case WM_FB_WRITE:
- emit_fb_write(c, inst);
+ emit_fb_write(c, args[0], args[1], args[2],
+ INST_AUX_GET_TARGET(inst->Aux),
+ inst->Aux & INST_AUX_EOT);
break;
case WM_FRONTFACING:
- emit_frontfacing(c, inst);
+ emit_frontfacing(p, dst, dst_flags);
break;
case OPCODE_ADD:
- emit_add(c, inst);
+ emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_ARL:
emit_arl(c, inst);
break;
case OPCODE_FRC:
- emit_frc(c, inst);
+ emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
break;
case OPCODE_FLR:
- emit_flr(c, inst);
+ emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
case OPCODE_LRP:
- emit_lrp(c, inst);
+ unalias3(c, emit_lrp,
+ dst, dst_flags, args[0], args[1], args[2]);
break;
case OPCODE_TRUNC:
- emit_trunc(c, inst);
+ emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
break;
case OPCODE_MOV:
case OPCODE_SWZ:
- emit_mov(c, inst);
+ emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
break;
case OPCODE_DP3:
- emit_dp3(c, inst);
+ emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DP4:
- emit_dp4(c, inst);
+ emit_dp4(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_XPD:
- emit_xpd(c, inst);
+ emit_xpd(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DPH:
- emit_dph(c, inst);
+ emit_dph(p, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_RCP:
- emit_rcp(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
break;
case OPCODE_RSQ:
- emit_rsq(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
break;
case OPCODE_SIN:
- emit_sin(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
break;
case OPCODE_COS:
- emit_cos(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
break;
case OPCODE_EX2:
- emit_ex2(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
break;
case OPCODE_LG2:
- emit_lg2(c, inst);
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
break;
case OPCODE_MIN:
+ unalias2(c, emit_min, dst, dst_flags, args[0], args[1]);
+ break;
case OPCODE_MAX:
- emit_min_max(c, inst);
+ unalias2(c, emit_max, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_DDX:
- emit_ddx(c, inst);
- break;
case OPCODE_DDY:
- emit_ddy(c, inst);
+ emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
+ args[0]);
break;
case OPCODE_SLT:
- emit_slt(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_L, args[0], args[1]);
break;
case OPCODE_SLE:
- emit_sle(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_LE, args[0], args[1]);
break;
case OPCODE_SGT:
- emit_sgt(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_G, args[0], args[1]);
break;
case OPCODE_SGE:
- emit_sge(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_GE, args[0], args[1]);
break;
case OPCODE_SEQ:
- emit_seq(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_EQ, args[0], args[1]);
break;
case OPCODE_SNE:
- emit_sne(c, inst);
+ emit_sop(p, dst, dst_flags,
+ BRW_CONDITIONAL_NEQ, args[0], args[1]);
break;
case OPCODE_MUL:
- emit_mul(c, inst);
+ emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
break;
case OPCODE_POW:
- emit_pow(c, inst);
+ emit_math2(c, BRW_MATH_FUNCTION_POW,
+ dst, dst_flags, args[0], args[1]);
break;
case OPCODE_MAD:
- emit_mad(c, inst);
+ emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
break;
case OPCODE_NOISE1:
emit_noise1(c, inst);
@@ -2944,10 +2021,19 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_noise4(c, inst);
break;
case OPCODE_TEX:
- emit_tex(c, inst);
+ emit_tex(c, dst, dst_flags, args[0],
+ get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
+ 0, 1, 0, 0),
+ inst->TexSrcTarget,
+ inst->TexSrcUnit,
+ (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0);
break;
case OPCODE_TXB:
- emit_txb(c, inst);
+ emit_txb(c, dst, dst_flags, args[0],
+ get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
+ 0, 1, 0, 0),
+ inst->TexSrcTarget,
+ c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]);
break;
case OPCODE_KIL_NV:
emit_kil(c);
diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c
index 6279258..ff4c082 100644
--- a/i965/brw_wm_pass0.c
+++ b/i965/brw_wm_pass0.c
@@ -42,12 +42,14 @@
static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
{
assert(c->nr_refs < BRW_WM_MAX_REF);
+ memset(&c->refs[c->nr_refs], 0, sizeof(*c->refs));
return &c->refs[c->nr_refs++];
}
static struct brw_wm_value *get_value( struct brw_wm_compile *c)
{
assert(c->nr_refs < BRW_WM_MAX_VREG);
+ memset(&c->vreg[c->nr_vreg], 0, sizeof(*c->vreg));
return &c->vreg[c->nr_vreg++];
}
@@ -55,6 +57,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c)
static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
{
assert(c->nr_insns < BRW_WM_MAX_INSN);
+ memset(&c->instruction[c->nr_insns], 0, sizeof(*c->instruction));
return &c->instruction[c->nr_insns++];
}
@@ -322,8 +325,8 @@ translate_insn(struct brw_wm_compile *c,
out->tex_unit = inst->TexSrcUnit;
out->tex_idx = inst->TexSrcTarget;
out->tex_shadow = inst->TexShadow;
- out->eot = inst->Aux & 1;
- out->target = inst->Aux >> 1;
+ out->eot = inst->Aux & INST_AUX_EOT;
+ out->target = INST_AUX_GET_TARGET(inst->Aux);
/* Args:
*/
diff --git a/i965/brw_wm_pass1.c b/i965/brw_wm_pass1.c
index 3436a24..b449394 100644
--- a/i965/brw_wm_pass1.c
+++ b/i965/brw_wm_pass1.c
@@ -178,6 +178,11 @@ void brw_wm_pass1( struct brw_wm_compile *c )
read1 = writemask;
break;
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ read0 = writemask;
+ break;
+
case OPCODE_MAD:
case OPCODE_CMP:
case OPCODE_LRP:
@@ -270,6 +275,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
case OPCODE_DST:
case WM_FRONTFACING:
+ case OPCODE_KIL_NV:
default:
break;
}
diff --git a/i965/brw_wm_pass2.c b/i965/brw_wm_pass2.c
index 6faea01..31303fe 100644
--- a/i965/brw_wm_pass2.c
+++ b/i965/brw_wm_pass2.c
@@ -82,8 +82,8 @@ static void init_registers( struct brw_wm_compile *c )
for (j = 0; j < c->nr_creg; j++)
prealloc_reg(c, &c->creg[j], i++);
- for (j = 0; j < FRAG_ATTRIB_MAX; j++) {
- if (c->key.vp_outputs_written & (1<<j)) {
+ for (j = 0; j < VERT_RESULT_MAX; j++) {
+ if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) {
int fp_index;
if (j >= VERT_RESULT_VAR0)
diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c
index dff4665..aa2e519 100644
--- a/i965/brw_wm_sampler_state.c
+++ b/i965/brw_wm_sampler_state.c
@@ -66,19 +66,6 @@ static GLuint translate_wrap_mode( GLenum wrap )
}
}
-
-static GLuint U_FIXED(GLfloat value, GLuint frac_bits)
-{
- value *= (1<<frac_bits);
- return value < 0 ? 0 : value;
-}
-
-static GLint S_FIXED(GLfloat value, GLuint frac_bits)
-{
- return value * (1<<frac_bits);
-}
-
-
static dri_bo *upload_default_color( struct brw_context *brw,
const GLfloat *color )
{
@@ -86,8 +73,8 @@ static dri_bo *upload_default_color( struct brw_context *brw,
COPY_4V(sdc.color, color);
- return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
- NULL, 0 );
+ return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+ &sdc, sizeof(sdc), NULL, 0);
}
@@ -228,8 +215,8 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
*/
sampler->ss0.base_level = U_FIXED(0, 1);
- sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6);
- sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6);
+ sampler->ss1.max_lod = U_FIXED(CLAMP(key->maxlod, 0, 13), 6);
+ sampler->ss1.min_lod = U_FIXED(CLAMP(key->minlod, 0, 13), 6);
sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
}
diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c
index 39f8c6d..f89ed9b 100644
--- a/i965/brw_wm_state.c
+++ b/i965/brw_wm_state.c
@@ -106,7 +106,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* as far as we can tell */
key->computes_depth =
- (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
+ (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
+ /* BRW_NEW_DEPTH_BUFFER
+ * Override for NULL depthbuffer case, required by the Pixel Shader Computed
+ * Depth field.
+ */
+ if (brw->state.depth_region == NULL)
+ key->computes_depth = 0;
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
@@ -300,6 +306,7 @@ const struct brw_tracked_state brw_wm_unit = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_DEPTH_BUFFER |
BRW_NEW_NR_WM_SURFACES),
.cache = (CACHE_NEW_WM_PROG |
diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c
index 3dcc592..8335e5a 100644
--- a/i965/brw_wm_surface_state.c
+++ b/i965/brw_wm_surface_state.c
@@ -31,7 +31,6 @@
#include "main/mtypes.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "shader/prog_parameter.h"
@@ -70,7 +69,8 @@ static GLuint translate_tex_target( GLenum target )
}
-static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
+static GLuint translate_tex_format( gl_format mesa_format,
+ GLenum internal_format,
GLenum depth_mode )
{
switch( mesa_format ) {
@@ -86,21 +86,22 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
case MESA_FORMAT_AL88:
return BRW_SURFACEFORMAT_L8A8_UNORM;
+ case MESA_FORMAT_AL1616:
+ return BRW_SURFACEFORMAT_L16A16_UNORM;
+
case MESA_FORMAT_RGB888:
assert(0); /* not supported for sampling */
return BRW_SURFACEFORMAT_R8G8B8_UNORM;
case MESA_FORMAT_ARGB8888:
- if (internal_format == GL_RGB)
- return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
- else
- return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ case MESA_FORMAT_XRGB8888:
+ return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
case MESA_FORMAT_RGBA8888_REV:
- if (internal_format == GL_RGB)
- return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
- else
- return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+ _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()");
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
case MESA_FORMAT_RGB565:
return BRW_SURFACEFORMAT_B5G6R5_UNORM;
@@ -287,7 +288,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.bo = NULL;
key.offset = intelObj->textureOffset;
} else {
- key.format = firstImage->TexFormat->MesaFormat;
+ key.format = firstImage->TexFormat;
key.internal_format = firstImage->InternalFormat;
key.pitch = intelObj->mt->pitch;
key.depth = firstImage->Depth;
@@ -354,7 +355,10 @@ brw_create_constant_surface( struct brw_context *brw,
NULL, NULL);
if (key->bo) {
- /* Emit relocation to surface contents */
+ /* Emit relocation to surface contents. Section 5.1.1 of the gen4
+ * bspec ("Data Cache") says that the data cache does not exist as
+ * a separate cache and is just the sampler cache.
+ */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_SAMPLER, 0,
0,
@@ -527,8 +531,15 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
region_bo = region->buffer;
key.surface_type = BRW_SURFACE_2D;
- switch (irb->texformat->MesaFormat) {
+ switch (irb->Base.Format) {
+ /* XRGB and ARGB are treated the same here because the chips in this
+ * family cannot render to XRGB targets. This means that we have to
+ * mask writes to alpha (ala glColorMask) and reconfigure the alpha
+ * blending hardware to use GL_ONE (or GL_ZERO) for cases where
+ * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used.
+ */
case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
break;
case MESA_FORMAT_RGB565:
@@ -541,26 +552,38 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
break;
default:
- _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
- irb->texformat->MesaFormat);
+ _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format);
}
key.tiling = region->tiling;
- key.width = region->width;
- key.height = region->height;
+ if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
+ key.width = rb->Width;
+ key.height = rb->Height;
+ } else {
+ key.width = region->width;
+ key.height = region->height;
+ }
key.pitch = region->pitch;
key.cpp = region->cpp;
key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
} else {
key.surface_type = BRW_SURFACE_NULL;
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- key.tiling = 0;
+ key.tiling = I915_TILING_X;
key.width = 1;
key.height = 1;
key.cpp = 4;
key.draw_offset = 0;
}
+ /* _NEW_COLOR */
memcpy(key.color_mask, ctx->Color.ColorMask,
sizeof(key.color_mask));
+
+ /* As mentioned above, disable writes to the alpha component when the
+ * renderbuffer is XRGB.
+ */
+ if (ctx->DrawBuffer->Visual.alphaBits == 0)
+ key.color_mask[3] = GL_FALSE;
+
key.color_blend = (!ctx->Color._LogicOpEnabled &&
ctx->Color.BlendEnabled);
@@ -655,7 +678,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
if (bind_bo == NULL) {
GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint);
- uint32_t *data = malloc(data_size);
+ uint32_t data[BRW_WM_MAX_SURF];
int i;
for (i = 0; i < brw->wm.nr_surfaces; i++)
@@ -680,8 +703,6 @@ brw_wm_get_binding_table(struct brw_context *brw)
brw->wm.surf_bo[i]);
}
}
-
- free(data);
}
return bind_bo;
@@ -690,11 +711,10 @@ brw_wm_get_binding_table(struct brw_context *brw)
static void prepare_wm_surfaces(struct brw_context *brw )
{
GLcontext *ctx = &brw->intel.ctx;
- struct intel_context *intel = &brw->intel;
GLuint i;
int old_nr_surfaces;
- /* _NEW_BUFFERS */
+ /* _NEW_BUFFERS | _NEW_COLOR */
/* Update surfaces for drawing buffers */
if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
@@ -707,7 +727,7 @@ static void prepare_wm_surfaces(struct brw_context *brw )
}
old_nr_surfaces = brw->wm.nr_surfaces;
- brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
+ brw->wm.nr_surfaces = BRW_MAX_DRAW_BUFFERS;
if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
@@ -719,17 +739,8 @@ static void prepare_wm_surfaces(struct brw_context *brw )
/* _NEW_TEXTURE, BRW_NEW_TEXDATA */
if (texUnit->_ReallyEnabled) {
- if (texUnit->_Current == intel->frame_buffer_texobj) {
- /* render to texture */
- dri_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = brw->wm.surf_bo[0];
- dri_bo_reference(brw->wm.surf_bo[surf]);
- brw->wm.nr_surfaces = surf + 1;
- } else {
- /* regular texture */
- brw_update_texture_surface(ctx, i);
- brw->wm.nr_surfaces = surf + 1;
- }
+ brw_update_texture_surface(ctx, i);
+ brw->wm.nr_surfaces = surf + 1;
} else {
dri_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = NULL;
diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c
index 6aa36d1..ca6e2fa 100644
--- a/shared/intel_batchbuffer.c
+++ b/shared/intel_batchbuffer.c
@@ -201,11 +201,6 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
}
- if (intel->first_post_swapbuffers_batch == NULL) {
- intel->first_post_swapbuffers_batch = intel->batch->buf;
- drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
- }
-
if (used == 0) {
batch->cliprect_mode = IGNORE_CLIPRECTS;
return;
@@ -215,10 +210,10 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
used);
+ batch->reserved_space = 0;
/* Emit a flush if the bufmgr doesn't do it for us. */
if (intel->always_flush_cache || !intel->ttm) {
- *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd();
- batch->ptr += 4;
+ intel_batchbuffer_emit_mi_flush(batch);
used = batch->ptr - batch->map;
}
@@ -249,6 +244,8 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
if (intel->vtbl.finish_batch)
intel->vtbl.finish_batch(intel);
+ batch->reserved_space = BATCH_RESERVED;
+
/* TODO: Just pass the relocation list and dma buffer up to the
* kernel.
*/
@@ -304,3 +301,31 @@ intel_batchbuffer_data(struct intel_batchbuffer *batch,
__memcpy(batch->ptr, data, bytes);
batch->ptr += bytes;
}
+
+/* Emit a pipelined flush to either flush render and texture cache for
+ * reading from a FBO-drawn texture, or flush so that frontbuffer
+ * render appears on the screen in DRI1.
+ *
+ * This is also used for the always_flush_cache driconf debug option.
+ */
+void
+intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
+{
+ struct intel_context *intel = batch->intel;
+
+ if (intel->gen >= 4) {
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_INSTRUCTION_FLUSH |
+ PIPE_CONTROL_WRITE_FLUSH |
+ PIPE_CONTROL_NO_WRITE);
+ OUT_BATCH(0); /* write address */
+ OUT_BATCH(0); /* write data */
+ OUT_BATCH(0); /* write data */
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH(MI_FLUSH);
+ ADVANCE_BATCH();
+ }
+}
diff --git a/shared/intel_batchbuffer.h b/shared/intel_batchbuffer.h
index 51579df..d4a9445 100644
--- a/shared/intel_batchbuffer.h
+++ b/shared/intel_batchbuffer.h
@@ -62,6 +62,7 @@ struct intel_batchbuffer
} emit;
GLuint dirty_state;
+ GLuint reserved_space;
};
struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
@@ -95,6 +96,7 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
uint32_t read_domains,
uint32_t write_domain,
uint32_t offset);
+void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch);
/* Inline functions - might actually be better off with these
* non-inlined. Certainly better off switching all command packets to
@@ -104,7 +106,7 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
static INLINE GLint
intel_batchbuffer_space(struct intel_batchbuffer *batch)
{
- return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+ return (batch->size - batch->reserved_space) - (batch->ptr - batch->map);
}
@@ -157,7 +159,7 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \
- assert((delta) >= 0); \
+ assert((unsigned) (delta) < buf->size); \
intel_batchbuffer_emit_reloc(intel->batch, buf, \
read_domains, write_domain, delta); \
} while (0)
@@ -173,12 +175,4 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
intel->batch->emit.start_ptr = NULL; \
} while(0)
-
-static INLINE void
-intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
-{
- intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS);
- intel_batchbuffer_emit_dword(batch, MI_FLUSH);
-}
-
#endif
diff --git a/shared/intel_blit.c b/shared/intel_blit.c
index 0c5be4c..9f638b0 100644
--- a/shared/intel_blit.c
+++ b/shared/intel_blit.c
@@ -26,13 +26,9 @@
**************************************************************************/
-#include <stdio.h>
-#include <errno.h>
-
#include "main/mtypes.h"
#include "main/context.h"
#include "main/enums.h"
-#include "main/texformat.h"
#include "main/colormac.h"
#include "intel_blit.h"
@@ -374,8 +370,6 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
skipBuffers = BUFFER_BIT_STENCIL;
}
- /* XXX Move this flush/lock into the following conditional? */
- intelFlush(&intel->ctx);
LOCK_HARDWARE(intel);
intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
@@ -441,6 +435,10 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
intel_region_buffer(intel, irb->region,
all ? INTEL_WRITE_FULL :
INTEL_WRITE_PART);
+ int x1 = b.x1 + irb->region->draw_x;
+ int y1 = b.y1 + irb->region->draw_y;
+ int x2 = b.x2 + irb->region->draw_x;
+ int y2 = b.y2 + irb->region->draw_y;
GLuint clearVal;
GLint pitch, cpp;
@@ -449,11 +447,10 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
pitch = irb->region->pitch;
cpp = irb->region->cpp;
- DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+ DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
__FUNCTION__,
irb->region->buffer, (pitch * cpp),
- irb->region->draw_offset,
- b.x1, b.y1, b.x2 - b.x1, b.y2 - b.y1);
+ x1, y1, x2 - x1, y2 - y1);
BR13 = 0xf0 << 16;
CMD = XY_COLOR_BLT_CMD;
@@ -499,8 +496,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]);
CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]);
- switch (irb->texformat->MesaFormat) {
+ switch (irb->Base.Format) {
case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
clearVal = intel->ClearColor8888;
break;
case MESA_FORMAT_RGB565:
@@ -516,7 +514,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
break;
default:
_mesa_problem(ctx, "Unexpected renderbuffer format: %d\n",
- irb->texformat->MesaFormat);
+ irb->Base.Format);
clearVal = 0;
}
}
@@ -526,17 +524,17 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
buf, irb->Base.Name);
*/
- assert(b.x1 < b.x2);
- assert(b.y1 < b.y2);
+ assert(x1 < x2);
+ assert(y1 < y2);
BEGIN_BATCH(6, REFERENCES_CLIPRECTS);
OUT_BATCH(CMD);
OUT_BATCH(BR13);
- OUT_BATCH((b.y1 << 16) | b.x1);
- OUT_BATCH((b.y2 << 16) | b.x2);
+ OUT_BATCH((y1 << 16) | x1);
+ OUT_BATCH((y2 << 16) | x2);
OUT_RELOC(write_buffer,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- irb->region->draw_offset);
+ 0);
OUT_BATCH(clearVal);
ADVANCE_BATCH();
clearMask &= ~bufBit; /* turn off bit, for faster loop exit */
diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c
index a022593..3b7015b 100644
--- a/shared/intel_buffer_objects.c
+++ b/shared/intel_buffer_objects.c
@@ -209,9 +209,23 @@ intel_bufferobj_subdata(GLcontext * ctx,
memcpy((char *)intel_obj->sys_buffer + offset, data, size);
else {
/* Flush any existing batchbuffer that might reference this data. */
- intelFlush(ctx);
+ if (drm_intel_bo_busy(intel_obj->buffer) ||
+ drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) {
+ drm_intel_bo *temp_bo;
+
+ temp_bo = drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64);
+
+ drm_intel_bo_subdata(temp_bo, 0, size, data);
- dri_bo_subdata(intel_obj->buffer, offset, size, data);
+ intel_emit_linear_blit(intel,
+ intel_obj->buffer, offset,
+ temp_bo, 0,
+ size);
+
+ drm_intel_bo_unreference(temp_bo);
+ } else {
+ dri_bo_subdata(intel_obj->buffer, offset, size, data);
+ }
}
}
@@ -254,13 +268,14 @@ intel_bufferobj_map(GLcontext * ctx,
if (intel_obj->sys_buffer) {
obj->Pointer = intel_obj->sys_buffer;
+ obj->Length = obj->Size;
+ obj->Offset = 0;
return obj->Pointer;
}
- /* Flush any existing batchbuffer that might have written to this
- * buffer.
- */
- intelFlush(ctx);
+ /* Flush any existing batchbuffer that might reference this data. */
+ if (drm_intel_bo_references(intel->batch->buf, intel_obj->buffer))
+ intelFlush(ctx);
if (intel_obj->region)
intel_bufferobj_cow(intel, intel_obj);
@@ -330,7 +345,8 @@ intel_bufferobj_map_range(GLcontext * ctx,
* the batchbuffer so that GEM knows about the buffer access for later
* syncing.
*/
- if (!(access & GL_MAP_UNSYNCHRONIZED_BIT))
+ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
+ drm_intel_bo_references(intel->batch->buf, intel_obj->buffer))
intelFlush(ctx);
if (intel_obj->buffer == NULL) {
diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c
index e7357e7..0564318 100644
--- a/shared/intel_buffers.c
+++ b/shared/intel_buffers.c
@@ -133,6 +133,25 @@ intel_get_cliprects(struct intel_context *intel,
/**
+ * Check if we're about to draw into the front color buffer.
+ * If so, set the intel->front_buffer_dirty field to true.
+ */
+void
+intel_check_front_buffer_rendering(struct intel_context *intel)
+{
+ const struct gl_framebuffer *fb = intel->ctx.DrawBuffer;
+ if (fb->Name == 0) {
+ /* drawing to window system buffer */
+ if (fb->_NumColorDrawBuffers > 0) {
+ if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+ intel->front_buffer_dirty = GL_TRUE;
+ }
+ }
+ }
+}
+
+
+/**
* Update the hardware state for drawing into a window or framebuffer object.
*
* Called by glDrawBuffer, glBindFramebufferEXT, MakeCurrent, and other
@@ -172,10 +191,15 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
return;
}
- /*
- * How many color buffers are we drawing into?
+ /* How many color buffers are we drawing into?
+ *
+ * If there are zero buffers or the buffer is too big, don't configure any
+ * regions for hardware drawing. We'll fallback to software below. Not
+ * having regions set makes some of the software fallback paths faster.
*/
- if (fb->_NumColorDrawBuffers == 0) {
+ if ((fb->Width > ctx->Const.MaxRenderbufferSize)
+ || (fb->Height > ctx->Const.MaxRenderbufferSize)
+ || (fb->_NumColorDrawBuffers == 0)) {
/* writing to 0 */
colorRegions[0] = NULL;
intel->constant_cliprect = GL_TRUE;
@@ -192,7 +216,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
}
else {
/* Get the intel_renderbuffer for the single colorbuffer we're drawing
- * into, and set up cliprects if it's .
+ * into, and set up cliprects if it's a DRI1 window front buffer.
*/
if (fb->Name == 0) {
intel->constant_cliprect = intel->driScreen->dri2.enabled;
@@ -202,14 +226,12 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
intel_batchbuffer_flush(intel->batch);
intel->front_cliprects = GL_TRUE;
colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
-
- intel->front_buffer_dirty = GL_TRUE;
}
else {
if (!intel->constant_cliprect && intel->front_cliprects)
intel_batchbuffer_flush(intel->batch);
intel->front_cliprects = GL_FALSE;
- colorRegions[0]= intel_get_rb_region(fb, BUFFER_BACK_LEFT);
+ colorRegions[0] = intel_get_rb_region(fb, BUFFER_BACK_LEFT);
}
}
else {
@@ -257,7 +279,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) {
irbStencil = intel_renderbuffer(fb->_StencilBuffer->Wrapped);
if (irbStencil && irbStencil->region) {
- ASSERT(irbStencil->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+ ASSERT(irbStencil->Base.Format == MESA_FORMAT_S8_Z24);
FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
}
else {
diff --git a/shared/intel_buffers.h b/shared/intel_buffers.h
index 6069d38..d7800f2 100644
--- a/shared/intel_buffers.h
+++ b/shared/intel_buffers.h
@@ -45,6 +45,8 @@ extern struct intel_region *intel_readbuf_region(struct intel_context *intel);
extern struct intel_region *intel_drawbuf_region(struct intel_context *intel);
+extern void intel_check_front_buffer_rendering(struct intel_context *intel);
+
extern void intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb);
extern void intelInitBufferFuncs(struct dd_function_table *functions);
diff --git a/shared/intel_clear.c b/shared/intel_clear.c
index 1b0e221..f682ee3 100644
--- a/shared/intel_clear.c
+++ b/shared/intel_clear.c
@@ -75,6 +75,10 @@ intelClear(GLcontext *ctx, GLbitfield mask)
struct gl_framebuffer *fb = ctx->DrawBuffer;
GLuint i;
+ if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
+ intel->front_buffer_dirty = GL_TRUE;
+ }
+
if (0)
fprintf(stderr, "%s\n", __FUNCTION__);
@@ -172,7 +176,7 @@ intelClear(GLcontext *ctx, GLbitfield mask)
DBG("\n");
}
- _mesa_meta_clear(&intel->ctx, tri_mask);
+ _mesa_meta_Clear(&intel->ctx, tri_mask);
}
if (swrast_mask) {
diff --git a/shared/intel_context.c b/shared/intel_context.c
index fce42e9..dd24b71 100644
--- a/shared/intel_context.c
+++ b/shared/intel_context.c
@@ -28,7 +28,7 @@
#include "main/glheader.h"
#include "main/context.h"
-#include "main/arrayobj.h"
+/* #include "main/arrayobj.h" */
#include "main/extensions.h"
#include "main/framebuffer.h"
#include "main/imports.h"
@@ -68,7 +68,7 @@ int INTEL_DEBUG = (0);
#endif
-#define DRIVER_DATE "20090712 2009Q2 RC3"
+#define DRIVER_DATE "20091221 2009Q4"
#define DRIVER_DATE_GEM "GEM " DRIVER_DATE
@@ -189,19 +189,7 @@ intelGetString(GLcontext * ctx, GLenum name)
static unsigned
intel_bits_per_pixel(const struct intel_renderbuffer *rb)
{
- switch (rb->Base._ActualFormat) {
- case GL_RGB5:
- case GL_DEPTH_COMPONENT16:
- return 16;
- case GL_RGB8:
- case GL_RGBA8:
- case GL_DEPTH_COMPONENT24:
- case GL_DEPTH24_STENCIL8_EXT:
- case GL_STENCIL_INDEX8_EXT:
- return 32;
- default:
- return 0;
- }
+ return _mesa_get_format_bytes(rb->Base.Format) * 8;
}
void
@@ -489,14 +477,14 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush)
if (intel->Fallback)
_swrast_flush(ctx);
- if (!IS_965(intel->intelScreen->deviceID))
+ if (intel->gen < 4)
INTEL_FIREVERTICES(intel);
/* Emit a flush so that any frontbuffer rendering that might have occurred
* lands onscreen in a timely manner, even if the X Server doesn't trigger
* a flush for us.
*/
- if (needs_mi_flush)
+ if (!intel->driScreen->dri2.enabled && needs_mi_flush)
intel_batchbuffer_emit_mi_flush(intel->batch);
if (intel->batch->map != intel->batch->ptr)
@@ -588,11 +576,6 @@ intelInitDriverFunctions(struct dd_function_table *functions)
functions->GetString = intelGetString;
functions->UpdateState = intelInvalidateState;
- functions->CopyColorTable = _swrast_CopyColorTable;
- functions->CopyColorSubTable = _swrast_CopyColorSubTable;
- functions->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
- functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
-
intelInitTextureFuncs(functions);
intelInitTextureImageFuncs(functions);
intelInitTextureSubImageFuncs(functions);
@@ -631,6 +614,13 @@ intelInitContext(struct intel_context *intel,
intel->sarea = intelScreen->sarea;
intel->driContext = driContextPriv;
+ if (IS_965(intel->intelScreen->deviceID))
+ intel->gen = 4;
+ else if (IS_9XX(intel->intelScreen->deviceID))
+ intel->gen = 3;
+ else
+ intel->gen = 2;
+
/* Dri stuff */
intel->hHWContext = driContextPriv->hHWContext;
intel->driFd = sPriv->fd;
@@ -638,17 +628,13 @@ intelInitContext(struct intel_context *intel,
driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
intel->driScreen->myNum,
- IS_965(intelScreen->deviceID) ? "i965" : "i915");
+ (intel->gen >= 4) ? "i965" : "i915");
if (intelScreen->deviceID == PCI_CHIP_I865_G)
intel->maxBatchSize = 4096;
else
intel->maxBatchSize = BATCH_SZ;
intel->bufmgr = intelScreen->bufmgr;
-
- if (0) /* for debug */
- drm_intel_bufmgr_set_debug(intel->bufmgr, 1);
-
intel->ttm = intelScreen->ttm;
if (intel->ttm) {
int bo_reuse_mode;
@@ -704,7 +690,7 @@ intelInitContext(struct intel_context *intel,
meta_init_metaops(ctx, &intel->meta);
ctx->Const.MaxColorAttachments = 4; /* XXX FBO: review this */
- if (IS_965(intelScreen->deviceID)) {
+ if (intel->gen >= 4) {
if (MAX_WIDTH > 8192)
ctx->Const.MaxRenderbufferSize = 8192;
} else {
@@ -741,7 +727,7 @@ intelInitContext(struct intel_context *intel,
break;
}
- if (IS_965(intelScreen->deviceID))
+ if (intel->gen >= 4)
intel->polygon_offset_scale /= 0xffff;
intel->RenderIndex = ~0;
@@ -754,12 +740,12 @@ intelInitContext(struct intel_context *intel,
intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
- if (IS_965(intelScreen->deviceID) && !intel->intelScreen->irq_active) {
+ if (intel->gen >= 4 && !intel->intelScreen->irq_active) {
_mesa_printf("IRQs not active. Exiting\n");
exit(1);
}
- intelInitExtensions(ctx, GL_FALSE);
+ intelInitExtensions(ctx);
INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
if (INTEL_DEBUG & DEBUG_BUFMGR)
@@ -839,7 +825,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
_vbo_DestroyContext(&intel->ctx);
_swrast_DestroyContext(&intel->ctx);
- intel->Fallback = 0; /* don't call _swrast_Flush later */
+ intel->Fallback = 0x0; /* don't call _swrast_Flush later */
intel_batchbuffer_free(intel->batch);
intel->batch = NULL;
@@ -944,10 +930,23 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
__DRIdrawablePrivate * driReadPriv)
{
__DRIscreenPrivate *psp = driDrawPriv->driScreenPriv;
+ struct intel_context *intel;
+ GET_CURRENT_CONTEXT(curCtx);
+
+ if (driContextPriv)
+ intel = (struct intel_context *) driContextPriv->driverPrivate;
+ else
+ intel = NULL;
+
+ /* According to the glXMakeCurrent() man page: "Pending commands to
+ * the previous context, if any, are flushed before it is released."
+ * But only flush if we're actually changing contexts.
+ */
+ if (intel_context(curCtx) && intel_context(curCtx) != intel) {
+ _mesa_flush(curCtx);
+ }
if (driContextPriv) {
- struct intel_context *intel =
- (struct intel_context *) driContextPriv->driverPrivate;
struct intel_framebuffer *intel_fb =
(struct intel_framebuffer *) driDrawPriv->driverPrivate;
GLframebuffer *readFb = (GLframebuffer *) driReadPriv->driverPrivate;
@@ -993,41 +992,35 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
_mesa_make_current(&intel->ctx, &intel_fb->Base, readFb);
- /* The drawbuffer won't always be updated by _mesa_make_current:
- */
- if (intel->ctx.DrawBuffer == &intel_fb->Base) {
-
- if (intel->driReadDrawable != driReadPriv)
- intel->driReadDrawable = driReadPriv;
-
- if (intel->driDrawable != driDrawPriv) {
- if (driDrawPriv->swap_interval == (unsigned)-1) {
- int i;
-
- driDrawPriv->vblFlags = (intel->intelScreen->irq_active != 0)
- ? driGetDefaultVBlankFlags(&intel->optionCache)
- : VBLANK_FLAG_NO_IRQ;
-
- /* Prevent error printf if one crtc is disabled, this will
- * be properly calculated in intelWindowMoved() next.
- */
- driDrawPriv->vblFlags = intelFixupVblank(intel, driDrawPriv);
-
- (*psp->systemTime->getUST) (&intel_fb->swap_ust);
- driDrawableInitVBlank(driDrawPriv);
- intel_fb->vbl_waited = driDrawPriv->vblSeq;
-
- for (i = 0; i < 2; i++) {
- if (intel_fb->color_rb[i])
- intel_fb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq;
- }
- }
- intel->driDrawable = driDrawPriv;
- intelWindowMoved(intel);
- }
+ intel->driReadDrawable = driReadPriv;
+
+ if (intel->driDrawable != driDrawPriv) {
+ if (driDrawPriv->swap_interval == (unsigned)-1) {
+ int i;
- intel_draw_buffer(&intel->ctx, &intel_fb->Base);
+ driDrawPriv->vblFlags = (intel->intelScreen->irq_active != 0)
+ ? driGetDefaultVBlankFlags(&intel->optionCache)
+ : VBLANK_FLAG_NO_IRQ;
+
+ /* Prevent error printf if one crtc is disabled, this will
+ * be properly calculated in intelWindowMoved() next.
+ */
+ driDrawPriv->vblFlags = intelFixupVblank(intel, driDrawPriv);
+
+ (*psp->systemTime->getUST) (&intel_fb->swap_ust);
+ driDrawableInitVBlank(driDrawPriv);
+ intel_fb->vbl_waited = driDrawPriv->vblSeq;
+
+ for (i = 0; i < 2; i++) {
+ if (intel_fb->color_rb[i])
+ intel_fb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq;
+ }
+ }
+ intel->driDrawable = driDrawPriv;
+ intelWindowMoved(intel);
}
+
+ intel_draw_buffer(&intel->ctx, &intel_fb->Base);
}
else {
_mesa_make_current(NULL, NULL, NULL);
diff --git a/shared/intel_context.h b/shared/intel_context.h
index 03e7cf3..eb7be7d 100644
--- a/shared/intel_context.h
+++ b/shared/intel_context.h
@@ -61,6 +61,10 @@ typedef void (*intel_line_func) (struct intel_context *, intelVertex *,
intelVertex *);
typedef void (*intel_point_func) (struct intel_context *, intelVertex *);
+/**
+ * Bits for intel->Fallback field
+ */
+/*@{*/
#define INTEL_FALLBACK_DRAW_BUFFER 0x1
#define INTEL_FALLBACK_READ_BUFFER 0x2
#define INTEL_FALLBACK_DEPTH_BUFFER 0x4
@@ -68,8 +72,10 @@ typedef void (*intel_point_func) (struct intel_context *, intelVertex *);
#define INTEL_FALLBACK_USER 0x10
#define INTEL_FALLBACK_RENDERMODE 0x20
#define INTEL_FALLBACK_TEXTURE 0x40
+#define INTEL_FALLBACK_DRIVER 0x1000 /**< first for drivers */
+/*@}*/
-extern void intelFallback(struct intel_context *intel, GLuint bit,
+extern void intelFallback(struct intel_context *intel, GLbitfield bit,
GLboolean mode);
#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode )
@@ -111,8 +117,6 @@ struct intel_context
struct intel_region * depth_region,
GLuint num_regions);
- GLuint (*flush_cmd) (void);
-
void (*reduced_primitive_state) (struct intel_context * intel,
GLenum rprim);
@@ -170,13 +174,17 @@ struct intel_context
struct dri_metaops meta;
- GLint refcount;
- GLuint Fallback;
+ GLbitfield Fallback; /**< mask of INTEL_FALLBACK_x bits */
GLuint NewGLState;
dri_bufmgr *bufmgr;
unsigned int maxBatchSize;
+ /**
+ * Generation number of the hardware: 2 is 8xx, 3 is 9xx pre-965, 4 is 965.
+ */
+ int gen;
+
struct intel_region *front_region;
struct intel_region *back_region;
struct intel_region *depth_region;
@@ -190,7 +198,6 @@ struct intel_context
struct intel_batchbuffer *batch;
drm_intel_bo *first_post_swapbuffers_batch;
GLboolean no_batch_wrap;
- unsigned batch_id;
struct
{
@@ -254,9 +261,6 @@ struct intel_context
intel_line_func draw_line;
intel_tri_func draw_tri;
- /* These refer to the current drawing buffer:
- */
- struct gl_texture_object *frame_buffer_texobj;
/**
* Set to true if a single constant cliprect should be used in the
* batchbuffer. Otherwise, cliprects must be calculated at batchbuffer
@@ -296,7 +300,6 @@ struct intel_context
GLboolean use_texture_tiling;
GLboolean use_early_z;
-
drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */
int perf_boxes;
@@ -351,6 +354,19 @@ extern char *__progname;
#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1))
#define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0)
+static inline uint32_t
+U_FIXED(float value, uint32_t frac_bits)
+{
+ value *= (1 << frac_bits);
+ return value < 0 ? 0 : value;
+}
+
+static inline uint32_t
+S_FIXED(float value, uint32_t frac_bits)
+{
+ return value * (1 << frac_bits);
+}
+
#define INTEL_FIREVERTICES(intel) \
do { \
if ((intel)->prim.flush) \
@@ -572,4 +588,25 @@ is_power_of_two(uint32_t value)
return (value & (value - 1)) == 0;
}
+static inline void
+intel_bo_map_gtt_preferred(struct intel_context *intel,
+ drm_intel_bo *bo,
+ GLboolean write)
+{
+ if (intel->intelScreen->kernel_exec_fencing)
+ drm_intel_gem_bo_map_gtt(bo);
+ else
+ drm_intel_bo_map(bo, write);
+}
+
+static inline void
+intel_bo_unmap_gtt_preferred(struct intel_context *intel,
+ drm_intel_bo *bo)
+{
+ if (intel->intelScreen->kernel_exec_fencing)
+ drm_intel_gem_bo_unmap_gtt(bo);
+ else
+ drm_intel_bo_unmap(bo);
+}
+
#endif
diff --git a/shared/intel_depthtmp.h b/shared/intel_depthtmp.h
index 16d7708..a9c75d4 100644
--- a/shared/intel_depthtmp.h
+++ b/shared/intel_depthtmp.h
@@ -31,6 +31,16 @@
*/
#define VALUE_TYPE INTEL_VALUE_TYPE
+#define WRITE_DEPTH(_x, _y, d) \
+ (*(INTEL_VALUE_TYPE *)(irb->region->buffer->virtual + \
+ NO_TILE(_x, _y)) = d)
+#define READ_DEPTH(d, _x, _y) \
+ d = *(INTEL_VALUE_TYPE *)(irb->region->buffer->virtual + \
+ NO_TILE(_x, _y))
+#define TAG(x) INTEL_TAG(intel_gttmap_##x)
+#include "depthtmp.h"
+
+#define VALUE_TYPE INTEL_VALUE_TYPE
#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(NO_TILE(_x, _y), d)
#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(NO_TILE(_x, _y))
#define TAG(x) INTEL_TAG(intel##x)
diff --git a/shared/intel_extensions.c b/shared/intel_extensions.c
index 2e61c55..48cdae5 100644
--- a/shared/intel_extensions.c
+++ b/shared/intel_extensions.c
@@ -28,9 +28,11 @@
#include "intel_chipset.h"
#include "intel_context.h"
#include "intel_extensions.h"
+#include "utils.h"
#define need_GL_ARB_copy_buffer
+#define need_GL_ARB_draw_elements_base_vertex
#define need_GL_ARB_framebuffer_object
#define need_GL_ARB_map_buffer_range
#define need_GL_ARB_occlusion_query
@@ -62,7 +64,7 @@
#define need_GL_VERSION_2_0
#define need_GL_VERSION_2_1
-#include "extension_helper.h"
+#include "main/remap_helper.h"
/**
@@ -73,11 +75,15 @@
*/
static const struct dri_extension card_extensions[] = {
{ "GL_ARB_copy_buffer", GL_ARB_copy_buffer_functions },
+ { "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions },
{ "GL_ARB_half_float_pixel", NULL },
{ "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions },
{ "GL_ARB_multitexture", NULL },
{ "GL_ARB_point_parameters", GL_ARB_point_parameters_functions },
{ "GL_ARB_point_sprite", NULL },
+ { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
+ { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
+ { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
{ "GL_ARB_sync", GL_ARB_sync_functions },
{ "GL_ARB_texture_border_clamp", NULL },
{ "GL_ARB_texture_cube_map", NULL },
@@ -89,6 +95,7 @@ static const struct dri_extension card_extensions[] = {
{ "GL_ARB_texture_rectangle", NULL },
{ "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions},
{ "GL_ARB_vertex_program", GL_ARB_vertex_program_functions },
+ { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions },
{ "GL_ARB_window_pos", GL_ARB_window_pos_functions },
{ "GL_EXT_blend_color", GL_EXT_blend_color_functions },
{ "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions },
@@ -114,7 +121,6 @@ static const struct dri_extension card_extensions[] = {
{ "GL_MESA_pack_invert", NULL },
{ "GL_MESA_ycbcr_texture", NULL },
{ "GL_NV_blend_square", NULL },
- { "GL_NV_point_sprite", GL_NV_point_sprite_functions },
{ "GL_NV_vertex_program", GL_NV_vertex_program_functions },
{ "GL_NV_vertex_program1_1", NULL },
{ "GL_SGIS_generate_mipmap", NULL },
@@ -139,6 +145,7 @@ static const struct dri_extension i915_extensions[] = {
/** i965-only extensions */
static const struct dri_extension brw_extensions[] = {
+ { "GL_ARB_depth_clamp", NULL },
{ "GL_ARB_depth_texture", NULL },
{ "GL_ARB_fragment_program", NULL },
{ "GL_ARB_fragment_program_shadow", NULL },
@@ -147,13 +154,9 @@ static const struct dri_extension brw_extensions[] = {
{ "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions },
{ "GL_ARB_point_sprite", NULL },
{ "GL_ARB_seamless_cube_map", NULL },
- { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
- { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
- { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
{ "GL_ARB_shadow", NULL },
{ "GL_MESA_texture_signed_rgba", NULL },
{ "GL_ARB_texture_non_power_of_two", NULL },
- { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions },
{ "GL_EXT_shadow_funcs", NULL },
{ "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions },
{ "GL_EXT_texture_sRGB", NULL },
@@ -168,6 +171,7 @@ static const struct dri_extension brw_extensions[] = {
static const struct dri_extension arb_oq_extensions[] = {
+ { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions },
{ NULL, NULL }
};
@@ -179,29 +183,38 @@ static const struct dri_extension ttm_extensions[] = {
{ NULL, NULL }
};
+static const struct dri_extension fragment_shader_extensions[] = {
+ { "GL_ARB_fragment_shader", NULL },
+ { NULL, NULL }
+};
/**
* Initializes potential list of extensions if ctx == NULL, or actually enables
* extensions for a context.
*/
void
-intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging)
+intelInitExtensions(GLcontext *ctx)
{
- struct intel_context *intel = ctx?intel_context(ctx):NULL;
+ struct intel_context *intel = intel_context(ctx);
/* Disable imaging extension until convolution is working in teximage paths.
*/
- enable_imaging = GL_FALSE;
+ driInitExtensions(ctx, card_extensions, GL_FALSE);
- driInitExtensions(ctx, card_extensions, enable_imaging);
-
- if (intel == NULL || intel->ttm)
+ if (intel->ttm)
driInitExtensions(ctx, ttm_extensions, GL_FALSE);
- if (intel == NULL || IS_965(intel->intelScreen->deviceID))
+ if (IS_965(intel->intelScreen->deviceID))
driInitExtensions(ctx, brw_extensions, GL_FALSE);
- if (intel == NULL || IS_915(intel->intelScreen->deviceID)
- || IS_945(intel->intelScreen->deviceID))
+ if (IS_915(intel->intelScreen->deviceID)
+ || IS_945(intel->intelScreen->deviceID)) {
driInitExtensions(ctx, i915_extensions, GL_FALSE);
+
+ if (driQueryOptionb(&intel->optionCache, "fragment_shader"))
+ driInitExtensions(ctx, fragment_shader_extensions, GL_FALSE);
+
+ if (driQueryOptionb(&intel->optionCache, "stub_occlusion_query"))
+ driInitExtensions(ctx, arb_oq_extensions, GL_FALSE);
+ }
}
diff --git a/shared/intel_extensions.h b/shared/intel_extensions.h
index 97147ec..1d1c97a 100644
--- a/shared/intel_extensions.h
+++ b/shared/intel_extensions.h
@@ -30,7 +30,7 @@
extern void
-intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging);
+intelInitExtensions(GLcontext *ctx);
#endif
diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c
index 804c034..608f75b 100644
--- a/shared/intel_fbo.c
+++ b/shared/intel_fbo.c
@@ -33,11 +33,11 @@
#include "main/framebuffer.h"
#include "main/renderbuffer.h"
#include "main/context.h"
-#include "main/texformat.h"
#include "main/texrender.h"
#include "drivers/common/meta.h"
#include "intel_context.h"
+#include "intel_batchbuffer.h"
#include "intel_buffers.h"
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
@@ -106,8 +106,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
{
struct intel_context *intel = intel_context(ctx);
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
- GLboolean softwareBuffer = GL_FALSE;
int cpp;
+ GLuint pitch;
ASSERT(rb->Name != 0);
@@ -115,27 +115,16 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
case GL_R3_G3_B2:
case GL_RGB4:
case GL_RGB5:
- rb->_ActualFormat = GL_RGB5;
+ rb->Format = MESA_FORMAT_RGB565;
rb->DataType = GL_UNSIGNED_BYTE;
- rb->RedBits = 5;
- rb->GreenBits = 6;
- rb->BlueBits = 5;
- irb->texformat = &_mesa_texformat_rgb565;
- cpp = 2;
break;
case GL_RGB:
case GL_RGB8:
case GL_RGB10:
case GL_RGB12:
case GL_RGB16:
- rb->_ActualFormat = GL_RGB8;
+ rb->Format = MESA_FORMAT_XRGB8888;
rb->DataType = GL_UNSIGNED_BYTE;
- rb->RedBits = 8;
- rb->GreenBits = 8;
- rb->BlueBits = 8;
- rb->AlphaBits = 0;
- irb->texformat = &_mesa_texformat_argb8888; /* XXX: Need xrgb8888 */
- cpp = 4;
break;
case GL_RGBA:
case GL_RGBA2:
@@ -145,14 +134,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
case GL_RGB10_A2:
case GL_RGBA12:
case GL_RGBA16:
- rb->_ActualFormat = GL_RGBA8;
+ rb->Format = MESA_FORMAT_ARGB8888;
rb->DataType = GL_UNSIGNED_BYTE;
- rb->RedBits = 8;
- rb->GreenBits = 8;
- rb->BlueBits = 8;
- rb->AlphaBits = 8;
- irb->texformat = &_mesa_texformat_argb8888;
- cpp = 4;
break;
case GL_STENCIL_INDEX:
case GL_STENCIL_INDEX1_EXT:
@@ -160,36 +143,23 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
case GL_STENCIL_INDEX8_EXT:
case GL_STENCIL_INDEX16_EXT:
/* alloc a depth+stencil buffer */
- rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+ rb->Format = MESA_FORMAT_S8_Z24;
rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
- rb->StencilBits = 8;
- cpp = 4;
- irb->texformat = &_mesa_texformat_s8_z24;
break;
case GL_DEPTH_COMPONENT16:
- rb->_ActualFormat = GL_DEPTH_COMPONENT16;
+ rb->Format = MESA_FORMAT_Z16;
rb->DataType = GL_UNSIGNED_SHORT;
- rb->DepthBits = 16;
- cpp = 2;
- irb->texformat = &_mesa_texformat_z16;
break;
case GL_DEPTH_COMPONENT:
case GL_DEPTH_COMPONENT24:
case GL_DEPTH_COMPONENT32:
- rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+ rb->Format = MESA_FORMAT_S8_Z24;
rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
- rb->DepthBits = 24;
- cpp = 4;
- irb->texformat = &_mesa_texformat_s8_z24;
break;
case GL_DEPTH_STENCIL_EXT:
case GL_DEPTH24_STENCIL8_EXT:
- rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+ rb->Format = MESA_FORMAT_S8_Z24;
rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
- rb->DepthBits = 24;
- rb->StencilBits = 8;
- cpp = 4;
- irb->texformat = &_mesa_texformat_s8_z24;
break;
default:
_mesa_problem(ctx,
@@ -197,6 +167,9 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
return GL_FALSE;
}
+ rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
+ cpp = _mesa_get_format_bytes(rb->Format);
+
intelFlush(ctx);
/* free old region */
@@ -205,32 +178,25 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
}
/* allocate new memory region/renderbuffer */
- if (softwareBuffer) {
- return _mesa_soft_renderbuffer_storage(ctx, rb, internalFormat,
- width, height);
- }
- else {
- /* Choose a pitch to match hardware requirements:
- */
- GLuint pitch = ((cpp * width + 63) & ~63) / cpp;
- /* alloc hardware renderbuffer */
- DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
- height, pitch);
+ /* Choose a pitch to match hardware requirements:
+ */
+ pitch = ((cpp * width + 63) & ~63) / cpp;
- irb->region = intel_region_alloc(intel, I915_TILING_NONE,
- cpp, width, height, pitch,
- GL_TRUE);
- if (!irb->region)
- return GL_FALSE; /* out of memory? */
+ /* alloc hardware renderbuffer */
+ DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch);
- ASSERT(irb->region->buffer);
+ irb->region = intel_region_alloc(intel, I915_TILING_NONE, cpp,
+ width, height, pitch, GL_TRUE);
+ if (!irb->region)
+ return GL_FALSE; /* out of memory? */
- rb->Width = width;
- rb->Height = height;
+ ASSERT(irb->region->buffer);
- return GL_TRUE;
- }
+ rb->Width = width;
+ rb->Height = height;
+
+ return GL_TRUE;
}
@@ -246,7 +212,7 @@ intel_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
ASSERT(rb->Name == 0);
rb->Width = width;
rb->Height = height;
- rb->_ActualFormat = internalFormat;
+ rb->InternalFormat = internalFormat;
return GL_TRUE;
}
@@ -307,12 +273,11 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb,
* not a user-created renderbuffer.
*/
struct intel_renderbuffer *
-intel_create_renderbuffer(GLenum intFormat)
+intel_create_renderbuffer(gl_format format)
{
GET_CURRENT_CONTEXT(ctx);
struct intel_renderbuffer *irb;
- const GLuint name = 0;
irb = CALLOC_STRUCT(intel_renderbuffer);
if (!irb) {
@@ -320,75 +285,43 @@ intel_create_renderbuffer(GLenum intFormat)
return NULL;
}
- _mesa_init_renderbuffer(&irb->Base, name);
+ _mesa_init_renderbuffer(&irb->Base, 0);
irb->Base.ClassID = INTEL_RB_CLASS;
- switch (intFormat) {
- case GL_RGB5:
- irb->Base._ActualFormat = GL_RGB5;
- irb->Base._BaseFormat = GL_RGBA;
- irb->Base.RedBits = 5;
- irb->Base.GreenBits = 6;
- irb->Base.BlueBits = 5;
+ switch (format) {
+ case MESA_FORMAT_RGB565:
+ irb->Base._BaseFormat = GL_RGB;
irb->Base.DataType = GL_UNSIGNED_BYTE;
- irb->texformat = &_mesa_texformat_rgb565;
break;
- case GL_RGB8:
- irb->Base._ActualFormat = GL_RGB8;
+ case MESA_FORMAT_XRGB8888:
irb->Base._BaseFormat = GL_RGB;
- irb->Base.RedBits = 8;
- irb->Base.GreenBits = 8;
- irb->Base.BlueBits = 8;
- irb->Base.AlphaBits = 0;
irb->Base.DataType = GL_UNSIGNED_BYTE;
- irb->texformat = &_mesa_texformat_argb8888; /* XXX: Need xrgb8888 */
break;
- case GL_RGBA8:
- irb->Base._ActualFormat = GL_RGBA8;
+ case MESA_FORMAT_ARGB8888:
irb->Base._BaseFormat = GL_RGBA;
- irb->Base.RedBits = 8;
- irb->Base.GreenBits = 8;
- irb->Base.BlueBits = 8;
- irb->Base.AlphaBits = 8;
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- irb->texformat = &_mesa_texformat_argb8888;
- break;
- case GL_STENCIL_INDEX8_EXT:
- irb->Base._ActualFormat = GL_STENCIL_INDEX8_EXT;
- irb->Base._BaseFormat = GL_STENCIL_INDEX;
- irb->Base.StencilBits = 8;
irb->Base.DataType = GL_UNSIGNED_BYTE;
- irb->texformat = &_mesa_texformat_s8_z24;
break;
- case GL_DEPTH_COMPONENT16:
- irb->Base._ActualFormat = GL_DEPTH_COMPONENT16;
+ case MESA_FORMAT_Z16:
irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
- irb->Base.DepthBits = 16;
irb->Base.DataType = GL_UNSIGNED_SHORT;
- irb->texformat = &_mesa_texformat_z16;
break;
- case GL_DEPTH_COMPONENT24:
- irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+ case MESA_FORMAT_X8_Z24:
irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
- irb->Base.DepthBits = 24;
irb->Base.DataType = GL_UNSIGNED_INT;
- irb->texformat = &_mesa_texformat_s8_z24;
break;
- case GL_DEPTH24_STENCIL8_EXT:
- irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
- irb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT;
- irb->Base.DepthBits = 24;
- irb->Base.StencilBits = 8;
+ case MESA_FORMAT_S8_Z24:
+ irb->Base._BaseFormat = GL_DEPTH_STENCIL;
irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
- irb->texformat = &_mesa_texformat_s8_z24;
break;
default:
_mesa_problem(NULL,
"Unexpected intFormat in intel_create_renderbuffer");
+ _mesa_free(irb);
return NULL;
}
- irb->Base.InternalFormat = intFormat;
+ irb->Base.Format = format;
+ irb->Base.InternalFormat = irb->Base._BaseFormat;
/* intel-specific methods */
irb->Base.Delete = intel_delete_renderbuffer;
@@ -465,59 +398,49 @@ static GLboolean
intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb,
struct gl_texture_image *texImage)
{
- irb->texformat = texImage->TexFormat;
+ gl_format texFormat;
- if (texImage->TexFormat == &_mesa_texformat_argb8888) {
- irb->Base._ActualFormat = GL_RGBA8;
- irb->Base._BaseFormat = GL_RGBA;
+ if (texImage->TexFormat == MESA_FORMAT_ARGB8888) {
irb->Base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to RGBA8 texture OK\n");
}
- else if (texImage->TexFormat == &_mesa_texformat_rgb565) {
- irb->Base._ActualFormat = GL_RGB5;
- irb->Base._BaseFormat = GL_RGB;
+ else if (texImage->TexFormat == MESA_FORMAT_XRGB8888) {
+ irb->Base.DataType = GL_UNSIGNED_BYTE;
+ DBG("Render to XGBA8 texture OK\n");
+ }
+ else if (texImage->TexFormat == MESA_FORMAT_RGB565) {
irb->Base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to RGB5 texture OK\n");
}
- else if (texImage->TexFormat == &_mesa_texformat_argb1555) {
- irb->Base._ActualFormat = GL_RGB5_A1;
- irb->Base._BaseFormat = GL_RGBA;
+ else if (texImage->TexFormat == MESA_FORMAT_ARGB1555) {
irb->Base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to ARGB1555 texture OK\n");
}
- else if (texImage->TexFormat == &_mesa_texformat_argb4444) {
- irb->Base._ActualFormat = GL_RGBA4;
- irb->Base._BaseFormat = GL_RGBA;
+ else if (texImage->TexFormat == MESA_FORMAT_ARGB4444) {
irb->Base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to ARGB4444 texture OK\n");
}
- else if (texImage->TexFormat == &_mesa_texformat_z16) {
- irb->Base._ActualFormat = GL_DEPTH_COMPONENT16;
- irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
+ else if (texImage->TexFormat == MESA_FORMAT_Z16) {
irb->Base.DataType = GL_UNSIGNED_SHORT;
DBG("Render to DEPTH16 texture OK\n");
}
- else if (texImage->TexFormat == &_mesa_texformat_s8_z24) {
- irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
- irb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT;
+ else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) {
irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
DBG("Render to DEPTH_STENCIL texture OK\n");
}
else {
- DBG("Render to texture BAD FORMAT %d\n",
- texImage->TexFormat->MesaFormat);
+ DBG("Render to texture BAD FORMAT %d\n", texImage->TexFormat);
return GL_FALSE;
}
- irb->Base.InternalFormat = irb->Base._ActualFormat;
+ irb->Base.Format = texImage->TexFormat;
+
+ texFormat = texImage->TexFormat;
+
+ irb->Base.InternalFormat = texImage->InternalFormat;
+ irb->Base._BaseFormat = _mesa_base_fbo_format(ctx, irb->Base.InternalFormat);
irb->Base.Width = texImage->Width;
irb->Base.Height = texImage->Height;
- irb->Base.RedBits = texImage->TexFormat->RedBits;
- irb->Base.GreenBits = texImage->TexFormat->GreenBits;
- irb->Base.BlueBits = texImage->TexFormat->BlueBits;
- irb->Base.AlphaBits = texImage->TexFormat->AlphaBits;
- irb->Base.DepthBits = texImage->TexFormat->DepthBits;
- irb->Base.StencilBits = texImage->TexFormat->StencilBits;
irb->Base.Delete = intel_delete_renderbuffer;
irb->Base.AllocStorage = intel_nop_alloc_storage;
@@ -571,7 +494,7 @@ intel_render_texture(GLcontext * ctx,
= att->Texture->Image[att->CubeMapFace][att->TextureLevel];
struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
struct intel_texture_image *intel_image;
- GLuint imageOffset;
+ GLuint dst_x, dst_y;
(void) fb;
@@ -618,18 +541,17 @@ intel_render_texture(GLcontext * ctx,
}
/* compute offset of the particular 2D image within the texture region */
- imageOffset = intel_miptree_image_offset(intel_image->mt,
- att->CubeMapFace,
- att->TextureLevel);
-
- if (att->Texture->Target == GL_TEXTURE_3D) {
- const GLuint *offsets = intel_miptree_depth_offsets(intel_image->mt,
- att->TextureLevel);
- imageOffset += offsets[att->Zoffset];
- }
-
- /* store that offset in the region */
- intel_image->mt->region->draw_offset = imageOffset;
+ intel_miptree_get_image_offset(intel_image->mt,
+ att->TextureLevel,
+ att->CubeMapFace,
+ att->Zoffset,
+ &dst_x, &dst_y);
+
+ intel_image->mt->region->draw_offset = (dst_y * intel_image->mt->pitch +
+ dst_x) * intel_image->mt->cpp;
+ intel_image->mt->region->draw_x = dst_x;
+ intel_image->mt->region->draw_y = dst_y;
+ intel_image->used_as_render_target = GL_TRUE;
/* update drawing region, etc */
intel_draw_buffer(ctx, fb);
@@ -643,19 +565,23 @@ static void
intel_finish_render_texture(GLcontext * ctx,
struct gl_renderbuffer_attachment *att)
{
- /* no-op
- * Previously we released the renderbuffer's intel_region but
- * that's not necessary and actually caused problems when trying
- * to do a glRead/CopyPixels from the renderbuffer later.
- * The region will be released later if the texture is replaced
- * or the renderbuffer deleted.
- *
- * The intention of this driver hook is more of a "done rendering
- * to texture, please re-twiddle/etc if necessary".
+ struct intel_context *intel = intel_context(ctx);
+ struct gl_texture_object *tex_obj = att->Texture;
+ struct gl_texture_image *image =
+ tex_obj->Image[att->CubeMapFace][att->TextureLevel];
+ struct intel_texture_image *intel_image = intel_texture_image(image);
+
+ /* Flag that this image may now be validated into the object's miptree. */
+ intel_image->used_as_render_target = GL_FALSE;
+
+ /* Since we've (probably) rendered to the texture and will (likely) use
+ * it in the texture domain later on in this batchbuffer, flush the
+ * batch. Once again, we wish for a domain tracker in libdrm to cover
+ * usage inside of a batchbuffer like GEM does in the kernel.
*/
+ intel_batchbuffer_emit_mi_flush(intel->batch);
}
-
/**
* Do additional "completeness" testing of a framebuffer object.
*/
@@ -687,8 +613,9 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
continue;
}
- switch (irb->texformat->MesaFormat) {
+ switch (irb->Base.Format) {
case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
case MESA_FORMAT_RGB565:
case MESA_FORMAT_ARGB1555:
case MESA_FORMAT_ARGB4444:
@@ -715,5 +642,5 @@ intel_fbo_init(struct intel_context *intel)
intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture;
intel->ctx.Driver.ResizeBuffers = intel_resize_buffers;
intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer;
- intel->ctx.Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer;
+ intel->ctx.Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
}
diff --git a/shared/intel_fbo.h b/shared/intel_fbo.h
index f0665af..fa43077 100644
--- a/shared/intel_fbo.h
+++ b/shared/intel_fbo.h
@@ -28,6 +28,7 @@
#ifndef INTEL_FBO_H
#define INTEL_FBO_H
+#include "main/formats.h"
#include "intel_screen.h"
struct intel_context;
@@ -61,8 +62,6 @@ struct intel_renderbuffer
struct gl_renderbuffer Base;
struct intel_region *region;
- const struct gl_texture_format *texformat;
-
GLuint vbl_pending; /**< vblank sequence number of pending flip */
uint8_t *span_cache;
@@ -114,7 +113,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *irb,
extern struct intel_renderbuffer *
-intel_create_renderbuffer(GLenum intFormat);
+intel_create_renderbuffer(gl_format format);
extern void
diff --git a/shared/intel_generatemipmap.c b/shared/intel_generatemipmap.c
deleted file mode 100644
index 12059e1..0000000
--- a/shared/intel_generatemipmap.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
- * Copyright © 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "main/glheader.h"
-#include "main/enums.h"
-#include "main/image.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/bufferobj.h"
-#include "main/teximage.h"
-#include "main/texenv.h"
-#include "main/texobj.h"
-#include "main/texstate.h"
-#include "main/texparam.h"
-#include "main/varray.h"
-#include "main/attrib.h"
-#include "main/enable.h"
-#include "main/buffers.h"
-#include "main/fbobject.h"
-#include "main/framebuffer.h"
-#include "main/renderbuffer.h"
-#include "main/depth.h"
-#include "main/hash.h"
-#include "main/mipmap.h"
-#include "main/blend.h"
-#include "glapi/dispatch.h"
-#include "swrast/swrast.h"
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_batchbuffer.h"
-#include "intel_pixel.h"
-#include "intel_tex.h"
-#include "intel_mipmap_tree.h"
-
-static const char *intel_fp_tex2d =
- "!!ARBfp1.0\n"
- "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n"
- "END\n";
-
-static GLboolean
-intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name,
- int level, int width, int height)
-{
- struct intel_context *intel = intel_context(ctx);
- GLfloat vertices[4][2];
- GLint status;
-
- /* Set to source from the previous level */
- _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1);
- _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1);
-
- /* Set to draw into the current level */
- _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
- GL_COLOR_ATTACHMENT0_EXT,
- GL_TEXTURE_2D,
- tex_name,
- level);
- /* Choose to render to the color attachment. */
- _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
-
- status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT);
- if (status != GL_FRAMEBUFFER_COMPLETE_EXT)
- return GL_FALSE;
-
- meta_set_passthrough_transform(&intel->meta);
-
- /* XXX: Doing it right would involve setting up the transformation to do
- * 0-1 mapping or something, and not changing the vertex data.
- */
- vertices[0][0] = 0;
- vertices[0][1] = 0;
- vertices[1][0] = width;
- vertices[1][1] = 0;
- vertices[2][0] = width;
- vertices[2][1] = height;
- vertices[3][0] = 0;
- vertices[3][1] = height;
-
- _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices);
- _mesa_Enable(GL_VERTEX_ARRAY);
- meta_set_default_texrect(&intel->meta);
-
- _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
- meta_restore_texcoords(&intel->meta);
- meta_restore_transform(&intel->meta);
-
- return GL_TRUE;
-}
-
-static GLboolean
-intel_generate_mipmap_2d(GLcontext *ctx,
- GLenum target,
- struct gl_texture_object *texObj)
-{
- struct intel_context *intel = intel_context(ctx);
- GLint old_active_texture;
- int level, max_levels, start_level, end_level;
- GLuint fb_name;
- GLboolean success = GL_FALSE;
- struct gl_framebuffer *saved_fbo = NULL;
- struct gl_buffer_object *saved_array_buffer = NULL;
- struct gl_buffer_object *saved_element_buffer = NULL;
-
- _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
- GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT |
- GL_DEPTH_BUFFER_BIT);
- _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
- old_active_texture = ctx->Texture.CurrentUnit;
- _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer);
-
- /* use default array/index buffers */
- _mesa_reference_buffer_object(ctx, &saved_array_buffer,
- ctx->Array.ArrayBufferObj);
- _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj,
- ctx->Shared->NullBufferObj);
- _mesa_reference_buffer_object(ctx, &saved_element_buffer,
- ctx->Array.ElementArrayBufferObj);
- _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj,
- ctx->Shared->NullBufferObj);
-
- _mesa_Disable(GL_POLYGON_STIPPLE);
- _mesa_Disable(GL_DEPTH_TEST);
- _mesa_Disable(GL_STENCIL_TEST);
- _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
- _mesa_DepthMask(GL_FALSE);
-
- /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our
- * minification.
- */
- _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
- _mesa_Enable(GL_TEXTURE_2D);
- _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name);
- _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
- GL_LINEAR_MIPMAP_NEAREST);
- _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-
- /* Bind the new renderbuffer to the color attachment point. */
- _mesa_GenFramebuffersEXT(1, &fb_name);
- _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name);
-
- meta_set_fragment_program(&intel->meta, &intel->meta.tex2d_fp,
- intel_fp_tex2d);
- meta_set_passthrough_vertex_program(&intel->meta);
-
- max_levels = _mesa_max_texture_levels(ctx, texObj->Target);
- start_level = texObj->BaseLevel;
- end_level = texObj->MaxLevel;
-
- /* Loop generating level+1 from level. */
- for (level = start_level; level < end_level && level < max_levels - 1; level++) {
- const struct gl_texture_image *srcImage;
- int width, height;
-
- srcImage = _mesa_select_tex_image(ctx, texObj, target, level);
- if (srcImage->Border != 0)
- goto fail;
-
- width = srcImage->Width / 2;
- if (width < 1)
- width = 1;
- height = srcImage->Height / 2;
- if (height < 1)
- height = 1;
-
- if (width == srcImage->Width &&
- height == srcImage->Height) {
- /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the
- * maximum texture level for the object, so break out when we've gone
- * over the edge.
- */
- break;
- }
-
- /* Make sure that there's space allocated for the target level.
- * We could skip this if there's already space allocated and save some
- * time.
- */
- _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat,
- width, height, 0,
- GL_RGBA, GL_UNSIGNED_INT, NULL);
-
- if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1,
- width, height))
- goto fail;
- }
-
- success = GL_TRUE;
-
-fail:
- meta_restore_fragment_program(&intel->meta);
- meta_restore_vertex_program(&intel->meta);
-
- /* restore array/index buffers */
- _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj,
- saved_array_buffer);
- _mesa_reference_buffer_object(ctx, &saved_array_buffer, NULL);
- _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj,
- saved_element_buffer);
- _mesa_reference_buffer_object(ctx, &saved_element_buffer, NULL);
-
-
- _mesa_DeleteFramebuffersEXT(1, &fb_name);
- _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
- if (saved_fbo)
- _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name);
- _mesa_reference_framebuffer(&saved_fbo, NULL);
- _mesa_PopClientAttrib();
- _mesa_PopAttrib();
-
- return success;
-}
-
-
-/**
- * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
- * level).
- *
- * The texture object's miptree must be mapped.
- *
- * It would be really nice if this was just called by Mesa whenever mipmaps
- * needed to be regenerated, rather than us having to remember to do so in
- * each texture image modification path.
- *
- * This function should also include an accelerated path.
- */
-void
-intel_generate_mipmap(GLcontext *ctx, GLenum target,
- struct gl_texture_object *texObj)
-{
- struct intel_context *intel = intel_context(ctx);
- struct intel_texture_object *intelObj = intel_texture_object(texObj);
- GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
- int face, i;
-
- /* HW path */
- if (target == GL_TEXTURE_2D &&
- ctx->Extensions.EXT_framebuffer_object &&
- ctx->Extensions.ARB_fragment_program &&
- ctx->Extensions.ARB_vertex_program) {
- GLboolean success;
-
- /* We'll be accessing this texture using GL entrypoints, which should
- * be resilient against other access to this texture.
- */
- _mesa_unlock_texture(ctx, texObj);
- success = intel_generate_mipmap_2d(ctx, target, texObj);
- _mesa_lock_texture(ctx, texObj);
-
- if (success)
- return;
- }
-
- /* SW path */
- intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
- _mesa_generate_mipmap(ctx, target, texObj);
- intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
-
- /* Update the level information in our private data in the new images, since
- * it didn't get set as part of a normal TexImage path.
- */
- for (face = 0; face < nr_faces; face++) {
- for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
- struct intel_texture_image *intelImage;
-
- intelImage = intel_texture_image(texObj->Image[face][i]);
- if (intelImage == NULL)
- break;
-
- intelImage->level = i;
- intelImage->face = face;
- /* Unreference the miptree to signal that the new Data is a bare
- * pointer from mesa.
- */
- intel_miptree_release(intel, &intelImage->mt);
- }
- }
-}
diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c
index c985da5..abb3024 100644
--- a/shared/intel_mipmap_tree.c
+++ b/shared/intel_mipmap_tree.c
@@ -28,11 +28,16 @@
#include "intel_context.h"
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
+#include "intel_tex_layout.h"
#include "intel_chipset.h"
+#ifndef I915
+#include "brw_state.h"
+#endif
#include "main/enums.h"
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+
static GLenum
target_to_target(GLenum target)
{
@@ -49,6 +54,7 @@ target_to_target(GLenum target)
}
}
+
static struct intel_mipmap_tree *
intel_miptree_create_internal(struct intel_context *intel,
GLenum target,
@@ -98,6 +104,7 @@ intel_miptree_create_internal(struct intel_context *intel,
return mt;
}
+
struct intel_mipmap_tree *
intel_miptree_create(struct intel_context *intel,
GLenum target,
@@ -115,7 +122,7 @@ intel_miptree_create(struct intel_context *intel,
if (intel->use_texture_tiling && compress_byte == 0 &&
intel->intelScreen->kernel_exec_fencing) {
- if (IS_965(intel->intelScreen->deviceID) &&
+ if (intel->gen >= 4 &&
(base_format == GL_DEPTH_COMPONENT ||
base_format == GL_DEPTH_STENCIL_EXT))
tiling = I915_TILING_Y;
@@ -131,8 +138,10 @@ intel_miptree_create(struct intel_context *intel,
/*
* pitch == 0 || height == 0 indicates the null texture
*/
- if (!mt || !mt->pitch || !mt->total_height)
+ if (!mt || !mt->pitch || !mt->total_height) {
+ free(mt);
return NULL;
+ }
mt->region = intel_region_alloc(intel,
tiling,
@@ -150,6 +159,7 @@ intel_miptree_create(struct intel_context *intel,
return mt;
}
+
struct intel_mipmap_tree *
intel_miptree_create_for_region(struct intel_context *intel,
GLenum target,
@@ -187,7 +197,8 @@ intel_miptree_create_for_region(struct intel_context *intel,
intel_region_reference(&mt->region, region);
return mt;
- }
+}
+
/**
* intel_miptree_pitch_align:
@@ -201,7 +212,6 @@ intel_miptree_create_for_region(struct intel_context *intel,
* Given @pitch, compute a larger value which accounts for
* any necessary alignment required by the device
*/
-
int intel_miptree_pitch_align (struct intel_context *intel,
struct intel_mipmap_tree *mt,
uint32_t tiling,
@@ -247,6 +257,7 @@ int intel_miptree_pitch_align (struct intel_context *intel,
return pitch;
}
+
void
intel_miptree_reference(struct intel_mipmap_tree **dst,
struct intel_mipmap_tree *src)
@@ -256,6 +267,7 @@ intel_miptree_reference(struct intel_mipmap_tree **dst,
DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
}
+
void
intel_miptree_release(struct intel_context *intel,
struct intel_mipmap_tree **mt)
@@ -269,11 +281,25 @@ intel_miptree_release(struct intel_context *intel,
DBG("%s deleting %p\n", __FUNCTION__, *mt);
+#ifndef I915
+ /* Free up cached binding tables holding a reference on our buffer, to
+ * avoid excessive memory consumption.
+ *
+ * This isn't as aggressive as we could be, as we'd like to do
+ * it from any time we free the last ref on a region. But intel_region.c
+ * is context-agnostic. Perhaps our constant state cache should be, as
+ * well.
+ */
+ brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache,
+ (*mt)->region->buffer);
+#endif
+
intel_region_release(&((*mt)->region));
- for (i = 0; i < MAX_TEXTURE_LEVELS; i++)
- if ((*mt)->level[i].image_offset)
- free((*mt)->level[i].image_offset);
+ for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
+ free((*mt)->level[i].x_offset);
+ free((*mt)->level[i].y_offset);
+ }
free(*mt);
}
@@ -281,33 +307,31 @@ intel_miptree_release(struct intel_context *intel,
}
-
-
-/* Can the image be pulled into a unified mipmap tree. This mirrors
+/**
+ * Can the image be pulled into a unified mipmap tree? This mirrors
* the completeness test in a lot of ways.
*
* Not sure whether I want to pass gl_texture_image here.
*/
GLboolean
intel_miptree_match_image(struct intel_mipmap_tree *mt,
- struct gl_texture_image *image,
- GLuint face, GLuint level)
+ struct gl_texture_image *image)
{
- /* Images with borders are never pulled into mipmap trees.
- */
- if (image->Border ||
- ((image->_BaseFormat == GL_DEPTH_COMPONENT) &&
- ((image->TexObject->WrapS == GL_CLAMP_TO_BORDER) ||
- (image->TexObject->WrapT == GL_CLAMP_TO_BORDER))))
+ GLboolean isCompressed = _mesa_is_format_compressed(image->TexFormat);
+ struct intel_texture_image *intelImage = intel_texture_image(image);
+ GLuint level = intelImage->level;
+
+ /* Images with borders are never pulled into mipmap trees. */
+ if (image->Border)
return GL_FALSE;
if (image->InternalFormat != mt->internal_format ||
- image->IsCompressed != mt->compressed)
+ isCompressed != mt->compressed)
return GL_FALSE;
- if (!image->IsCompressed &&
+ if (!isCompressed &&
!mt->compressed &&
- image->TexFormat->TexelBytes != mt->cpp)
+ _mesa_get_format_bytes(image->TexFormat) != mt->cpp)
return GL_FALSE;
/* Test image dimensions against the base level image adjusted for
@@ -334,82 +358,59 @@ intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
mt->level[level].height = h;
mt->level[level].depth = d;
mt->level[level].level_offset = (x + y * mt->pitch) * mt->cpp;
+ mt->level[level].level_x = x;
+ mt->level[level].level_y = y;
mt->level[level].nr_images = nr_images;
DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
level, w, h, d, x, y, mt->level[level].level_offset);
- /* Not sure when this would happen, but anyway:
- */
- if (mt->level[level].image_offset) {
- free(mt->level[level].image_offset);
- mt->level[level].image_offset = NULL;
- }
-
assert(nr_images);
+ assert(!mt->level[level].x_offset);
- mt->level[level].image_offset = malloc(nr_images * sizeof(GLuint));
- mt->level[level].image_offset[0] = 0;
+ mt->level[level].x_offset = malloc(nr_images * sizeof(GLuint));
+ mt->level[level].x_offset[0] = mt->level[level].level_x;
+ mt->level[level].y_offset = malloc(nr_images * sizeof(GLuint));
+ mt->level[level].y_offset[0] = mt->level[level].level_y;
}
void
-intel_miptree_set_image_offset_ex(struct intel_mipmap_tree *mt,
- GLuint level, GLuint img,
- GLuint x, GLuint y,
- GLuint offset)
-{
- if (img == 0 && level == 0)
- assert(x == 0 && y == 0);
-
- assert(img < mt->level[level].nr_images);
-
- mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp + offset;
-
- DBG("%s level %d img %d pos %d,%d image_offset %x\n",
- __FUNCTION__, level, img, x, y, mt->level[level].image_offset[img]);
-}
-
-void
intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
GLuint level, GLuint img,
GLuint x, GLuint y)
{
- intel_miptree_set_image_offset_ex(mt, level, img, x, y, 0);
-}
+ if (img == 0 && level == 0)
+ assert(x == 0 && y == 0);
+ assert(img < mt->level[level].nr_images);
-/* Although we use the image_offset[] array to store relative offsets
- * to cube faces, Mesa doesn't know anything about this and expects
- * each cube face to be treated as a separate image.
- *
- * These functions present that view to mesa:
- */
-const GLuint *
-intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, GLuint level)
-{
- static const GLuint zero = 0;
+ mt->level[level].x_offset[img] = mt->level[level].level_x + x;
+ mt->level[level].y_offset[img] = mt->level[level].level_y + y;
- if (mt->target != GL_TEXTURE_3D || mt->level[level].nr_images == 1)
- return &zero;
- else
- return mt->level[level].image_offset;
+ DBG("%s level %d img %d pos %d,%d\n",
+ __FUNCTION__, level, img,
+ mt->level[level].x_offset[img], mt->level[level].y_offset[img]);
}
-GLuint
-intel_miptree_image_offset(struct intel_mipmap_tree *mt,
- GLuint face, GLuint level)
+void
+intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
+ GLuint level, GLuint face, GLuint depth,
+ GLuint *x, GLuint *y)
{
- if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
- return (mt->level[level].level_offset +
- mt->level[level].image_offset[face]);
- else
- return mt->level[level].level_offset;
+ if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) {
+ *x = mt->level[level].x_offset[face];
+ *y = mt->level[level].y_offset[face];
+ } else if (mt->target == GL_TEXTURE_3D) {
+ *x = mt->level[level].x_offset[depth];
+ *y = mt->level[level].y_offset[depth];
+ } else {
+ *x = mt->level[level].x_offset[0];
+ *y = mt->level[level].y_offset[0];
+ }
}
-
-
/**
* Map a teximage in a mipmap tree.
* \param row_stride returns row stride in bytes
@@ -425,6 +426,7 @@ intel_miptree_image_map(struct intel_context * intel,
GLuint level,
GLuint * row_stride, GLuint * image_offsets)
{
+ GLuint x, y;
DBG("%s \n", __FUNCTION__);
if (row_stride)
@@ -433,19 +435,26 @@ intel_miptree_image_map(struct intel_context * intel,
if (mt->target == GL_TEXTURE_3D) {
int i;
- for (i = 0; i < mt->level[level].depth; i++)
- image_offsets[i] = mt->level[level].image_offset[i] / mt->cpp;
+ for (i = 0; i < mt->level[level].depth; i++) {
+
+ intel_miptree_get_image_offset(mt, level, face, i,
+ &x, &y);
+ image_offsets[i] = x + y * mt->pitch;
+ }
+
+ return intel_region_map(intel, mt->region);
} else {
assert(mt->level[level].depth == 1);
- assert(mt->target == GL_TEXTURE_CUBE_MAP ||
- mt->level[level].image_offset[0] == 0);
+ intel_miptree_get_image_offset(mt, level, face, 0,
+ &x, &y);
image_offsets[0] = 0;
- }
- return (intel_region_map(intel, mt->region) +
- intel_miptree_image_offset(mt, face, level));
+ return intel_region_map(intel, mt->region) +
+ (x + y * mt->pitch) * mt->cpp;
+ }
}
+
void
intel_miptree_image_unmap(struct intel_context *intel,
struct intel_mipmap_tree *mt)
@@ -455,8 +464,8 @@ intel_miptree_image_unmap(struct intel_context *intel,
}
-
-/* Upload data for a particular image.
+/**
+ * Upload data for a particular image.
*/
void
intel_miptree_image_data(struct intel_context *intel,
@@ -467,21 +476,21 @@ intel_miptree_image_data(struct intel_context *intel,
GLuint src_row_pitch,
GLuint src_image_pitch)
{
- GLuint depth = dst->level[level].depth;
- GLuint dst_offset = intel_miptree_image_offset(dst, face, level);
- const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level);
+ const GLuint depth = dst->level[level].depth;
GLuint i;
- GLuint height = 0;
DBG("%s: %d/%d\n", __FUNCTION__, face, level);
for (i = 0; i < depth; i++) {
+ GLuint dst_x, dst_y, height;
+
+ intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y);
+
height = dst->level[level].height;
if(dst->compressed)
height = (height + 3) / 4;
+
intel_region_data(intel,
- dst->region,
- dst_offset + dst_depth_offset[i], /* dst_offset */
- 0, 0, /* dstx, dsty */
+ dst->region, 0, dst_x, dst_y,
src,
src_row_pitch,
0, 0, /* source x, y */
@@ -491,8 +500,9 @@ intel_miptree_image_data(struct intel_context *intel,
}
}
-extern void intel_get_texture_alignment_unit(GLenum, GLuint *, GLuint *);
-/* Copy mipmap image between trees
+
+/**
+ * Copy mipmap image between trees
*/
void
intel_miptree_image_copy(struct intel_context *intel,
@@ -503,38 +513,37 @@ intel_miptree_image_copy(struct intel_context *intel,
GLuint width = src->level[level].width;
GLuint height = src->level[level].height;
GLuint depth = src->level[level].depth;
- GLuint dst_offset = intel_miptree_image_offset(dst, face, level);
- GLuint src_offset = intel_miptree_image_offset(src, face, level);
- const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level);
- const GLuint *src_depth_offset = intel_miptree_depth_offsets(src, level);
+ GLuint src_x, src_y, dst_x, dst_y;
GLuint i;
GLboolean success;
if (dst->compressed) {
GLuint align_w, align_h;
- intel_get_texture_alignment_unit(dst->internal_format, &align_w, &align_h);
+ intel_get_texture_alignment_unit(dst->internal_format,
+ &align_w, &align_h);
height = (height + 3) / 4;
width = ALIGN(width, align_w);
}
for (i = 0; i < depth; i++) {
+ intel_miptree_get_image_offset(src, level, face, i, &src_x, &src_y);
+ intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y);
success = intel_region_copy(intel,
- dst->region, dst_offset + dst_depth_offset[i],
- 0, 0,
- src->region, src_offset + src_depth_offset[i],
- 0, 0, width, height, GL_COPY);
+ dst->region, 0, dst_x, dst_y,
+ src->region, 0, src_x, src_y, width, height,
+ GL_COPY);
if (!success) {
GLubyte *src_ptr, *dst_ptr;
src_ptr = intel_region_map(intel, src->region);
dst_ptr = intel_region_map(intel, dst->region);
- _mesa_copy_rect(dst_ptr + dst_offset + dst_depth_offset[i],
+ _mesa_copy_rect(dst_ptr + dst->cpp * (dst_x + dst_y * dst->pitch),
dst->cpp,
dst->pitch,
0, 0, width, height,
- src_ptr + src_offset + src_depth_offset[i],
+ src_ptr + src->cpp * (src_x + src_y * src->pitch),
src->pitch,
0, 0);
intel_region_unmap(intel, src->region);
diff --git a/shared/intel_mipmap_tree.h b/shared/intel_mipmap_tree.h
index c890b2a..b19c548 100644
--- a/shared/intel_mipmap_tree.h
+++ b/shared/intel_mipmap_tree.h
@@ -70,6 +70,10 @@ struct intel_mipmap_level
* always zero in that case.
*/
GLuint level_offset;
+ /** Offset to this miptree level, used in computing x_offset. */
+ GLuint level_x;
+ /** Offset to this miptree level, used in computing y_offset. */
+ GLuint level_y;
GLuint width;
GLuint height;
/** Depth of the mipmap at this level: 1 for 1D/2D/CUBE, n for 3D. */
@@ -86,7 +90,7 @@ struct intel_mipmap_level
* compute the offsets of depth/cube images within a mipmap level,
* so have to store them as a lookup table.
*/
- GLuint *image_offset;
+ GLuint *x_offset, *y_offset;
};
struct intel_mipmap_tree
@@ -161,8 +165,7 @@ void intel_miptree_release(struct intel_context *intel,
/* Check if an image fits an existing mipmap tree layout
*/
GLboolean intel_miptree_match_image(struct intel_mipmap_tree *mt,
- struct gl_texture_image *image,
- GLuint face, GLuint level);
+ struct gl_texture_image *image);
/* Return a pointer to an image within a tree. Return image stride as
* well.
@@ -176,19 +179,10 @@ GLubyte *intel_miptree_image_map(struct intel_context *intel,
void intel_miptree_image_unmap(struct intel_context *intel,
struct intel_mipmap_tree *mt);
-
-/* Return the linear offset of an image relative to the start of the
- * tree:
- */
-GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt,
- GLuint face, GLuint level);
-
-/* Return pointers to each 2d slice within an image. Indexed by depth
- * value.
- */
-const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt,
- GLuint level);
-
+void
+intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
+ GLuint level, GLuint face, GLuint depth,
+ GLuint *x, GLuint *y);
void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
GLuint level,
@@ -196,16 +190,10 @@ void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
GLuint x, GLuint y,
GLuint w, GLuint h, GLuint d);
-void intel_miptree_set_image_offset_ex(struct intel_mipmap_tree *mt,
- GLuint level,
- GLuint img, GLuint x, GLuint y,
- GLuint offset);
-
void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
GLuint level,
GLuint img, GLuint x, GLuint y);
-
/* Upload an image into a tree
*/
void intel_miptree_image_data(struct intel_context *intel,
diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c
index a300141..993e427 100644
--- a/shared/intel_pixel.c
+++ b/shared/intel_pixel.c
@@ -129,20 +129,6 @@ intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one)
return GL_TRUE;
}
-
-GLboolean
-intel_check_meta_tex_fragment_ops(GLcontext * ctx)
-{
- if (ctx->NewState)
- _mesa_update_state(ctx);
-
- /* Some of _ImageTransferState (scale, bias) could be done with
- * fragment programs on i915.
- */
- return !(ctx->_ImageTransferState || ctx->Fog.Enabled || /* not done yet */
- ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled);
-}
-
/* The intel_region struct doesn't really do enough to capture the
* format of the pixels in the region. For now this code assumes that
* the region is a display surface and hence is either ARGB8888 or
diff --git a/shared/intel_pixel.h b/shared/intel_pixel.h
index 96a6dd1..743b649 100644
--- a/shared/intel_pixel.h
+++ b/shared/intel_pixel.h
@@ -34,8 +34,6 @@ void intelInitPixelFuncs(struct dd_function_table *functions);
GLboolean intel_check_blit_fragment_ops(GLcontext * ctx,
GLboolean src_alpha_is_one);
-GLboolean intel_check_meta_tex_fragment_ops(GLcontext * ctx);
-
GLboolean intel_check_blit_format(struct intel_region *region,
GLenum format, GLenum type);
diff --git a/shared/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c
index b543a0b..204a233 100644
--- a/shared/intel_pixel_bitmap.c
+++ b/shared/intel_pixel_bitmap.c
@@ -33,6 +33,7 @@
#include "main/macros.h"
#include "main/bufferobj.h"
#include "main/pixelstore.h"
+#include "main/polygon.h"
#include "main/state.h"
#include "main/teximage.h"
#include "main/texenv.h"
@@ -209,7 +210,7 @@ do_blit_bitmap( GLcontext *ctx,
if (!dst)
return GL_FALSE;
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
bitmap = map_pbo(ctx, width, height, unpack, bitmap);
if (bitmap == NULL)
return GL_TRUE; /* even though this is an error, we're done */
@@ -329,12 +330,14 @@ out:
if (INTEL_DEBUG & DEBUG_SYNC)
intel_batchbuffer_flush(intel->batch);
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
/* done with PBO so unmap it now */
ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
unpack->BufferObj);
}
+ intel_check_front_buffer_rendering(intel);
+
return GL_TRUE;
}
@@ -418,7 +421,7 @@ intel_texture_bitmap(GLcontext * ctx,
return GL_FALSE;
}
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
bitmap = map_pbo(ctx, width, height, unpack, bitmap);
if (bitmap == NULL)
return GL_TRUE; /* even though this is an error, we're done */
@@ -428,20 +431,21 @@ intel_texture_bitmap(GLcontext * ctx,
a8_bitmap = _mesa_calloc(width * height);
_mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff);
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
/* done with PBO so unmap it now */
ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
unpack->BufferObj);
}
/* Save GL state before we start setting up our drawing */
- _mesa_PushAttrib(GL_ENABLE_BIT | GL_CURRENT_BIT |
- GL_VIEWPORT_BIT);
+ _mesa_PushAttrib(GL_ENABLE_BIT | GL_CURRENT_BIT | GL_POLYGON_BIT |
+ GL_TEXTURE_BIT | GL_VIEWPORT_BIT);
_mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT |
GL_CLIENT_PIXEL_STORE_BIT);
old_active_texture = ctx->Texture.CurrentUnit;
_mesa_Disable(GL_POLYGON_STIPPLE);
+ _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL);
/* Upload our bitmap data to an alpha texture */
_mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
@@ -501,8 +505,6 @@ intel_texture_bitmap(GLcontext * ctx,
meta_restore_vertex_program(&intel->meta);
_mesa_PopClientAttrib();
- _mesa_Disable(GL_TEXTURE_2D); /* asserted that it was disabled at entry */
- _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
_mesa_PopAttrib();
_mesa_DeleteTextures(1, &texname);
diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c
index 07ca8f7..622aaa2 100644
--- a/shared/intel_pixel_copy.c
+++ b/shared/intel_pixel_copy.c
@@ -222,6 +222,8 @@ do_blit_copypixels(GLcontext * ctx,
out:
UNLOCK_HARDWARE(intel);
+ intel_check_front_buffer_rendering(intel);
+
DBG("%s: success\n", __FUNCTION__);
return GL_TRUE;
}
@@ -240,5 +242,5 @@ intelCopyPixels(GLcontext * ctx,
return;
/* this will use swrast if needed */
- _mesa_meta_copy_pixels(ctx, srcx, srcy, width, height, destx, desty, type);
+ _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
}
diff --git a/shared/intel_pixel_draw.c b/shared/intel_pixel_draw.c
index 7fbb89f..9b382e3 100644
--- a/shared/intel_pixel_draw.c
+++ b/shared/intel_pixel_draw.c
@@ -54,7 +54,7 @@
#include "intel_fbo.h"
-/** XXX compare perf of this vs. _mesa_meta_draw_pixels(STENCIL) */
+/** XXX compare perf of this vs. _mesa_meta_DrawPixels(STENCIL) */
static GLboolean
intel_stencil_drawpixels(GLcontext * ctx,
GLint x, GLint y,
@@ -169,7 +169,7 @@ intel_stencil_drawpixels(GLcontext * ctx,
* buffer.
*/
depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
- irb = intel_create_renderbuffer(GL_RGBA8);
+ irb = intel_create_renderbuffer(MESA_FORMAT_ARGB8888);
rb = &irb->Base;
irb->Base.Width = depth_irb->Base.Width;
irb->Base.Height = depth_irb->Base.Height;
@@ -265,7 +265,7 @@ intelDrawPixels(GLcontext * ctx,
/* XXX this function doesn't seem to work reliably even when all
* the pre-requisite conditions are met.
* Note that this function is never hit with conform.
- * Fall back to swrast because even the _mesa_meta_draw_pixels() approach
+ * Fall back to swrast because even the _mesa_meta_DrawPixels() approach
* isn't working because of an apparent stencil bug.
*/
if (intel_stencil_drawpixels(ctx, x, y, width, height, format, type,
@@ -280,6 +280,6 @@ intelDrawPixels(GLcontext * ctx,
}
#endif
- _mesa_meta_draw_pixels(ctx, x, y, width, height, format, type,
- unpack, pixels);
+ _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type,
+ unpack, pixels);
}
diff --git a/shared/intel_pixel_read.c b/shared/intel_pixel_read.c
index 8713463..20424e2 100644
--- a/shared/intel_pixel_read.c
+++ b/shared/intel_pixel_read.c
@@ -180,16 +180,7 @@ do_blit_readpixels(GLcontext * ctx,
if (!src)
return GL_FALSE;
- if (pack->BufferObj->Name) {
- /* XXX This validation should be done by core mesa:
- */
- if (!_mesa_validate_pbo_access(2, pack, width, height, 1,
- format, type, pixels)) {
- _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
- return GL_TRUE;
- }
- }
- else {
+ if (!_mesa_is_bufferobj(pack->BufferObj)) {
/* PBO only for now:
*/
if (INTEL_DEBUG & DEBUG_PIXEL)
@@ -225,9 +216,8 @@ do_blit_readpixels(GLcontext * ctx,
rowLength = -rowLength;
}
- /* XXX 64-bit cast? */
- dst_offset = (GLuint) _mesa_image_address(2, pack, pixels, width, height,
- format, type, 0, 0, 0);
+ dst_offset = (GLintptr) _mesa_image_address(2, pack, pixels, width, height,
+ format, type, 0, 0, 0);
/* Although the blits go on the command buffer, need to do this and
@@ -236,14 +226,14 @@ do_blit_readpixels(GLcontext * ctx,
intelFlush(&intel->ctx);
LOCK_HARDWARE(intel);
- if (intel->driDrawable->numClipRects) {
+ if (intel->driReadDrawable->numClipRects) {
GLboolean all = (width * height * src->cpp == dst->Base.Size &&
x == 0 && dst_offset == 0);
dri_bo *dst_buffer = intel_bufferobj_buffer(intel, dst,
all ? INTEL_WRITE_FULL :
INTEL_WRITE_PART);
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
+ __DRIdrawablePrivate *dPriv = intel->driReadDrawable;
int nbox = dPriv->numClipRects;
drm_clip_rect_t *box = dPriv->pClipRects;
drm_clip_rect_t rect;
@@ -295,11 +285,11 @@ intelReadPixels(GLcontext * ctx,
intelFlush(ctx);
-#ifdef I915
if (do_blit_readpixels
(ctx, x, y, width, height, format, type, pack, pixels))
return;
+#ifdef I915
if (do_texture_readpixels
(ctx, x, y, width, height, format, type, pack, pixels))
return;
diff --git a/shared/intel_regions.c b/shared/intel_regions.c
index a86c66a..8097516 100644
--- a/shared/intel_regions.c
+++ b/shared/intel_regions.c
@@ -582,8 +582,7 @@ intel_recreate_static(struct intel_context *intel,
* instead of which tiling mode it is. Guess.
*/
if (region_desc->tiled) {
- if (IS_965(intel->intelScreen->deviceID) &&
- region_desc == &intelScreen->depth)
+ if (intel->gen >= 4 && region_desc == &intelScreen->depth)
region->tiling = I915_TILING_Y;
else
region->tiling = I915_TILING_X;
diff --git a/shared/intel_regions.h b/shared/intel_regions.h
index 0d379bd..535fcd7 100644
--- a/shared/intel_regions.h
+++ b/shared/intel_regions.h
@@ -62,6 +62,8 @@ struct intel_region
GLuint map_refcount; /**< Reference count for mapping */
GLuint draw_offset; /**< Offset of drawing address within the region */
+ GLuint draw_x, draw_y; /**< Offset of drawing within the region */
+
uint32_t tiling; /**< Which tiling mode the region is in */
uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */
drmAddress classic_map; /**< drmMap of the region when not in GEM mode */
diff --git a/shared/intel_screen.c b/shared/intel_screen.c
index 1b8c56e..789135b 100644
--- a/shared/intel_screen.c
+++ b/shared/intel_screen.c
@@ -79,6 +79,10 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).")
DRI_CONF_OPT_END
+ DRI_CONF_OPT_BEGIN(fragment_shader, bool, false)
+ DRI_CONF_DESC(en, "Enable limited ARB_fragment_shader support on 915/945.")
+ DRI_CONF_OPT_END
+
DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY
DRI_CONF_FORCE_S3TC_ENABLE(false)
@@ -88,10 +92,14 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_NO_RAST(false)
DRI_CONF_ALWAYS_FLUSH_BATCH(false)
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
+
+ DRI_CONF_OPT_BEGIN(stub_occlusion_query, bool, false)
+ DRI_CONF_DESC(en, "Enable stub ARB_occlusion_query support on 915/945.")
+ DRI_CONF_OPT_END
DRI_CONF_SECTION_END
DRI_CONF_END;
-const GLuint __driNConfigOptions = 10;
+const GLuint __driNConfigOptions = 12;
#ifdef USE_NEW_INTERFACE
static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
@@ -341,7 +349,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
else {
GLboolean swStencil = (mesaVis->stencilBits > 0 &&
mesaVis->depthBits != 24);
- GLenum rgbFormat;
+ gl_format rgbFormat;
struct intel_framebuffer *intel_fb = CALLOC_STRUCT(intel_framebuffer);
@@ -351,11 +359,11 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
_mesa_initialize_framebuffer(&intel_fb->Base, mesaVis);
if (mesaVis->redBits == 5)
- rgbFormat = GL_RGB5;
+ rgbFormat = MESA_FORMAT_RGB565;
else if (mesaVis->alphaBits == 0)
- rgbFormat = GL_RGB8;
+ rgbFormat = MESA_FORMAT_XRGB8888;
else
- rgbFormat = GL_RGBA8;
+ rgbFormat = MESA_FORMAT_ARGB8888;
/* setup the hardware-based renderbuffers */
intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat);
@@ -374,7 +382,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
if (mesaVis->stencilBits == 8) {
/* combined depth/stencil buffer */
struct intel_renderbuffer *depthStencilRb
- = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT);
+ = intel_create_renderbuffer(MESA_FORMAT_S8_Z24);
/* note: bind RB to two attachment points */
_mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH,
&depthStencilRb->Base);
@@ -382,7 +390,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
&depthStencilRb->Base);
} else {
struct intel_renderbuffer *depthRb
- = intel_create_renderbuffer(GL_DEPTH_COMPONENT24);
+ = intel_create_renderbuffer(MESA_FORMAT_X8_Z24);
_mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH,
&depthRb->Base);
}
@@ -390,7 +398,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
else if (mesaVis->depthBits == 16) {
/* just 16-bit depth buffer, no hw stencil */
struct intel_renderbuffer *depthRb
- = intel_create_renderbuffer(GL_DEPTH_COMPONENT16);
+ = intel_create_renderbuffer(MESA_FORMAT_Z16);
_mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base);
}
@@ -688,18 +696,6 @@ static const __DRIconfig **intelInitScreen(__DRIscreenPrivate *psp)
return NULL;
}
- /* Calling driInitExtensions here, with a NULL context pointer,
- * does not actually enable the extensions. It just makes sure
- * that all the dispatch offsets for all the extensions that
- * *might* be enables are known. This is needed because the
- * dispatch offsets need to be known when _mesa_context_create is
- * called, but we can't enable the extensions until we have a
- * context pointer.
- *
- * Hello chicken. Hello egg. How are you two today?
- */
- intelInitExtensions(NULL, GL_TRUE);
-
if (!intelInitDriver(psp))
return NULL;
@@ -752,18 +748,6 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
int color;
__DRIconfig **configs = NULL;
- /* Calling driInitExtensions here, with a NULL context pointer,
- * does not actually enable the extensions. It just makes sure
- * that all the dispatch offsets for all the extensions that
- * *might* be enables are known. This is needed because the
- * dispatch offsets need to be known when _mesa_context_create is
- * called, but we can't enable the extensions until we have a
- * context pointer.
- *
- * Hello chicken. Hello egg. How are you two today?
- */
- intelInitExtensions(NULL, GL_TRUE);
-
/* Allocate the private area */
intelScreen = (intelScreenPrivate *) CALLOC(sizeof(intelScreenPrivate));
if (!intelScreen) {
diff --git a/shared/intel_span.c b/shared/intel_span.c
index 8df4990..34c3d9d 100644
--- a/shared/intel_span.c
+++ b/shared/intel_span.c
@@ -29,7 +29,6 @@
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/colormac.h"
-#include "main/texformat.h"
#include "intel_buffers.h"
#include "intel_fbo.h"
@@ -132,18 +131,6 @@ pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val)
dri_bo_subdata(irb->region->buffer, offset, 1, &val);
}
-static uint32_t
-z24s8_to_s8z24(uint32_t val)
-{
- return (val << 24) | (val >> 8);
-}
-
-static uint32_t
-s8z24_to_z24s8(uint32_t val)
-{
- return (val >> 24) | (val << 8);
-}
-
static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb,
int x, int y)
{
@@ -163,6 +150,9 @@ static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb,
int x_tile_number, y_tile_number;
int tile_off, tile_base;
+ x += irb->region->draw_x;
+ y += irb->region->draw_y;
+
tile_stride = (irb->region->pitch * irb->region->cpp) << 3;
xbyte = x * irb->region->cpp;
@@ -218,6 +208,9 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
int x_tile_number, y_tile_number;
int tile_off, tile_base;
+ x += irb->region->draw_x;
+ y += irb->region->draw_y;
+
tile_stride = (irb->region->pitch * irb->region->cpp) << 5;
xbyte = x * irb->region->cpp;
@@ -273,8 +266,11 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
unsigned int num_cliprects; \
struct drm_clip_rect *cliprects; \
int x_off, y_off; \
+ int pitch = irb->region->pitch * irb->region->cpp; \
+ void *buf = irb->region->buffer->virtual; \
GLuint p; \
(void) p; \
+ (void)buf; (void)pitch; /* unused for non-gttmap. */ \
intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
/* XXX FBO: this is identical to the macro in spantmp2.h except we get
@@ -296,7 +292,6 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define Y_FLIP(_y) ((_y) * yScale + yBias)
-/* XXX with GEM, these need to tell the kernel */
#define HW_LOCK()
#define HW_UNLOCK()
@@ -339,7 +334,7 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#include "intel_spantmp.h"
/* x8r8g8b8 color span and pixel functions */
-#define INTEL_PIXEL_FMT GL_BGRA
+#define INTEL_PIXEL_FMT GL_BGR
#define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
#define INTEL_READ_VALUE(offset) pread_xrgb8888(irb, offset)
#define INTEL_WRITE_VALUE(offset, v) pwrite_xrgb8888(irb, offset, v)
@@ -354,6 +349,9 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
unsigned int num_cliprects; \
struct drm_clip_rect *cliprects; \
int x_off, y_off; \
+ int pitch = irb->region->pitch * irb->region->cpp; \
+ void *buf = irb->region->buffer->virtual; \
+ (void)buf; (void)pitch; /* unused for non-gttmap. */ \
intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
@@ -366,20 +364,22 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define INTEL_TAG(name) name##_z16
#include "intel_depthtmp.h"
-/* z24 depthbuffer functions. */
+/* z24x8 depthbuffer functions. */
#define INTEL_VALUE_TYPE GLuint
#define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, d)
#define INTEL_READ_DEPTH(offset) pread_32(irb, offset)
-#define INTEL_TAG(name) name##_z24
+#define INTEL_TAG(name) name##_z24_x8
#include "intel_depthtmp.h"
-/* z24s8 depthbuffer functions. */
-#define INTEL_VALUE_TYPE GLuint
-#define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, z24s8_to_s8z24(d))
-#define INTEL_READ_DEPTH(offset) s8z24_to_z24s8(pread_32(irb, offset))
-#define INTEL_TAG(name) name##_z24_s8
-#include "intel_depthtmp.h"
+/**
+ ** 8-bit stencil function (XXX FBO: This is obsolete)
+ **/
+/* XXX */
+#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, NO_TILE(_x, _y) + 3, d)
+#define READ_STENCIL(d, _x, _y) d = pread_8(irb, NO_TILE(_x, _y) + 3);
+#define TAG(x) intel_gttmap_##x##_z24_s8
+#include "stenciltmp.h"
/**
** 8-bit stencil function (XXX FBO: This is obsolete)
@@ -413,6 +413,9 @@ intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb)
if (irb == NULL || irb->region == NULL)
return;
+ if (intel->intelScreen->kernel_exec_fencing)
+ drm_intel_gem_bo_map_gtt(irb->region->buffer);
+
intel_set_span_functions(intel, rb);
}
@@ -425,7 +428,10 @@ intel_renderbuffer_unmap(struct intel_context *intel,
if (irb == NULL || irb->region == NULL)
return;
- clear_span_cache(irb);
+ if (intel->intelScreen->kernel_exec_fencing)
+ drm_intel_gem_bo_unmap_gtt(irb->region->buffer);
+ else
+ clear_span_cache(irb);
rb->GetRow = NULL;
rb->PutRow = NULL;
@@ -444,23 +450,30 @@ intel_renderbuffer_unmap(struct intel_context *intel,
* _ColorReadBuffer, _DepthBuffer or _StencilBuffer fields.
*/
static void
-intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
+intel_map_unmap_framebuffer(struct intel_context *intel,
+ struct gl_framebuffer *fb,
+ GLboolean map)
{
- GLcontext *ctx = &intel->ctx;
- GLuint i, j;
+ GLuint i;
/* color draw buffers */
- for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) {
+ for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
if (map)
- intel_renderbuffer_map(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]);
+ intel_renderbuffer_map(intel, fb->_ColorDrawBuffers[i]);
else
- intel_renderbuffer_unmap(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]);
+ intel_renderbuffer_unmap(intel, fb->_ColorDrawBuffers[i]);
}
+ /* color read buffer */
+ if (map)
+ intel_renderbuffer_map(intel, fb->_ColorReadBuffer);
+ else
+ intel_renderbuffer_unmap(intel, fb->_ColorReadBuffer);
+
/* check for render to textures */
for (i = 0; i < BUFFER_COUNT; i++) {
struct gl_renderbuffer_attachment *att =
- ctx->DrawBuffer->Attachment + i;
+ fb->Attachment + i;
struct gl_texture_object *tex = att->Texture;
if (tex) {
/* render to texture */
@@ -472,33 +485,24 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
}
}
- /* color read buffers */
- if (map)
- intel_renderbuffer_map(intel, ctx->ReadBuffer->_ColorReadBuffer);
- else
- intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer);
-
/* depth buffer (Note wrapper!) */
- if (ctx->DrawBuffer->_DepthBuffer) {
+ if (fb->_DepthBuffer) {
if (map)
- intel_renderbuffer_map(intel, ctx->DrawBuffer->_DepthBuffer->Wrapped);
+ intel_renderbuffer_map(intel, fb->_DepthBuffer->Wrapped);
else
- intel_renderbuffer_unmap(intel,
- ctx->DrawBuffer->_DepthBuffer->Wrapped);
+ intel_renderbuffer_unmap(intel, fb->_DepthBuffer->Wrapped);
}
/* stencil buffer (Note wrapper!) */
- if (ctx->DrawBuffer->_StencilBuffer) {
+ if (fb->_StencilBuffer) {
if (map)
- intel_renderbuffer_map(intel,
- ctx->DrawBuffer->_StencilBuffer->Wrapped);
+ intel_renderbuffer_map(intel, fb->_StencilBuffer->Wrapped);
else
- intel_renderbuffer_unmap(intel,
- ctx->DrawBuffer->_StencilBuffer->Wrapped);
+ intel_renderbuffer_unmap(intel, fb->_StencilBuffer->Wrapped);
}
-}
-
+ intel_check_front_buffer_rendering(intel);
+}
/**
* Prepare for software rendering. Map current read/draw framebuffers'
@@ -522,7 +526,9 @@ intelSpanRenderStart(GLcontext * ctx)
}
}
- intel_map_unmap_buffers(intel, GL_TRUE);
+ intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_TRUE);
+ if (ctx->ReadBuffer != ctx->DrawBuffer)
+ intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_TRUE);
}
/**
@@ -544,7 +550,9 @@ intelSpanRenderFinish(GLcontext * ctx)
}
}
- intel_map_unmap_buffers(intel, GL_FALSE);
+ intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_FALSE);
+ if (ctx->ReadBuffer != ctx->DrawBuffer)
+ intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_FALSE);
UNLOCK_HARDWARE(intel);
}
@@ -558,6 +566,43 @@ intelInitSpanFuncs(GLcontext * ctx)
swdd->SpanRenderFinish = intelSpanRenderFinish;
}
+void
+intel_map_vertex_shader_textures(GLcontext *ctx)
+{
+ struct intel_context *intel = intel_context(ctx);
+ int i;
+
+ if (ctx->VertexProgram._Current == NULL)
+ return;
+
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled &&
+ ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) {
+ struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+
+ intel_tex_map_images(intel, intel_texture_object(texObj));
+ }
+ }
+}
+
+void
+intel_unmap_vertex_shader_textures(GLcontext *ctx)
+{
+ struct intel_context *intel = intel_context(ctx);
+ int i;
+
+ if (ctx->VertexProgram._Current == NULL)
+ return;
+
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled &&
+ ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) {
+ struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+
+ intel_tex_unmap_images(intel, intel_texture_object(texObj));
+ }
+ }
+}
/**
* Plug in appropriate span read/write functions for the given renderbuffer.
@@ -578,7 +623,51 @@ intel_set_span_functions(struct intel_context *intel,
else
tiling = I915_TILING_NONE;
- switch (irb->texformat->MesaFormat) {
+ if (intel->intelScreen->kernel_exec_fencing) {
+ switch (irb->Base.Format) {
+ case MESA_FORMAT_RGB565:
+ intel_gttmap_InitPointers_RGB565(rb);
+ break;
+ case MESA_FORMAT_ARGB4444:
+ intel_gttmap_InitPointers_ARGB4444(rb);
+ break;
+ case MESA_FORMAT_ARGB1555:
+ intel_gttmap_InitPointers_ARGB1555(rb);
+ break;
+ case MESA_FORMAT_XRGB8888:
+ intel_gttmap_InitPointers_xRGB8888(rb);
+ break;
+ case MESA_FORMAT_ARGB8888:
+ intel_gttmap_InitPointers_ARGB8888(rb);
+ break;
+ case MESA_FORMAT_Z16:
+ intel_gttmap_InitDepthPointers_z16(rb);
+ break;
+ case MESA_FORMAT_X8_Z24:
+ intel_gttmap_InitDepthPointers_z24_x8(rb);
+ break;
+ case MESA_FORMAT_S8_Z24:
+ /* There are a few different ways SW asks us to access the S8Z24 data:
+ * Z24 depth-only depth reads
+ * S8Z24 depth reads
+ * S8Z24 stencil reads.
+ */
+ if (rb->Format == MESA_FORMAT_S8_Z24) {
+ intel_gttmap_InitDepthPointers_z24_x8(rb);
+ } else if (rb->Format == MESA_FORMAT_S8) {
+ intel_gttmap_InitStencilPointers_z24_s8(rb);
+ }
+ break;
+ default:
+ _mesa_problem(NULL,
+ "Unexpected MesaFormat %d in intelSetSpanFunctions",
+ irb->Base.Format);
+ break;
+ }
+ return;
+ }
+
+ switch (irb->Base.Format) {
case MESA_FORMAT_RGB565:
switch (tiling) {
case I915_TILING_NONE:
@@ -621,35 +710,33 @@ intel_set_span_functions(struct intel_context *intel,
break;
}
break;
+ case MESA_FORMAT_XRGB8888:
+ switch (tiling) {
+ case I915_TILING_NONE:
+ default:
+ intelInitPointers_xRGB8888(rb);
+ break;
+ case I915_TILING_X:
+ intel_XTile_InitPointers_xRGB8888(rb);
+ break;
+ case I915_TILING_Y:
+ intel_YTile_InitPointers_xRGB8888(rb);
+ break;
+ }
+ break;
case MESA_FORMAT_ARGB8888:
- if (rb->AlphaBits == 0) { /* XXX: Need xRGB8888 Mesa format */
- /* 8888 RGBx */
- switch (tiling) {
- case I915_TILING_NONE:
- default:
- intelInitPointers_xRGB8888(rb);
- break;
- case I915_TILING_X:
- intel_XTile_InitPointers_xRGB8888(rb);
- break;
- case I915_TILING_Y:
- intel_YTile_InitPointers_xRGB8888(rb);
- break;
- }
- } else {
- /* 8888 RGBA */
- switch (tiling) {
- case I915_TILING_NONE:
- default:
- intelInitPointers_ARGB8888(rb);
- break;
- case I915_TILING_X:
- intel_XTile_InitPointers_ARGB8888(rb);
- break;
- case I915_TILING_Y:
- intel_YTile_InitPointers_ARGB8888(rb);
- break;
- }
+ /* 8888 RGBA */
+ switch (tiling) {
+ case I915_TILING_NONE:
+ default:
+ intelInitPointers_ARGB8888(rb);
+ break;
+ case I915_TILING_X:
+ intel_XTile_InitPointers_ARGB8888(rb);
+ break;
+ case I915_TILING_Y:
+ intel_YTile_InitPointers_ARGB8888(rb);
+ break;
}
break;
case MESA_FORMAT_Z16:
@@ -666,39 +753,27 @@ intel_set_span_functions(struct intel_context *intel,
break;
}
break;
+ case MESA_FORMAT_X8_Z24:
case MESA_FORMAT_S8_Z24:
/* There are a few different ways SW asks us to access the S8Z24 data:
* Z24 depth-only depth reads
* S8Z24 depth reads
* S8Z24 stencil reads.
*/
- if (rb->_ActualFormat == GL_DEPTH_COMPONENT24) {
- switch (tiling) {
- case I915_TILING_NONE:
- default:
- intelInitDepthPointers_z24(rb);
- break;
- case I915_TILING_X:
- intel_XTile_InitDepthPointers_z24(rb);
- break;
- case I915_TILING_Y:
- intel_YTile_InitDepthPointers_z24(rb);
- break;
- }
- } else if (rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
+ if (rb->Format == MESA_FORMAT_S8_Z24) {
switch (tiling) {
case I915_TILING_NONE:
default:
- intelInitDepthPointers_z24_s8(rb);
+ intelInitDepthPointers_z24_x8(rb);
break;
case I915_TILING_X:
- intel_XTile_InitDepthPointers_z24_s8(rb);
+ intel_XTile_InitDepthPointers_z24_x8(rb);
break;
case I915_TILING_Y:
- intel_YTile_InitDepthPointers_z24_s8(rb);
+ intel_YTile_InitDepthPointers_z24_x8(rb);
break;
}
- } else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) {
+ } else if (rb->Format == MESA_FORMAT_S8) {
switch (tiling) {
case I915_TILING_NONE:
default:
@@ -711,6 +786,9 @@ intel_set_span_functions(struct intel_context *intel,
intel_YTile_InitStencilPointers_z24_s8(rb);
break;
}
+ } else {
+ _mesa_problem(NULL,
+ "Unexpected ActualFormat in intelSetSpanFunctions");
}
break;
default:
diff --git a/shared/intel_span.h b/shared/intel_span.h
index acbeb4a..bffe109 100644
--- a/shared/intel_span.h
+++ b/shared/intel_span.h
@@ -36,5 +36,7 @@ void intel_renderbuffer_map(struct intel_context *intel,
struct gl_renderbuffer *rb);
void intel_renderbuffer_unmap(struct intel_context *intel,
struct gl_renderbuffer *rb);
+void intel_map_vertex_shader_textures(GLcontext *ctx);
+void intel_unmap_vertex_shader_textures(GLcontext *ctx);
#endif
diff --git a/shared/intel_spantmp.h b/shared/intel_spantmp.h
index ead0b1c..bad0339 100644
--- a/shared/intel_spantmp.h
+++ b/shared/intel_spantmp.h
@@ -32,6 +32,12 @@
#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT
#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE
+#define TAG(x) INTEL_TAG(intel_gttmap_##x)
+#define TAG2(x, y) INTEL_TAG(intel_gttmap_##x##y)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT
+#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE
#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(NO_TILE(_x, _y), v)
#define GET_VALUE(_x, _y) INTEL_READ_VALUE(NO_TILE(_x, _y))
#define TAG(x) INTEL_TAG(intel##x)
@@ -48,8 +54,8 @@
#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT
#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE
-#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(X_TILE(_x, _y), v)
-#define GET_VALUE(_x, _y) INTEL_READ_VALUE(X_TILE(_x, _y))
+#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(Y_TILE(_x, _y), v)
+#define GET_VALUE(_x, _y) INTEL_READ_VALUE(Y_TILE(_x, _y))
#define TAG(x) INTEL_TAG(intel_YTile_##x)
#define TAG2(x, y) INTEL_TAG(intel_YTile_##x)##y
#include "spantmp2.h"
diff --git a/shared/intel_syncobj.c b/shared/intel_syncobj.c
index 1286fe9..0d7889d 100644
--- a/shared/intel_syncobj.c
+++ b/shared/intel_syncobj.c
@@ -114,7 +114,7 @@ static void intel_check_sync(GLcontext *ctx, struct gl_sync_object *s)
{
struct intel_sync_object *sync = (struct intel_sync_object *)s;
- if (sync->bo && drm_intel_bo_busy(sync->bo)) {
+ if (sync->bo && !drm_intel_bo_busy(sync->bo)) {
drm_intel_bo_unreference(sync->bo);
sync->bo = NULL;
s->StatusFlag = 1;
diff --git a/shared/intel_tex.c b/shared/intel_tex.c
index df63f29..215a534 100644
--- a/shared/intel_tex.c
+++ b/shared/intel_tex.c
@@ -2,6 +2,7 @@
#include "main/texobj.h"
#include "main/teximage.h"
#include "main/mipmap.h"
+#include "drivers/common/meta.h"
#include "intel_context.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
@@ -158,11 +159,58 @@ timed_memcpy(void *dest, const void *src, size_t n)
}
#endif /* DO_DEBUG */
+
+/**
+ * Called via ctx->Driver.GenerateMipmap()
+ * This is basically a wrapper for _mesa_meta_GenerateMipmap() which checks
+ * if we'll be using software mipmap generation. In that case, we need to
+ * map/unmap the base level texture image.
+ */
+static void
+intelGenerateMipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, texObj)) {
+ /* sw path: need to map texture images */
+ struct intel_context *intel = intel_context(ctx);
+ struct intel_texture_object *intelObj = intel_texture_object(texObj);
+ intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
+ _mesa_generate_mipmap(ctx, target, texObj);
+ intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
+
+ {
+ GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+ GLuint face, i;
+ /* Update the level information in our private data in the new images,
+ * since it didn't get set as part of a normal TexImage path.
+ */
+ for (face = 0; face < nr_faces; face++) {
+ for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+ struct intel_texture_image *intelImage =
+ intel_texture_image(texObj->Image[face][i]);
+ if (!intelImage)
+ break;
+ intelImage->level = i;
+ intelImage->face = face;
+ /* Unreference the miptree to signal that the new Data is a
+ * bare pointer from mesa.
+ */
+ intel_miptree_release(intel, &intelImage->mt);
+ }
+ }
+ }
+ }
+ else {
+ _mesa_meta_GenerateMipmap(ctx, target, texObj);
+ }
+}
+
+
void
intelInitTextureFuncs(struct dd_function_table *functions)
{
functions->ChooseTextureFormat = intelChooseTextureFormat;
- functions->GenerateMipmap = intel_generate_mipmap;
+ functions->GenerateMipmap = intelGenerateMipmap;
functions->NewTextureObject = intelNewTextureObject;
functions->NewTextureImage = intelNewTextureImage;
diff --git a/shared/intel_tex.h b/shared/intel_tex.h
index 471aa2a..f3cc0ff 100644
--- a/shared/intel_tex.h
+++ b/shared/intel_tex.h
@@ -29,6 +29,7 @@
#define INTELTEX_INC
#include "main/mtypes.h"
+#include "main/formats.h"
#include "intel_context.h"
#include "texmem.h"
@@ -41,10 +42,8 @@ void intelInitTextureSubImageFuncs(struct dd_function_table *functions);
void intelInitTextureCopyImageFuncs(struct dd_function_table *functions);
-const struct gl_texture_format *intelChooseTextureFormat(GLcontext * ctx,
- GLint internalFormat,
- GLenum format,
- GLenum type);
+gl_format intelChooseTextureFormat(GLcontext *ctx, GLint internalFormat,
+ GLenum format, GLenum type);
void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
unsigned long long offset, GLint depth, GLuint pitch);
@@ -71,7 +70,4 @@ void intel_tex_unmap_images(struct intel_context *intel,
int intel_compressed_num_bytes(GLuint mesaFormat);
-void intel_generate_mipmap(GLcontext *ctx, GLenum target,
- struct gl_texture_object *texObj);
-
#endif
diff --git a/shared/intel_tex_copy.c b/shared/intel_tex_copy.c
index 74f7f58..767d04d 100644
--- a/shared/intel_tex_copy.c
+++ b/shared/intel_tex_copy.c
@@ -29,8 +29,10 @@
#include "main/enums.h"
#include "main/image.h"
#include "main/teximage.h"
+#include "main/texstate.h"
#include "main/mipmap.h"
-#include "swrast/swrast.h"
+
+#include "drivers/common/meta.h"
#include "intel_screen.h"
#include "intel_context.h"
@@ -91,9 +93,7 @@ do_copy_texsubimage(struct intel_context *intel,
GLint x, GLint y, GLsizei width, GLsizei height)
{
GLcontext *ctx = &intel->ctx;
- struct gl_texture_object *texObj = intelImage->base.TexObject;
- const struct intel_region *src =
- get_teximage_source(intel, internalFormat);
+ const struct intel_region *src = get_teximage_source(intel, internalFormat);
if (!intelImage->mt || !src) {
if (INTEL_DEBUG & DEBUG_FALLBACKS)
@@ -109,33 +109,36 @@ do_copy_texsubimage(struct intel_context *intel,
return GL_FALSE;
}
- intelFlush(ctx);
+ // intelFlush(ctx);
LOCK_HARDWARE(intel);
{
drm_intel_bo *dst_bo = intel_region_buffer(intel,
intelImage->mt->region,
INTEL_WRITE_PART);
- GLuint image_offset = intel_miptree_image_offset(intelImage->mt,
- intelImage->face,
- intelImage->level);
const GLint orig_x = x;
const GLint orig_y = y;
+ GLuint image_x, image_y;
GLshort src_pitch;
+ /* get dest x/y in destination texture */
+ intel_miptree_get_image_offset(intelImage->mt,
+ intelImage->level,
+ intelImage->face,
+ 0,
+ &image_x, &image_y);
/* Update dst for clipped src. Need to also clip the source rect. */
dstx += x - orig_x;
dsty += y - orig_y;
/* Can't blit to tiled buffers with non-tile-aligned offset. */
- if (intelImage->mt->region->tiling != I915_TILING_NONE &&
- (image_offset & 4095) != 0) {
+ if (intelImage->mt->region->tiling == I915_TILING_Y) {
UNLOCK_HARDWARE(intel);
return GL_FALSE;
}
if (ctx->ReadBuffer->Name == 0) {
/* reading from a window, adjust x, y */
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
+ const __DRIdrawablePrivate *dPriv = intel->driReadDrawable;
y = dPriv->y + (dPriv->h - (y + height));
x += dPriv->x;
@@ -152,17 +155,19 @@ do_copy_texsubimage(struct intel_context *intel,
src_pitch = src->pitch;
}
+ /* blit from src buffer to texture */
if (!intelEmitCopyBlit(intel,
intelImage->mt->cpp,
src_pitch,
src->buffer,
- 0,
+ src->draw_offset,
src->tiling,
intelImage->mt->pitch,
dst_bo,
- image_offset,
+ 0,
intelImage->mt->region->tiling,
- x, y, dstx, dsty, width, height,
+ x, y, image_x + dstx, image_y + dsty,
+ width, height,
GL_COPY)) {
UNLOCK_HARDWARE(intel);
return GL_FALSE;
@@ -171,11 +176,6 @@ do_copy_texsubimage(struct intel_context *intel,
UNLOCK_HARDWARE(intel);
- /* GL_SGIS_generate_mipmap */
- if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) {
- intel_generate_mipmap(ctx, target, texObj);
- }
-
return GL_TRUE;
}
@@ -185,8 +185,7 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
GLenum internalFormat,
GLint x, GLint y, GLsizei width, GLint border)
{
- struct gl_texture_unit *texUnit =
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+ struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
struct gl_texture_object *texObj =
_mesa_select_tex_object(ctx, texUnit, target);
struct gl_texture_image *texImage =
@@ -222,8 +221,10 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
return;
fail:
- _swrast_copy_teximage1d(ctx, target, level, internalFormat, x, y,
- width, border);
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__);
+ _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y,
+ width, border);
}
@@ -233,8 +234,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
GLint x, GLint y, GLsizei width, GLsizei height,
GLint border)
{
- struct gl_texture_unit *texUnit =
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+ struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
struct gl_texture_object *texObj =
_mesa_select_tex_object(ctx, texUnit, target);
struct gl_texture_image *texImage =
@@ -249,7 +249,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
*/
ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
width, height, border,
- GL_RGBA, CHAN_TYPE, NULL,
+ GL_RGBA, GL_UNSIGNED_BYTE, NULL,
&ctx->DefaultPacking, texObj, texImage);
srcx = x;
@@ -270,8 +270,10 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
return;
fail:
- _swrast_copy_teximage2d(ctx, target, level, internalFormat, x, y,
- width, height, border);
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__);
+ _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
+ width, height, border);
}
@@ -279,8 +281,7 @@ static void
intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
GLint xoffset, GLint x, GLint y, GLsizei width)
{
- struct gl_texture_unit *texUnit =
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+ struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
struct gl_texture_object *texObj =
_mesa_select_tex_object(ctx, texUnit, target);
struct gl_texture_image *texImage =
@@ -295,7 +296,9 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
if (!do_copy_texsubimage(intel_context(ctx), target,
intel_texture_image(texImage),
internalFormat, xoffset, 0, x, y, width, 1)) {
- _swrast_copy_texsubimage1d(ctx, target, level, xoffset, x, y, width);
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__);
+ _mesa_meta_CopyTexSubImage1D(ctx, target, level, xoffset, x, y, width);
}
}
@@ -305,8 +308,7 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
GLint xoffset, GLint yoffset,
GLint x, GLint y, GLsizei width, GLsizei height)
{
- struct gl_texture_unit *texUnit =
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+ struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
struct gl_texture_object *texObj =
_mesa_select_tex_object(ctx, texUnit, target);
struct gl_texture_image *texImage =
@@ -321,10 +323,10 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
internalFormat,
xoffset, yoffset, x, y, width, height)) {
- DBG("%s - fallback to swrast\n", __FUNCTION__);
-
- _swrast_copy_texsubimage2d(ctx, target, level,
- xoffset, yoffset, x, y, width, height);
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__);
+ _mesa_meta_CopyTexSubImage2D(ctx, target, level,
+ xoffset, yoffset, x, y, width, height);
}
}
diff --git a/shared/intel_tex_format.c b/shared/intel_tex_format.c
index 3322a71..87efb72 100644
--- a/shared/intel_tex_format.c
+++ b/shared/intel_tex_format.c
@@ -1,7 +1,6 @@
#include "intel_context.h"
#include "intel_tex.h"
#include "intel_chipset.h"
-#include "main/texformat.h"
#include "main/enums.h"
@@ -16,7 +15,7 @@
* these if we take the step of simply swizzling the colors
* immediately after sampling...
*/
-const struct gl_texture_format *
+gl_format
intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
GLenum format, GLenum type)
{
@@ -34,48 +33,48 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
case GL_COMPRESSED_RGBA:
if (format == GL_BGRA) {
if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) {
- return &_mesa_texformat_argb8888;
+ return MESA_FORMAT_ARGB8888;
}
else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
- return &_mesa_texformat_argb4444;
+ return MESA_FORMAT_ARGB4444;
}
else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
- return &_mesa_texformat_argb1555;
+ return MESA_FORMAT_ARGB1555;
}
}
- return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+ return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
case 3:
case GL_RGB:
case GL_COMPRESSED_RGB:
if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
- return &_mesa_texformat_rgb565;
+ return MESA_FORMAT_RGB565;
}
- return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
+ return do32bpt ? MESA_FORMAT_XRGB8888 : MESA_FORMAT_RGB565;
case GL_RGBA8:
case GL_RGB10_A2:
case GL_RGBA12:
case GL_RGBA16:
- return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+ return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
case GL_RGBA4:
case GL_RGBA2:
- return &_mesa_texformat_argb4444;
+ return MESA_FORMAT_ARGB4444;
case GL_RGB5_A1:
- return &_mesa_texformat_argb1555;
+ return MESA_FORMAT_ARGB1555;
case GL_RGB8:
case GL_RGB10:
case GL_RGB12:
case GL_RGB16:
- return &_mesa_texformat_argb8888;
+ return MESA_FORMAT_XRGB8888;
case GL_RGB5:
case GL_RGB4:
case GL_R3_G3_B2:
- return &_mesa_texformat_rgb565;
+ return MESA_FORMAT_RGB565;
case GL_ALPHA:
case GL_ALPHA4:
@@ -83,7 +82,7 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
case GL_ALPHA12:
case GL_ALPHA16:
case GL_COMPRESSED_ALPHA:
- return &_mesa_texformat_a8;
+ return MESA_FORMAT_A8;
case 1:
case GL_LUMINANCE:
@@ -92,18 +91,24 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
case GL_LUMINANCE12:
case GL_LUMINANCE16:
case GL_COMPRESSED_LUMINANCE:
- return &_mesa_texformat_l8;
+ return MESA_FORMAT_L8;
+
+ case GL_LUMINANCE12_ALPHA4:
+ case GL_LUMINANCE12_ALPHA12:
+ case GL_LUMINANCE16_ALPHA16:
+#ifndef I915
+ return MESA_FORMAT_AL1616;
+#else
+ /* FALLTHROUGH */
+#endif
case 2:
case GL_LUMINANCE_ALPHA:
case GL_LUMINANCE4_ALPHA4:
case GL_LUMINANCE6_ALPHA2:
case GL_LUMINANCE8_ALPHA8:
- case GL_LUMINANCE12_ALPHA4:
- case GL_LUMINANCE12_ALPHA12:
- case GL_LUMINANCE16_ALPHA16:
case GL_COMPRESSED_LUMINANCE_ALPHA:
- return &_mesa_texformat_al88;
+ return MESA_FORMAT_AL88;
case GL_INTENSITY:
case GL_INTENSITY4:
@@ -111,41 +116,41 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
case GL_INTENSITY12:
case GL_INTENSITY16:
case GL_COMPRESSED_INTENSITY:
- return &_mesa_texformat_i8;
+ return MESA_FORMAT_I8;
case GL_YCBCR_MESA:
if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE)
- return &_mesa_texformat_ycbcr;
+ return MESA_FORMAT_YCBCR;
else
- return &_mesa_texformat_ycbcr_rev;
+ return MESA_FORMAT_YCBCR_REV;
case GL_COMPRESSED_RGB_FXT1_3DFX:
- return &_mesa_texformat_rgb_fxt1;
+ return MESA_FORMAT_RGB_FXT1;
case GL_COMPRESSED_RGBA_FXT1_3DFX:
- return &_mesa_texformat_rgba_fxt1;
+ return MESA_FORMAT_RGBA_FXT1;
case GL_RGB_S3TC:
case GL_RGB4_S3TC:
case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
- return &_mesa_texformat_rgb_dxt1;
+ return MESA_FORMAT_RGB_DXT1;
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
- return &_mesa_texformat_rgba_dxt1;
+ return MESA_FORMAT_RGBA_DXT1;
case GL_RGBA_S3TC:
case GL_RGBA4_S3TC:
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
- return &_mesa_texformat_rgba_dxt3;
+ return MESA_FORMAT_RGBA_DXT3;
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
- return &_mesa_texformat_rgba_dxt5;
+ return MESA_FORMAT_RGBA_DXT5;
case GL_DEPTH_COMPONENT:
case GL_DEPTH_COMPONENT16:
case GL_DEPTH_COMPONENT24:
case GL_DEPTH_COMPONENT32:
#if 0
- return &_mesa_texformat_z16;
+ return MESA_FORMAT_Z16;
#else
/* fall-through.
* 16bpp depth texture can't be paired with a stencil buffer so
@@ -154,7 +159,7 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
#endif
case GL_DEPTH_STENCIL_EXT:
case GL_DEPTH24_STENCIL8_EXT:
- return &_mesa_texformat_s8_z24;
+ return MESA_FORMAT_S8_Z24;
#ifndef I915
case GL_SRGB_EXT:
@@ -165,41 +170,41 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
case GL_COMPRESSED_SRGB_ALPHA_EXT:
case GL_COMPRESSED_SLUMINANCE_EXT:
case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
- return &_mesa_texformat_sargb8;
+ return MESA_FORMAT_SARGB8;
case GL_SLUMINANCE_EXT:
case GL_SLUMINANCE8_EXT:
if (IS_G4X(intel->intelScreen->deviceID))
- return &_mesa_texformat_sl8;
+ return MESA_FORMAT_SL8;
else
- return &_mesa_texformat_sargb8;
+ return MESA_FORMAT_SARGB8;
case GL_SLUMINANCE_ALPHA_EXT:
case GL_SLUMINANCE8_ALPHA8_EXT:
if (IS_G4X(intel->intelScreen->deviceID))
- return &_mesa_texformat_sla8;
+ return MESA_FORMAT_SLA8;
else
- return &_mesa_texformat_sargb8;
+ return MESA_FORMAT_SARGB8;
case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
- return &_mesa_texformat_srgb_dxt1;
+ return MESA_FORMAT_SRGB_DXT1;
/* i915 could also do this */
case GL_DUDV_ATI:
case GL_DU8DV8_ATI:
- return &_mesa_texformat_dudv8;
+ return MESA_FORMAT_DUDV8;
case GL_RGBA_SNORM:
case GL_RGBA8_SNORM:
- return &_mesa_texformat_signed_rgba8888_rev;
+ return MESA_FORMAT_SIGNED_RGBA8888_REV;
#endif
default:
fprintf(stderr, "unexpected texture format %s in %s\n",
_mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__);
- return NULL;
+ return MESA_FORMAT_NONE;
}
- return NULL; /* never get here */
+ return MESA_FORMAT_NONE; /* never get here */
}
int intel_compressed_num_bytes(GLuint mesaFormat)
diff --git a/shared/intel_tex_image.c b/shared/intel_tex_image.c
index c5f5220..66d61f9 100644
--- a/shared/intel_tex_image.c
+++ b/shared/intel_tex_image.c
@@ -1,17 +1,15 @@
-#include <stdlib.h>
-#include <stdio.h>
-
#include "main/glheader.h"
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/enums.h"
-#include "main/colortab.h"
+#include "main/bufferobj.h"
#include "main/convolve.h"
#include "main/context.h"
-#include "main/simple_list.h"
+#include "main/formats.h"
+#include "main/image.h"
#include "main/texcompress.h"
-#include "main/texformat.h"
+#include "main/texstore.h"
#include "main/texgetimage.h"
#include "main/texobj.h"
#include "main/texstore.h"
@@ -73,6 +71,7 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel,
GLuint depth = intelImage->base.Depth;
GLuint l2width, l2height, l2depth;
GLuint i, comp_byte = 0;
+ GLuint texelBytes;
DBG("%s\n", __FUNCTION__);
@@ -116,7 +115,8 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel,
*/
if ((intelObj->base.MinFilter == GL_NEAREST ||
intelObj->base.MinFilter == GL_LINEAR) &&
- intelImage->level == firstLevel) {
+ intelImage->level == firstLevel &&
+ (intel->gen < 4 || firstLevel == 0)) {
lastLevel = firstLevel;
}
else {
@@ -127,8 +127,11 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel,
}
assert(!intelObj->mt);
- if (intelImage->base.IsCompressed)
- comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat);
+ if (_mesa_is_format_compressed(intelImage->base.TexFormat))
+ comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat);
+
+ texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat);
+
intelObj->mt = intel_miptree_create(intel,
intelObj->base.Target,
intelImage->base._BaseFormat,
@@ -138,7 +141,7 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel,
width,
height,
depth,
- intelImage->base.TexFormat->TexelBytes,
+ texelBytes,
comp_byte,
expect_accelerated_upload);
@@ -170,7 +173,7 @@ target_to_face(GLenum target)
static GLboolean
check_pbo_format(GLint internalFormat,
GLenum format, GLenum type,
- const struct gl_texture_format *mesa_format)
+ gl_format mesa_format)
{
switch (internalFormat) {
case 4:
@@ -178,12 +181,12 @@ check_pbo_format(GLint internalFormat,
return (format == GL_BGRA &&
(type == GL_UNSIGNED_BYTE ||
type == GL_UNSIGNED_INT_8_8_8_8_REV) &&
- mesa_format == &_mesa_texformat_argb8888);
+ mesa_format == MESA_FORMAT_ARGB8888);
case 3:
case GL_RGB:
return (format == GL_RGB &&
type == GL_UNSIGNED_SHORT_5_6_5 &&
- mesa_format == &_mesa_texformat_rgb565);
+ mesa_format == MESA_FORMAT_RGB565);
case GL_YCBCR_MESA:
return (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE);
default:
@@ -204,9 +207,12 @@ try_pbo_upload(struct intel_context *intel,
{
struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj);
GLuint src_offset, src_stride;
- GLuint dst_offset, dst_stride;
+ GLuint dst_x, dst_y, dst_stride;
+ dri_bo *dst_buffer = intel_region_buffer(intel,
+ intelImage->mt->region,
+ INTEL_WRITE_FULL);
- if (unpack->BufferObj->Name == 0 ||
+ if (!_mesa_is_bufferobj(unpack->BufferObj) ||
intel->ctx._ImageTransferState ||
unpack->SkipPixels || unpack->SkipRows) {
DBG("%s: failure 1\n", __FUNCTION__);
@@ -221,26 +227,23 @@ try_pbo_upload(struct intel_context *intel,
else
src_stride = width;
- dst_offset = intel_miptree_image_offset(intelImage->mt,
- intelImage->face,
- intelImage->level);
+ intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
+ intelImage->face, 0,
+ &dst_x, &dst_y);
dst_stride = intelImage->mt->pitch;
- intelFlush(&intel->ctx);
+ if (drm_intel_bo_references(intel->batch->buf, dst_buffer))
+ intelFlush(&intel->ctx);
LOCK_HARDWARE(intel);
{
dri_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ);
- dri_bo *dst_buffer = intel_region_buffer(intel,
- intelImage->mt->region,
- INTEL_WRITE_FULL);
-
if (!intelEmitCopyBlit(intel,
intelImage->mt->cpp,
src_stride, src_buffer, src_offset, GL_FALSE,
- dst_stride, dst_buffer, dst_offset, GL_FALSE,
- 0, 0, 0, 0, width, height,
+ dst_stride, dst_buffer, 0, GL_FALSE,
+ 0, 0, dst_x, dst_y, width, height,
GL_COPY)) {
UNLOCK_HARDWARE(intel);
return GL_FALSE;
@@ -262,9 +265,9 @@ try_pbo_zcopy(struct intel_context *intel,
{
struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj);
GLuint src_offset, src_stride;
- GLuint dst_offset, dst_stride;
+ GLuint dst_x, dst_y, dst_stride;
- if (unpack->BufferObj->Name == 0 ||
+ if (!_mesa_is_bufferobj(unpack->BufferObj) ||
intel->ctx._ImageTransferState ||
unpack->SkipPixels || unpack->SkipRows) {
DBG("%s: failure 1\n", __FUNCTION__);
@@ -279,13 +282,14 @@ try_pbo_zcopy(struct intel_context *intel,
else
src_stride = width;
- dst_offset = intel_miptree_image_offset(intelImage->mt,
- intelImage->face,
- intelImage->level);
+ intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
+ intelImage->face, 0,
+ &dst_x, &dst_y);
dst_stride = intelImage->mt->pitch;
- if (src_stride != dst_stride || dst_offset != 0 || src_offset != 0) {
+ if (src_stride != dst_stride || dst_x != 0 || dst_y != 0 ||
+ src_offset != 0) {
DBG("%s: failure 2\n", __FUNCTION__);
return GL_FALSE;
}
@@ -320,8 +324,6 @@ intelTexImage(GLcontext * ctx,
DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
- intelFlush(ctx);
-
intelImage->face = target_to_face(target);
intelImage->level = level;
@@ -330,22 +332,11 @@ intelTexImage(GLcontext * ctx,
&postConvHeight);
}
- /* choose the texture format */
- texImage->TexFormat = intelChooseTextureFormat(ctx, internalFormat,
- format, type);
-
- _mesa_set_fetch_functions(texImage, dims);
-
- if (texImage->TexFormat->TexelBytes == 0) {
- /* must be a compressed format */
+ if (_mesa_is_format_compressed(texImage->TexFormat)) {
texelBytes = 0;
- texImage->IsCompressed = GL_TRUE;
- texImage->CompressedSize =
- ctx->Driver.CompressedTextureSize(ctx, texImage->Width,
- texImage->Height, texImage->Depth,
- texImage->TexFormat->MesaFormat);
- } else {
- texelBytes = texImage->TexFormat->TexelBytes;
+ }
+ else {
+ texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
/* Minimum pitch of 32 bytes */
if (postConvWidth * texelBytes < 32) {
@@ -378,8 +369,7 @@ intelTexImage(GLcontext * ctx,
intelObj->mt->first_level == level &&
intelObj->mt->last_level == level &&
intelObj->mt->target != GL_TEXTURE_CUBE_MAP_ARB &&
- !intel_miptree_match_image(intelObj->mt, &intelImage->base,
- intelImage->face, intelImage->level)) {
+ !intel_miptree_match_image(intelObj->mt, &intelImage->base)) {
DBG("release it\n");
intel_miptree_release(intel, &intelObj->mt);
@@ -396,17 +386,17 @@ intelTexImage(GLcontext * ctx,
assert(!intelImage->mt);
if (intelObj->mt &&
- intel_miptree_match_image(intelObj->mt, &intelImage->base,
- intelImage->face, intelImage->level)) {
+ intel_miptree_match_image(intelObj->mt, &intelImage->base)) {
intel_miptree_reference(&intelImage->mt, intelObj->mt);
assert(intelImage->mt);
} else if (intelImage->base.Border == 0) {
int comp_byte = 0;
-
- if (intelImage->base.IsCompressed) {
+ GLuint texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat);
+ GLenum baseFormat = _mesa_get_format_base_format(intelImage->base.TexFormat);
+ if (_mesa_is_format_compressed(intelImage->base.TexFormat)) {
comp_byte =
- intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat);
+ intel_compressed_num_bytes(intelImage->base.TexFormat);
}
/* Didn't fit in the object miptree, but it's suitable for inclusion in
@@ -414,11 +404,11 @@ intelTexImage(GLcontext * ctx,
* It'll get moved into the object miptree at validate time.
*/
intelImage->mt = intel_miptree_create(intel, target,
- intelImage->base.TexFormat->BaseFormat,
+ baseFormat,
internalFormat,
level, level,
width, height, depth,
- intelImage->base.TexFormat->TexelBytes,
+ texelBytes,
comp_byte, pixels == NULL);
}
@@ -427,7 +417,7 @@ intelTexImage(GLcontext * ctx,
*/
if (dims <= 2 &&
intelImage->mt &&
- unpack->BufferObj->Name != 0 &&
+ _mesa_is_bufferobj(unpack->BufferObj) &&
check_pbo_format(internalFormat, format,
type, intelImage->base.TexFormat)) {
@@ -482,21 +472,31 @@ intelTexImage(GLcontext * ctx,
LOCK_HARDWARE(intel);
if (intelImage->mt) {
- if (pixels != NULL)
+ if (pixels != NULL) {
+ /* Flush any queued rendering with the texture before mapping. */
+ if (drm_intel_bo_references(intel->batch->buf,
+ intelImage->mt->region->buffer)) {
+ intelFlush(ctx);
+ }
texImage->Data = intel_miptree_image_map(intel,
intelImage->mt,
intelImage->face,
intelImage->level,
&dstRowStride,
intelImage->base.ImageOffsets);
+ }
+
texImage->RowStride = dstRowStride / intelImage->mt->cpp;
}
else {
/* Allocate regular memory and store the image there temporarily. */
- if (texImage->IsCompressed) {
- sizeInBytes = texImage->CompressedSize;
+ if (_mesa_is_format_compressed(texImage->TexFormat)) {
+ sizeInBytes = _mesa_format_image_size(texImage->TexFormat,
+ texImage->Width,
+ texImage->Height,
+ texImage->Depth);
dstRowStride =
- _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
+ _mesa_format_row_stride(texImage->TexFormat, width);
assert(dims != 3);
}
else {
@@ -527,17 +527,20 @@ intelTexImage(GLcontext * ctx,
pixels,
srcRowStride,
0, 0);
- } else
+ }
+ else {
memcpy(texImage->Data, pixels, imageSize);
- } else if (!texImage->TexFormat->StoreImage(ctx, dims,
- texImage->_BaseFormat,
- texImage->TexFormat,
- texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */
- dstRowStride,
- texImage->ImageOffsets,
- width, height, depth,
- format, type, pixels, unpack)) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+ }
+ }
+ else if (!_mesa_texstore(ctx, dims,
+ texImage->_BaseFormat,
+ texImage->TexFormat,
+ texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */
+ dstRowStride,
+ texImage->ImageOffsets,
+ width, height, depth,
+ format, type, pixels, unpack)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
}
}
@@ -550,11 +553,6 @@ intelTexImage(GLcontext * ctx,
}
UNLOCK_HARDWARE(intel);
-
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- intel_generate_mipmap(ctx, target, texObj);
- }
}
@@ -670,9 +668,10 @@ intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
if (compressed) {
_mesa_get_compressed_teximage(ctx, target, level, pixels,
texObj, texImage);
- } else {
+ }
+ else {
_mesa_get_teximage(ctx, target, level, format, type, pixels,
- texObj, texImage);
+ texObj, texImage);
}
@@ -735,17 +734,16 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
{
struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
struct intel_context *intel = pDRICtx->driverPrivate;
+ GLcontext *ctx = &intel->ctx;
struct intel_texture_object *intelObj;
struct intel_texture_image *intelImage;
struct intel_mipmap_tree *mt;
struct intel_renderbuffer *rb;
- struct gl_texture_unit *texUnit;
struct gl_texture_object *texObj;
struct gl_texture_image *texImage;
- int level = 0, type, format, internalFormat;
+ int level = 0, internalFormat;
- texUnit = &intel->ctx.Texture.Unit[intel->ctx.Texture.CurrentUnit];
- texObj = _mesa_select_tex_object(&intel->ctx, texUnit, target);
+ texObj = _mesa_get_current_tex_object(ctx, target);
intelObj = intel_texture_object(texObj);
if (!intelObj)
@@ -760,8 +758,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
if (rb->region == NULL)
return;
- type = GL_BGRA;
- format = GL_UNSIGNED_BYTE;
if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT)
internalFormat = GL_RGB;
else
@@ -792,14 +788,14 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
intelImage->face = target_to_face(target);
intelImage->level = level;
- texImage->TexFormat = intelChooseTextureFormat(&intel->ctx, internalFormat,
- type, format);
- _mesa_set_fetch_functions(texImage, 2);
+ if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT)
+ texImage->TexFormat = MESA_FORMAT_XRGB8888;
+ else
+ texImage->TexFormat = MESA_FORMAT_ARGB8888;
texImage->RowStride = rb->region->pitch;
intel_miptree_reference(&intelImage->mt, intelObj->mt);
- if (!intel_miptree_match_image(intelObj->mt, &intelImage->base,
- intelImage->face, intelImage->level)) {
+ if (!intel_miptree_match_image(intelObj->mt, &intelImage->base)) {
fprintf(stderr, "miptree doesn't match image\n");
}
diff --git a/shared/intel_tex_layout.h b/shared/intel_tex_layout.h
index c9de9b5..a9ac9e7 100644
--- a/shared/intel_tex_layout.h
+++ b/shared/intel_tex_layout.h
@@ -33,7 +33,7 @@
#include "main/macros.h"
-static GLuint minify( GLuint d )
+static INLINE GLuint minify( GLuint d )
{
return MAX2(1, d>>1);
}
diff --git a/shared/intel_tex_obj.h b/shared/intel_tex_obj.h
index 5a93461..3ad10d3 100644
--- a/shared/intel_tex_obj.h
+++ b/shared/intel_tex_obj.h
@@ -66,6 +66,7 @@ struct intel_texture_image
* Else there is no image data.
*/
struct intel_mipmap_tree *mt;
+ GLboolean used_as_render_target;
};
static INLINE struct intel_texture_object *
diff --git a/shared/intel_tex_subimage.c b/shared/intel_tex_subimage.c
index 8903707..1f68208 100644
--- a/shared/intel_tex_subimage.c
+++ b/shared/intel_tex_subimage.c
@@ -85,13 +85,13 @@ intelTexSubimage(GLcontext * ctx,
&dstRowStride,
texImage->ImageOffsets);
else {
- if (texImage->IsCompressed) {
+ if (_mesa_is_format_compressed(texImage->TexFormat)) {
dstRowStride =
- _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
+ _mesa_format_row_stride(texImage->TexFormat, width);
assert(dims != 3);
}
else {
- dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes;
+ dstRowStride = texImage->RowStride * _mesa_get_format_bytes(texImage->TexFormat);
}
}
@@ -105,18 +105,20 @@ intelTexSubimage(GLcontext * ctx,
xoffset, yoffset / 4,
(width + 3) & ~3, (height + 3) / 4,
pixels, (width + 3) & ~3, 0, 0);
- } else
+ }
+ else {
memcpy(texImage->Data, pixels, imageSize);
+ }
}
else {
- if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
- texImage->TexFormat,
- texImage->Data,
- xoffset, yoffset, zoffset,
- dstRowStride,
- texImage->ImageOffsets,
- width, height, depth,
- format, type, pixels, packing)) {
+ if (!_mesa_texstore(ctx, dims, texImage->_BaseFormat,
+ texImage->TexFormat,
+ texImage->Data,
+ xoffset, yoffset, zoffset,
+ dstRowStride,
+ texImage->ImageOffsets,
+ width, height, depth,
+ format, type, pixels, packing)) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
}
}
@@ -129,11 +131,6 @@ intelTexSubimage(GLcontext * ctx,
}
UNLOCK_HARDWARE(intel);
-
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- intel_generate_mipmap(ctx, target, texObj);
- }
}
diff --git a/shared/intel_tex_validate.c b/shared/intel_tex_validate.c
index a284d54..c9a24ac 100644
--- a/shared/intel_tex_validate.c
+++ b/shared/intel_tex_validate.c
@@ -5,6 +5,7 @@
#include "intel_batchbuffer.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
+#include "intel_chipset.h"
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
@@ -14,7 +15,8 @@
* GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
*/
static void
-intel_calculate_first_last_level(struct intel_texture_object *intelObj)
+intel_calculate_first_last_level(struct intel_context *intel,
+ struct intel_texture_object *intelObj)
{
struct gl_texture_object *tObj = &intelObj->base;
const struct gl_texture_image *const baseImage =
@@ -40,27 +42,27 @@ intel_calculate_first_last_level(struct intel_texture_object *intelObj)
firstLevel = lastLevel = tObj->BaseLevel;
}
else {
-#ifdef I915
- firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5);
- firstLevel = MAX2(firstLevel, tObj->BaseLevel);
- firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
- lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5);
- lastLevel = MAX2(lastLevel, tObj->BaseLevel);
- lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
- lastLevel = MIN2(lastLevel, tObj->MaxLevel);
- lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
-#else
- /* Currently not taking min/max lod into account here, those
- * values are programmed as sampler state elsewhere and we
- * upload the same mipmap levels regardless. Not sure if
- * this makes sense as it means it isn't possible for the app
- * to use min/max lod to reduce texture memory pressure:
- */
- firstLevel = tObj->BaseLevel;
- lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2,
- tObj->MaxLevel);
- lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
-#endif
+ if (intel->gen == 2) {
+ firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5);
+ firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+ firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
+ lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5);
+ lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+ lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
+ lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+ lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+ } else {
+ /* Min/max LOD are taken into account in sampler state. We don't
+ * want to re-layout textures just because clamping has been applied
+ * since it means a bunch of blitting around and probably no memory
+ * savings (since we have to keep the other levels around anyway).
+ */
+ firstLevel = tObj->BaseLevel;
+ lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2,
+ tObj->MaxLevel);
+ /* need at least one level */
+ lastLevel = MAX2(firstLevel, lastLevel);
+ }
}
break;
case GL_TEXTURE_RECTANGLE_NV:
@@ -135,9 +137,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
/* What levels must the tree include at a minimum?
*/
- intel_calculate_first_last_level(intelObj);
- firstImage =
- intel_texture_image(intelObj->base.Image[0][intelObj->firstLevel]);
+ intel_calculate_first_last_level(intel, intelObj);
+ firstImage = intel_texture_image(tObj->Image[0][intelObj->firstLevel]);
/* Fallback case:
*/
@@ -165,11 +166,12 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
intel_miptree_reference(&intelObj->mt, firstImage->mt);
}
- if (firstImage->base.IsCompressed) {
- comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat->MesaFormat);
+ if (_mesa_is_format_compressed(firstImage->base.TexFormat)) {
+ comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat);
cpp = comp_byte;
}
- else cpp = firstImage->base.TexFormat->TexelBytes;
+ else
+ cpp = _mesa_get_format_bytes(firstImage->base.TexFormat);
/* Check tree can hold all active levels. Check tree matches
* target, imageFormat, etc.
@@ -189,7 +191,7 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
intelObj->mt->height0 != firstImage->base.Height ||
intelObj->mt->depth0 != firstImage->base.Depth ||
intelObj->mt->cpp != cpp ||
- intelObj->mt->compressed != firstImage->base.IsCompressed)) {
+ intelObj->mt->compressed != _mesa_is_format_compressed(firstImage->base.TexFormat))) {
intel_miptree_release(intel, &intelObj->mt);
}
@@ -220,8 +222,13 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
intel_texture_image(intelObj->base.Image[face][i]);
/* Need to import images in main memory or held in other trees.
+ * If it's a render target, then its data isn't needed to be in
+ * the object tree (otherwise we'd be FBO incomplete), and we need
+ * to keep track of the image's MT as needing to be pulled in still,
+ * or we'll lose the rendering that's done to it.
*/
- if (intelObj->mt != intelImage->mt) {
+ if (intelObj->mt != intelImage->mt &&
+ !intelImage->used_as_render_target) {
copy_image_data_to_tree(intel, intelObj, intelImage);
}
}