From 6b263d1e2c599ddcc0ce4c84425dbc1ac8de020c Mon Sep 17 00:00:00 2001 From: Luc Verhaegen Date: Sun, 14 Mar 2010 06:20:29 +0100 Subject: Import i915 and i965 dri drivers from mesa 7.7.0. --- configure.ac | 8 +- i915/Makefile.am | 3 +- i915/i830_texblend.c | 1 - i915/i830_texstate.c | 23 +- i915/i830_vtbl.c | 16 +- i915/i915_context.c | 5 +- i915/i915_context.h | 16 +- i915/i915_debug.c | 1 + i915/i915_fragprog.c | 235 +++++++- i915/i915_program.c | 31 +- i915/i915_program.h | 5 +- i915/i915_reg.h | 6 +- i915/i915_state.c | 32 +- i915/i915_texstate.c | 50 +- i915/i915_vtbl.c | 16 +- i915/intel_tris.c | 11 +- i965/Makefile.am | 3 +- i965/brw_cc.c | 51 +- i965/brw_clip.c | 3 +- i965/brw_clip.h | 9 +- i965/brw_clip_line.c | 8 +- i965/brw_clip_state.c | 11 +- i965/brw_clip_tri.c | 12 +- i965/brw_context.c | 3 +- i965/brw_context.h | 41 +- i965/brw_curbe.c | 2 +- i965/brw_defines.h | 16 +- i965/brw_draw.c | 21 +- i965/brw_draw_upload.c | 27 +- i965/brw_eu.h | 10 +- i965/brw_eu_emit.c | 3 +- i965/brw_fallback.c | 6 +- i965/brw_gs.c | 10 +- i965/brw_gs.h | 9 +- i965/brw_gs_emit.c | 38 +- i965/brw_gs_state.c | 5 +- i965/brw_misc_state.c | 10 +- i965/brw_sf.c | 8 +- i965/brw_sf.h | 8 +- i965/brw_sf_emit.c | 22 +- i965/brw_sf_state.c | 10 +- i965/brw_state.h | 10 +- i965/brw_state_cache.c | 95 +-- i965/brw_state_upload.c | 7 +- i965/brw_tex.c | 32 -- i965/brw_tex_layout.c | 7 +- i965/brw_util.c | 2 +- i965/brw_util.h | 2 +- i965/brw_vs.c | 2 +- i965/brw_vs_emit.c | 154 +++-- i965/brw_vs_state.c | 37 +- i965/brw_vs_surface_state.c | 16 +- i965/brw_vtbl.c | 29 +- i965/brw_wm.c | 39 +- i965/brw_wm.h | 167 +++++- i965/brw_wm_emit.c | 766 ++++++++++++++++--------- i965/brw_wm_fp.c | 68 +-- i965/brw_wm_glsl.c | 1276 ++++++----------------------------------- i965/brw_wm_pass0.c | 7 +- i965/brw_wm_pass1.c | 6 + i965/brw_wm_pass2.c | 4 +- i965/brw_wm_sampler_state.c | 21 +- i965/brw_wm_state.c | 9 +- i965/brw_wm_surface_state.c | 81 +-- shared/intel_batchbuffer.c | 39 +- shared/intel_batchbuffer.h | 14 +- shared/intel_blit.c | 30 +- shared/intel_buffer_objects.c | 30 +- shared/intel_buffers.c | 38 +- shared/intel_buffers.h | 2 + shared/intel_clear.c | 6 +- shared/intel_context.c | 127 ++-- shared/intel_context.h | 57 +- shared/intel_depthtmp.h | 10 + shared/intel_extensions.c | 43 +- shared/intel_extensions.h | 2 +- shared/intel_fbo.c | 249 +++----- shared/intel_fbo.h | 5 +- shared/intel_generatemipmap.c | 304 ---------- shared/intel_mipmap_tree.c | 213 +++---- shared/intel_mipmap_tree.h | 32 +- shared/intel_pixel.c | 14 - shared/intel_pixel.h | 2 - shared/intel_pixel_bitmap.c | 18 +- shared/intel_pixel_copy.c | 4 +- shared/intel_pixel_draw.c | 10 +- shared/intel_pixel_read.c | 22 +- shared/intel_regions.c | 3 +- shared/intel_regions.h | 2 + shared/intel_screen.c | 48 +- shared/intel_span.c | 272 +++++---- shared/intel_span.h | 2 + shared/intel_spantmp.h | 10 +- shared/intel_syncobj.c | 2 +- shared/intel_tex.c | 50 +- shared/intel_tex.h | 10 +- shared/intel_tex_copy.c | 76 +-- shared/intel_tex_format.c | 85 +-- shared/intel_tex_image.c | 176 +++--- shared/intel_tex_layout.h | 2 +- shared/intel_tex_obj.h | 1 + shared/intel_tex_subimage.c | 31 +- shared/intel_tex_validate.c | 67 ++- 103 files changed, 2697 insertions(+), 3053 deletions(-) delete mode 100644 shared/intel_generatemipmap.c diff --git a/configure.ac b/configure.ac index af47813..d339ca1 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-i9xx], 7.6.0, [], mesa-dri-i9xx) +AC_INIT([mesa-dri-i9xx], 7.7.0, [], mesa-dri-i9xx) AM_INIT_AUTOMAKE([dist-bzip2]) @@ -15,9 +15,9 @@ AC_PROG_CC # Checks for header files. AC_HEADER_STDC -PKG_CHECK_MODULES([DRM], [libdrm >= 2.4.3]) -PKG_CHECK_MODULES([DRI], [libmesadri >= 7.6.0 libmesadri < 7.7.0 - libmesadricommon >= 7.6.0 libmesadricommon < 7.7.0]) +PKG_CHECK_MODULES([DRM], [libdrm >= 2.4.15 libdrm_intel]) +PKG_CHECK_MODULES([DRI], [libmesadri >= 7.7.0 libmesadri < 7.8.0 + libmesadricommon >= 7.7.0 libmesadricommon < 7.8.0]) AC_OUTPUT([ Makefile diff --git a/i915/Makefile.am b/i915/Makefile.am index 451e9fd..c994f96 100644 --- a/i915/Makefile.am +++ b/i915/Makefile.am @@ -5,7 +5,7 @@ I915_CFLAGS = -I../shared -I../shared/server -DI915 i915_dri_la_LTLIBRARIES = i915_dri.la i915_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(I915_CFLAGS) i915_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \ - $(DRM_LIBS) -ldrm_intel $(DRI_LIBS) + $(DRM_LIBS) $(DRI_LIBS) i915_dri_ladir = @libdir@/dri i915_dri_la_SOURCES = \ i830_context.c \ @@ -20,7 +20,6 @@ i915_dri_la_SOURCES = \ ../shared/intel_batchbuffer.c \ ../shared/intel_clear.c \ ../shared/intel_extensions.c \ - ../shared/intel_generatemipmap.c \ ../shared/intel_mipmap_tree.c \ ../shared/intel_tex_layout.c \ ../shared/intel_tex_image.c \ diff --git a/i915/i830_texblend.c b/i915/i830_texblend.c index 09f7f37..3f64be8 100644 --- a/i915/i830_texblend.c +++ b/i915/i830_texblend.c @@ -30,7 +30,6 @@ #include "main/mtypes.h" #include "main/simple_list.h" #include "main/enums.h" -#include "main/texformat.h" #include "main/texstore.h" #include "main/mm.h" diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c index 6f998fa..ce409b3 100644 --- a/i915/i830_texstate.c +++ b/i915/i830_texstate.c @@ -27,7 +27,6 @@ #include "main/mtypes.h" #include "main/enums.h" -#include "main/texformat.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" @@ -56,10 +55,9 @@ translate_texture_format(GLuint mesa_format, GLuint internal_format) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - if (internal_format == GL_RGB) - return MAPSURF_32BIT | MT_32BIT_XRGB8888; - else - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + return MAPSURF_32BIT | MT_32BIT_ARGB8888; + case MESA_FORMAT_XRGB8888: + return MAPSURF_32BIT | MT_32BIT_XRGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -160,13 +158,20 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) pitch = intelObj->pitchOverride; } else { + GLuint dst_x, dst_y; + + intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0, + &dst_x, &dst_y); + dri_bo_reference(intelObj->mt->region->buffer); i830->state.tex_buffer[unit] = intelObj->mt->region->buffer; - i830->state.tex_offset[unit] = intel_miptree_image_offset(intelObj->mt, - 0, intelObj-> - firstLevel); + /* XXX: This calculation is probably broken for tiled images with + * a non-page-aligned offset. + */ + i830->state.tex_offset[unit] = (dst_x + dst_y * intelObj->mt->pitch) * + intelObj->mt->cpp; - format = translate_texture_format(firstImage->TexFormat->MesaFormat, + format = translate_texture_format(firstImage->TexFormat, firstImage->InternalFormat); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c index 983f672..e8c8d5a 100644 --- a/i915/i830_vtbl.c +++ b/i915/i830_vtbl.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "glapi/glapi.h" -#include "main/texformat.h" #include "i830_context.h" #include "i830_reg.h" @@ -646,8 +645,9 @@ i830_state_draw_region(struct intel_context *intel, DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z); /* .5 */ if (irb != NULL) { - switch (irb->texformat->MesaFormat) { + switch (irb->Base.Format) { case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: value |= DV_PF_8888; break; case MESA_FORMAT_RGB565: @@ -661,7 +661,7 @@ i830_state_draw_region(struct intel_context *intel, break; default: _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->texformat->MesaFormat); + irb->Base.Format); } } @@ -719,15 +719,6 @@ i830_new_batch(struct intel_context *intel) assert(!intel->no_batch_wrap); } - - -static GLuint -i830_flush_cmd(void) -{ - return MI_FLUSH | FLUSH_MAP_CACHE; -} - - static void i830_assert_not_dirty( struct intel_context *intel ) { @@ -753,7 +744,6 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.reduced_primitive_state = i830_reduced_primitive_state; i830->intel.vtbl.set_draw_region = i830_set_draw_region; i830->intel.vtbl.update_texture_state = i830UpdateTextureState; - i830->intel.vtbl.flush_cmd = i830_flush_cmd; i830->intel.vtbl.render_start = i830_render_start; i830->intel.vtbl.render_prevalidate = i830_render_prevalidate; i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty; diff --git a/i915/i915_context.c b/i915/i915_context.c index 3ab7d68..7d4c7cf 100644 --- a/i915/i915_context.c +++ b/i915/i915_context.c @@ -40,6 +40,7 @@ #include "utils.h" #include "i915_reg.h" +#include "i915_program.h" #include "intel_regions.h" #include "intel_batchbuffer.h" @@ -80,6 +81,8 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state) i915_update_stencil(ctx); if (new_state & (_NEW_LIGHT)) i915_update_provoking_vertex(ctx); + if (new_state & (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) + i915_update_program(ctx); } @@ -139,7 +142,7 @@ i915CreateContext(const __GLcontextModes * mesaVis, ctx->Const.MaxTextureUnits = I915_TEX_UNITS; ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS; ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS; - + ctx->Const.MaxVarying = I915_TEX_UNITS; /* Advertise the full hardware capabilities. The new memory * manager should cope much better with overload situations: diff --git a/i915/i915_context.h b/i915/i915_context.h index 8de4a9d..25418d5 100644 --- a/i915/i915_context.h +++ b/i915/i915_context.h @@ -39,6 +39,7 @@ #define I915_FALLBACK_LOGICOP 0x20000 #define I915_FALLBACK_POLYGON_SMOOTH 0x40000 #define I915_FALLBACK_POINT_SMOOTH 0x80000 +#define I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN 0x100000 #define I915_UPLOAD_CTX 0x1 #define I915_UPLOAD_BUFFERS 0x2 @@ -121,10 +122,14 @@ enum { #define I915_MAX_CONSTANT 32 #define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT)) +#define I915_MAX_INSN (I915_MAX_DECL_INSN + \ + I915_MAX_TEX_INSN + \ + I915_MAX_ALU_INSN) -#define I915_PROGRAM_SIZE 192 - -#define I915_MAX_INSN (I915_MAX_TEX_INSN+I915_MAX_ALU_INSN) +/* Maximum size of the program packet, which matches the limits on + * decl, tex, and ALU instructions. + */ +#define I915_PROGRAM_SIZE (I915_MAX_INSN * 3 + 1) /* Hardware version of a parsed fragment program. "Derived" from the * mesa fragment_program struct. @@ -154,8 +159,9 @@ struct i915_fragment_program */ GLcontext *ctx; - GLuint declarations[I915_PROGRAM_SIZE]; - GLuint program[I915_PROGRAM_SIZE]; + /* declarations contains the packet header. */ + GLuint declarations[I915_MAX_DECL_INSN * 3 + 1]; + GLuint program[(I915_MAX_TEX_INSN + I915_MAX_ALU_INSN) * 3]; GLfloat constant[I915_MAX_CONSTANT][4]; GLuint constant_flags[I915_MAX_CONSTANT]; diff --git a/i915/i915_debug.c b/i915/i915_debug.c index f7bb7ea..fecfac3 100644 --- a/i915/i915_debug.c +++ b/i915/i915_debug.c @@ -806,6 +806,7 @@ static GLboolean i915_debug_packet( struct debug_stream *stream ) default: return debug(stream, "", 0); } + break; default: assert(0); return 0; diff --git a/i915/i915_fragprog.c b/i915/i915_fragprog.c index 2db10c6..d9c6144 100644 --- a/i915/i915_fragprog.c +++ b/i915/i915_fragprog.c @@ -89,7 +89,8 @@ src_vector(struct i915_fragment_program *p, */ case PROGRAM_TEMPORARY: if (source->Index >= I915_MAX_TEMPORARY) { - i915_program_error(p, "Exceeded max temporary reg"); + i915_program_error(p, "Exceeded max temporary reg: %d/%d", + source->Index, I915_MAX_TEMPORARY); return 0; } src = UREG(REG_TYPE_R, source->Index); @@ -121,10 +122,23 @@ src_vector(struct i915_fragment_program *p, src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0), D0_CHANNEL_ALL); + break; + + case FRAG_ATTRIB_VAR0: + case FRAG_ATTRIB_VAR0 + 1: + case FRAG_ATTRIB_VAR0 + 2: + case FRAG_ATTRIB_VAR0 + 3: + case FRAG_ATTRIB_VAR0 + 4: + case FRAG_ATTRIB_VAR0 + 5: + case FRAG_ATTRIB_VAR0 + 6: + case FRAG_ATTRIB_VAR0 + 7: + src = i915_emit_decl(p, REG_TYPE_T, + T_TEX0 + (source->Index - FRAG_ATTRIB_VAR0), + D0_CHANNEL_ALL); break; default: - i915_program_error(p, "Bad source->Index"); + i915_program_error(p, "Bad source->Index: %d", source->Index); return 0; } break; @@ -146,6 +160,7 @@ src_vector(struct i915_fragment_program *p, case PROGRAM_CONSTANT: case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: + case PROGRAM_UNIFORM: src = i915_emit_param4fv(p, program->Base.Parameters->ParameterValues[source-> @@ -153,7 +168,7 @@ src_vector(struct i915_fragment_program *p, break; default: - i915_program_error(p, "Bad source->File"); + i915_program_error(p, "Bad source->File: %d", source->File); return 0; } @@ -186,13 +201,14 @@ get_result_vector(struct i915_fragment_program *p, p->depth_written = 1; return UREG(REG_TYPE_OD, 0); default: - i915_program_error(p, "Bad inst->DstReg.Index"); + i915_program_error(p, "Bad inst->DstReg.Index: %d", + inst->DstReg.Index); return 0; } case PROGRAM_TEMPORARY: return UREG(REG_TYPE_R, inst->DstReg.Index); default: - i915_program_error(p, "Bad inst->DstReg.File"); + i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File); return 0; } } @@ -231,7 +247,7 @@ translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit) case TEXTURE_CUBE_INDEX: return D0_SAMPLE_TYPE_CUBE; default: - i915_program_error(p, "TexSrcBit"); + i915_program_error(p, "TexSrcBit: %d", bit); return 0; } } @@ -351,7 +367,7 @@ upload_program(struct i915_fragment_program *p) while (1) { GLuint src0, src1, src2, flags; - GLuint tmp = 0, consts0 = 0, consts1 = 0; + GLuint tmp = 0, dst, consts0 = 0, consts1 = 0; switch (inst->Opcode) { case OPCODE_ABS: @@ -503,6 +519,10 @@ upload_program(struct i915_fragment_program *p) EMIT_1ARG_ARITH(A0_FLR); break; + case OPCODE_TRUNC: + EMIT_1ARG_ARITH(A0_TRC); + break; + case OPCODE_FRC: EMIT_1ARG_ARITH(A0_FRC); break; @@ -516,6 +536,22 @@ upload_program(struct i915_fragment_program *p) 0, src0, T0_TEXKILL); break; + case OPCODE_KIL_NV: + if (inst->DstReg.CondMask == COND_TR) { + tmp = i915_get_utemp(p); + + i915_emit_texld(p, get_live_regs(p, inst), + tmp, A0_DEST_CHANNEL_ALL, + 0, /* use a dummy dest reg */ + swizzle(tmp, ONE, ONE, ONE, ONE), /* always */ + T0_TEXKILL); + } else { + p->error = 1; + i915_program_error(p, "Unsupported KIL_NV condition code: %d", + inst->DstReg.CondMask); + } + break; + case OPCODE_LG2: src0 = src_vector(p, &inst->SrcReg[0], program); @@ -615,6 +651,20 @@ upload_program(struct i915_fragment_program *p) EMIT_2ARG_ARITH(A0_MUL); break; + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: + /* Don't implement noise because we just don't have the instructions + * to spare. We aren't the first vendor to do so. + */ + i915_program_error(p, "Stubbed-out noise functions"); + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); + case OPCODE_POW: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); @@ -721,9 +771,38 @@ upload_program(struct i915_fragment_program *p) } break; - case OPCODE_SGE: - EMIT_2ARG_ARITH(A0_SGE); - break; + case OPCODE_SEQ: + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + dst = get_result_vector(p, inst); + + /* dst = src1 >= src2 */ + i915_emit_arith(p, + A0_SGE, + dst, + flags, 0, + src_vector(p, &inst->SrcReg[0], program), + src_vector(p, &inst->SrcReg[1], program), + 0); + /* tmp = src1 <= src2 */ + i915_emit_arith(p, + A0_SGE, + tmp, + flags, 0, + negate(src_vector(p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector(p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + /* dst = tmp && dst */ + i915_emit_arith(p, + A0_MUL, + dst, + flags, 0, + dst, + tmp, + 0); + break; case OPCODE_SIN: src0 = src_vector(p, &inst->SrcReg[0], program); @@ -809,10 +888,71 @@ upload_program(struct i915_fragment_program *p) break; + case OPCODE_SGE: + EMIT_2ARG_ARITH(A0_SGE); + break; + + case OPCODE_SGT: + i915_emit_arith(p, + A0_SLT, + get_result_vector( p, inst ), + get_result_flags( inst ), 0, + negate(src_vector( p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector( p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + break; + + case OPCODE_SLE: + i915_emit_arith(p, + A0_SGE, + get_result_vector( p, inst ), + get_result_flags( inst ), 0, + negate(src_vector( p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector( p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + break; + case OPCODE_SLT: EMIT_2ARG_ARITH(A0_SLT); break; + case OPCODE_SNE: + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + dst = get_result_vector(p, inst); + + /* dst = src1 < src2 */ + i915_emit_arith(p, + A0_SLT, + dst, + flags, 0, + src_vector(p, &inst->SrcReg[0], program), + src_vector(p, &inst->SrcReg[1], program), + 0); + /* tmp = src1 > src2 */ + i915_emit_arith(p, + A0_SLT, + tmp, + flags, 0, + negate(src_vector(p, &inst->SrcReg[0], program), + 1, 1, 1, 1), + negate(src_vector(p, &inst->SrcReg[1], program), + 1, 1, 1, 1), + 0); + /* dst = tmp || dst */ + i915_emit_arith(p, + A0_ADD, + dst, + flags | A0_DEST_SATURATE, 0, + dst, + tmp, + 0); + break; + case OPCODE_SUB: src0 = src_vector(p, &inst->SrcReg[0], program); src1 = src_vector(p, &inst->SrcReg[1], program); @@ -869,8 +1009,39 @@ upload_program(struct i915_fragment_program *p) case OPCODE_END: return; + case OPCODE_BGNLOOP: + case OPCODE_BGNSUB: + case OPCODE_BRA: + case OPCODE_BRK: + case OPCODE_CAL: + case OPCODE_CONT: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_ELSE: + case OPCODE_ENDIF: + case OPCODE_ENDLOOP: + case OPCODE_ENDSUB: + case OPCODE_IF: + case OPCODE_RET: + p->error = 1; + i915_program_error(p, "Unsupported opcode: %s", + _mesa_opcode_string(inst->Opcode)); + return; + + case OPCODE_EXP: + case OPCODE_LOG: + /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in + * prog_instruction.h, but apparently GLSL doesn't ever emit them. + * Instead, it translates to EX2 or LG2. + */ + case OPCODE_TXD: + case OPCODE_TXL: + /* These opcodes are claimed by GLSL in prog_instruction.h, but + * only NV_vp/fp appears to emit them. + */ default: - i915_program_error(p, "bad opcode"); + i915_program_error(p, "bad opcode: %s", + _mesa_opcode_string(inst->Opcode)); return; } @@ -906,7 +1077,7 @@ check_wpos(struct i915_fragment_program *p) p->wpos_tex = -1; for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { - if (inputs & FRAG_BIT_TEX(i)) + if (inputs & (FRAG_BIT_TEX(i) | FRAG_BIT_VAR(i))) continue; else if (inputs & FRAG_BIT_WPOS) { p->wpos_tex = i; @@ -1055,6 +1226,28 @@ i915ProgramStringNotify(GLcontext * ctx, _tnl_program_string(ctx, target, prog); } +void +i915_update_program(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + struct i915_context *i915 = i915_context(&intel->ctx); + struct i915_fragment_program *fp = + (struct i915_fragment_program *) ctx->FragmentProgram._Current; + + if (i915->current_program != fp) { + if (i915->current_program) { + i915->current_program->on_hardware = 0; + i915->current_program->params_uptodate = 0; + } + + i915->current_program = fp; + } + + if (!fp->translated) + translate_program(fp); + + FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error); +} void i915ValidateFragmentProgram(struct i915_context *i915) @@ -1072,16 +1265,6 @@ i915ValidateFragmentProgram(struct i915_context *i915) GLuint s2 = S2_TEXCOORD_NONE; int i, offset = 0; - if (i915->current_program != p) { - if (i915->current_program) { - i915->current_program->on_hardware = 0; - i915->current_program->params_uptodate = 0; - } - - i915->current_program = p; - } - - /* Important: */ VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; @@ -1125,6 +1308,14 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4); } + else if (inputsRead & FRAG_BIT_VAR(i)) { + int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size; + + s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); + s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); + + EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4); + } else if (i == p->wpos_tex) { /* If WPOS is required, duplicate the XYZ position data in an diff --git a/i915/i915_program.c b/i915/i915_program.c index e87700f..e7908bd 100644 --- a/i915/i915_program.c +++ b/i915/i915_program.c @@ -130,6 +130,7 @@ i915_emit_decl(struct i915_fragment_program *p, *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); *(p->decl++) = D1_MBZ; *(p->decl++) = D2_MBZ; + assert(p->decl <= p->declarations + ARRAY_SIZE(p->declarations)); p->nr_decl_insn++; return reg; @@ -186,6 +187,11 @@ i915_emit_arith(struct i915_fragment_program * p, p->utemp_flag = old_utemp_flag; /* restore */ } + if (p->csr >= p->program + ARRAY_SIZE(p->program)) { + i915_program_error(p, "Program contains too many instructions"); + return UREG_BAD; + } + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); @@ -270,6 +276,11 @@ GLuint i915_emit_texld( struct i915_fragment_program *p, p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) p->nr_tex_indirect++; + if (p->csr >= p->program + ARRAY_SIZE(p->program)) { + i915_program_error(p, "Program contains too many instructions"); + return UREG_BAD; + } + *(p->csr++) = (op | T0_DEST( dest ) | T0_SAMPLER( sampler )); @@ -424,12 +435,21 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) return 0; } - - +/* Warning the user about program errors seems to be quite valuable, from + * our bug reports. It unfortunately means piglit reporting errors + * when we fall back to software due to an unsupportable program, though. + */ void -i915_program_error(struct i915_fragment_program *p, const char *msg) +i915_program_error(struct i915_fragment_program *p, const char *fmt, ...) { - _mesa_problem(NULL, "i915_program_error: %s", msg); + va_list args; + + fprintf(stderr, "i915_program_error: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + fprintf(stderr, "\n"); p->error = 1; } @@ -511,7 +531,8 @@ i915_upload_program(struct i915_context *i915, GLuint program_size = p->csr - p->program; GLuint decl_size = p->decl - p->declarations; - FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, p->error); + if (p->error) + return; /* Could just go straight to the batchbuffer from here: */ diff --git a/i915/i915_program.h b/i915/i915_program.h index 14a3f08..0d17d04 100644 --- a/i915/i915_program.h +++ b/i915/i915_program.h @@ -145,7 +145,7 @@ extern GLuint i915_emit_param4fv(struct i915_fragment_program *p, const GLfloat * values); extern void i915_program_error(struct i915_fragment_program *p, - const char *msg); + const char *fmt, ...); extern void i915_init_program(struct i915_context *i915, struct i915_fragment_program *p); @@ -155,7 +155,6 @@ extern void i915_upload_program(struct i915_context *i915, extern void i915_fini_program(struct i915_fragment_program *p); - - +extern void i915_update_program(GLcontext *ctx); #endif diff --git a/i915/i915_reg.h b/i915/i915_reg.h index b5fa7fd..7f31ff6 100644 --- a/i915/i915_reg.h +++ b/i915/i915_reg.h @@ -626,9 +626,9 @@ #define MT_32BIT_AWVU2101010 (0xA<<3) #define MT_32BIT_GR1616 (0xB<<3) #define MT_32BIT_VU1616 (0xC<<3) -#define MT_32BIT_xI824 (0xD<<3) -#define MT_32BIT_xA824 (0xE<<3) -#define MT_32BIT_xL824 (0xF<<3) +#define MT_32BIT_x8I24 (0xD<<3) +#define MT_32BIT_x8L24 (0xE<<3) +#define MT_32BIT_x8A24 (0xF<<3) #define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ #define MT_422_YCRCB_NORMAL (1<<3) #define MT_422_YCRCB_SWAPUV (2<<3) diff --git a/i915/i915_state.c b/i915/i915_state.c index b60efea..cc98d12 100644 --- a/i915/i915_state.c +++ b/i915/i915_state.c @@ -585,7 +585,7 @@ i915PointSize(GLcontext * ctx, GLfloat size) { struct i915_context *i915 = I915_CONTEXT(ctx); int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_POINT_WIDTH_MASK; - GLint point_size = (int) size; + GLint point_size = (int) round(size); DBG("%s\n", __FUNCTION__); @@ -599,6 +599,24 @@ i915PointSize(GLcontext * ctx, GLfloat size) } +static void +i915PointParameterfv(GLcontext * ctx, GLenum pname, const GLfloat *params) +{ + struct i915_context *i915 = I915_CONTEXT(ctx); + + switch (pname) { + case GL_POINT_SPRITE_COORD_ORIGIN: + /* This could be supported, but it would require modifying the fragment + * program to invert the y component of the texture coordinate by + * inserting a 'SUB tc.y, {1.0}.xxxx, tc' instruction. + */ + FALLBACK(&i915->intel, I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN, + (params[0] != GL_UPPER_LEFT)); + break; + } +} + + /* ============================================================= * Color masks */ @@ -939,6 +957,17 @@ i915Enable(GLcontext * ctx, GLenum cap, GLboolean state) case GL_POLYGON_SMOOTH: break; + case GL_POINT_SPRITE: + /* This state change is handled in i915_reduced_primitive_state because + * the hardware bit should only be set when rendering points. + */ + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + if (state) + i915->state.Ctx[I915_CTXREG_LIS4] |= S4_SPRITE_POINT_ENABLE; + else + i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_SPRITE_POINT_ENABLE; + break; + case GL_POINT_SMOOTH: break; @@ -1108,6 +1137,7 @@ i915InitStateFunctions(struct dd_function_table *functions) functions->LineWidth = i915LineWidth; functions->LogicOpcode = i915LogicOp; functions->PointSize = i915PointSize; + functions->PointParameterfv = i915PointParameterfv; functions->PolygonStipple = i915PolygonStipple; functions->Scissor = i915Scissor; functions->ShadeModel = i915ShadeModel; diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c index 32d4b30..de25848 100644 --- a/i915/i915_texstate.c +++ b/i915/i915_texstate.c @@ -27,7 +27,7 @@ #include "main/mtypes.h" #include "main/enums.h" -#include "main/texformat.h" +#include "main/macros.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" @@ -37,7 +37,7 @@ static GLuint -translate_texture_format(GLuint mesa_format, GLuint internal_format, +translate_texture_format(gl_format mesa_format, GLuint internal_format, GLenum DepthMode) { switch (mesa_format) { @@ -56,10 +56,9 @@ translate_texture_format(GLuint mesa_format, GLuint internal_format, case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - if (internal_format == GL_RGB) - return MAPSURF_32BIT | MT_32BIT_XRGB8888; - else - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + return MAPSURF_32BIT | MT_32BIT_ARGB8888; + case MESA_FORMAT_XRGB8888: + return MAPSURF_32BIT | MT_32BIT_XRGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -82,7 +81,12 @@ translate_texture_format(GLuint mesa_format, GLuint internal_format, case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); case MESA_FORMAT_S8_Z24: - return (MAPSURF_32BIT | MT_32BIT_xI824); + if (DepthMode == GL_ALPHA) + return (MAPSURF_32BIT | MT_32BIT_x8A24); + else if (DepthMode == GL_INTENSITY) + return (MAPSURF_32BIT | MT_32BIT_x8I24); + else + return (MAPSURF_32BIT | MT_32BIT_x8L24); default: fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); abort(); @@ -134,6 +138,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) GLuint *state = i915->state.Tex[unit], format, pitch; GLint lodbias, aniso = 0; GLubyte border[4]; + GLfloat maxlod; memset(state, 0, sizeof(state)); @@ -173,11 +178,9 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) } else { dri_bo_reference(intelObj->mt->region->buffer); i915->state.tex_buffer[unit] = intelObj->mt->region->buffer; - i915->state.tex_offset[unit] = intel_miptree_image_offset(intelObj->mt, - 0, intelObj-> - firstLevel); + i915->state.tex_offset[unit] = 0; /* Always the origin of the miptree */ - format = translate_texture_format(firstImage->TexFormat->MesaFormat, + format = translate_texture_format(firstImage->TexFormat, firstImage->InternalFormat, tObj->DepthMode); pitch = intelObj->mt->pitch * intelObj->mt->cpp; @@ -193,11 +196,15 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I915_TEXREG_MS3] |= MS3_TILE_WALK; } + /* We get one field with fraction bits to cover the maximum addressable (smallest + * resolution) LOD. Use it to cover both MAX_LEVEL and MAX_LOD. + */ + maxlod = MIN2(tObj->MaxLod, tObj->MaxLevel - tObj->BaseLevel); state[I915_TEXREG_MS4] = - ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK | - ((((intelObj->lastLevel - intelObj->firstLevel) * 4)) << - MS4_MAX_LOD_SHIFT) | ((firstImage->Depth - 1) << - MS4_VOLUME_DEPTH_SHIFT)); + ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | + MS4_CUBE_FACE_ENA_MASK | + (U_FIXED(CLAMP(maxlod, 0.0, 11.0), 2) << MS4_MAX_LOD_SHIFT) | + ((firstImage->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); { @@ -263,8 +270,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) /* YUV conversion: */ - if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR || - firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV) + if (firstImage->TexFormat == MESA_FORMAT_YCBCR || + firstImage->TexFormat == MESA_FORMAT_YCBCR_REV) state[I915_TEXREG_SS2] |= SS2_COLORSPACE_CONVERSION; /* Shadow: @@ -293,6 +300,12 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) GLenum wt = tObj->WrapT; GLenum wr = tObj->WrapR; + /* We program 1D textures as 2D textures, so the 2D texcoord could + * result in sampling border values if we don't set the T wrap to + * repeat. + */ + if (tObj->Target == GL_TEXTURE_1D) + wt = GL_REPEAT; /* 3D textures don't seem to respect the border color. * Fallback if there's ever a danger that they might refer to @@ -327,6 +340,9 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); + state[I915_TEXREG_SS3] |= (U_FIXED(CLAMP(tObj->MinLod, 0.0, 11.0), 4) << + SS3_MIN_LOD_SHIFT); + } /* convert border color from float to ubyte */ diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c index 9a723d3..ff97e5a 100644 --- a/i915/i915_vtbl.c +++ b/i915/i915_vtbl.c @@ -32,7 +32,6 @@ #include "main/imports.h" #include "main/macros.h" #include "main/colormac.h" -#include "main/texformat.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" @@ -54,8 +53,7 @@ i915_render_prevalidate(struct intel_context *intel) { struct i915_context *i915 = i915_context(&intel->ctx); - if (!intel->Fallback) - i915ValidateFragmentProgram(i915); + i915ValidateFragmentProgram(i915); } static void @@ -589,8 +587,9 @@ i915_state_draw_region(struct intel_context *intel, DSTORG_VERT_BIAS(0x8) | /* .5 */ LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL); if (irb != NULL) { - switch (irb->texformat->MesaFormat) { + switch (irb->Base.Format) { case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: value |= DV_PF_8888; break; case MESA_FORMAT_RGB565: @@ -604,7 +603,7 @@ i915_state_draw_region(struct intel_context *intel, break; default: _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->texformat->MesaFormat); + irb->Base.Format); } } @@ -673,12 +672,6 @@ i915_new_batch(struct intel_context *intel) assert(!intel->no_batch_wrap); } -static GLuint -i915_flush_cmd(void) -{ - return MI_FLUSH | FLUSH_MAP_CACHE; -} - static void i915_assert_not_dirty( struct intel_context *intel ) { @@ -700,7 +693,6 @@ i915InitVtbl(struct i915_context *i915) i915->intel.vtbl.render_prevalidate = i915_render_prevalidate; i915->intel.vtbl.set_draw_region = i915_set_draw_region; i915->intel.vtbl.update_texture_state = i915UpdateTextureState; - i915->intel.vtbl.flush_cmd = i915_flush_cmd; i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty; i915->intel.vtbl.finish_batch = intel_finish_vb; } diff --git a/i915/intel_tris.c b/i915/intel_tris.c index a905455..bc527aa 100644 --- a/i915/intel_tris.c +++ b/i915/intel_tris.c @@ -1076,7 +1076,9 @@ intelRunPipeline(GLcontext * ctx) intel->NewGLState = 0; } + intel_map_vertex_shader_textures(ctx); _tnl_run_pipeline(ctx); + intel_unmap_vertex_shader_textures(ctx); _mesa_unlock_context_textures(ctx); } @@ -1086,6 +1088,7 @@ intelRenderStart(GLcontext * ctx) { struct intel_context *intel = intel_context(ctx); + intel_check_front_buffer_rendering(intel); intel->vtbl.render_start(intel_context(ctx)); intel->vtbl.emit_state(intel); } @@ -1194,12 +1197,16 @@ getFallbackString(GLuint bit) +/** + * Enable/disable a fallback flag. + * \param bit one of INTEL_FALLBACK_x flags. + */ void -intelFallback(struct intel_context *intel, GLuint bit, GLboolean mode) +intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) { GLcontext *ctx = &intel->ctx; TNLcontext *tnl = TNL_CONTEXT(ctx); - GLuint oldfallback = intel->Fallback; + const GLbitfield oldfallback = intel->Fallback; if (mode) { intel->Fallback |= bit; diff --git a/i965/Makefile.am b/i965/Makefile.am index 8ada8b5..8a61dab 100644 --- a/i965/Makefile.am +++ b/i965/Makefile.am @@ -5,7 +5,7 @@ I965_CFLAGS = -I../shared -I../shared/server i965_dri_la_LTLIBRARIES = i965_dri.la i965_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(I965_CFLAGS) i965_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl\ - $(DRM_LIBS) -ldrm_intel $(DRI_LIBS) + $(DRM_LIBS) $(DRI_LIBS) i965_dri_ladir = @libdir@/dri i965_dri_la_SOURCES = \ ../shared/intel_batchbuffer.c \ @@ -17,7 +17,6 @@ i965_dri_la_SOURCES = \ ../shared/intel_decode.c \ ../shared/intel_extensions.c \ ../shared/intel_fbo.c \ - ../shared/intel_generatemipmap.c \ ../shared/intel_mipmap_tree.c \ ../shared/intel_regions.c \ ../shared/intel_screen.c \ diff --git a/i965/brw_cc.c b/i965/brw_cc.c index c724218..bac1c3a 100644 --- a/i965/brw_cc.c +++ b/i965/brw_cc.c @@ -34,25 +34,35 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_util.h" +#include "intel_fbo.h" #include "main/macros.h" #include "main/enums.h" static void prepare_cc_vp( struct brw_context *brw ) { + GLcontext *ctx = &brw->intel.ctx; struct brw_cc_viewport ccv; memset(&ccv, 0, sizeof(ccv)); - ccv.min_depth = 0.0; - ccv.max_depth = 1.0; + /* _NEW_TRANSOFORM */ + if (ctx->Transform.DepthClamp) { + /* _NEW_VIEWPORT */ + ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + } else { + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + } dri_bo_unreference(brw->cc.vp_bo); - brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); + brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), + NULL, 0); } const struct brw_tracked_state brw_cc_vp = { .dirty = { - .mesa = 0, + .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -80,6 +90,28 @@ struct brw_cc_unit_key { GLenum depth_func; }; +/** + * Modify blend function to force destination alpha to 1.0 + * + * If \c function specifies a blend function that uses destination alpha, + * replace it with a function that hard-wires destination alpha to 1.0. This + * is used when rendering to xRGB targets. + */ +static GLenum +fix_xRGB_alpha(GLenum function) +{ + switch (function) { + case GL_DST_ALPHA: + return GL_ONE; + + case GL_ONE_MINUS_DST_ALPHA: + case GL_SRC_ALPHA_SATURATE: + return GL_ZERO; + } + + return function; +} + static void cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) { @@ -123,6 +155,17 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) key->blend_dst_rgb = ctx->Color.BlendDstRGB; key->blend_src_a = ctx->Color.BlendSrcA; key->blend_dst_a = ctx->Color.BlendDstA; + + /* If the renderbuffer is XRGB, we have to frob the blend function to + * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA + * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) { + key->blend_src_rgb = fix_xRGB_alpha(key->blend_src_rgb); + key->blend_src_a = fix_xRGB_alpha(key->blend_src_a); + key->blend_dst_rgb = fix_xRGB_alpha(key->blend_dst_rgb); + key->blend_dst_a = fix_xRGB_alpha(key->blend_dst_a); + } } key->alpha_enabled = ctx->Color.AlphaEnabled; diff --git a/i965/brw_clip.c b/i965/brw_clip.c index 20a927c..dbd10a5 100644 --- a/i965/brw_clip.c +++ b/i965/brw_clip.c @@ -78,7 +78,7 @@ static void compile_clip_prog( struct brw_context *brw, delta = REG_SIZE; for (i = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<vs.prog_data->outputs_written; /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); + key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); /* _NEW_TRANSFORM */ key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); diff --git a/i965/brw_clip.h b/i965/brw_clip.h index 957df44..1c68255 100644 --- a/i965/brw_clip.h +++ b/i965/brw_clip.h @@ -42,22 +42,21 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint attrs:32; + GLbitfield64 attrs; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; + GLuint pv_first:1; GLuint do_unfilled:1; GLuint fill_cw:2; /* includes cull information */ GLuint fill_ccw:2; /* includes cull information */ GLuint offset_cw:1; GLuint offset_ccw:1; - GLuint pad0:17; - GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; GLuint clip_mode:3; - GLuint pad1:27; - + GLuint pad0:11; + GLfloat offset_factor; GLfloat offset_units; }; diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c index 048ca62..fa9648f 100644 --- a/i965/brw_clip_line.c +++ b/i965/brw_clip_line.c @@ -269,8 +269,12 @@ void brw_emit_line_clip( struct brw_clip_compile *c ) brw_clip_line_alloc_regs(c); brw_clip_init_ff_sync(c); - if (c->key.do_flat_shading) - brw_clip_copy_colors(c, 0, 1); + if (c->key.do_flat_shading) { + if (c->key.pv_first) + brw_clip_copy_colors(c, 1, 0); + else + brw_clip_copy_colors(c, 0, 1); + } clip_and_emit_line(c); } diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c index 5762c95..234b374 100644 --- a/i965/brw_clip_state.c +++ b/i965/brw_clip_state.c @@ -43,11 +43,14 @@ struct brw_clip_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + GLboolean depth_clamp; }; static void clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) { + GLcontext *ctx = &brw->intel.ctx; memset(key, 0, sizeof(*key)); /* CACHE_NEW_CLIP_PROG */ @@ -62,6 +65,9 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) /* BRW_NEW_URB_FENCE */ key->nr_urb_entries = brw->urb.nr_clip_entries; key->urb_size = brw->urb.vsize; + + /* _NEW_TRANSOFORM */ + key->depth_clamp = ctx->Transform.DepthClamp; } static dri_bo * @@ -117,7 +123,8 @@ clip_unit_create_from_key(struct brw_context *brw, clip.clip5.userclip_enable_flags = 0x7f; clip.clip5.userclip_must_clip = 1; clip.clip5.guard_band_enable = 0; - clip.clip5.viewport_z_clip_enable = 1; + if (!key->depth_clamp) + clip.clip5.viewport_z_clip_enable = 1; clip.clip5.viewport_xy_clip_enable = 1; clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; clip.clip5.api_mode = BRW_CLIP_API_OGL; @@ -168,7 +175,7 @@ static void upload_clip_unit( struct brw_context *brw ) const struct brw_tracked_state brw_clip_unit = { .dirty = { - .mesa = 0, + .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c index 0efd772..cf79224 100644 --- a/i965/brw_clip_tri.c +++ b/i965/brw_clip_tri.c @@ -188,14 +188,20 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) brw_imm_ud(_3DPRIM_POLYGON)); is_poly = brw_IF(p, BRW_EXECUTE_1); - { + { brw_clip_copy_colors(c, 1, 0); brw_clip_copy_colors(c, 2, 0); } is_poly = brw_ELSE(p, is_poly); { - brw_clip_copy_colors(c, 0, 2); - brw_clip_copy_colors(c, 1, 2); + if (c->key.pv_first) { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + else { + brw_clip_copy_colors(c, 0, 2); + brw_clip_copy_colors(c, 1, 2); + } } brw_ENDIF(p, is_poly); } diff --git a/i965/brw_context.c b/i965/brw_context.c index c300c33..aaa2d80 100644 --- a/i965/brw_context.c +++ b/i965/brw_context.c @@ -33,7 +33,7 @@ #include "main/imports.h" #include "main/api_noop.h" #include "main/macros.h" -#include "main/vtxfmt.h" +/* #include "main/vtxfmt.h" */ #include "main/simple_list.h" #include "shader/shader_api.h" @@ -105,6 +105,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, diff --git a/i965/brw_context.h b/i965/brw_context.h index a5209ac..fded47a 100644 --- a/i965/brw_context.h +++ b/i965/brw_context.h @@ -115,7 +115,7 @@ * Handles blending and (presumably) depth and stencil testing. */ -#define BRW_FALLBACK_TEXTURE 0x1 + #define BRW_MAX_CURBE (32*16) struct brw_context; @@ -231,7 +231,7 @@ struct brw_vs_prog_data { GLuint curb_read_length; GLuint urb_read_length; GLuint total_grf; - GLuint outputs_written; + GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -252,20 +252,23 @@ struct brw_vs_ouput_sizes { /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 +/** Max number of render targets in a shader */ +#define BRW_MAX_DRAW_BUFFERS 4 + /** * Size of our surface binding table for the WM. * This contains pointers to the drawing surfaces and current texture * objects and shader constant buffers (+2). */ -#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) +#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) /** * Helpers to convert drawing buffers, textures and constant buffers * to surface binding table indexes, for WM. */ #define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) -#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) +#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t)) /** * Size of surface binding table for the VS. @@ -317,7 +320,6 @@ struct brw_cache_item { GLuint nr_reloc_bufs; dri_bo *bo; - GLuint data_size; struct brw_cache_item *next; }; @@ -330,7 +332,6 @@ struct brw_cache { struct brw_cache_item **items; GLuint size, n_items; - GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ GLuint aux_size[BRW_MAX_CACHE]; char *name[BRW_MAX_CACHE]; @@ -410,23 +411,6 @@ struct brw_vertex_info { GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ }; - - - -/* Cache for TNL programs. - */ -struct brw_tnl_cache_item { - GLuint hash; - void *key; - void *data; - struct brw_tnl_cache_item *next; -}; - -struct brw_tnl_cache { - struct brw_tnl_cache_item **items; - GLuint size, n_items; -}; - struct brw_query_object { struct gl_query_object Base; @@ -454,7 +438,6 @@ struct brw_context GLuint primitive; GLboolean emit_state_always; - GLboolean tmp_fallback; GLboolean no_batch_wrap; struct { @@ -705,10 +688,6 @@ void brw_debug_batch(struct intel_context *intel); /*====================================================================== * brw_tex.c */ -void brwUpdateTextureState( struct intel_context *intel ); -void brw_FrameBufferTexInit( struct brw_context *brw, - struct intel_region *region ); -void brw_FrameBufferTexDestroy( struct brw_context *brw ); void brw_validate_textures( struct brw_context *brw ); @@ -763,9 +742,5 @@ brw_fragment_program_const(const struct gl_fragment_program *p) return (const struct brw_fragment_program *) p; } - - -#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) - #endif diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c index 4be6c77..aadcfbe 100644 --- a/i965/brw_curbe.c +++ b/i965/brw_curbe.c @@ -130,7 +130,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) const struct brw_tracked_state brw_curbe_offsets = { .dirty = { .mesa = _NEW_TRANSFORM, - .brw = BRW_NEW_VERTEX_PROGRAM, + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_CONTEXT, .cache = CACHE_NEW_WM_PROG }, .prepare = calculate_curbe_offsets diff --git a/i965/brw_defines.h b/i965/brw_defines.h index 78d457a..c19510b 100644 --- a/i965/brw_defines.h +++ b/i965/brw_defines.h @@ -673,18 +673,10 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG 3 /* for IGDNG only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 diff --git a/i965/brw_draw.c b/i965/brw_draw.c index c53bd47..8bcb608 100644 --- a/i965/brw_draw.c +++ b/i965/brw_draw.c @@ -25,13 +25,15 @@ * **************************************************************************/ -#include #include "main/glheader.h" #include "main/context.h" #include "main/state.h" -#include "main/api_validate.h" #include "main/enums.h" +#include "tnl/tnl.h" +#include "vbo/vbo_context.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" #include "brw_draw.h" #include "brw_defines.h" @@ -42,11 +44,6 @@ #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -#include "tnl/tnl.h" -#include "vbo/vbo_context.h" -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" - #define FILE_DEBUG_FLAG DEBUG_BATCH static GLuint prim_to_hw_prim[GL_POLYGON+1] = { @@ -145,7 +142,7 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.start_vert_location += brw->ib.start_vertex_offset; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; - prim_packet.base_vert_location = 0; + prim_packet.base_vert_location = prim->basevertex; /* Can't wrap here, since we rely on the validated state. */ brw->no_batch_wrap = GL_TRUE; @@ -156,18 +153,14 @@ static void brw_emit_prim(struct brw_context *brw, * the besides the draw code. */ if (intel->always_flush_cache) { - BEGIN_BATCH(1, IGNORE_CLIPRECTS); - OUT_BATCH(intel->vtbl.flush_cmd()); - ADVANCE_BATCH(); + intel_batchbuffer_emit_mi_flush(intel->batch); } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } if (intel->always_flush_cache) { - BEGIN_BATCH(1, IGNORE_CLIPRECTS); - OUT_BATCH(intel->vtbl.flush_cmd()); - ADVANCE_BATCH(); + intel_batchbuffer_emit_mi_flush(intel->batch); } brw->no_batch_wrap = GL_FALSE; diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c index 4aa17fa..ee684f6 100644 --- a/i965/brw_draw_upload.c +++ b/i965/brw_draw_upload.c @@ -25,12 +25,12 @@ * **************************************************************************/ -#include #include "main/glheader.h" +#include "main/bufferobj.h" #include "main/context.h" #include "main/state.h" -#include "main/api_validate.h" +/* #include "main/api_validate.h" */ #include "main/enums.h" #include "brw_draw.h" @@ -375,7 +375,7 @@ static void brw_prepare_vertices(struct brw_context *brw) * isn't an issue at this point. */ if (brw->vb.nr_enabled >= BRW_VEP_MAX) { - intel->Fallback = 1; + intel->Fallback = GL_TRUE; /* boolean, not bitfield */ return; } @@ -384,7 +384,7 @@ static void brw_prepare_vertices(struct brw_context *brw) input->element_size = get_size(input->glarray->Type) * input->glarray->Size; - if (input->glarray->BufferObj->Name != 0) { + if (_mesa_is_bufferobj(input->glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(input->glarray->BufferObj); @@ -427,7 +427,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) { - intel->Fallback = 1; + intel->Fallback = GL_TRUE; /* boolean, not bitfield */ return; } @@ -536,16 +536,9 @@ static void brw_emit_vertices(struct brw_context *brw) I915_GEM_DOMAIN_VERTEX, 0, input->offset); if (BRW_IS_IGDNG(brw)) { - if (input->stride) { - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset + input->stride * input->count); - } else { - assert(input->count == 1); - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset + input->element_size); - } + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->bo->size - 1); } else OUT_BATCH(input->stride ? input->count : 0); OUT_BATCH(0); /* Instance data step rate */ @@ -623,7 +616,7 @@ static void brw_prepare_indices(struct brw_context *brw) /* Turn into a proper VBO: */ - if (!bufferobj->Name) { + if (!_mesa_is_bufferobj(bufferobj)) { brw->ib.start_vertex_offset = 0; /* Get new bufferobj, offset: @@ -726,7 +719,7 @@ static void brw_emit_index_buffer(struct brw_context *brw) brw->ib.offset); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, - brw->ib.offset + brw->ib.size); + brw->ib.offset + brw->ib.size - 1); OUT_BATCH( 0 ); ADVANCE_BATCH(); } diff --git a/i965/brw_eu.h b/i965/brw_eu.h index 30603bd..39eb88d 100644 --- a/i965/brw_eu.h +++ b/i965/brw_eu.h @@ -170,11 +170,11 @@ static INLINE struct brw_reg brw_reg( GLuint file, GLuint writemask ) { struct brw_reg reg; - if (type == BRW_GENERAL_REGISTER_FILE) + if (file == BRW_GENERAL_REGISTER_FILE) assert(nr < BRW_MAX_GRF); - else if (type == BRW_MESSAGE_REGISTER_FILE) - assert(nr < BRW_MAX_MRF); - else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + else if (file == BRW_MESSAGE_REGISTER_FILE) + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + else if (file == BRW_ARCHITECTURE_REGISTER_FILE) assert(nr <= BRW_ARF_IP); reg.type = type; @@ -538,7 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) static INLINE struct brw_reg brw_message_reg( GLuint nr ) { - assert(nr < BRW_MAX_MRF); + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c index 241cdc3..7ceabba 100644 --- a/i965/brw_eu_emit.c +++ b/i965/brw_eu_emit.c @@ -55,7 +55,8 @@ static void guess_execution_size( struct brw_instruction *insn, static void brw_set_dest( struct brw_instruction *insn, struct brw_reg dest ) { - if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && + dest.file != BRW_MESSAGE_REGISTER_FILE) assert(dest.nr < 128); insn->bits1.da1.dest_reg_file = dest.file; diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c index d27c6c2..562a178 100644 --- a/i965/brw_fallback.c +++ b/i965/brw_fallback.c @@ -133,7 +133,11 @@ const struct brw_tracked_state brw_check_fallback = { -/* Not used: +/** + * Called by the INTEL_FALLBACK() macro. + * NOTE: this is a no-op for the i965 driver. The brw->intel.Fallback + * field is treated as a boolean, not a bitmask. It's only set in a + * couple of places. */ void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode ) { diff --git a/i965/brw_gs.c b/i965/brw_gs.c index 48c2b9a..610b6c3 100644 --- a/i965/brw_gs.c +++ b/i965/brw_gs.c @@ -85,10 +85,10 @@ static void compile_gs_prog( struct brw_context *brw, */ switch (key->primitive) { case GL_QUADS: - brw_gs_quads( &c ); + brw_gs_quads( &c, key ); break; case GL_QUAD_STRIP: - brw_gs_quad_strip( &c ); + brw_gs_quad_strip( &c, key ); break; case GL_LINE_LOOP: brw_gs_lines( &c ); @@ -149,6 +149,7 @@ static const GLenum gs_prim[GL_POLYGON+1] = { static void populate_key( struct brw_context *brw, struct brw_gs_prog_key *key ) { + GLcontext *ctx = &brw->intel.ctx; memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ @@ -158,6 +159,9 @@ static void populate_key( struct brw_context *brw, key->primitive = gs_prim[brw->primitive]; key->hint_gs_always = 0; /* debug code? */ + + /* _NEW_LIGHT */ + key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); key->need_gs_prog = (key->hint_gs_always || brw->primitive == GL_QUADS || @@ -193,7 +197,7 @@ static void prepare_gs_prog(struct brw_context *brw) const struct brw_tracked_state brw_gs_prog = { .dirty = { - .mesa = 0, + .mesa = _NEW_LIGHT, .brw = BRW_NEW_PRIMITIVE, .cache = CACHE_NEW_VS_PROG }, diff --git a/i965/brw_gs.h b/i965/brw_gs.h index bbb991e..010c1c2 100644 --- a/i965/brw_gs.h +++ b/i965/brw_gs.h @@ -40,11 +40,12 @@ #define MAX_GS_VERTS (4) struct brw_gs_prog_key { - GLuint attrs:32; + GLbitfield64 attrs; GLuint primitive:4; GLuint hint_gs_always:1; + GLuint pv_first:1; GLuint need_gs_prog:1; - GLuint pad:26; + GLuint pad:25; }; struct brw_gs_compile { @@ -67,8 +68,8 @@ struct brw_gs_compile { #define ATTR_SIZE (4*4) -void brw_gs_quads( struct brw_gs_compile *c ); -void brw_gs_quad_strip( struct brw_gs_compile *c ); +void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ); +void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ); void brw_gs_tris( struct brw_gs_compile *c ); void brw_gs_lines( struct brw_gs_compile *c ); void brw_gs_points( struct brw_gs_compile *c ); diff --git a/i965/brw_gs_emit.c b/i965/brw_gs_emit.c index a9b2aa2..0fc5b02 100644 --- a/i965/brw_gs_emit.c +++ b/i965/brw_gs_emit.c @@ -120,7 +120,7 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) } -void brw_gs_quads( struct brw_gs_compile *c ) +void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) { brw_gs_alloc_regs(c, 4); @@ -128,23 +128,39 @@ void brw_gs_quads( struct brw_gs_compile *c ) * is the PV for quads, but vertex 0 for polygons: */ if (c->need_ff_sync) - brw_gs_ff_sync(c, 1); - brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + brw_gs_ff_sync(c, 1); + if (key->pv_first) { + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + } + else { + brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + } } -void brw_gs_quad_strip( struct brw_gs_compile *c ) +void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) { brw_gs_alloc_regs(c, 4); if (c->need_ff_sync) brw_gs_ff_sync(c, 1); - brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + if (key->pv_first) { + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + } + else { + brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + } } void brw_gs_tris( struct brw_gs_compile *c ) diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c index a761c03..ed9d2ff 100644 --- a/i965/brw_gs_state.c +++ b/i965/brw_gs_state.c @@ -93,7 +93,10 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) gs.thread4.nr_urb_entries = key->nr_urb_entries; gs.thread4.urb_entry_allocation_size = key->urb_size - 1; - gs.thread4.max_threads = 0; /* Hardware requirement */ + if (key->nr_urb_entries >= 8) + gs.thread4.max_threads = 1; + else + gs.thread4.max_threads = 0; if (BRW_IS_IGDNG(brw)) gs.thread4.rendering_enable = 1; diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c index ea71857..4b0d598 100644 --- a/i965/brw_misc_state.c +++ b/i965/brw_misc_state.c @@ -66,7 +66,7 @@ static void upload_blend_constant_color(struct brw_context *brw) const struct brw_tracked_state brw_blend_constant_color = { .dirty = { .mesa = _NEW_COLOR, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_blend_constant_color @@ -93,7 +93,7 @@ static void upload_drawing_rect(struct brw_context *brw) const struct brw_tracked_state brw_drawing_rect = { .dirty = { .mesa = _NEW_BUFFERS, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_drawing_rect @@ -317,7 +317,7 @@ static void upload_polygon_stipple(struct brw_context *brw) const struct brw_tracked_state brw_polygon_stipple = { .dirty = { .mesa = _NEW_POLYGONSTIPPLE, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_polygon_stipple @@ -362,7 +362,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) const struct brw_tracked_state brw_polygon_stipple_offset = { .dirty = { .mesa = _NEW_WINDOW_POS, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_polygon_stipple_offset @@ -425,7 +425,7 @@ static void upload_line_stipple(struct brw_context *brw) const struct brw_tracked_state brw_line_stipple = { .dirty = { .mesa = _NEW_LINE, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_line_stipple diff --git a/i965/brw_sf.c b/i965/brw_sf.c index e1c2c77..968890f 100644 --- a/i965/brw_sf.c +++ b/i965/brw_sf.c @@ -61,7 +61,7 @@ static void compile_sf_prog( struct brw_context *brw, c.key = *key; c.nr_attrs = brw_count_bits(c.key.attrs); c.nr_attr_regs = (c.nr_attrs+1)/2; - c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); + c.nr_setup_attrs = brw_count_bits(c.key.attrs); c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; @@ -70,7 +70,7 @@ static void compile_sf_prog( struct brw_context *brw, /* Construct map from attribute number to position in the vertex. */ for (i = idx = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { @@ -147,7 +147,7 @@ static void upload_sf_prog(struct brw_context *brw) * edgeflag testing here, it is already done in the clip * program. */ - if (key.attrs & (1<Point.PointSprite; - key.SpriteOrigin = ctx->Point.SpriteOrigin; + key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT); /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); diff --git a/i965/brw_sf.h b/i965/brw_sf.h index 6426b6d..0ba731f 100644 --- a/i965/brw_sf.h +++ b/i965/brw_sf.h @@ -45,19 +45,19 @@ #define SF_UNFILLED_TRIS 3 struct brw_sf_prog_key { - GLuint attrs:32; + GLbitfield64 attrs; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; GLuint linear_color:1; /**< linear interp vs. perspective interp */ - GLuint pad:25; - GLenum SpriteOrigin; + GLuint sprite_origin_lower_left:1; + GLuint pad:24; }; struct brw_sf_point_tex { - GLboolean CoordReplace; + GLboolean CoordReplace; }; struct brw_sf_compile { diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c index ca8f97f..3eae41e 100644 --- a/i965/brw_sf_emit.c +++ b/i965/brw_sf_emit.c @@ -56,7 +56,7 @@ static struct brw_reg get_vert_attr(struct brw_sf_compile *c, static GLboolean have_attr(struct brw_sf_compile *c, GLuint attr) { - return (c->key.attrs & (1<key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0; } /*********************************************************************** @@ -122,8 +122,8 @@ static void do_twoside_color( struct brw_sf_compile *c ) * Flat shading */ -#define VERT_RESULT_COLOR_BITS ((1<nr_setup_regs - 1); - GLuint persp_mask; - GLuint linear_mask; + GLbitfield64 persp_mask; + GLbitfield64 linear_mask; if (c->key.do_flat_shading || c->key.linear_color) persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | @@ -331,10 +331,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, *pc_linear = 0; *pc = 0xf; - if (persp_mask & (1 << c->idx_to_attr[reg*2])) + if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2])) *pc_persp = 0xf; - if (linear_mask & (1 << c->idx_to_attr[reg*2])) + if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2])) *pc_linear = 0xf; /* Maybe only processs one attribute on the final round: @@ -342,10 +342,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, if (reg*2+1 < c->nr_setup_attrs) { *pc |= 0xf0; - if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) + if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1])) *pc_persp |= 0xf0; - if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) + if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1])) *pc_linear |= 0xf0; } @@ -551,7 +551,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); - if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + if (c->key.sprite_origin_lower_left) { brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); @@ -570,7 +570,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) { brw_set_predicate_control_flag_value(p, pc); if (tex->CoordReplace) { - if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + if (c->key.sprite_origin_lower_left) { brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); } diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c index bc0f076..bb69435 100644 --- a/i965/brw_sf_state.c +++ b/i965/brw_sf_state.c @@ -93,7 +93,8 @@ static void upload_sf_vp(struct brw_context *brw) } dri_bo_unreference(brw->sf.vp_bo); - brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); + brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv), + NULL, 0); } const struct brw_tracked_state brw_sf_vp = { @@ -113,7 +114,8 @@ struct brw_sf_unit_key { unsigned int nr_urb_entries, urb_size, sfsize; - GLenum front_face, cull_face, provoking_vertex; + GLenum front_face, cull_face; + unsigned pv_first:1; unsigned scissor:1; unsigned line_smooth:1; unsigned point_sprite:1; @@ -154,7 +156,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->point_attenuated = ctx->Point._Attenuated; /* _NEW_LIGHT */ - key->provoking_vertex = ctx->Light.ProvokingVertex; + key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; } @@ -287,7 +289,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ - if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) { + if (!key->pv_first) { sf.sf7.trifan_pv = 2; sf.sf7.linestrip_pv = 1; sf.sf7.tristrip_pv = 2; diff --git a/i965/brw_state.h b/i965/brw_state.h index 5335eac..b129b1f 100644 --- a/i965/brw_state.h +++ b/i965/brw_state.h @@ -112,6 +112,7 @@ void brw_validate_state(struct brw_context *brw); void brw_upload_state(struct brw_context *brw); void brw_init_state(struct brw_context *brw); void brw_destroy_state(struct brw_context *brw); +void brw_clear_validated_bos(struct brw_context *brw); /*********************************************************************** * brw_state_cache.c @@ -119,16 +120,10 @@ void brw_destroy_state(struct brw_context *brw); dri_bo *brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, + GLuint size, dri_bo **reloc_bufs, GLuint nr_reloc_bufs); -dri_bo *brw_cache_data_sz(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - GLuint data_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs); - dri_bo *brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, @@ -151,6 +146,7 @@ void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); void brw_destroy_caches( struct brw_context *brw ); +void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo); /*********************************************************************** * brw_state_batch.c diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c index e40d7a0..e4c9ba7 100644 --- a/i965/brw_state_cache.c +++ b/i965/brw_state_cache.c @@ -245,7 +245,6 @@ brw_upload_cache( struct brw_cache *cache, item->bo = bo; dri_bo_reference(bo); - item->data_size = data_size; if (cache->n_items > cache->size * 1.5) rehash(cache); @@ -275,15 +274,22 @@ brw_upload_cache( struct brw_cache *cache, /** - * This doesn't really work with aux data. Use search/upload instead + * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. + * + * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be + * better to use, as the potentially changing offsets in the data-used-as-key + * will result in excessive cache misses. + * + * If aux data is involved, use search/upload instead. + */ dri_bo * -brw_cache_data_sz(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - GLuint data_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs) +brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) { dri_bo *bo; struct brw_cache_item *item; @@ -306,25 +312,6 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } - -/** - * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. - * - * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be - * better to use, as the potentially changing offsets in the data-used-as-key - * will result in excessive cache misses. - */ -dri_bo * -brw_cache_data(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs) -{ - return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], - reloc_bufs, nr_reloc_bufs); -} - enum pool_type { DW_SURFACE_STATE, DW_GENERAL_STATE @@ -335,11 +322,9 @@ static void brw_init_cache_id(struct brw_cache *cache, const char *name, enum brw_cache_id id, - GLuint key_size, GLuint aux_size) { cache->name[id] = strdup(name); - cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } @@ -359,91 +344,76 @@ brw_init_non_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, - sizeof(struct brw_cc_viewport), 0); brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, - sizeof(struct brw_cc_unit_state), 0); brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, - sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, - sizeof(struct brw_sampler_default_color), 0); brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, - 0, /* variable key/data size */ 0); brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, - sizeof(struct brw_wm_unit_state), 0); brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, - sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, - sizeof(struct brw_sf_viewport), 0); brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, - sizeof(struct brw_sf_unit_state), 0); brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, - sizeof(struct brw_vs_unit_state), 0); brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, - sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, - sizeof(struct brw_clip_unit_state), 0); brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, - sizeof(struct brw_gs_unit_state), 0); brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, - sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); } @@ -463,13 +433,11 @@ brw_init_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, - sizeof(struct brw_surface_state), 0); brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, - 0, 0); } @@ -517,6 +485,41 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) brw->state.dirty.cache |= ~0; } +/* Clear all entries from the cache that point to the given bo. + * + * This lets us release memory for reuse earlier for known-dead buffers, + * at the cost of walking the entire hash table. + */ +void +brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) +{ + struct brw_cache_item **prev; + GLuint i; + + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + for (i = 0; i < cache->size; i++) { + for (prev = &cache->items[i]; *prev;) { + struct brw_cache_item *c = *prev; + + if (drm_intel_bo_references(c->bo, bo)) { + int j; + + *prev = c->next; + + for (j = 0; j < c->nr_reloc_bufs; j++) + dri_bo_unreference(c->reloc_bufs[j]); + dri_bo_unreference(c->bo); + free((void *)c->key); + free(c); + cache->n_items--; + } else { + prev = &c->next; + } + } + } +} void brw_state_cache_check_size(struct brw_context *brw) diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c index b817b74..af8dfb4 100644 --- a/i965/brw_state_upload.c +++ b/i965/brw_state_upload.c @@ -34,6 +34,7 @@ #include "brw_context.h" #include "brw_state.h" #include "intel_batchbuffer.h" +#include "intel_buffers.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which @@ -142,7 +143,7 @@ static void xor_states( struct brw_state_flags *result, result->cache = a->cache ^ b->cache; } -static void +void brw_clear_validated_bos(struct brw_context *brw) { int i; @@ -308,7 +309,7 @@ void brw_validate_state( struct brw_context *brw ) if (brw->state.dirty.brw & BRW_NEW_CONTEXT) brw_clear_batch_cache(brw); - brw->intel.Fallback = 0; + brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */ /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { @@ -324,6 +325,8 @@ void brw_validate_state( struct brw_context *brw ) } } + intel_check_front_buffer_rendering(intel); + /* Make sure that the textures which are referenced by the current * brw fragment program are actually present/valid. * If this fails, we can experience GPU lock-ups. diff --git a/i965/brw_tex.c b/i965/brw_tex.c index 71bff16..e911b10 100644 --- a/i965/brw_tex.c +++ b/i965/brw_tex.c @@ -39,38 +39,6 @@ #include "intel_tex.h" #include "brw_context.h" - -void brw_FrameBufferTexInit( struct brw_context *brw, - struct intel_region *region ) -{ - struct intel_context *intel = &brw->intel; - GLcontext *ctx = &intel->ctx; - struct gl_texture_object *obj; - struct gl_texture_image *img; - - intel->frame_buffer_texobj = obj = - ctx->Driver.NewTextureObject( ctx, (GLuint) -1, GL_TEXTURE_2D ); - - obj->MinFilter = GL_NEAREST; - obj->MagFilter = GL_NEAREST; - - img = ctx->Driver.NewTextureImage( ctx ); - - _mesa_init_teximage_fields( ctx, GL_TEXTURE_2D, img, - region->pitch, region->height, 1, 0, - region->cpp == 4 ? GL_RGBA : GL_RGB ); - - _mesa_set_tex_image( obj, GL_TEXTURE_2D, 0, img ); -} - -void brw_FrameBufferTexDestroy( struct brw_context *brw ) -{ - if (brw->intel.frame_buffer_texobj != NULL) - brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx, - brw->intel.frame_buffer_texobj ); - brw->intel.frame_buffer_texobj = NULL; -} - /** * Finalizes all textures, completing any rendering that needs to be done * to prepare them. diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c index 5986cbf..e59e52e 100644 --- a/i965/brw_tex_layout.c +++ b/i965/brw_tex_layout.c @@ -86,10 +86,10 @@ GLboolean brw_miptree_layout(struct intel_context *intel, mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); if (mt->compressed) { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4; mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; } else { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h); mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; } @@ -102,7 +102,8 @@ GLboolean brw_miptree_layout(struct intel_context *intel, height, 1); for (q = 0; q < nr_images; q++) - intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); + intel_miptree_set_image_offset(mt, level, q, + x, y + q * qpitch); if (mt->compressed) img_height = MAX2(1, height/4); diff --git a/i965/brw_util.c b/i965/brw_util.c index ce21aa4..bba9249 100644 --- a/i965/brw_util.c +++ b/i965/brw_util.c @@ -35,7 +35,7 @@ #include "brw_util.h" #include "brw_defines.h" -GLuint brw_count_bits( GLuint val ) +GLuint brw_count_bits(uint64_t val) { GLuint i; for (i = 0; val ; val >>= 1) diff --git a/i965/brw_util.h b/i965/brw_util.h index 33e7cd8..04f3175 100644 --- a/i965/brw_util.h +++ b/i965/brw_util.h @@ -35,7 +35,7 @@ #include "main/mtypes.h" -extern GLuint brw_count_bits( GLuint val ); +extern GLuint brw_count_bits(uint64_t val); extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); extern GLuint brw_translate_blend_factor( GLenum factor ); extern GLuint brw_translate_blend_equation( GLenum mode ); diff --git a/i965/brw_vs.c b/i965/brw_vs.c index f0c79ef..fd055e2 100644 --- a/i965/brw_vs.c +++ b/i965/brw_vs.c @@ -56,7 +56,7 @@ static void do_vs_prog( struct brw_context *brw, c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { - c.prog_data.outputs_written |= 1<prog_data.outputs_written & (1 << i)) { + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { c->nr_outputs++; assert(i < Elements(c->regs[PROGRAM_OUTPUT])); if (i == VERT_RESULT_HPOS) { @@ -331,63 +331,76 @@ static void unalias3( struct brw_vs_compile *c, } } -static void emit_sop( struct brw_compile *p, +static void emit_sop( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1, GLuint cond) { + struct brw_compile *p = &c->func; + brw_MOV(p, dst, brw_imm_f(0.0f)); brw_CMP(p, brw_null_reg(), cond, arg0, arg1); brw_MOV(p, dst, brw_imm_f(1.0f)); brw_set_predicate_control_flag_value(p, 0xff); } -static void emit_seq( struct brw_compile *p, +static void emit_seq( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_EQ); } -static void emit_sne( struct brw_compile *p, +static void emit_sne( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); } -static void emit_slt( struct brw_compile *p, +static void emit_slt( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_L); } -static void emit_sle( struct brw_compile *p, +static void emit_sle( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_LE); } -static void emit_sgt( struct brw_compile *p, +static void emit_sgt( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_G); } -static void emit_sge( struct brw_compile *p, +static void emit_sge( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_GE); +} + +static void emit_cmp( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + struct brw_reg arg2 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, brw_imm_f(0)); + brw_SEL(p, dst, arg1, arg2); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); } static void emit_max( struct brw_compile *p, @@ -912,6 +925,7 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: if (c->vp->use_const_buffer) { return get_constant(c, inst, argIndex); } @@ -930,7 +944,6 @@ get_src_reg( struct brw_vs_compile *c, /* this is a normal case since we loop over all three src args */ return brw_null_reg(); - case PROGRAM_LOCAL_PARAM: case PROGRAM_WRITE_ONLY: default: assert(0); @@ -1122,7 +1135,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ - if ((c->prog_data.outputs_written & (1<prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || c->key.nr_userclip || BRW_IS_965(p->brw)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); @@ -1132,7 +1145,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); - if (c->prog_data.outputs_written & (1<prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); @@ -1208,7 +1221,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ eot, /* eot */ - 1, /* writes complete */ + eot, /* writes complete */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); @@ -1222,7 +1235,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ GLuint i, mrf = 0; for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1 << i)) { + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { /* move from GRF to MRF */ brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); mrf++; @@ -1269,10 +1282,60 @@ post_vs_emit( struct brw_vs_compile *c, } } +static GLboolean +accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *prev_insn = &p->store[p->nr_insn - 1]; + + if (p->nr_insn == 0) + return GL_FALSE; + + if (val.address_mode != BRW_ADDRESS_DIRECT) + return GL_FALSE; + + switch (prev_insn->header.opcode) { + case BRW_OPCODE_MOV: + case BRW_OPCODE_MAC: + case BRW_OPCODE_MUL: + if (prev_insn->header.access_mode == BRW_ALIGN_16 && + prev_insn->header.execution_size == val.width && + prev_insn->bits1.da1.dest_reg_file == val.file && + prev_insn->bits1.da1.dest_reg_type == val.type && + prev_insn->bits1.da1.dest_address_mode == val.address_mode && + prev_insn->bits1.da1.dest_reg_nr == val.nr && + prev_insn->bits1.da16.dest_subreg_nr == val.subnr / 16 && + prev_insn->bits1.da16.dest_writemask == 0xf) + return GL_TRUE; + else + return GL_FALSE; + default: + return GL_FALSE; + } +} + static uint32_t -get_predicate(uint32_t swizzle) +get_predicate(const struct prog_instruction *inst) { - switch (swizzle) { + if (inst->DstReg.CondMask == COND_TR) + return BRW_PREDICATE_NONE; + + /* All of GLSL only produces predicates for COND_NE and one channel per + * vector. Fail badly if someone starts doing something else, as it might + * mean infinite looping or something. + * + * We'd like to support all the condition codes, but our hardware doesn't + * quite match the Mesa IR, which is modeled after the NV extensions. For + * those, the instruction may update the condition codes or not, then any + * later instruction may use one of those condition codes. For gen4, the + * instruction may update the flags register based on one of the condition + * codes output by the instruction, and then further instructions may + * predicate on that. We can probably support this, but it won't + * necessarily be easy. + */ + assert(inst->DstReg.CondMask == COND_NE); + + switch (inst->DstReg.CondSwizzle) { case SWIZZLE_XXXX: return BRW_PREDICATE_ALIGN16_REPLICATE_X; case SWIZZLE_YYYY: @@ -1282,7 +1345,8 @@ get_predicate(uint32_t swizzle) case SWIZZLE_WWWW: return BRW_PREDICATE_ALIGN16_REPLICATE_W; default: - _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle); + _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", + inst->DstReg.CondMask); return BRW_PREDICATE_NORMAL; } } @@ -1294,6 +1358,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; const GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, if_depth = 0, loop_depth = 0; GLuint end_offset = 0; @@ -1427,9 +1492,13 @@ void brw_vs_emit(struct brw_vs_compile *c ) unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias); break; case OPCODE_MAD: - brw_MOV(p, brw_acc_reg(), args[2]); + if (!accumulator_contains(c, args[2])) + brw_MOV(p, brw_acc_reg(), args[2]); brw_MAC(p, dst, args[0], args[1]); break; + case OPCODE_CMP: + emit_cmp(p, dst, args[0], args[1], args[2]); + break; case OPCODE_MAX: emit_max(p, dst, args[0], args[1]); break; @@ -1453,25 +1522,25 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_SEQ: - emit_seq(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_seq); break; case OPCODE_SIN: emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL); break; case OPCODE_SNE: - emit_sne(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_sne); break; case OPCODE_SGE: - emit_sge(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_sge); break; case OPCODE_SGT: - emit_sgt(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_sgt); break; case OPCODE_SLT: - emit_slt(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_slt); break; case OPCODE_SLE: - emit_sle(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_sle); break; case OPCODE_SUB: brw_ADD(p, dst, args[0], negate(args[1])); @@ -1492,8 +1561,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) case OPCODE_IF: assert(if_depth < MAX_IF_DEPTH); if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); - if_inst[if_depth]->header.predicate_control = - get_predicate(inst->DstReg.CondSwizzle); + /* Note that brw_IF smashes the predicate_control field. */ + if_inst[if_depth]->header.predicate_control = get_predicate(inst); if_depth++; break; case OPCODE_ELSE: @@ -1503,45 +1572,48 @@ void brw_vs_emit(struct brw_vs_compile *c ) assert(if_depth > 0); brw_ENDIF(p, if_inst[--if_depth]); break; -#if 0 case OPCODE_BGNLOOP: loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: + brw_set_predicate_control(p, get_predicate(inst)); brw_BREAK(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CONT: + brw_set_predicate_control(p, get_predicate(inst)); brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: { struct brw_instruction *inst0, *inst1; + GLuint br = 1; + loop_depth--; + + if (BRW_IS_IGDNG(brw)) + br = 2; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); /* patch all the BREAK/CONT instructions from last BEGINLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; if (inst0->header.opcode == BRW_OPCODE_BREAK) { - inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { - inst0->bits3.if_else.jump_count = inst1 - inst0; + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } } } break; -#else - (void) loop_inst; - (void) loop_depth; -#endif case OPCODE_BRA: - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_set_predicate_control_flag_value(p, 0xff); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CAL: brw_set_access_mode(p, BRW_ALIGN_1); diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c index d790ab6..7285466 100644 --- a/i965/brw_vs_state.c +++ b/i965/brw_vs_state.c @@ -109,10 +109,39 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) - vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; - else - vs.thread4.nr_urb_entries = key->nr_urb_entries; + if (BRW_IS_IGDNG(brw)) { + switch (key->nr_urb_entries) { + case 8: + case 12: + case 16: + case 32: + case 64: + case 96: + case 128: + case 168: + case 192: + case 224: + case 256: + vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; + break; + default: + assert(0); + } + } else { + switch (key->nr_urb_entries) { + case 8: + case 12: + case 16: + case 32: + break; + case 64: + assert(BRW_IS_G4X(brw)); + break; + default: + assert(0); + } + vs.thread4.nr_urb_entries = key->nr_urb_entries; + } vs.thread4.urb_entry_allocation_size = key->urb_size - 1; diff --git a/i965/brw_vs_surface_state.c b/i965/brw_vs_surface_state.c index 89f4752..3bc9840 100644 --- a/i965/brw_vs_surface_state.c +++ b/i965/brw_vs_surface_state.c @@ -30,7 +30,6 @@ */ #include "main/mtypes.h" -#include "main/texformat.h" #include "main/texstore.h" #include "shader/prog_parameter.h" @@ -53,6 +52,7 @@ brw_vs_update_constant_buffer(struct brw_context *brw) const struct gl_program_parameter_list *params = vp->program.Base.Parameters; const int size = params->NumParameters * 4 * sizeof(GLfloat); drm_intel_bo *const_buffer; + int i; /* BRW_NEW_VERTEX_PROGRAM */ if (!vp->use_const_buffer) @@ -62,7 +62,19 @@ brw_vs_update_constant_buffer(struct brw_context *brw) size, 64); /* _NEW_PROGRAM_CONSTANTS */ - dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); + + intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE); + for (i = 0; i < params->NumParameters; i++) { + memcpy(const_buffer->virtual + i * 4 * sizeof(float), + params->ParameterValues[i], + 4 * sizeof(float)); + } + intel_bo_unmap_gtt_preferred(intel, const_buffer); return const_buffer; } diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c index ac11790..34aaea3 100644 --- a/i965/brw_vtbl.c +++ b/i965/brw_vtbl.c @@ -46,7 +46,7 @@ #include "brw_state.h" #include "brw_fallback.h" #include "brw_vs.h" - +#include "brw_wm.h" static void dri_bo_release(dri_bo **bo) @@ -66,10 +66,14 @@ static void brw_destroy_context( struct intel_context *intel ) brw_destroy_state(brw); brw_draw_destroy( brw ); - - _mesa_free(brw->wm.compile_data); - - brw_FrameBufferTexDestroy( brw ); + brw_clear_validated_bos(brw); + if (brw->wm.compile_data) { + _mesa_free(brw->wm.compile_data->instruction); + _mesa_free(brw->wm.compile_data->vreg); + _mesa_free(brw->wm.compile_data->refs); + _mesa_free(brw->wm.compile_data->prog_instructions); + _mesa_free(brw->wm.compile_data); + } for (i = 0; i < brw->state.nr_color_regions; i++) intel_region_release(&brw->state.color_regions[i]); @@ -177,20 +181,6 @@ static void brw_note_fence( struct intel_context *intel, GLuint fence ) brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } -/* called from intelWaitForIdle() and intelFlush() - * - * For now, just flush everything. Could be smarter later. - */ -static GLuint brw_flush_cmd( void ) -{ - struct brw_mi_flush flush; - flush.opcode = CMD_MI_FLUSH; - flush.pad = 0; - flush.flags = BRW_FLUSH_STATE_CACHE; - return *(GLuint *)&flush; -} - - static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -211,6 +201,5 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; - brw->intel.vtbl.flush_cmd = brw_flush_cmd; brw->intel.vtbl.debug_batch = brw_debug_batch; } diff --git a/i965/brw_wm.c b/i965/brw_wm.c index 2292de9..6895f64 100644 --- a/i965/brw_wm.c +++ b/i965/brw_wm.c @@ -29,7 +29,6 @@ * Keith Whitwell */ -#include "main/texformat.h" #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" @@ -153,8 +152,21 @@ static void do_wm_prog( struct brw_context *brw, */ return; } + c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction)); + c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN * + sizeof(*c->prog_instructions)); + c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); + c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs)); } else { + void *instruction = c->instruction; + void *prog_instructions = c->prog_instructions; + void *vreg = c->vreg; + void *refs = c->refs; memset(c, 0, sizeof(*brw->wm.compile_data)); + c->instruction = instruction; + c->prog_instructions = prog_instructions; + c->vreg = vreg; + c->refs = refs; } memcpy(&c->key, key, sizeof(*key)); @@ -218,7 +230,7 @@ static void brw_wm_populate_key( struct brw_context *brw, ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; /* _NEW_DEPTH */ @@ -288,7 +300,7 @@ static void brw_wm_populate_key( struct brw_context *brw, const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; if (img->InternalFormat == GL_YCBCR_MESA) { key->yuvtex_mask |= 1 << i; - if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) + if (img->TexFormat == MESA_FORMAT_YCBCR) key->yuvtex_swap_mask |= 1 << i; } @@ -309,6 +321,9 @@ static void brw_wm_populate_key( struct brw_context *brw, * from the incoming screen origin relative position we get as part of our * payload. * + * This is only needed for the WM_WPOSXY opcode when the fragment program + * uses the gl_FragCoord input. + * * We could avoid recompiling by including this as a constant referenced by * our program, but if we were to do that it would also be nice to handle * getting that constant updated at batchbuffer submit time (when we @@ -317,17 +332,21 @@ static void brw_wm_populate_key( struct brw_context *brw, * just avoid using this as key data if the program doesn't use * fragment.position. * - * This pretty much becomes moot with DRI2 and redirected buffers anyway, - * as our origins will always be zero then. + * For DRI2 the origin_x/y will always be (0,0) but we still need the + * drawable height in order to invert the Y axis. */ - if (brw->intel.driDrawable != NULL) { - key->origin_x = brw->intel.driDrawable->x; - key->origin_y = brw->intel.driDrawable->y; - key->drawable_height = brw->intel.driDrawable->h; + if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) { + if (brw->intel.driDrawable != NULL) { + key->origin_x = brw->intel.driDrawable->x; + key->origin_y = brw->intel.driDrawable->y; + key->drawable_height = brw->intel.driDrawable->h; + } } + key->nr_color_regions = brw->state.nr_color_regions; + /* CACHE_NEW_VS_PROG */ - key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + key->vp_outputs_written = brw->vs.prog_data->outputs_written; /* The unique fragment program ID */ key->program_string_id = fp->id; diff --git a/i965/brw_wm.h b/i965/brw_wm.h index ae98b54..9dcb6e1 100644 --- a/i965/brw_wm.h +++ b/i965/brw_wm.h @@ -38,6 +38,8 @@ #include "brw_context.h" #include "brw_eu.h" +#define SATURATE (1<<5) + /* A big lookup table is used to figure out which and how many * additional regs will inserted before the main payload in the WM * program execution. These mainly relate to depth and stencil @@ -65,18 +67,19 @@ struct brw_wm_prog_key { GLuint flat_shade:1; GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; + GLuint nr_color_regions:2; GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ - GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; + GLushort tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; - GLuint origin_x, origin_y; - GLuint drawable_height; - GLuint vp_outputs_written; + GLushort origin_x, origin_y; + GLushort drawable_height; + GLbitfield64 vp_outputs_written; }; @@ -151,15 +154,16 @@ struct brw_wm_instruction { }; -#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) +#define BRW_WM_MAX_INSN (MAX_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) #define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) #define BRW_WM_MAX_PARAM 256 #define BRW_WM_MAX_CONST 256 -#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS #define BRW_WM_MAX_SUBROUTINE 16 +/* used in masks next to WRITEMASK_*. */ +#define SATURATE (1<<5) /* New opcodes to track internal operations required for WM unit. @@ -198,19 +202,18 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct prog_instruction prog_instructions[BRW_WM_MAX_INSN]; + struct prog_instruction *prog_instructions; GLuint nr_fp_insns; GLuint fp_temp; GLuint fp_interp_emitted; GLuint fp_fragcolor_emitted; - GLuint fp_deriv_emitted; struct prog_src_register pixel_xy; struct prog_src_register delta_xy; struct prog_src_register pixel_w; - struct brw_wm_value vreg[BRW_WM_MAX_VREG]; + struct brw_wm_value *vreg; GLuint nr_vreg; struct brw_wm_value creg[BRW_WM_MAX_PARAM]; @@ -227,10 +230,10 @@ struct brw_wm_compile { struct brw_wm_ref undef_ref; struct brw_wm_value undef_value; - struct brw_wm_ref refs[BRW_WM_MAX_REF]; + struct brw_wm_ref *refs; GLuint nr_refs; - struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; + struct brw_wm_instruction *instruction; GLuint nr_insns; struct brw_wm_constref constref[BRW_WM_MAX_CONST]; @@ -270,6 +273,12 @@ struct brw_wm_compile { }; +/** Bits for prog_instruction::Aux field */ +#define INST_AUX_EOT 0x1 +#define INST_AUX_TARGET(T) (T << 1) +#define INST_AUX_GET_TARGET(AUX) ((AUX) >> 1) + + GLuint brw_wm_nr_args( GLuint opcode ); GLuint brw_wm_is_scalar_result( GLuint opcode ); @@ -299,5 +308,141 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); +/* brw_wm_emit.c */ +void emit_alu1(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_alu2(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_cinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0); +void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_dp3(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_dp4(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_dph(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_fb_write(struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot); +void emit_frontfacing(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask); +void emit_linterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas); +void emit_lrp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); +void emit_mad(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); +void emit_math1(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_math2(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_min(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_max(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_pinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w); +void emit_pixel_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask); +void emit_pixel_w(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas); +void emit_sop(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLuint cond, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_tex(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler, + GLboolean shadow); +void emit_txb(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler); +void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_xpd(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); #endif diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c index 268f796..5390fd2 100644 --- a/i965/brw_wm_emit.c +++ b/i965/brw_wm_emit.c @@ -34,8 +34,6 @@ #include "brw_context.h" #include "brw_wm.h" -#define SATURATE (1<<5) - /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -46,6 +44,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) return reg; } + /* Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, @@ -62,42 +61,50 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) * R1.8 -- ? */ - -static void emit_pixel_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask) +void emit_pixel_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask) { + struct brw_compile *p = &c->func; struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + struct brw_reg dst0_uw, dst1_uw; + brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); + if (c->dispatch_width == 16) { + dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)); + dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)); + } else { + dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW)); + dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW)); + } + /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, - vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), + dst0_uw, stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, - vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), + dst1_uw, stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } - - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + brw_pop_insn_state(p); } - -static void emit_delta_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); @@ -120,10 +127,10 @@ static void emit_delta_xy(struct brw_compile *p, } } -static void emit_wpos_xy(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_compile *p = &c->func; @@ -148,12 +155,14 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } -static void emit_pixel_w( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas) +void emit_pixel_w(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) { + struct brw_compile *p = &c->func; + /* Don't need this if all you are doing is interpolating color, for * instance. */ @@ -167,21 +176,29 @@ static void emit_pixel_w( struct brw_compile *p, brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); /* Calc w */ - brw_math_16( p, dst[3], - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); + if (c->dispatch_width == 16) { + brw_math_16(p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } else { + brw_math(p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } } } - -static void emit_linterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas ) +void emit_linterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -201,12 +218,12 @@ static void emit_linterp( struct brw_compile *p, } -static void emit_pinterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas, - const struct brw_reg *w) +void emit_pinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -231,10 +248,10 @@ static void emit_pinterp( struct brw_compile *p, } -static void emit_cinterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_cinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -253,9 +270,9 @@ static void emit_cinterp( struct brw_compile *p, } /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ -static void emit_frontfacing( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask ) +void emit_frontfacing(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask) { struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); GLuint i; @@ -281,13 +298,86 @@ static void emit_frontfacing( struct brw_compile *p, brw_set_predicate_control_flag_value(p, 0xff); } -static void emit_alu1( struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input + * looking like: + * + * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br + * + * and we're trying to produce: + * + * DDX DDY + * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) + * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) + * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) + * (ss0.br - ss0.bl) (ss0.tr - ss0.br) + * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) + * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) + * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) + * (ss1.br - ss1.bl) (ss1.tr - ss1.br) + * + * and add another set of two more subspans if in 16-pixel dispatch mode. + * + * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result + * for each pair, and vertstride = 2 jumps us 2 elements after processing a + * pair. But for DDY, it's harder, as we want to produce the pairs swizzled + * between each other. We could probably do it like ddx and swizzle the right + * order later, but bail for now and just produce + * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) + */ +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0) +{ + int i; + struct brw_reg src0, src1; + + if (mask & SATURATE) + brw_set_saturate(p, 1); + for (i = 0; i < 4; i++ ) { + if (mask & (1<func; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); + /* If compressed, this will write message reg 2,3 from arg0.x's 16 + * channels. + */ brw_MOV(p, brw_message_reg(2), arg0[0]); /* Send two messages to perform all 16 operations: */ - brw_math_16(p, - dst[dst_chan], + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, + dst[dst_chan], + function, + saturate, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, + saturate, + 3, brw_null_reg(), + BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); + } + brw_pop_insn_state(p); } -static void emit_math2( struct brw_compile *p, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) +void emit_math2(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { + struct brw_compile *p = &c->func; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ @@ -675,173 +781,231 @@ static void emit_math2( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), arg0[0]); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + } - - /* Send two messages to perform all 16 operations: - */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[dst_chan], function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + saturate, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, - offset(dst[dst_chan],1), - function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 4, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - + /* Send two messages to perform all 16 operations: + */ + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), + function, + saturate, + 4, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } brw_pop_insn_state(p); } - -static void emit_tex( struct brw_wm_compile *c, - const struct brw_wm_instruction *inst, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg ) +void emit_tex(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler, + GLboolean shadow) { struct brw_compile *p = &c->func; - GLuint msgLength, responseLength; - GLuint i, nr; + struct brw_reg dst_retyped; + GLuint cur_mrf = 2, response_length; + GLuint i, nr_texcoords; GLuint emit; GLuint msg_type; + GLuint mrf_per_channel; + GLuint simd_mode; + + if (c->dispatch_width == 16) { + mrf_per_channel = 2; + response_length = 8; + dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } else { + mrf_per_channel = 1; + response_length = 4; + dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; + } /* How many input regs are there? */ - switch (inst->tex_idx) { + switch (tex_idx) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; - nr = 1; + nr_texcoords = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: emit = WRITEMASK_XY; - nr = 2; + nr_texcoords = 2; break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; - nr = 3; + nr_texcoords = 3; break; default: /* unexpected target */ abort(); } - if (inst->tex_shadow) { - nr = 4; - emit |= WRITEMASK_W; - } + /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ + if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8) + nr_texcoords = 3; - msgLength = 1; + /* For shadow comparisons, we have to supply u,v,r. */ + if (shadow) + nr_texcoords = 3; - for (i = 0; i < nr; i++) { - static const GLuint swz[4] = {0,1,2,2}; - if (emit & (1<brw)) { + /* Fill in the cube map array index value. */ + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; + } else if (c->dispatch_width == 8) { + /* Fill in the LOD bias value. */ + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; + } + brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); + cur_mrf += mrf_per_channel; + } if (BRW_IS_IGDNG(p->brw)) { - if (inst->tex_shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG; } else { - if (inst->tex_shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + /* Note that G45 and older determines shadow compare and dispatch width + * from message length for most messages. + */ + if (c->dispatch_width == 16 && shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; } - brw_SAMPLE(p, - retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + brw_SAMPLE(p, + dst_retyped, 1, - retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ - inst->writemask, - msg_type, - responseLength, - msgLength, - 0, + retype(depth_payload, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(sampler), + sampler, + dst_flags & WRITEMASK_XYZW, + msg_type, + response_length, + cur_mrf - 1, + 0, 1, - BRW_SAMPLER_SIMD_MODE_SIMD16); + simd_mode); } -static void emit_txb( struct brw_wm_compile *c, - const struct brw_wm_instruction *inst, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg ) +void emit_txb(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler) { struct brw_compile *p = &c->func; GLuint msgLength; GLuint msg_type; - /* Shadow ignored for txb. + GLuint mrf_per_channel; + GLuint response_length; + struct brw_reg dst_retyped; + + /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased + * samples, so we'll use the 16-wide instruction, leave the second halves + * undefined, and trust the execution mask to keep the undefined pixels + * from mattering. */ - switch (inst->tex_idx) { + if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) { + if (BRW_IS_IGDNG(p->brw)) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + mrf_per_channel = 2; + dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); + response_length = 8; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + mrf_per_channel = 1; + dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); + response_length = 4; + } + + /* Shadow ignored for txb. */ + switch (tex_idx) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), arg[2]); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]); break; default: /* unexpected target */ abort(); } - brw_MOV(p, brw_message_reg(8), arg[3]); - msgLength = 9; - - if (BRW_IS_IGDNG(p->brw)) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]); + msgLength = 2 + 4 * mrf_per_channel - 1; brw_SAMPLE(p, - retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + dst_retyped, 1, - retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ - inst->writemask, + retype(depth_payload, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(sampler), + sampler, + dst_flags & WRITEMASK_XYZW, msg_type, - 8, /* responseLength */ + response_length, msgLength, 0, 1, @@ -849,11 +1013,13 @@ static void emit_txb( struct brw_wm_compile *c, } -static void emit_lit( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +static void emit_lit(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { + struct brw_compile *p = &c->func; + assert((mask & WRITEMASK_XW) == 0); if (mask & WRITEMASK_Y) { @@ -863,7 +1029,7 @@ static void emit_lit( struct brw_compile *p, } if (mask & WRITEMASK_Z) { - emit_math2(p, BRW_MATH_FUNCTION_POW, + emit_math2(c, BRW_MATH_FUNCTION_POW, &dst[2], WRITEMASK_X | (mask & SATURATE), &arg0[1], @@ -908,6 +1074,20 @@ static void emit_kil( struct brw_wm_compile *c, } } +/* KIL_NV kills the pixels that are currently executing, not based on a test + * of the arguments. + */ +static void emit_kil_nv( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, r0uw, c->emit_mask_reg, r0uw); + brw_pop_insn_state(p); +} static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, @@ -916,7 +1096,13 @@ static void fire_fb_write( struct brw_wm_compile *c, GLuint eot ) { struct brw_compile *p = &c->func; - + struct brw_reg dst; + + if (c->dispatch_width == 16) + dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + else + dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); + /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ @@ -933,7 +1119,7 @@ static void fire_fb_write( struct brw_wm_compile *c, /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, - retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), + dst, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, @@ -965,14 +1151,15 @@ static void emit_aa( struct brw_wm_compile *c, * \param arg1 the pass-through depth value * \param arg2 the shader-computed depth value */ -static void emit_fb_write( struct brw_wm_compile *c, - struct brw_reg *arg0, - struct brw_reg *arg1, - struct brw_reg *arg2, - GLuint target, - GLuint eot) +void emit_fb_write(struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; GLuint nr = 2; GLuint channel; @@ -984,30 +1171,37 @@ static void emit_fb_write( struct brw_wm_compile *c, /* I don't really understand how this achieves the color interleave * (ie RGBARGBA) in the result: [Do the saturation here] */ - { - brw_push_insn_state(p); - - for (channel = 0; channel < 4; channel++) { + brw_push_insn_state(p); + + for (channel = 0; channel < 4; channel++) { + if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) { + /* By setting the high bit of the MRF register number, we indicate + * that we want COMPR4 mode - instead of doing the usual destination + * + 1 for the second half we get destination + 4. + */ + brw_MOV(p, + brw_message_reg(nr + channel + (1 << 7)), + arg0[channel]); + } else { /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); - - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, - brw_message_reg(nr + channel + 4), - sechalf(arg0[channel])); - } - /* skip over the regs populated above: - */ - nr += 8; - - brw_pop_insn_state(p); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(nr + channel + 4), + sechalf(arg0[channel])); + } + } } + /* skip over the regs populated above: + */ + nr += 8; + brw_pop_insn_state(p); if (c->key.source_depth_to_render_target) { @@ -1057,7 +1251,7 @@ static void emit_fb_write( struct brw_wm_compile *c, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); - jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); @@ -1071,7 +1265,6 @@ static void emit_fb_write( struct brw_wm_compile *c, } } - /** * Move a GPR to scratch memory. */ @@ -1209,7 +1402,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: - emit_pixel_xy(p, dst, dst_flags); + emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: @@ -1221,7 +1414,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case WM_PIXELW: - emit_pixel_w(p, dst, dst_flags, args[0], args[1]); + emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: @@ -1258,6 +1451,14 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; + case OPCODE_DDX: + emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]); + break; + + case OPCODE_DDY: + emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); + break; + case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; @@ -1271,7 +1472,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_TRUNC: - emit_trunc(p, dst, dst_flags, args[0]); + emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); break; case OPCODE_LRP: @@ -1298,27 +1499,27 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Higher math functions: */ case OPCODE_RCP: - emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: - emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: - emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: - emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_SCS: @@ -1326,13 +1527,13 @@ void brw_wm_emit( struct brw_wm_compile *c ) * fixup for 16-element execution. */ if (dst_flags & WRITEMASK_X) - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); if (dst_flags & WRITEMASK_Y) - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); break; case OPCODE_POW: - emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); break; /* Comparisons: @@ -1370,23 +1571,30 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_LIT: - emit_lit(p, dst, dst_flags, args[0]); + emit_lit(c, dst, dst_flags, args[0]); break; /* Texturing operations: */ case OPCODE_TEX: - emit_tex(c, inst, dst, dst_flags, args[0]); + emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, + inst->tex_idx, inst->tex_unit, + inst->tex_shadow); break; case OPCODE_TXB: - emit_txb(c, inst, dst, dst_flags, args[0]); + emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, + inst->tex_idx, inst->tex_unit); break; case OPCODE_KIL: emit_kil(c, args[0]); break; + case OPCODE_KIL_NV: + emit_kil_nv(c); + break; + default: _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c index 123fe84..7d03179 100644 --- a/i965/brw_wm_fp.c +++ b/i965/brw_wm_fp.c @@ -181,6 +181,9 @@ static void release_temp( struct brw_wm_compile *c, struct prog_dst_register tem static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) { + assert(c->nr_fp_insns < BRW_WM_MAX_INSN); + memset(&c->prog_instructions[c->nr_fp_insns], 0, + sizeof(*c->prog_instructions)); return &c->prog_instructions[c->nr_fp_insns++]; } @@ -447,7 +450,6 @@ static void emit_interp( struct brw_wm_compile *c, break; case FRAG_ATTRIB_FACE: - /* XXX review/test this case */ emit_op(c, WM_FRONTFACING, dst_mask(dst, WRITEMASK_X), @@ -494,38 +496,6 @@ static void emit_interp( struct brw_wm_compile *c, c->fp_interp_emitted |= 1<SrcReg[0].Index; - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - - c->fp_deriv_emitted |= 1<DstReg, - 0, - interp, - get_pixel_w(c), - src_undef()); -} - -static void emit_ddy( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - GLuint idx = inst->SrcReg[0].Index; - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - - c->fp_deriv_emitted |= 1<DstReg, - 0, - interp, - get_pixel_w(c), - src_undef()); -} - /*********************************************************************** * Hacks to extend the program parameter and constant lists. */ @@ -988,7 +958,7 @@ static void precalc_txp( struct brw_wm_compile *c, -static void emit_fb_write( struct brw_wm_compile *c ) +static void emit_render_target_writes( struct brw_wm_compile *c ) { struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); @@ -996,36 +966,34 @@ static void emit_fb_write( struct brw_wm_compile *c ) GLuint i; struct prog_instruction *inst, *last_inst; - struct brw_context *brw = c->func.brw; /* The inst->Aux field is used for FB write target and the EOT marker */ - if (brw->state.nr_color_regions > 1) { - for (i = 0 ; i < brw->state.nr_color_regions; i++) { + if (c->key.nr_color_regions > 1) { + for (i = 0 ; i < c->key.nr_color_regions; i++) { outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, - WM_FB_WRITE, dst_mask(dst_undef(),0), 0, - outcolor, payload_r0_depth, outdepth); - inst->Aux = (i<<1); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = INST_AUX_TARGET(i); if (c->fp_fragcolor_emitted) { outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = (i<<1); + inst->Aux = INST_AUX_TARGET(i); } } - last_inst->Aux |= 1; //eot + last_inst->Aux |= INST_AUX_EOT; } else { /* if gl_FragData[0] is written, use it, else use gl_FragColor */ - if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) + if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0)) outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); else outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = 1|(0<<1); + inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0); } } @@ -1186,14 +1154,8 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) */ out->DstReg.WriteMask = 0; break; - case OPCODE_DDX: - emit_ddx(c, inst); - break; - case OPCODE_DDY: - emit_ddy(c, inst); - break; case OPCODE_END: - emit_fb_write(c); + emit_render_target_writes(c); break; case OPCODE_PRINT: break; diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c index 7c210ab..e8c2cb6 100644 --- a/i965/brw_wm_glsl.c +++ b/i965/brw_wm_glsl.c @@ -22,6 +22,7 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; + for (i = 0; i < fp->Base.NumInstructions; i++) { const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { @@ -31,8 +32,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) case OPCODE_CAL: case OPCODE_BRK: case OPCODE_RET: - case OPCODE_DDX: - case OPCODE_DDY: case OPCODE_NOISE1: case OPCODE_NOISE2: case OPCODE_NOISE3: @@ -293,7 +292,7 @@ static void prealloc_reg(struct brw_wm_compile *c) int i, j; struct brw_reg reg; int urb_read_length = 0; - GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; GLuint reg_index = 0; memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); @@ -372,7 +371,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); } - if (c->key.vp_outputs_written & (1 << i)) { + if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { reg_index += 2; } } @@ -551,42 +550,6 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, } } - -/** - * Same as \sa get_src_reg() but if the register is a literal, emit - * a brw_reg encoding the literal. - * Note that a brw instruction only allows one src operand to be a literal. - * For instructions with more than one operand, only the second can be a - * literal. This means that we treat some literals as constants/uniforms - * (which why PROGRAM_CONSTANT is checked in fetch_constants()). - * - */ -static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint channel) -{ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - if (src->File == PROGRAM_CONSTANT) { - /* a literal */ - const int component = GET_SWZ(src->Swizzle, channel); - const GLfloat *param = - c->fp->program.Base.Parameters->ParameterValues[src->Index]; - GLfloat value = param[component]; - if (src->Negate & (1 << channel)) - value = -value; - if (src->Abs) - value = FABSF(value); -#if 0 - printf(" form immed value %f for chan %d\n", value, channel); -#endif - return brw_imm_f(value); - } - else { - return get_src_reg(c, inst, srcRegIndex, channel); - } -} - - /** * Subroutines are minimal support for resusable instruction sequences. * They are implemented as simply as possible to minimise overhead: there @@ -651,542 +614,110 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } -static void emit_trunc( struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0); - dst1 = get_dst_reg(c, inst, 1); - /* Calculate pixel centers by adding 1 or 0 to each of the - * micro-tile coordinates passed in r1. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 4), 2, 4, 0), - brw_imm_v(0x10101010)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 5), 2, 4, 0), - brw_imm_v(0x11001100)); - } -} - -static void emit_delta_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg dst0, dst1, src0, src1; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0); - dst1 = get_dst_reg(c, inst, 1); - src0 = get_src_reg(c, inst, 0, 0); - src1 = get_src_reg(c, inst, 0, 1); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst0, - retype(src0, BRW_REGISTER_TYPE_UW), - negate(r1)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst1, - retype(src1, BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); - - } -} - -static void fire_fb_write( struct brw_wm_compile *c, - GLuint base_reg, - GLuint nr, - GLuint target, - GLuint eot) -{ - struct brw_compile *p = &c->func; - /* Pass through control information: - */ - /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ - brw_MOV(p, - brw_message_reg(base_reg + 1), - brw_vec8_grf(1, 0)); - brw_pop_insn_state(p); - } - /* Send framebuffer write message: */ - brw_fb_WRITE(p, - retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), - base_reg, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - target, - nr, - 0, - eot); -} - -static void emit_fb_write(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - int nr = 2; - int channel; - GLuint target, eot; - struct brw_reg src0; - - /* Reserve a space for AA - may not be needed: - */ - if (c->key.aa_dest_stencil_reg) - nr += 1; - - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, inst, 0, channel); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); - - if (c->key.source_depth_to_render_target) { - if (c->key.computes_depth) { - src0 = get_src_reg(c, inst, 2, 2); - brw_MOV(p, brw_message_reg(nr), src0); - } - else { - src0 = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } - - nr += 2; - } - - if (c->key.dest_depth_reg) { - const GLuint comp = c->key.dest_depth_reg / 2; - const GLuint off = c->key.dest_depth_reg % 2; - - if (off != 0) { - /* XXX this code needs review/testing */ - struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); - struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); - - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); - /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), arg1_1); - brw_pop_insn_state(p); - } - else - { - struct brw_reg src = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src); - } - nr += 2; - } - - target = inst->Aux >> 1; - eot = inst->Aux & 1; - fire_fb_write(c, 0, nr, target, eot); -} - -static void emit_pixel_w( struct brw_wm_compile *c, - const struct prog_instruction *inst) +/* Workaround for using brw_wm_emit.c's emit functions, which expect + * destination regs to be uniquely written. Moves arguments out to + * temporaries as necessary for instructions which use their destination as + * a temporary. + */ +static void +unalias3(struct brw_wm_compile *c, + void (*func)(struct brw_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - if (mask & WRITEMASK_W) { - struct brw_reg dst, src0, delta0, delta1; - struct brw_reg interp3; - - dst = get_dst_reg(c, inst, 3); - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - - interp3 = brw_vec1_grf(src0.nr+1, 4); - /* Calc 1/w - just linterp wpos[3] optimized by putting the - * result straight into a message reg. - */ - brw_LINE(p, brw_null_reg(), interp3, delta0); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); - - /* Calc w */ - brw_math_16( p, dst, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); - } -} + struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4]; + int i, j; + int mark = mark_tmps(c); -static void emit_linterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - nr = src0.nr; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, src0; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - nr = src0.nr; - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { + for (i = 0; i < 4; i++) { if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0, w; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - w = get_src_reg(c, inst, 2, 3); - nr = src0.nr; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); + func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2); - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; - struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); - struct brw_reg dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; for (i = 0; i < 4; i++) { - GLuint i2 = (i+2)%3; - GLuint i1 = (i+1)%3; if (mask & (1<SaturateMode != SATURATE_OFF); - brw_MAC(p, dst, src0, src1); - brw_set_saturate(p, 0); + for (j = 0; j < 4; j++) { + if (arg0[j].file == dst[i].file && + dst[i].nr == arg0[j].nr) { + tmp_arg0[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg0[j], arg0[j]); + } + if (arg1[j].file == dst[i].file && + dst[i].nr == arg1[j].nr) { + tmp_arg1[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg1[j], arg1[j]); + } + } } } - brw_set_saturate(p, 0); -} - -static void emit_dp3(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[3], src1[3], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, 0); -} + func(p, dst, mask, tmp_arg0, tmp_arg1); -static void emit_dp4(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, brw_null_reg(), src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); -} - -static void emit_dph(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, dst, src1[3]); - brw_set_saturate(p, 0); -} - -/** - * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. - * Note that the result of the function is smeared across the dest - * register's X, Y, Z and W channels (subject to writemasking of course). - */ -static void emit_math1(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint func) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - /* Get first component of source register */ - dst = get_dst_reg(c, inst, dst_chan); - src0 = get_src_reg(c, inst, 0, 0); - - brw_MOV(p, brw_message_reg(2), src0); - brw_math(p, - dst, - func, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - -static void emit_rcp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_INV); -} - -static void emit_rsq(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); -} - -static void emit_sin(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); -} - -static void emit_cos(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_COS); -} - -static void emit_ex2(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); -} - -static void emit_lg2(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); -} - -static void emit_add(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SaturateMode != SATURATE_OFF) - brw_set_saturate(p, 0); -} - -static void emit_flr(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<func; - const GLuint mask = inst->DstReg.WriteMask; - const int mark = mark_tmps(c); - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - if (inst->Opcode == OPCODE_MIN) - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); - else - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); - - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - if (use_temp) - brw_MOV(p, real_dst, dst); - } - } - brw_pop_insn_state(p); - release_tmps(c, mark); -} - -static void emit_pow(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst, src0, src1; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - dst = get_dst_reg(c, inst, dst_chan); - src0 = get_src_reg_imm(c, inst, 0, 0); - src1 = get_src_reg_imm(c, inst, 1, 0); - - brw_MOV(p, brw_message_reg(2), src0); - brw_MOV(p, brw_message_reg(3), src1); - - brw_math(p, - dst, - BRW_MATH_FUNCTION_POW, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - -static void emit_lrp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, tmp1, tmp2, src0, src1, src2; - int i; - int mark = mark_tmps(c); - for (i = 0; i < 4; i++) { - if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0, tmp1); - brw_set_saturate(p, 0); - } - release_tmps(c, mark); - } -} - /** * For GLSL shaders, this KIL will be unconditional. * It may be contained inside an IF/ENDIF structure of course. @@ -1391,144 +748,6 @@ static void emit_kil(struct brw_wm_compile *c) brw_pop_insn_state(p); } -static void emit_mad(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1, src2; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, dst, src2); - brw_set_saturate(p, 0); - } - } -} - -static void emit_sop(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint cond) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - GLuint nr, i; - src0 = get_src_reg(c, inst, 0, 0); - w = get_src_reg(c, inst, 1, 3); - nr = src0.nr; - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - nr = src0.nr; - w = get_src_reg(c, inst, 1, 3); - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0[2], dst[2]; - - dst[0] = get_dst_reg(c, inst, 0); - dst[1] = get_dst_reg(c, inst, 1); - - src0[0] = get_src_reg(c, inst, 0, 0); - src0[1] = get_src_reg(c, inst, 0, 1); - - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ - if (mask & WRITEMASK_X) { - /* X' = X - origin_x */ - brw_ADD(p, - dst[0], - retype(src0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); - } - - if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ - brw_ADD(p, - dst[1], - negate(retype(src0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); - } -} - -/* TODO - BIAS on SIMD8 not working yet... - */ -static void emit_txb(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; - GLuint i; - GLuint msg_type; - - assert(unit < BRW_MAX_TEX_UNIT); - - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); - - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, inst, 0, i); - - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), src[2]); - break; - default: - /* invalid target */ - abort(); - } - brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ - - if (BRW_IS_IGDNG(p->brw)) { - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; - } else { - /* Does it work well on SIMD8? */ - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; - } - - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ - 1, /* msg_reg_nr */ - retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - SURF_INDEX_TEXTURE(unit), - unit, /* sampler */ - inst->DstReg.WriteMask, /* writemask */ - msg_type, /* msg_type */ - 4, /* response_length */ - 4, /* msg_length */ - 0, /* eot */ - 1, - BRW_SAMPLER_SIMD_MODE_SIMD8); -} - - -static void emit_tex(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; - GLuint msg_len; - GLuint i, nr; - GLuint emit; - GLboolean shadow = (c->key.shadowtex_mask & (1<TexSrcTarget) { - case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; - nr = 1; - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - emit = WRITEMASK_XY; - nr = 2; - break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: - emit = WRITEMASK_XYZ; - nr = 3; - break; - default: - /* invalid target */ - abort(); - } - msg_len = 1; - - /* move/load S, T, R coords */ - for (i = 0; i < nr; i++) { - static const GLuint swz[4] = {0,1,2,2}; - if (emit & (1<brw)) { - if (shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG; - } else { - /* Does it work for shadow on SIMD8 ? */ - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; - } - - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ - 1, /* msg_reg_nr */ - retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - SURF_INDEX_TEXTURE(unit), - unit, /* sampler */ - inst->DstReg.WriteMask, /* writemask */ - msg_type, /* msg_type */ - 4, /* response_length */ - shadow ? 6 : 4, /* msg_length */ - 0, /* eot */ - 1, - BRW_SAMPLER_SIMD_MODE_SIMD8); - - if (shadow) - brw_MOV(p, dst[3], brw_imm_f(1.0)); -} - /** * Resolve subroutine calls after code emit is done. @@ -2780,6 +1809,21 @@ static void post_wm_emit( struct brw_wm_compile *c ) brw_resolve_cals(&c->func); } +static void +get_argument_regs(struct brw_wm_compile *c, + const struct prog_instruction *inst, + int index, + struct brw_reg *regs, + int mask) +{ + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1 << i)) + regs[i] = get_src_reg(c, inst, index, i); + } +} + static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { #define MAX_IF_DEPTH 32 @@ -2797,6 +1841,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { const struct prog_instruction *inst = &c->prog_instructions[i]; + int dst_flags; + struct brw_reg args[3][4], dst[4]; + int j; c->cur_inst = i; @@ -2809,127 +1856,157 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if (c->fp->use_const_buffer) fetch_constants(c, inst); + if (inst->Opcode != OPCODE_ARL) { + for (j = 0; j < 4; j++) { + if (inst->DstReg.WriteMask & (1 << j)) + dst[j] = get_dst_reg(c, inst, j); + else + dst[j] = brw_null_reg(); + } + } + for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) + get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW); + + dst_flags = inst->DstReg.WriteMask; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + dst_flags |= SATURATE; + if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); else brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + dst_flags = inst->DstReg.WriteMask; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + dst_flags |= SATURATE; + switch (inst->Opcode) { case WM_PIXELXY: - emit_pixel_xy(c, inst); + emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: - emit_delta_xy(c, inst); + emit_delta_xy(p, dst, dst_flags, args[0]); break; case WM_PIXELW: - emit_pixel_w(c, inst); + emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: - emit_linterp(c, inst); + emit_linterp(p, dst, dst_flags, args[0], args[1]); break; case WM_PINTERP: - emit_pinterp(c, inst); + emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); break; case WM_CINTERP: - emit_cinterp(c, inst); + emit_cinterp(p, dst, dst_flags, args[0]); break; case WM_WPOSXY: - emit_wpos_xy(c, inst); + emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_FB_WRITE: - emit_fb_write(c, inst); + emit_fb_write(c, args[0], args[1], args[2], + INST_AUX_GET_TARGET(inst->Aux), + inst->Aux & INST_AUX_EOT); break; case WM_FRONTFACING: - emit_frontfacing(c, inst); + emit_frontfacing(p, dst, dst_flags); break; case OPCODE_ADD: - emit_add(c, inst); + emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); break; case OPCODE_ARL: emit_arl(c, inst); break; case OPCODE_FRC: - emit_frc(c, inst); + emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); break; case OPCODE_FLR: - emit_flr(c, inst); + emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; case OPCODE_LRP: - emit_lrp(c, inst); + unalias3(c, emit_lrp, + dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_TRUNC: - emit_trunc(c, inst); + emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); break; case OPCODE_MOV: case OPCODE_SWZ: - emit_mov(c, inst); + emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; case OPCODE_DP3: - emit_dp3(c, inst); + emit_dp3(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DP4: - emit_dp4(c, inst); + emit_dp4(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_XPD: - emit_xpd(c, inst); + emit_xpd(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DPH: - emit_dph(c, inst); + emit_dph(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_RCP: - emit_rcp(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: - emit_rsq(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: - emit_sin(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: - emit_cos(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: - emit_ex2(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: - emit_lg2(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_MIN: + unalias2(c, emit_min, dst, dst_flags, args[0], args[1]); + break; case OPCODE_MAX: - emit_min_max(c, inst); + unalias2(c, emit_max, dst, dst_flags, args[0], args[1]); break; case OPCODE_DDX: - emit_ddx(c, inst); - break; case OPCODE_DDY: - emit_ddy(c, inst); + emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), + args[0]); break; case OPCODE_SLT: - emit_slt(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_L, args[0], args[1]); break; case OPCODE_SLE: - emit_sle(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_LE, args[0], args[1]); break; case OPCODE_SGT: - emit_sgt(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_G, args[0], args[1]); break; case OPCODE_SGE: - emit_sge(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_GE, args[0], args[1]); break; case OPCODE_SEQ: - emit_seq(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_EQ, args[0], args[1]); break; case OPCODE_SNE: - emit_sne(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_NEQ, args[0], args[1]); break; case OPCODE_MUL: - emit_mul(c, inst); + emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); break; case OPCODE_POW: - emit_pow(c, inst); + emit_math2(c, BRW_MATH_FUNCTION_POW, + dst, dst_flags, args[0], args[1]); break; case OPCODE_MAD: - emit_mad(c, inst); + emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_NOISE1: emit_noise1(c, inst); @@ -2944,10 +2021,19 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_noise4(c, inst); break; case OPCODE_TEX: - emit_tex(c, inst); + emit_tex(c, dst, dst_flags, args[0], + get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, + 0, 1, 0, 0), + inst->TexSrcTarget, + inst->TexSrcUnit, + (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0); break; case OPCODE_TXB: - emit_txb(c, inst); + emit_txb(c, dst, dst_flags, args[0], + get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, + 0, 1, 0, 0), + inst->TexSrcTarget, + c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]); break; case OPCODE_KIL_NV: emit_kil(c); diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c index 6279258..ff4c082 100644 --- a/i965/brw_wm_pass0.c +++ b/i965/brw_wm_pass0.c @@ -42,12 +42,14 @@ static struct brw_wm_ref *get_ref( struct brw_wm_compile *c ) { assert(c->nr_refs < BRW_WM_MAX_REF); + memset(&c->refs[c->nr_refs], 0, sizeof(*c->refs)); return &c->refs[c->nr_refs++]; } static struct brw_wm_value *get_value( struct brw_wm_compile *c) { assert(c->nr_refs < BRW_WM_MAX_VREG); + memset(&c->vreg[c->nr_vreg], 0, sizeof(*c->vreg)); return &c->vreg[c->nr_vreg++]; } @@ -55,6 +57,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c) static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) { assert(c->nr_insns < BRW_WM_MAX_INSN); + memset(&c->instruction[c->nr_insns], 0, sizeof(*c->instruction)); return &c->instruction[c->nr_insns++]; } @@ -322,8 +325,8 @@ translate_insn(struct brw_wm_compile *c, out->tex_unit = inst->TexSrcUnit; out->tex_idx = inst->TexSrcTarget; out->tex_shadow = inst->TexShadow; - out->eot = inst->Aux & 1; - out->target = inst->Aux >> 1; + out->eot = inst->Aux & INST_AUX_EOT; + out->target = INST_AUX_GET_TARGET(inst->Aux); /* Args: */ diff --git a/i965/brw_wm_pass1.c b/i965/brw_wm_pass1.c index 3436a24..b449394 100644 --- a/i965/brw_wm_pass1.c +++ b/i965/brw_wm_pass1.c @@ -178,6 +178,11 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read1 = writemask; break; + case OPCODE_DDX: + case OPCODE_DDY: + read0 = writemask; + break; + case OPCODE_MAD: case OPCODE_CMP: case OPCODE_LRP: @@ -270,6 +275,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_DST: case WM_FRONTFACING: + case OPCODE_KIL_NV: default: break; } diff --git a/i965/brw_wm_pass2.c b/i965/brw_wm_pass2.c index 6faea01..31303fe 100644 --- a/i965/brw_wm_pass2.c +++ b/i965/brw_wm_pass2.c @@ -82,8 +82,8 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < FRAG_ATTRIB_MAX; j++) { - if (c->key.vp_outputs_written & (1<key.vp_outputs_written & BITFIELD64_BIT(j)) { int fp_index; if (j >= VERT_RESULT_VAR0) diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c index dff4665..aa2e519 100644 --- a/i965/brw_wm_sampler_state.c +++ b/i965/brw_wm_sampler_state.c @@ -66,19 +66,6 @@ static GLuint translate_wrap_mode( GLenum wrap ) } } - -static GLuint U_FIXED(GLfloat value, GLuint frac_bits) -{ - value *= (1<cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc, - NULL, 0 ); + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc), NULL, 0); } @@ -228,8 +215,8 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, */ sampler->ss0.base_level = U_FIXED(0, 1); - sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); - sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); + sampler->ss1.max_lod = U_FIXED(CLAMP(key->maxlod, 0, 13), 6); + sampler->ss1.min_lod = U_FIXED(CLAMP(key->minlod, 0, 13), 6); sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c index 39f8c6d..f89ed9b 100644 --- a/i965/brw_wm_state.c +++ b/i965/brw_wm_state.c @@ -106,7 +106,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; + (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0; + /* BRW_NEW_DEPTH_BUFFER + * Override for NULL depthbuffer case, required by the Pixel Shader Computed + * Depth field. + */ + if (brw->state.depth_region == NULL) + key->computes_depth = 0; /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; @@ -300,6 +306,7 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | + BRW_NEW_DEPTH_BUFFER | BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c index 3dcc592..8335e5a 100644 --- a/i965/brw_wm_surface_state.c +++ b/i965/brw_wm_surface_state.c @@ -31,7 +31,6 @@ #include "main/mtypes.h" -#include "main/texformat.h" #include "main/texstore.h" #include "shader/prog_parameter.h" @@ -70,7 +69,8 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, +static GLuint translate_tex_format( gl_format mesa_format, + GLenum internal_format, GLenum depth_mode ) { switch( mesa_format ) { @@ -86,21 +86,22 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, case MESA_FORMAT_AL88: return BRW_SURFACEFORMAT_L8A8_UNORM; + case MESA_FORMAT_AL1616: + return BRW_SURFACEFORMAT_L16A16_UNORM; + case MESA_FORMAT_RGB888: assert(0); /* not supported for sampling */ return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - if (internal_format == GL_RGB) - return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - else - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + case MESA_FORMAT_XRGB8888: + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; case MESA_FORMAT_RGBA8888_REV: - if (internal_format == GL_RGB) - return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; - else - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()"); + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -287,7 +288,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.bo = NULL; key.offset = intelObj->textureOffset; } else { - key.format = firstImage->TexFormat->MesaFormat; + key.format = firstImage->TexFormat; key.internal_format = firstImage->InternalFormat; key.pitch = intelObj->mt->pitch; key.depth = firstImage->Depth; @@ -354,7 +355,10 @@ brw_create_constant_surface( struct brw_context *brw, NULL, NULL); if (key->bo) { - /* Emit relocation to surface contents */ + /* Emit relocation to surface contents. Section 5.1.1 of the gen4 + * bspec ("Data Cache") says that the data cache does not exist as + * a separate cache and is just the sampler cache. + */ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_SAMPLER, 0, 0, @@ -527,8 +531,15 @@ brw_update_renderbuffer_surface(struct brw_context *brw, region_bo = region->buffer; key.surface_type = BRW_SURFACE_2D; - switch (irb->texformat->MesaFormat) { + switch (irb->Base.Format) { + /* XRGB and ARGB are treated the same here because the chips in this + * family cannot render to XRGB targets. This means that we have to + * mask writes to alpha (ala glColorMask) and reconfigure the alpha + * blending hardware to use GL_ONE (or GL_ZERO) for cases where + * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used. + */ case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; case MESA_FORMAT_RGB565: @@ -541,26 +552,38 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; break; default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->texformat->MesaFormat); + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format); } key.tiling = region->tiling; - key.width = region->width; - key.height = region->height; + if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { + key.width = rb->Width; + key.height = rb->Height; + } else { + key.width = region->width; + key.height = region->height; + } key.pitch = region->pitch; key.cpp = region->cpp; key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - key.tiling = 0; + key.tiling = I915_TILING_X; key.width = 1; key.height = 1; key.cpp = 4; key.draw_offset = 0; } + /* _NEW_COLOR */ memcpy(key.color_mask, ctx->Color.ColorMask, sizeof(key.color_mask)); + + /* As mentioned above, disable writes to the alpha component when the + * renderbuffer is XRGB. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) + key.color_mask[3] = GL_FALSE; + key.color_blend = (!ctx->Color._LogicOpEnabled && ctx->Color.BlendEnabled); @@ -655,7 +678,7 @@ brw_wm_get_binding_table(struct brw_context *brw) if (bind_bo == NULL) { GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); - uint32_t *data = malloc(data_size); + uint32_t data[BRW_WM_MAX_SURF]; int i; for (i = 0; i < brw->wm.nr_surfaces; i++) @@ -680,8 +703,6 @@ brw_wm_get_binding_table(struct brw_context *brw) brw->wm.surf_bo[i]); } } - - free(data); } return bind_bo; @@ -690,11 +711,10 @@ brw_wm_get_binding_table(struct brw_context *brw) static void prepare_wm_surfaces(struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; GLuint i; int old_nr_surfaces; - /* _NEW_BUFFERS */ + /* _NEW_BUFFERS | _NEW_COLOR */ /* Update surfaces for drawing buffers */ if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { @@ -707,7 +727,7 @@ static void prepare_wm_surfaces(struct brw_context *brw ) } old_nr_surfaces = brw->wm.nr_surfaces; - brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + brw->wm.nr_surfaces = BRW_MAX_DRAW_BUFFERS; if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; @@ -719,17 +739,8 @@ static void prepare_wm_surfaces(struct brw_context *brw ) /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ if (texUnit->_ReallyEnabled) { - if (texUnit->_Current == intel->frame_buffer_texobj) { - /* render to texture */ - dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[surf]); - brw->wm.nr_surfaces = surf + 1; - } else { - /* regular texture */ - brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = surf + 1; - } + brw_update_texture_surface(ctx, i); + brw->wm.nr_surfaces = surf + 1; } else { dri_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c index 6aa36d1..ca6e2fa 100644 --- a/shared/intel_batchbuffer.c +++ b/shared/intel_batchbuffer.c @@ -201,11 +201,6 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, drm_intel_bo_reference(intel->first_post_swapbuffers_batch); } - if (intel->first_post_swapbuffers_batch == NULL) { - intel->first_post_swapbuffers_batch = intel->batch->buf; - drm_intel_bo_reference(intel->first_post_swapbuffers_batch); - } - if (used == 0) { batch->cliprect_mode = IGNORE_CLIPRECTS; return; @@ -215,10 +210,10 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, used); + batch->reserved_space = 0; /* Emit a flush if the bufmgr doesn't do it for us. */ if (intel->always_flush_cache || !intel->ttm) { - *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); - batch->ptr += 4; + intel_batchbuffer_emit_mi_flush(batch); used = batch->ptr - batch->map; } @@ -249,6 +244,8 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (intel->vtbl.finish_batch) intel->vtbl.finish_batch(intel); + batch->reserved_space = BATCH_RESERVED; + /* TODO: Just pass the relocation list and dma buffer up to the * kernel. */ @@ -304,3 +301,31 @@ intel_batchbuffer_data(struct intel_batchbuffer *batch, __memcpy(batch->ptr, data, bytes); batch->ptr += bytes; } + +/* Emit a pipelined flush to either flush render and texture cache for + * reading from a FBO-drawn texture, or flush so that frontbuffer + * render appears on the screen in DRI1. + * + * This is also used for the always_flush_cache driconf debug option. + */ +void +intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) +{ + struct intel_context *intel = batch->intel; + + if (intel->gen >= 4) { + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_NO_WRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(MI_FLUSH); + ADVANCE_BATCH(); + } +} diff --git a/shared/intel_batchbuffer.h b/shared/intel_batchbuffer.h index 51579df..d4a9445 100644 --- a/shared/intel_batchbuffer.h +++ b/shared/intel_batchbuffer.h @@ -62,6 +62,7 @@ struct intel_batchbuffer } emit; GLuint dirty_state; + GLuint reserved_space; }; struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context @@ -95,6 +96,7 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, uint32_t read_domains, uint32_t write_domain, uint32_t offset); +void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch); /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to @@ -104,7 +106,7 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, static INLINE GLint intel_batchbuffer_space(struct intel_batchbuffer *batch) { - return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); + return (batch->size - batch->reserved_space) - (batch->ptr - batch->map); } @@ -157,7 +159,7 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ - assert((delta) >= 0); \ + assert((unsigned) (delta) < buf->size); \ intel_batchbuffer_emit_reloc(intel->batch, buf, \ read_domains, write_domain, delta); \ } while (0) @@ -173,12 +175,4 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, intel->batch->emit.start_ptr = NULL; \ } while(0) - -static INLINE void -intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) -{ - intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); - intel_batchbuffer_emit_dword(batch, MI_FLUSH); -} - #endif diff --git a/shared/intel_blit.c b/shared/intel_blit.c index 0c5be4c..9f638b0 100644 --- a/shared/intel_blit.c +++ b/shared/intel_blit.c @@ -26,13 +26,9 @@ **************************************************************************/ -#include -#include - #include "main/mtypes.h" #include "main/context.h" #include "main/enums.h" -#include "main/texformat.h" #include "main/colormac.h" #include "intel_blit.h" @@ -374,8 +370,6 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) skipBuffers = BUFFER_BIT_STENCIL; } - /* XXX Move this flush/lock into the following conditional? */ - intelFlush(&intel->ctx); LOCK_HARDWARE(intel); intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); @@ -441,6 +435,10 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) intel_region_buffer(intel, irb->region, all ? INTEL_WRITE_FULL : INTEL_WRITE_PART); + int x1 = b.x1 + irb->region->draw_x; + int y1 = b.y1 + irb->region->draw_y; + int x2 = b.x2 + irb->region->draw_x; + int y2 = b.y2 + irb->region->draw_y; GLuint clearVal; GLint pitch, cpp; @@ -449,11 +447,10 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) pitch = irb->region->pitch; cpp = irb->region->cpp; - DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", __FUNCTION__, irb->region->buffer, (pitch * cpp), - irb->region->draw_offset, - b.x1, b.y1, b.x2 - b.x1, b.y2 - b.y1); + x1, y1, x2 - x1, y2 - y1); BR13 = 0xf0 << 16; CMD = XY_COLOR_BLT_CMD; @@ -499,8 +496,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); - switch (irb->texformat->MesaFormat) { + switch (irb->Base.Format) { case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: clearVal = intel->ClearColor8888; break; case MESA_FORMAT_RGB565: @@ -516,7 +514,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) break; default: _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n", - irb->texformat->MesaFormat); + irb->Base.Format); clearVal = 0; } } @@ -526,17 +524,17 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) buf, irb->Base.Name); */ - assert(b.x1 < b.x2); - assert(b.y1 < b.y2); + assert(x1 < x2); + assert(y1 < y2); BEGIN_BATCH(6, REFERENCES_CLIPRECTS); OUT_BATCH(CMD); OUT_BATCH(BR13); - OUT_BATCH((b.y1 << 16) | b.x1); - OUT_BATCH((b.y2 << 16) | b.x2); + OUT_BATCH((y1 << 16) | x1); + OUT_BATCH((y2 << 16) | x2); OUT_RELOC(write_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - irb->region->draw_offset); + 0); OUT_BATCH(clearVal); ADVANCE_BATCH(); clearMask &= ~bufBit; /* turn off bit, for faster loop exit */ diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c index a022593..3b7015b 100644 --- a/shared/intel_buffer_objects.c +++ b/shared/intel_buffer_objects.c @@ -209,9 +209,23 @@ intel_bufferobj_subdata(GLcontext * ctx, memcpy((char *)intel_obj->sys_buffer + offset, data, size); else { /* Flush any existing batchbuffer that might reference this data. */ - intelFlush(ctx); + if (drm_intel_bo_busy(intel_obj->buffer) || + drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) { + drm_intel_bo *temp_bo; + + temp_bo = drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64); + + drm_intel_bo_subdata(temp_bo, 0, size, data); - dri_bo_subdata(intel_obj->buffer, offset, size, data); + intel_emit_linear_blit(intel, + intel_obj->buffer, offset, + temp_bo, 0, + size); + + drm_intel_bo_unreference(temp_bo); + } else { + dri_bo_subdata(intel_obj->buffer, offset, size, data); + } } } @@ -254,13 +268,14 @@ intel_bufferobj_map(GLcontext * ctx, if (intel_obj->sys_buffer) { obj->Pointer = intel_obj->sys_buffer; + obj->Length = obj->Size; + obj->Offset = 0; return obj->Pointer; } - /* Flush any existing batchbuffer that might have written to this - * buffer. - */ - intelFlush(ctx); + /* Flush any existing batchbuffer that might reference this data. */ + if (drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) + intelFlush(ctx); if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); @@ -330,7 +345,8 @@ intel_bufferobj_map_range(GLcontext * ctx, * the batchbuffer so that GEM knows about the buffer access for later * syncing. */ - if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) + if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && + drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) intelFlush(ctx); if (intel_obj->buffer == NULL) { diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c index e7357e7..0564318 100644 --- a/shared/intel_buffers.c +++ b/shared/intel_buffers.c @@ -132,6 +132,25 @@ intel_get_cliprects(struct intel_context *intel, } +/** + * Check if we're about to draw into the front color buffer. + * If so, set the intel->front_buffer_dirty field to true. + */ +void +intel_check_front_buffer_rendering(struct intel_context *intel) +{ + const struct gl_framebuffer *fb = intel->ctx.DrawBuffer; + if (fb->Name == 0) { + /* drawing to window system buffer */ + if (fb->_NumColorDrawBuffers > 0) { + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { + intel->front_buffer_dirty = GL_TRUE; + } + } + } +} + + /** * Update the hardware state for drawing into a window or framebuffer object. * @@ -172,10 +191,15 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) return; } - /* - * How many color buffers are we drawing into? + /* How many color buffers are we drawing into? + * + * If there are zero buffers or the buffer is too big, don't configure any + * regions for hardware drawing. We'll fallback to software below. Not + * having regions set makes some of the software fallback paths faster. */ - if (fb->_NumColorDrawBuffers == 0) { + if ((fb->Width > ctx->Const.MaxRenderbufferSize) + || (fb->Height > ctx->Const.MaxRenderbufferSize) + || (fb->_NumColorDrawBuffers == 0)) { /* writing to 0 */ colorRegions[0] = NULL; intel->constant_cliprect = GL_TRUE; @@ -192,7 +216,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) } else { /* Get the intel_renderbuffer for the single colorbuffer we're drawing - * into, and set up cliprects if it's . + * into, and set up cliprects if it's a DRI1 window front buffer. */ if (fb->Name == 0) { intel->constant_cliprect = intel->driScreen->dri2.enabled; @@ -202,14 +226,12 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) intel_batchbuffer_flush(intel->batch); intel->front_cliprects = GL_TRUE; colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT); - - intel->front_buffer_dirty = GL_TRUE; } else { if (!intel->constant_cliprect && intel->front_cliprects) intel_batchbuffer_flush(intel->batch); intel->front_cliprects = GL_FALSE; - colorRegions[0]= intel_get_rb_region(fb, BUFFER_BACK_LEFT); + colorRegions[0] = intel_get_rb_region(fb, BUFFER_BACK_LEFT); } } else { @@ -257,7 +279,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) { irbStencil = intel_renderbuffer(fb->_StencilBuffer->Wrapped); if (irbStencil && irbStencil->region) { - ASSERT(irbStencil->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + ASSERT(irbStencil->Base.Format == MESA_FORMAT_S8_Z24); FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); } else { diff --git a/shared/intel_buffers.h b/shared/intel_buffers.h index 6069d38..d7800f2 100644 --- a/shared/intel_buffers.h +++ b/shared/intel_buffers.h @@ -45,6 +45,8 @@ extern struct intel_region *intel_readbuf_region(struct intel_context *intel); extern struct intel_region *intel_drawbuf_region(struct intel_context *intel); +extern void intel_check_front_buffer_rendering(struct intel_context *intel); + extern void intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb); extern void intelInitBufferFuncs(struct dd_function_table *functions); diff --git a/shared/intel_clear.c b/shared/intel_clear.c index 1b0e221..f682ee3 100644 --- a/shared/intel_clear.c +++ b/shared/intel_clear.c @@ -75,6 +75,10 @@ intelClear(GLcontext *ctx, GLbitfield mask) struct gl_framebuffer *fb = ctx->DrawBuffer; GLuint i; + if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { + intel->front_buffer_dirty = GL_TRUE; + } + if (0) fprintf(stderr, "%s\n", __FUNCTION__); @@ -172,7 +176,7 @@ intelClear(GLcontext *ctx, GLbitfield mask) DBG("\n"); } - _mesa_meta_clear(&intel->ctx, tri_mask); + _mesa_meta_Clear(&intel->ctx, tri_mask); } if (swrast_mask) { diff --git a/shared/intel_context.c b/shared/intel_context.c index fce42e9..dd24b71 100644 --- a/shared/intel_context.c +++ b/shared/intel_context.c @@ -28,7 +28,7 @@ #include "main/glheader.h" #include "main/context.h" -#include "main/arrayobj.h" +/* #include "main/arrayobj.h" */ #include "main/extensions.h" #include "main/framebuffer.h" #include "main/imports.h" @@ -68,7 +68,7 @@ int INTEL_DEBUG = (0); #endif -#define DRIVER_DATE "20090712 2009Q2 RC3" +#define DRIVER_DATE "20091221 2009Q4" #define DRIVER_DATE_GEM "GEM " DRIVER_DATE @@ -189,19 +189,7 @@ intelGetString(GLcontext * ctx, GLenum name) static unsigned intel_bits_per_pixel(const struct intel_renderbuffer *rb) { - switch (rb->Base._ActualFormat) { - case GL_RGB5: - case GL_DEPTH_COMPONENT16: - return 16; - case GL_RGB8: - case GL_RGBA8: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH24_STENCIL8_EXT: - case GL_STENCIL_INDEX8_EXT: - return 32; - default: - return 0; - } + return _mesa_get_format_bytes(rb->Base.Format) * 8; } void @@ -489,14 +477,14 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) if (intel->Fallback) _swrast_flush(ctx); - if (!IS_965(intel->intelScreen->deviceID)) + if (intel->gen < 4) INTEL_FIREVERTICES(intel); /* Emit a flush so that any frontbuffer rendering that might have occurred * lands onscreen in a timely manner, even if the X Server doesn't trigger * a flush for us. */ - if (needs_mi_flush) + if (!intel->driScreen->dri2.enabled && needs_mi_flush) intel_batchbuffer_emit_mi_flush(intel->batch); if (intel->batch->map != intel->batch->ptr) @@ -588,11 +576,6 @@ intelInitDriverFunctions(struct dd_function_table *functions) functions->GetString = intelGetString; functions->UpdateState = intelInvalidateState; - functions->CopyColorTable = _swrast_CopyColorTable; - functions->CopyColorSubTable = _swrast_CopyColorSubTable; - functions->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D; - functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D; - intelInitTextureFuncs(functions); intelInitTextureImageFuncs(functions); intelInitTextureSubImageFuncs(functions); @@ -631,6 +614,13 @@ intelInitContext(struct intel_context *intel, intel->sarea = intelScreen->sarea; intel->driContext = driContextPriv; + if (IS_965(intel->intelScreen->deviceID)) + intel->gen = 4; + else if (IS_9XX(intel->intelScreen->deviceID)) + intel->gen = 3; + else + intel->gen = 2; + /* Dri stuff */ intel->hHWContext = driContextPriv->hHWContext; intel->driFd = sPriv->fd; @@ -638,17 +628,13 @@ intelInitContext(struct intel_context *intel, driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, intel->driScreen->myNum, - IS_965(intelScreen->deviceID) ? "i965" : "i915"); + (intel->gen >= 4) ? "i965" : "i915"); if (intelScreen->deviceID == PCI_CHIP_I865_G) intel->maxBatchSize = 4096; else intel->maxBatchSize = BATCH_SZ; intel->bufmgr = intelScreen->bufmgr; - - if (0) /* for debug */ - drm_intel_bufmgr_set_debug(intel->bufmgr, 1); - intel->ttm = intelScreen->ttm; if (intel->ttm) { int bo_reuse_mode; @@ -704,7 +690,7 @@ intelInitContext(struct intel_context *intel, meta_init_metaops(ctx, &intel->meta); ctx->Const.MaxColorAttachments = 4; /* XXX FBO: review this */ - if (IS_965(intelScreen->deviceID)) { + if (intel->gen >= 4) { if (MAX_WIDTH > 8192) ctx->Const.MaxRenderbufferSize = 8192; } else { @@ -741,7 +727,7 @@ intelInitContext(struct intel_context *intel, break; } - if (IS_965(intelScreen->deviceID)) + if (intel->gen >= 4) intel->polygon_offset_scale /= 0xffff; intel->RenderIndex = ~0; @@ -754,12 +740,12 @@ intelInitContext(struct intel_context *intel, intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); - if (IS_965(intelScreen->deviceID) && !intel->intelScreen->irq_active) { + if (intel->gen >= 4 && !intel->intelScreen->irq_active) { _mesa_printf("IRQs not active. Exiting\n"); exit(1); } - intelInitExtensions(ctx, GL_FALSE); + intelInitExtensions(ctx); INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); if (INTEL_DEBUG & DEBUG_BUFMGR) @@ -839,7 +825,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) _vbo_DestroyContext(&intel->ctx); _swrast_DestroyContext(&intel->ctx); - intel->Fallback = 0; /* don't call _swrast_Flush later */ + intel->Fallback = 0x0; /* don't call _swrast_Flush later */ intel_batchbuffer_free(intel->batch); intel->batch = NULL; @@ -944,10 +930,23 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, __DRIdrawablePrivate * driReadPriv) { __DRIscreenPrivate *psp = driDrawPriv->driScreenPriv; + struct intel_context *intel; + GET_CURRENT_CONTEXT(curCtx); + + if (driContextPriv) + intel = (struct intel_context *) driContextPriv->driverPrivate; + else + intel = NULL; + + /* According to the glXMakeCurrent() man page: "Pending commands to + * the previous context, if any, are flushed before it is released." + * But only flush if we're actually changing contexts. + */ + if (intel_context(curCtx) && intel_context(curCtx) != intel) { + _mesa_flush(curCtx); + } if (driContextPriv) { - struct intel_context *intel = - (struct intel_context *) driContextPriv->driverPrivate; struct intel_framebuffer *intel_fb = (struct intel_framebuffer *) driDrawPriv->driverPrivate; GLframebuffer *readFb = (GLframebuffer *) driReadPriv->driverPrivate; @@ -993,41 +992,35 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, _mesa_make_current(&intel->ctx, &intel_fb->Base, readFb); - /* The drawbuffer won't always be updated by _mesa_make_current: - */ - if (intel->ctx.DrawBuffer == &intel_fb->Base) { - - if (intel->driReadDrawable != driReadPriv) - intel->driReadDrawable = driReadPriv; - - if (intel->driDrawable != driDrawPriv) { - if (driDrawPriv->swap_interval == (unsigned)-1) { - int i; - - driDrawPriv->vblFlags = (intel->intelScreen->irq_active != 0) - ? driGetDefaultVBlankFlags(&intel->optionCache) - : VBLANK_FLAG_NO_IRQ; - - /* Prevent error printf if one crtc is disabled, this will - * be properly calculated in intelWindowMoved() next. - */ - driDrawPriv->vblFlags = intelFixupVblank(intel, driDrawPriv); - - (*psp->systemTime->getUST) (&intel_fb->swap_ust); - driDrawableInitVBlank(driDrawPriv); - intel_fb->vbl_waited = driDrawPriv->vblSeq; - - for (i = 0; i < 2; i++) { - if (intel_fb->color_rb[i]) - intel_fb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq; - } - } - intel->driDrawable = driDrawPriv; - intelWindowMoved(intel); - } + intel->driReadDrawable = driReadPriv; + + if (intel->driDrawable != driDrawPriv) { + if (driDrawPriv->swap_interval == (unsigned)-1) { + int i; - intel_draw_buffer(&intel->ctx, &intel_fb->Base); + driDrawPriv->vblFlags = (intel->intelScreen->irq_active != 0) + ? driGetDefaultVBlankFlags(&intel->optionCache) + : VBLANK_FLAG_NO_IRQ; + + /* Prevent error printf if one crtc is disabled, this will + * be properly calculated in intelWindowMoved() next. + */ + driDrawPriv->vblFlags = intelFixupVblank(intel, driDrawPriv); + + (*psp->systemTime->getUST) (&intel_fb->swap_ust); + driDrawableInitVBlank(driDrawPriv); + intel_fb->vbl_waited = driDrawPriv->vblSeq; + + for (i = 0; i < 2; i++) { + if (intel_fb->color_rb[i]) + intel_fb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq; + } + } + intel->driDrawable = driDrawPriv; + intelWindowMoved(intel); } + + intel_draw_buffer(&intel->ctx, &intel_fb->Base); } else { _mesa_make_current(NULL, NULL, NULL); diff --git a/shared/intel_context.h b/shared/intel_context.h index 03e7cf3..eb7be7d 100644 --- a/shared/intel_context.h +++ b/shared/intel_context.h @@ -61,6 +61,10 @@ typedef void (*intel_line_func) (struct intel_context *, intelVertex *, intelVertex *); typedef void (*intel_point_func) (struct intel_context *, intelVertex *); +/** + * Bits for intel->Fallback field + */ +/*@{*/ #define INTEL_FALLBACK_DRAW_BUFFER 0x1 #define INTEL_FALLBACK_READ_BUFFER 0x2 #define INTEL_FALLBACK_DEPTH_BUFFER 0x4 @@ -68,8 +72,10 @@ typedef void (*intel_point_func) (struct intel_context *, intelVertex *); #define INTEL_FALLBACK_USER 0x10 #define INTEL_FALLBACK_RENDERMODE 0x20 #define INTEL_FALLBACK_TEXTURE 0x40 +#define INTEL_FALLBACK_DRIVER 0x1000 /**< first for drivers */ +/*@}*/ -extern void intelFallback(struct intel_context *intel, GLuint bit, +extern void intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode); #define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode ) @@ -111,8 +117,6 @@ struct intel_context struct intel_region * depth_region, GLuint num_regions); - GLuint (*flush_cmd) (void); - void (*reduced_primitive_state) (struct intel_context * intel, GLenum rprim); @@ -170,13 +174,17 @@ struct intel_context struct dri_metaops meta; - GLint refcount; - GLuint Fallback; + GLbitfield Fallback; /**< mask of INTEL_FALLBACK_x bits */ GLuint NewGLState; dri_bufmgr *bufmgr; unsigned int maxBatchSize; + /** + * Generation number of the hardware: 2 is 8xx, 3 is 9xx pre-965, 4 is 965. + */ + int gen; + struct intel_region *front_region; struct intel_region *back_region; struct intel_region *depth_region; @@ -190,7 +198,6 @@ struct intel_context struct intel_batchbuffer *batch; drm_intel_bo *first_post_swapbuffers_batch; GLboolean no_batch_wrap; - unsigned batch_id; struct { @@ -254,9 +261,6 @@ struct intel_context intel_line_func draw_line; intel_tri_func draw_tri; - /* These refer to the current drawing buffer: - */ - struct gl_texture_object *frame_buffer_texobj; /** * Set to true if a single constant cliprect should be used in the * batchbuffer. Otherwise, cliprects must be calculated at batchbuffer @@ -296,7 +300,6 @@ struct intel_context GLboolean use_texture_tiling; GLboolean use_early_z; - drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ int perf_boxes; @@ -351,6 +354,19 @@ extern char *__progname; #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) #define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0) +static inline uint32_t +U_FIXED(float value, uint32_t frac_bits) +{ + value *= (1 << frac_bits); + return value < 0 ? 0 : value; +} + +static inline uint32_t +S_FIXED(float value, uint32_t frac_bits) +{ + return value * (1 << frac_bits); +} + #define INTEL_FIREVERTICES(intel) \ do { \ if ((intel)->prim.flush) \ @@ -572,4 +588,25 @@ is_power_of_two(uint32_t value) return (value & (value - 1)) == 0; } +static inline void +intel_bo_map_gtt_preferred(struct intel_context *intel, + drm_intel_bo *bo, + GLboolean write) +{ + if (intel->intelScreen->kernel_exec_fencing) + drm_intel_gem_bo_map_gtt(bo); + else + drm_intel_bo_map(bo, write); +} + +static inline void +intel_bo_unmap_gtt_preferred(struct intel_context *intel, + drm_intel_bo *bo) +{ + if (intel->intelScreen->kernel_exec_fencing) + drm_intel_gem_bo_unmap_gtt(bo); + else + drm_intel_bo_unmap(bo); +} + #endif diff --git a/shared/intel_depthtmp.h b/shared/intel_depthtmp.h index 16d7708..a9c75d4 100644 --- a/shared/intel_depthtmp.h +++ b/shared/intel_depthtmp.h @@ -30,6 +30,16 @@ * all the tiling styles. */ +#define VALUE_TYPE INTEL_VALUE_TYPE +#define WRITE_DEPTH(_x, _y, d) \ + (*(INTEL_VALUE_TYPE *)(irb->region->buffer->virtual + \ + NO_TILE(_x, _y)) = d) +#define READ_DEPTH(d, _x, _y) \ + d = *(INTEL_VALUE_TYPE *)(irb->region->buffer->virtual + \ + NO_TILE(_x, _y)) +#define TAG(x) INTEL_TAG(intel_gttmap_##x) +#include "depthtmp.h" + #define VALUE_TYPE INTEL_VALUE_TYPE #define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(NO_TILE(_x, _y), d) #define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(NO_TILE(_x, _y)) diff --git a/shared/intel_extensions.c b/shared/intel_extensions.c index 2e61c55..48cdae5 100644 --- a/shared/intel_extensions.c +++ b/shared/intel_extensions.c @@ -28,9 +28,11 @@ #include "intel_chipset.h" #include "intel_context.h" #include "intel_extensions.h" +#include "utils.h" #define need_GL_ARB_copy_buffer +#define need_GL_ARB_draw_elements_base_vertex #define need_GL_ARB_framebuffer_object #define need_GL_ARB_map_buffer_range #define need_GL_ARB_occlusion_query @@ -62,7 +64,7 @@ #define need_GL_VERSION_2_0 #define need_GL_VERSION_2_1 -#include "extension_helper.h" +#include "main/remap_helper.h" /** @@ -73,11 +75,15 @@ */ static const struct dri_extension card_extensions[] = { { "GL_ARB_copy_buffer", GL_ARB_copy_buffer_functions }, + { "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions }, { "GL_ARB_half_float_pixel", NULL }, { "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions }, { "GL_ARB_multitexture", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, { "GL_ARB_point_sprite", NULL }, + { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, + { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, + { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, { "GL_ARB_sync", GL_ARB_sync_functions }, { "GL_ARB_texture_border_clamp", NULL }, { "GL_ARB_texture_cube_map", NULL }, @@ -89,6 +95,7 @@ static const struct dri_extension card_extensions[] = { { "GL_ARB_texture_rectangle", NULL }, { "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions}, { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, + { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, @@ -114,7 +121,6 @@ static const struct dri_extension card_extensions[] = { { "GL_MESA_pack_invert", NULL }, { "GL_MESA_ycbcr_texture", NULL }, { "GL_NV_blend_square", NULL }, - { "GL_NV_point_sprite", GL_NV_point_sprite_functions }, { "GL_NV_vertex_program", GL_NV_vertex_program_functions }, { "GL_NV_vertex_program1_1", NULL }, { "GL_SGIS_generate_mipmap", NULL }, @@ -139,6 +145,7 @@ static const struct dri_extension i915_extensions[] = { /** i965-only extensions */ static const struct dri_extension brw_extensions[] = { + { "GL_ARB_depth_clamp", NULL }, { "GL_ARB_depth_texture", NULL }, { "GL_ARB_fragment_program", NULL }, { "GL_ARB_fragment_program_shadow", NULL }, @@ -147,13 +154,9 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { "GL_ARB_point_sprite", NULL }, { "GL_ARB_seamless_cube_map", NULL }, - { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, - { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, - { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, { "GL_ARB_shadow", NULL }, { "GL_MESA_texture_signed_rgba", NULL }, { "GL_ARB_texture_non_power_of_two", NULL }, - { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, { "GL_EXT_shadow_funcs", NULL }, { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions }, { "GL_EXT_texture_sRGB", NULL }, @@ -168,6 +171,7 @@ static const struct dri_extension brw_extensions[] = { static const struct dri_extension arb_oq_extensions[] = { + { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { NULL, NULL } }; @@ -179,29 +183,38 @@ static const struct dri_extension ttm_extensions[] = { { NULL, NULL } }; +static const struct dri_extension fragment_shader_extensions[] = { + { "GL_ARB_fragment_shader", NULL }, + { NULL, NULL } +}; /** * Initializes potential list of extensions if ctx == NULL, or actually enables * extensions for a context. */ void -intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging) +intelInitExtensions(GLcontext *ctx) { - struct intel_context *intel = ctx?intel_context(ctx):NULL; + struct intel_context *intel = intel_context(ctx); /* Disable imaging extension until convolution is working in teximage paths. */ - enable_imaging = GL_FALSE; + driInitExtensions(ctx, card_extensions, GL_FALSE); - driInitExtensions(ctx, card_extensions, enable_imaging); - - if (intel == NULL || intel->ttm) + if (intel->ttm) driInitExtensions(ctx, ttm_extensions, GL_FALSE); - if (intel == NULL || IS_965(intel->intelScreen->deviceID)) + if (IS_965(intel->intelScreen->deviceID)) driInitExtensions(ctx, brw_extensions, GL_FALSE); - if (intel == NULL || IS_915(intel->intelScreen->deviceID) - || IS_945(intel->intelScreen->deviceID)) + if (IS_915(intel->intelScreen->deviceID) + || IS_945(intel->intelScreen->deviceID)) { driInitExtensions(ctx, i915_extensions, GL_FALSE); + + if (driQueryOptionb(&intel->optionCache, "fragment_shader")) + driInitExtensions(ctx, fragment_shader_extensions, GL_FALSE); + + if (driQueryOptionb(&intel->optionCache, "stub_occlusion_query")) + driInitExtensions(ctx, arb_oq_extensions, GL_FALSE); + } } diff --git a/shared/intel_extensions.h b/shared/intel_extensions.h index 97147ec..1d1c97a 100644 --- a/shared/intel_extensions.h +++ b/shared/intel_extensions.h @@ -30,7 +30,7 @@ extern void -intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging); +intelInitExtensions(GLcontext *ctx); #endif diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c index 804c034..608f75b 100644 --- a/shared/intel_fbo.c +++ b/shared/intel_fbo.c @@ -33,11 +33,11 @@ #include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/context.h" -#include "main/texformat.h" #include "main/texrender.h" #include "drivers/common/meta.h" #include "intel_context.h" +#include "intel_batchbuffer.h" #include "intel_buffers.h" #include "intel_fbo.h" #include "intel_mipmap_tree.h" @@ -106,8 +106,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, { struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *irb = intel_renderbuffer(rb); - GLboolean softwareBuffer = GL_FALSE; int cpp; + GLuint pitch; ASSERT(rb->Name != 0); @@ -115,27 +115,16 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_R3_G3_B2: case GL_RGB4: case GL_RGB5: - rb->_ActualFormat = GL_RGB5; + rb->Format = MESA_FORMAT_RGB565; rb->DataType = GL_UNSIGNED_BYTE; - rb->RedBits = 5; - rb->GreenBits = 6; - rb->BlueBits = 5; - irb->texformat = &_mesa_texformat_rgb565; - cpp = 2; break; case GL_RGB: case GL_RGB8: case GL_RGB10: case GL_RGB12: case GL_RGB16: - rb->_ActualFormat = GL_RGB8; + rb->Format = MESA_FORMAT_XRGB8888; rb->DataType = GL_UNSIGNED_BYTE; - rb->RedBits = 8; - rb->GreenBits = 8; - rb->BlueBits = 8; - rb->AlphaBits = 0; - irb->texformat = &_mesa_texformat_argb8888; /* XXX: Need xrgb8888 */ - cpp = 4; break; case GL_RGBA: case GL_RGBA2: @@ -145,14 +134,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_RGB10_A2: case GL_RGBA12: case GL_RGBA16: - rb->_ActualFormat = GL_RGBA8; + rb->Format = MESA_FORMAT_ARGB8888; rb->DataType = GL_UNSIGNED_BYTE; - rb->RedBits = 8; - rb->GreenBits = 8; - rb->BlueBits = 8; - rb->AlphaBits = 8; - irb->texformat = &_mesa_texformat_argb8888; - cpp = 4; break; case GL_STENCIL_INDEX: case GL_STENCIL_INDEX1_EXT: @@ -160,36 +143,23 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_STENCIL_INDEX8_EXT: case GL_STENCIL_INDEX16_EXT: /* alloc a depth+stencil buffer */ - rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->Format = MESA_FORMAT_S8_Z24; rb->DataType = GL_UNSIGNED_INT_24_8_EXT; - rb->StencilBits = 8; - cpp = 4; - irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_COMPONENT16: - rb->_ActualFormat = GL_DEPTH_COMPONENT16; + rb->Format = MESA_FORMAT_Z16; rb->DataType = GL_UNSIGNED_SHORT; - rb->DepthBits = 16; - cpp = 2; - irb->texformat = &_mesa_texformat_z16; break; case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: - rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->Format = MESA_FORMAT_S8_Z24; rb->DataType = GL_UNSIGNED_INT_24_8_EXT; - rb->DepthBits = 24; - cpp = 4; - irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: - rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->Format = MESA_FORMAT_S8_Z24; rb->DataType = GL_UNSIGNED_INT_24_8_EXT; - rb->DepthBits = 24; - rb->StencilBits = 8; - cpp = 4; - irb->texformat = &_mesa_texformat_s8_z24; break; default: _mesa_problem(ctx, @@ -197,6 +167,9 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, return GL_FALSE; } + rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat); + cpp = _mesa_get_format_bytes(rb->Format); + intelFlush(ctx); /* free old region */ @@ -205,32 +178,25 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, } /* allocate new memory region/renderbuffer */ - if (softwareBuffer) { - return _mesa_soft_renderbuffer_storage(ctx, rb, internalFormat, - width, height); - } - else { - /* Choose a pitch to match hardware requirements: - */ - GLuint pitch = ((cpp * width + 63) & ~63) / cpp; - /* alloc hardware renderbuffer */ - DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, - height, pitch); + /* Choose a pitch to match hardware requirements: + */ + pitch = ((cpp * width + 63) & ~63) / cpp; - irb->region = intel_region_alloc(intel, I915_TILING_NONE, - cpp, width, height, pitch, - GL_TRUE); - if (!irb->region) - return GL_FALSE; /* out of memory? */ + /* alloc hardware renderbuffer */ + DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch); - ASSERT(irb->region->buffer); + irb->region = intel_region_alloc(intel, I915_TILING_NONE, cpp, + width, height, pitch, GL_TRUE); + if (!irb->region) + return GL_FALSE; /* out of memory? */ - rb->Width = width; - rb->Height = height; + ASSERT(irb->region->buffer); - return GL_TRUE; - } + rb->Width = width; + rb->Height = height; + + return GL_TRUE; } @@ -246,7 +212,7 @@ intel_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb, ASSERT(rb->Name == 0); rb->Width = width; rb->Height = height; - rb->_ActualFormat = internalFormat; + rb->InternalFormat = internalFormat; return GL_TRUE; } @@ -307,12 +273,11 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, * not a user-created renderbuffer. */ struct intel_renderbuffer * -intel_create_renderbuffer(GLenum intFormat) +intel_create_renderbuffer(gl_format format) { GET_CURRENT_CONTEXT(ctx); struct intel_renderbuffer *irb; - const GLuint name = 0; irb = CALLOC_STRUCT(intel_renderbuffer); if (!irb) { @@ -320,75 +285,43 @@ intel_create_renderbuffer(GLenum intFormat) return NULL; } - _mesa_init_renderbuffer(&irb->Base, name); + _mesa_init_renderbuffer(&irb->Base, 0); irb->Base.ClassID = INTEL_RB_CLASS; - switch (intFormat) { - case GL_RGB5: - irb->Base._ActualFormat = GL_RGB5; - irb->Base._BaseFormat = GL_RGBA; - irb->Base.RedBits = 5; - irb->Base.GreenBits = 6; - irb->Base.BlueBits = 5; + switch (format) { + case MESA_FORMAT_RGB565: + irb->Base._BaseFormat = GL_RGB; irb->Base.DataType = GL_UNSIGNED_BYTE; - irb->texformat = &_mesa_texformat_rgb565; break; - case GL_RGB8: - irb->Base._ActualFormat = GL_RGB8; + case MESA_FORMAT_XRGB8888: irb->Base._BaseFormat = GL_RGB; - irb->Base.RedBits = 8; - irb->Base.GreenBits = 8; - irb->Base.BlueBits = 8; - irb->Base.AlphaBits = 0; irb->Base.DataType = GL_UNSIGNED_BYTE; - irb->texformat = &_mesa_texformat_argb8888; /* XXX: Need xrgb8888 */ break; - case GL_RGBA8: - irb->Base._ActualFormat = GL_RGBA8; + case MESA_FORMAT_ARGB8888: irb->Base._BaseFormat = GL_RGBA; - irb->Base.RedBits = 8; - irb->Base.GreenBits = 8; - irb->Base.BlueBits = 8; - irb->Base.AlphaBits = 8; - irb->Base.DataType = GL_UNSIGNED_BYTE; - irb->texformat = &_mesa_texformat_argb8888; - break; - case GL_STENCIL_INDEX8_EXT: - irb->Base._ActualFormat = GL_STENCIL_INDEX8_EXT; - irb->Base._BaseFormat = GL_STENCIL_INDEX; - irb->Base.StencilBits = 8; irb->Base.DataType = GL_UNSIGNED_BYTE; - irb->texformat = &_mesa_texformat_s8_z24; break; - case GL_DEPTH_COMPONENT16: - irb->Base._ActualFormat = GL_DEPTH_COMPONENT16; + case MESA_FORMAT_Z16: irb->Base._BaseFormat = GL_DEPTH_COMPONENT; - irb->Base.DepthBits = 16; irb->Base.DataType = GL_UNSIGNED_SHORT; - irb->texformat = &_mesa_texformat_z16; break; - case GL_DEPTH_COMPONENT24: - irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; + case MESA_FORMAT_X8_Z24: irb->Base._BaseFormat = GL_DEPTH_COMPONENT; - irb->Base.DepthBits = 24; irb->Base.DataType = GL_UNSIGNED_INT; - irb->texformat = &_mesa_texformat_s8_z24; break; - case GL_DEPTH24_STENCIL8_EXT: - irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; - irb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT; - irb->Base.DepthBits = 24; - irb->Base.StencilBits = 8; + case MESA_FORMAT_S8_Z24: + irb->Base._BaseFormat = GL_DEPTH_STENCIL; irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT; - irb->texformat = &_mesa_texformat_s8_z24; break; default: _mesa_problem(NULL, "Unexpected intFormat in intel_create_renderbuffer"); + _mesa_free(irb); return NULL; } - irb->Base.InternalFormat = intFormat; + irb->Base.Format = format; + irb->Base.InternalFormat = irb->Base._BaseFormat; /* intel-specific methods */ irb->Base.Delete = intel_delete_renderbuffer; @@ -465,59 +398,49 @@ static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, struct gl_texture_image *texImage) { - irb->texformat = texImage->TexFormat; + gl_format texFormat; - if (texImage->TexFormat == &_mesa_texformat_argb8888) { - irb->Base._ActualFormat = GL_RGBA8; - irb->Base._BaseFormat = GL_RGBA; + if (texImage->TexFormat == MESA_FORMAT_ARGB8888) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGBA8 texture OK\n"); } - else if (texImage->TexFormat == &_mesa_texformat_rgb565) { - irb->Base._ActualFormat = GL_RGB5; - irb->Base._BaseFormat = GL_RGB; + else if (texImage->TexFormat == MESA_FORMAT_XRGB8888) { + irb->Base.DataType = GL_UNSIGNED_BYTE; + DBG("Render to XGBA8 texture OK\n"); + } + else if (texImage->TexFormat == MESA_FORMAT_RGB565) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGB5 texture OK\n"); } - else if (texImage->TexFormat == &_mesa_texformat_argb1555) { - irb->Base._ActualFormat = GL_RGB5_A1; - irb->Base._BaseFormat = GL_RGBA; + else if (texImage->TexFormat == MESA_FORMAT_ARGB1555) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to ARGB1555 texture OK\n"); } - else if (texImage->TexFormat == &_mesa_texformat_argb4444) { - irb->Base._ActualFormat = GL_RGBA4; - irb->Base._BaseFormat = GL_RGBA; + else if (texImage->TexFormat == MESA_FORMAT_ARGB4444) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to ARGB4444 texture OK\n"); } - else if (texImage->TexFormat == &_mesa_texformat_z16) { - irb->Base._ActualFormat = GL_DEPTH_COMPONENT16; - irb->Base._BaseFormat = GL_DEPTH_COMPONENT; + else if (texImage->TexFormat == MESA_FORMAT_Z16) { irb->Base.DataType = GL_UNSIGNED_SHORT; DBG("Render to DEPTH16 texture OK\n"); } - else if (texImage->TexFormat == &_mesa_texformat_s8_z24) { - irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; - irb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT; + else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) { irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT; DBG("Render to DEPTH_STENCIL texture OK\n"); } else { - DBG("Render to texture BAD FORMAT %d\n", - texImage->TexFormat->MesaFormat); + DBG("Render to texture BAD FORMAT %d\n", texImage->TexFormat); return GL_FALSE; } - irb->Base.InternalFormat = irb->Base._ActualFormat; + irb->Base.Format = texImage->TexFormat; + + texFormat = texImage->TexFormat; + + irb->Base.InternalFormat = texImage->InternalFormat; + irb->Base._BaseFormat = _mesa_base_fbo_format(ctx, irb->Base.InternalFormat); irb->Base.Width = texImage->Width; irb->Base.Height = texImage->Height; - irb->Base.RedBits = texImage->TexFormat->RedBits; - irb->Base.GreenBits = texImage->TexFormat->GreenBits; - irb->Base.BlueBits = texImage->TexFormat->BlueBits; - irb->Base.AlphaBits = texImage->TexFormat->AlphaBits; - irb->Base.DepthBits = texImage->TexFormat->DepthBits; - irb->Base.StencilBits = texImage->TexFormat->StencilBits; irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; @@ -571,7 +494,7 @@ intel_render_texture(GLcontext * ctx, = att->Texture->Image[att->CubeMapFace][att->TextureLevel]; struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); struct intel_texture_image *intel_image; - GLuint imageOffset; + GLuint dst_x, dst_y; (void) fb; @@ -618,18 +541,17 @@ intel_render_texture(GLcontext * ctx, } /* compute offset of the particular 2D image within the texture region */ - imageOffset = intel_miptree_image_offset(intel_image->mt, - att->CubeMapFace, - att->TextureLevel); - - if (att->Texture->Target == GL_TEXTURE_3D) { - const GLuint *offsets = intel_miptree_depth_offsets(intel_image->mt, - att->TextureLevel); - imageOffset += offsets[att->Zoffset]; - } - - /* store that offset in the region */ - intel_image->mt->region->draw_offset = imageOffset; + intel_miptree_get_image_offset(intel_image->mt, + att->TextureLevel, + att->CubeMapFace, + att->Zoffset, + &dst_x, &dst_y); + + intel_image->mt->region->draw_offset = (dst_y * intel_image->mt->pitch + + dst_x) * intel_image->mt->cpp; + intel_image->mt->region->draw_x = dst_x; + intel_image->mt->region->draw_y = dst_y; + intel_image->used_as_render_target = GL_TRUE; /* update drawing region, etc */ intel_draw_buffer(ctx, fb); @@ -643,19 +565,23 @@ static void intel_finish_render_texture(GLcontext * ctx, struct gl_renderbuffer_attachment *att) { - /* no-op - * Previously we released the renderbuffer's intel_region but - * that's not necessary and actually caused problems when trying - * to do a glRead/CopyPixels from the renderbuffer later. - * The region will be released later if the texture is replaced - * or the renderbuffer deleted. - * - * The intention of this driver hook is more of a "done rendering - * to texture, please re-twiddle/etc if necessary". + struct intel_context *intel = intel_context(ctx); + struct gl_texture_object *tex_obj = att->Texture; + struct gl_texture_image *image = + tex_obj->Image[att->CubeMapFace][att->TextureLevel]; + struct intel_texture_image *intel_image = intel_texture_image(image); + + /* Flag that this image may now be validated into the object's miptree. */ + intel_image->used_as_render_target = GL_FALSE; + + /* Since we've (probably) rendered to the texture and will (likely) use + * it in the texture domain later on in this batchbuffer, flush the + * batch. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer like GEM does in the kernel. */ + intel_batchbuffer_emit_mi_flush(intel->batch); } - /** * Do additional "completeness" testing of a framebuffer object. */ @@ -687,8 +613,9 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) continue; } - switch (irb->texformat->MesaFormat) { + switch (irb->Base.Format) { case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: case MESA_FORMAT_RGB565: case MESA_FORMAT_ARGB1555: case MESA_FORMAT_ARGB4444: @@ -715,5 +642,5 @@ intel_fbo_init(struct intel_context *intel) intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture; intel->ctx.Driver.ResizeBuffers = intel_resize_buffers; intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer; - intel->ctx.Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer; + intel->ctx.Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer; } diff --git a/shared/intel_fbo.h b/shared/intel_fbo.h index f0665af..fa43077 100644 --- a/shared/intel_fbo.h +++ b/shared/intel_fbo.h @@ -28,6 +28,7 @@ #ifndef INTEL_FBO_H #define INTEL_FBO_H +#include "main/formats.h" #include "intel_screen.h" struct intel_context; @@ -61,8 +62,6 @@ struct intel_renderbuffer struct gl_renderbuffer Base; struct intel_region *region; - const struct gl_texture_format *texformat; - GLuint vbl_pending; /**< vblank sequence number of pending flip */ uint8_t *span_cache; @@ -114,7 +113,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *irb, extern struct intel_renderbuffer * -intel_create_renderbuffer(GLenum intFormat); +intel_create_renderbuffer(gl_format format); extern void diff --git a/shared/intel_generatemipmap.c b/shared/intel_generatemipmap.c deleted file mode 100644 index 12059e1..0000000 --- a/shared/intel_generatemipmap.c +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -#include "main/glheader.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/bufferobj.h" -#include "main/teximage.h" -#include "main/texenv.h" -#include "main/texobj.h" -#include "main/texstate.h" -#include "main/texparam.h" -#include "main/varray.h" -#include "main/attrib.h" -#include "main/enable.h" -#include "main/buffers.h" -#include "main/fbobject.h" -#include "main/framebuffer.h" -#include "main/renderbuffer.h" -#include "main/depth.h" -#include "main/hash.h" -#include "main/mipmap.h" -#include "main/blend.h" -#include "glapi/dispatch.h" -#include "swrast/swrast.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_batchbuffer.h" -#include "intel_pixel.h" -#include "intel_tex.h" -#include "intel_mipmap_tree.h" - -static const char *intel_fp_tex2d = - "!!ARBfp1.0\n" - "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n" - "END\n"; - -static GLboolean -intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name, - int level, int width, int height) -{ - struct intel_context *intel = intel_context(ctx); - GLfloat vertices[4][2]; - GLint status; - - /* Set to source from the previous level */ - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1); - - /* Set to draw into the current level */ - _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, - GL_COLOR_ATTACHMENT0_EXT, - GL_TEXTURE_2D, - tex_name, - level); - /* Choose to render to the color attachment. */ - _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT); - - status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT); - if (status != GL_FRAMEBUFFER_COMPLETE_EXT) - return GL_FALSE; - - meta_set_passthrough_transform(&intel->meta); - - /* XXX: Doing it right would involve setting up the transformation to do - * 0-1 mapping or something, and not changing the vertex data. - */ - vertices[0][0] = 0; - vertices[0][1] = 0; - vertices[1][0] = width; - vertices[1][1] = 0; - vertices[2][0] = width; - vertices[2][1] = height; - vertices[3][0] = 0; - vertices[3][1] = height; - - _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); - _mesa_Enable(GL_VERTEX_ARRAY); - meta_set_default_texrect(&intel->meta); - - _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); - - meta_restore_texcoords(&intel->meta); - meta_restore_transform(&intel->meta); - - return GL_TRUE; -} - -static GLboolean -intel_generate_mipmap_2d(GLcontext *ctx, - GLenum target, - struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - GLint old_active_texture; - int level, max_levels, start_level, end_level; - GLuint fb_name; - GLboolean success = GL_FALSE; - struct gl_framebuffer *saved_fbo = NULL; - struct gl_buffer_object *saved_array_buffer = NULL; - struct gl_buffer_object *saved_element_buffer = NULL; - - _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | - GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | - GL_DEPTH_BUFFER_BIT); - _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); - old_active_texture = ctx->Texture.CurrentUnit; - _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer); - - /* use default array/index buffers */ - _mesa_reference_buffer_object(ctx, &saved_array_buffer, - ctx->Array.ArrayBufferObj); - _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj, - ctx->Shared->NullBufferObj); - _mesa_reference_buffer_object(ctx, &saved_element_buffer, - ctx->Array.ElementArrayBufferObj); - _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj, - ctx->Shared->NullBufferObj); - - _mesa_Disable(GL_POLYGON_STIPPLE); - _mesa_Disable(GL_DEPTH_TEST); - _mesa_Disable(GL_STENCIL_TEST); - _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - _mesa_DepthMask(GL_FALSE); - - /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our - * minification. - */ - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); - _mesa_Enable(GL_TEXTURE_2D); - _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, - GL_LINEAR_MIPMAP_NEAREST); - _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - - /* Bind the new renderbuffer to the color attachment point. */ - _mesa_GenFramebuffersEXT(1, &fb_name); - _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name); - - meta_set_fragment_program(&intel->meta, &intel->meta.tex2d_fp, - intel_fp_tex2d); - meta_set_passthrough_vertex_program(&intel->meta); - - max_levels = _mesa_max_texture_levels(ctx, texObj->Target); - start_level = texObj->BaseLevel; - end_level = texObj->MaxLevel; - - /* Loop generating level+1 from level. */ - for (level = start_level; level < end_level && level < max_levels - 1; level++) { - const struct gl_texture_image *srcImage; - int width, height; - - srcImage = _mesa_select_tex_image(ctx, texObj, target, level); - if (srcImage->Border != 0) - goto fail; - - width = srcImage->Width / 2; - if (width < 1) - width = 1; - height = srcImage->Height / 2; - if (height < 1) - height = 1; - - if (width == srcImage->Width && - height == srcImage->Height) { - /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the - * maximum texture level for the object, so break out when we've gone - * over the edge. - */ - break; - } - - /* Make sure that there's space allocated for the target level. - * We could skip this if there's already space allocated and save some - * time. - */ - _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat, - width, height, 0, - GL_RGBA, GL_UNSIGNED_INT, NULL); - - if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1, - width, height)) - goto fail; - } - - success = GL_TRUE; - -fail: - meta_restore_fragment_program(&intel->meta); - meta_restore_vertex_program(&intel->meta); - - /* restore array/index buffers */ - _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj, - saved_array_buffer); - _mesa_reference_buffer_object(ctx, &saved_array_buffer, NULL); - _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj, - saved_element_buffer); - _mesa_reference_buffer_object(ctx, &saved_element_buffer, NULL); - - - _mesa_DeleteFramebuffersEXT(1, &fb_name); - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); - if (saved_fbo) - _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name); - _mesa_reference_framebuffer(&saved_fbo, NULL); - _mesa_PopClientAttrib(); - _mesa_PopAttrib(); - - return success; -} - - -/** - * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap - * level). - * - * The texture object's miptree must be mapped. - * - * It would be really nice if this was just called by Mesa whenever mipmaps - * needed to be regenerated, rather than us having to remember to do so in - * each texture image modification path. - * - * This function should also include an accelerated path. - */ -void -intel_generate_mipmap(GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - int face, i; - - /* HW path */ - if (target == GL_TEXTURE_2D && - ctx->Extensions.EXT_framebuffer_object && - ctx->Extensions.ARB_fragment_program && - ctx->Extensions.ARB_vertex_program) { - GLboolean success; - - /* We'll be accessing this texture using GL entrypoints, which should - * be resilient against other access to this texture. - */ - _mesa_unlock_texture(ctx, texObj); - success = intel_generate_mipmap_2d(ctx, target, texObj); - _mesa_lock_texture(ctx, texObj); - - if (success) - return; - } - - /* SW path */ - intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); - _mesa_generate_mipmap(ctx, target, texObj); - intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); - - /* Update the level information in our private data in the new images, since - * it didn't get set as part of a normal TexImage path. - */ - for (face = 0; face < nr_faces; face++) { - for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { - struct intel_texture_image *intelImage; - - intelImage = intel_texture_image(texObj->Image[face][i]); - if (intelImage == NULL) - break; - - intelImage->level = i; - intelImage->face = face; - /* Unreference the miptree to signal that the new Data is a bare - * pointer from mesa. - */ - intel_miptree_release(intel, &intelImage->mt); - } - } -} diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c index c985da5..abb3024 100644 --- a/shared/intel_mipmap_tree.c +++ b/shared/intel_mipmap_tree.c @@ -28,11 +28,16 @@ #include "intel_context.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" +#include "intel_tex_layout.h" #include "intel_chipset.h" +#ifndef I915 +#include "brw_state.h" +#endif #include "main/enums.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE + static GLenum target_to_target(GLenum target) { @@ -49,6 +54,7 @@ target_to_target(GLenum target) } } + static struct intel_mipmap_tree * intel_miptree_create_internal(struct intel_context *intel, GLenum target, @@ -98,6 +104,7 @@ intel_miptree_create_internal(struct intel_context *intel, return mt; } + struct intel_mipmap_tree * intel_miptree_create(struct intel_context *intel, GLenum target, @@ -115,7 +122,7 @@ intel_miptree_create(struct intel_context *intel, if (intel->use_texture_tiling && compress_byte == 0 && intel->intelScreen->kernel_exec_fencing) { - if (IS_965(intel->intelScreen->deviceID) && + if (intel->gen >= 4 && (base_format == GL_DEPTH_COMPONENT || base_format == GL_DEPTH_STENCIL_EXT)) tiling = I915_TILING_Y; @@ -131,8 +138,10 @@ intel_miptree_create(struct intel_context *intel, /* * pitch == 0 || height == 0 indicates the null texture */ - if (!mt || !mt->pitch || !mt->total_height) + if (!mt || !mt->pitch || !mt->total_height) { + free(mt); return NULL; + } mt->region = intel_region_alloc(intel, tiling, @@ -150,6 +159,7 @@ intel_miptree_create(struct intel_context *intel, return mt; } + struct intel_mipmap_tree * intel_miptree_create_for_region(struct intel_context *intel, GLenum target, @@ -187,7 +197,8 @@ intel_miptree_create_for_region(struct intel_context *intel, intel_region_reference(&mt->region, region); return mt; - } +} + /** * intel_miptree_pitch_align: @@ -201,7 +212,6 @@ intel_miptree_create_for_region(struct intel_context *intel, * Given @pitch, compute a larger value which accounts for * any necessary alignment required by the device */ - int intel_miptree_pitch_align (struct intel_context *intel, struct intel_mipmap_tree *mt, uint32_t tiling, @@ -247,6 +257,7 @@ int intel_miptree_pitch_align (struct intel_context *intel, return pitch; } + void intel_miptree_reference(struct intel_mipmap_tree **dst, struct intel_mipmap_tree *src) @@ -256,6 +267,7 @@ intel_miptree_reference(struct intel_mipmap_tree **dst, DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount); } + void intel_miptree_release(struct intel_context *intel, struct intel_mipmap_tree **mt) @@ -269,11 +281,25 @@ intel_miptree_release(struct intel_context *intel, DBG("%s deleting %p\n", __FUNCTION__, *mt); +#ifndef I915 + /* Free up cached binding tables holding a reference on our buffer, to + * avoid excessive memory consumption. + * + * This isn't as aggressive as we could be, as we'd like to do + * it from any time we free the last ref on a region. But intel_region.c + * is context-agnostic. Perhaps our constant state cache should be, as + * well. + */ + brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache, + (*mt)->region->buffer); +#endif + intel_region_release(&((*mt)->region)); - for (i = 0; i < MAX_TEXTURE_LEVELS; i++) - if ((*mt)->level[i].image_offset) - free((*mt)->level[i].image_offset); + for (i = 0; i < MAX_TEXTURE_LEVELS; i++) { + free((*mt)->level[i].x_offset); + free((*mt)->level[i].y_offset); + } free(*mt); } @@ -281,33 +307,31 @@ intel_miptree_release(struct intel_context *intel, } - - -/* Can the image be pulled into a unified mipmap tree. This mirrors +/** + * Can the image be pulled into a unified mipmap tree? This mirrors * the completeness test in a lot of ways. * * Not sure whether I want to pass gl_texture_image here. */ GLboolean intel_miptree_match_image(struct intel_mipmap_tree *mt, - struct gl_texture_image *image, - GLuint face, GLuint level) + struct gl_texture_image *image) { - /* Images with borders are never pulled into mipmap trees. - */ - if (image->Border || - ((image->_BaseFormat == GL_DEPTH_COMPONENT) && - ((image->TexObject->WrapS == GL_CLAMP_TO_BORDER) || - (image->TexObject->WrapT == GL_CLAMP_TO_BORDER)))) + GLboolean isCompressed = _mesa_is_format_compressed(image->TexFormat); + struct intel_texture_image *intelImage = intel_texture_image(image); + GLuint level = intelImage->level; + + /* Images with borders are never pulled into mipmap trees. */ + if (image->Border) return GL_FALSE; if (image->InternalFormat != mt->internal_format || - image->IsCompressed != mt->compressed) + isCompressed != mt->compressed) return GL_FALSE; - if (!image->IsCompressed && + if (!isCompressed && !mt->compressed && - image->TexFormat->TexelBytes != mt->cpp) + _mesa_get_format_bytes(image->TexFormat) != mt->cpp) return GL_FALSE; /* Test image dimensions against the base level image adjusted for @@ -334,82 +358,59 @@ intel_miptree_set_level_info(struct intel_mipmap_tree *mt, mt->level[level].height = h; mt->level[level].depth = d; mt->level[level].level_offset = (x + y * mt->pitch) * mt->cpp; + mt->level[level].level_x = x; + mt->level[level].level_y = y; mt->level[level].nr_images = nr_images; DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, level, w, h, d, x, y, mt->level[level].level_offset); - /* Not sure when this would happen, but anyway: - */ - if (mt->level[level].image_offset) { - free(mt->level[level].image_offset); - mt->level[level].image_offset = NULL; - } - assert(nr_images); + assert(!mt->level[level].x_offset); - mt->level[level].image_offset = malloc(nr_images * sizeof(GLuint)); - mt->level[level].image_offset[0] = 0; + mt->level[level].x_offset = malloc(nr_images * sizeof(GLuint)); + mt->level[level].x_offset[0] = mt->level[level].level_x; + mt->level[level].y_offset = malloc(nr_images * sizeof(GLuint)); + mt->level[level].y_offset[0] = mt->level[level].level_y; } -void -intel_miptree_set_image_offset_ex(struct intel_mipmap_tree *mt, - GLuint level, GLuint img, - GLuint x, GLuint y, - GLuint offset) -{ - if (img == 0 && level == 0) - assert(x == 0 && y == 0); - - assert(img < mt->level[level].nr_images); - - mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp + offset; - - DBG("%s level %d img %d pos %d,%d image_offset %x\n", - __FUNCTION__, level, img, x, y, mt->level[level].image_offset[img]); -} - void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, GLuint level, GLuint img, GLuint x, GLuint y) { - intel_miptree_set_image_offset_ex(mt, level, img, x, y, 0); -} + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + assert(img < mt->level[level].nr_images); -/* Although we use the image_offset[] array to store relative offsets - * to cube faces, Mesa doesn't know anything about this and expects - * each cube face to be treated as a separate image. - * - * These functions present that view to mesa: - */ -const GLuint * -intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, GLuint level) -{ - static const GLuint zero = 0; + mt->level[level].x_offset[img] = mt->level[level].level_x + x; + mt->level[level].y_offset[img] = mt->level[level].level_y + y; - if (mt->target != GL_TEXTURE_3D || mt->level[level].nr_images == 1) - return &zero; - else - return mt->level[level].image_offset; + DBG("%s level %d img %d pos %d,%d\n", + __FUNCTION__, level, img, + mt->level[level].x_offset[img], mt->level[level].y_offset[img]); } -GLuint -intel_miptree_image_offset(struct intel_mipmap_tree *mt, - GLuint face, GLuint level) +void +intel_miptree_get_image_offset(struct intel_mipmap_tree *mt, + GLuint level, GLuint face, GLuint depth, + GLuint *x, GLuint *y) { - if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) - return (mt->level[level].level_offset + - mt->level[level].image_offset[face]); - else - return mt->level[level].level_offset; + if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) { + *x = mt->level[level].x_offset[face]; + *y = mt->level[level].y_offset[face]; + } else if (mt->target == GL_TEXTURE_3D) { + *x = mt->level[level].x_offset[depth]; + *y = mt->level[level].y_offset[depth]; + } else { + *x = mt->level[level].x_offset[0]; + *y = mt->level[level].y_offset[0]; + } } - - /** * Map a teximage in a mipmap tree. * \param row_stride returns row stride in bytes @@ -425,6 +426,7 @@ intel_miptree_image_map(struct intel_context * intel, GLuint level, GLuint * row_stride, GLuint * image_offsets) { + GLuint x, y; DBG("%s \n", __FUNCTION__); if (row_stride) @@ -433,19 +435,26 @@ intel_miptree_image_map(struct intel_context * intel, if (mt->target == GL_TEXTURE_3D) { int i; - for (i = 0; i < mt->level[level].depth; i++) - image_offsets[i] = mt->level[level].image_offset[i] / mt->cpp; + for (i = 0; i < mt->level[level].depth; i++) { + + intel_miptree_get_image_offset(mt, level, face, i, + &x, &y); + image_offsets[i] = x + y * mt->pitch; + } + + return intel_region_map(intel, mt->region); } else { assert(mt->level[level].depth == 1); - assert(mt->target == GL_TEXTURE_CUBE_MAP || - mt->level[level].image_offset[0] == 0); + intel_miptree_get_image_offset(mt, level, face, 0, + &x, &y); image_offsets[0] = 0; - } - return (intel_region_map(intel, mt->region) + - intel_miptree_image_offset(mt, face, level)); + return intel_region_map(intel, mt->region) + + (x + y * mt->pitch) * mt->cpp; + } } + void intel_miptree_image_unmap(struct intel_context *intel, struct intel_mipmap_tree *mt) @@ -455,8 +464,8 @@ intel_miptree_image_unmap(struct intel_context *intel, } - -/* Upload data for a particular image. +/** + * Upload data for a particular image. */ void intel_miptree_image_data(struct intel_context *intel, @@ -467,21 +476,21 @@ intel_miptree_image_data(struct intel_context *intel, GLuint src_row_pitch, GLuint src_image_pitch) { - GLuint depth = dst->level[level].depth; - GLuint dst_offset = intel_miptree_image_offset(dst, face, level); - const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level); + const GLuint depth = dst->level[level].depth; GLuint i; - GLuint height = 0; DBG("%s: %d/%d\n", __FUNCTION__, face, level); for (i = 0; i < depth; i++) { + GLuint dst_x, dst_y, height; + + intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y); + height = dst->level[level].height; if(dst->compressed) height = (height + 3) / 4; + intel_region_data(intel, - dst->region, - dst_offset + dst_depth_offset[i], /* dst_offset */ - 0, 0, /* dstx, dsty */ + dst->region, 0, dst_x, dst_y, src, src_row_pitch, 0, 0, /* source x, y */ @@ -491,8 +500,9 @@ intel_miptree_image_data(struct intel_context *intel, } } -extern void intel_get_texture_alignment_unit(GLenum, GLuint *, GLuint *); -/* Copy mipmap image between trees + +/** + * Copy mipmap image between trees */ void intel_miptree_image_copy(struct intel_context *intel, @@ -503,38 +513,37 @@ intel_miptree_image_copy(struct intel_context *intel, GLuint width = src->level[level].width; GLuint height = src->level[level].height; GLuint depth = src->level[level].depth; - GLuint dst_offset = intel_miptree_image_offset(dst, face, level); - GLuint src_offset = intel_miptree_image_offset(src, face, level); - const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level); - const GLuint *src_depth_offset = intel_miptree_depth_offsets(src, level); + GLuint src_x, src_y, dst_x, dst_y; GLuint i; GLboolean success; if (dst->compressed) { GLuint align_w, align_h; - intel_get_texture_alignment_unit(dst->internal_format, &align_w, &align_h); + intel_get_texture_alignment_unit(dst->internal_format, + &align_w, &align_h); height = (height + 3) / 4; width = ALIGN(width, align_w); } for (i = 0; i < depth; i++) { + intel_miptree_get_image_offset(src, level, face, i, &src_x, &src_y); + intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y); success = intel_region_copy(intel, - dst->region, dst_offset + dst_depth_offset[i], - 0, 0, - src->region, src_offset + src_depth_offset[i], - 0, 0, width, height, GL_COPY); + dst->region, 0, dst_x, dst_y, + src->region, 0, src_x, src_y, width, height, + GL_COPY); if (!success) { GLubyte *src_ptr, *dst_ptr; src_ptr = intel_region_map(intel, src->region); dst_ptr = intel_region_map(intel, dst->region); - _mesa_copy_rect(dst_ptr + dst_offset + dst_depth_offset[i], + _mesa_copy_rect(dst_ptr + dst->cpp * (dst_x + dst_y * dst->pitch), dst->cpp, dst->pitch, 0, 0, width, height, - src_ptr + src_offset + src_depth_offset[i], + src_ptr + src->cpp * (src_x + src_y * src->pitch), src->pitch, 0, 0); intel_region_unmap(intel, src->region); diff --git a/shared/intel_mipmap_tree.h b/shared/intel_mipmap_tree.h index c890b2a..b19c548 100644 --- a/shared/intel_mipmap_tree.h +++ b/shared/intel_mipmap_tree.h @@ -70,6 +70,10 @@ struct intel_mipmap_level * always zero in that case. */ GLuint level_offset; + /** Offset to this miptree level, used in computing x_offset. */ + GLuint level_x; + /** Offset to this miptree level, used in computing y_offset. */ + GLuint level_y; GLuint width; GLuint height; /** Depth of the mipmap at this level: 1 for 1D/2D/CUBE, n for 3D. */ @@ -86,7 +90,7 @@ struct intel_mipmap_level * compute the offsets of depth/cube images within a mipmap level, * so have to store them as a lookup table. */ - GLuint *image_offset; + GLuint *x_offset, *y_offset; }; struct intel_mipmap_tree @@ -161,8 +165,7 @@ void intel_miptree_release(struct intel_context *intel, /* Check if an image fits an existing mipmap tree layout */ GLboolean intel_miptree_match_image(struct intel_mipmap_tree *mt, - struct gl_texture_image *image, - GLuint face, GLuint level); + struct gl_texture_image *image); /* Return a pointer to an image within a tree. Return image stride as * well. @@ -176,19 +179,10 @@ GLubyte *intel_miptree_image_map(struct intel_context *intel, void intel_miptree_image_unmap(struct intel_context *intel, struct intel_mipmap_tree *mt); - -/* Return the linear offset of an image relative to the start of the - * tree: - */ -GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt, - GLuint face, GLuint level); - -/* Return pointers to each 2d slice within an image. Indexed by depth - * value. - */ -const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, - GLuint level); - +void +intel_miptree_get_image_offset(struct intel_mipmap_tree *mt, + GLuint level, GLuint face, GLuint depth, + GLuint *x, GLuint *y); void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, GLuint level, @@ -196,16 +190,10 @@ void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, GLuint x, GLuint y, GLuint w, GLuint h, GLuint d); -void intel_miptree_set_image_offset_ex(struct intel_mipmap_tree *mt, - GLuint level, - GLuint img, GLuint x, GLuint y, - GLuint offset); - void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, GLuint level, GLuint img, GLuint x, GLuint y); - /* Upload an image into a tree */ void intel_miptree_image_data(struct intel_context *intel, diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c index a300141..993e427 100644 --- a/shared/intel_pixel.c +++ b/shared/intel_pixel.c @@ -129,20 +129,6 @@ intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one) return GL_TRUE; } - -GLboolean -intel_check_meta_tex_fragment_ops(GLcontext * ctx) -{ - if (ctx->NewState) - _mesa_update_state(ctx); - - /* Some of _ImageTransferState (scale, bias) could be done with - * fragment programs on i915. - */ - return !(ctx->_ImageTransferState || ctx->Fog.Enabled || /* not done yet */ - ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled); -} - /* The intel_region struct doesn't really do enough to capture the * format of the pixels in the region. For now this code assumes that * the region is a display surface and hence is either ARGB8888 or diff --git a/shared/intel_pixel.h b/shared/intel_pixel.h index 96a6dd1..743b649 100644 --- a/shared/intel_pixel.h +++ b/shared/intel_pixel.h @@ -34,8 +34,6 @@ void intelInitPixelFuncs(struct dd_function_table *functions); GLboolean intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one); -GLboolean intel_check_meta_tex_fragment_ops(GLcontext * ctx); - GLboolean intel_check_blit_format(struct intel_region *region, GLenum format, GLenum type); diff --git a/shared/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c index b543a0b..204a233 100644 --- a/shared/intel_pixel_bitmap.c +++ b/shared/intel_pixel_bitmap.c @@ -33,6 +33,7 @@ #include "main/macros.h" #include "main/bufferobj.h" #include "main/pixelstore.h" +#include "main/polygon.h" #include "main/state.h" #include "main/teximage.h" #include "main/texenv.h" @@ -209,7 +210,7 @@ do_blit_bitmap( GLcontext *ctx, if (!dst) return GL_FALSE; - if (unpack->BufferObj->Name) { + if (_mesa_is_bufferobj(unpack->BufferObj)) { bitmap = map_pbo(ctx, width, height, unpack, bitmap); if (bitmap == NULL) return GL_TRUE; /* even though this is an error, we're done */ @@ -329,12 +330,14 @@ out: if (INTEL_DEBUG & DEBUG_SYNC) intel_batchbuffer_flush(intel->batch); - if (unpack->BufferObj->Name) { + if (_mesa_is_bufferobj(unpack->BufferObj)) { /* done with PBO so unmap it now */ ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, unpack->BufferObj); } + intel_check_front_buffer_rendering(intel); + return GL_TRUE; } @@ -418,7 +421,7 @@ intel_texture_bitmap(GLcontext * ctx, return GL_FALSE; } - if (unpack->BufferObj->Name) { + if (_mesa_is_bufferobj(unpack->BufferObj)) { bitmap = map_pbo(ctx, width, height, unpack, bitmap); if (bitmap == NULL) return GL_TRUE; /* even though this is an error, we're done */ @@ -428,20 +431,21 @@ intel_texture_bitmap(GLcontext * ctx, a8_bitmap = _mesa_calloc(width * height); _mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff); - if (unpack->BufferObj->Name) { + if (_mesa_is_bufferobj(unpack->BufferObj)) { /* done with PBO so unmap it now */ ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, unpack->BufferObj); } /* Save GL state before we start setting up our drawing */ - _mesa_PushAttrib(GL_ENABLE_BIT | GL_CURRENT_BIT | - GL_VIEWPORT_BIT); + _mesa_PushAttrib(GL_ENABLE_BIT | GL_CURRENT_BIT | GL_POLYGON_BIT | + GL_TEXTURE_BIT | GL_VIEWPORT_BIT); _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT | GL_CLIENT_PIXEL_STORE_BIT); old_active_texture = ctx->Texture.CurrentUnit; _mesa_Disable(GL_POLYGON_STIPPLE); + _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); /* Upload our bitmap data to an alpha texture */ _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); @@ -501,8 +505,6 @@ intel_texture_bitmap(GLcontext * ctx, meta_restore_vertex_program(&intel->meta); _mesa_PopClientAttrib(); - _mesa_Disable(GL_TEXTURE_2D); /* asserted that it was disabled at entry */ - _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); _mesa_PopAttrib(); _mesa_DeleteTextures(1, &texname); diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c index 07ca8f7..622aaa2 100644 --- a/shared/intel_pixel_copy.c +++ b/shared/intel_pixel_copy.c @@ -222,6 +222,8 @@ do_blit_copypixels(GLcontext * ctx, out: UNLOCK_HARDWARE(intel); + intel_check_front_buffer_rendering(intel); + DBG("%s: success\n", __FUNCTION__); return GL_TRUE; } @@ -240,5 +242,5 @@ intelCopyPixels(GLcontext * ctx, return; /* this will use swrast if needed */ - _mesa_meta_copy_pixels(ctx, srcx, srcy, width, height, destx, desty, type); + _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); } diff --git a/shared/intel_pixel_draw.c b/shared/intel_pixel_draw.c index 7fbb89f..9b382e3 100644 --- a/shared/intel_pixel_draw.c +++ b/shared/intel_pixel_draw.c @@ -54,7 +54,7 @@ #include "intel_fbo.h" -/** XXX compare perf of this vs. _mesa_meta_draw_pixels(STENCIL) */ +/** XXX compare perf of this vs. _mesa_meta_DrawPixels(STENCIL) */ static GLboolean intel_stencil_drawpixels(GLcontext * ctx, GLint x, GLint y, @@ -169,7 +169,7 @@ intel_stencil_drawpixels(GLcontext * ctx, * buffer. */ depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); - irb = intel_create_renderbuffer(GL_RGBA8); + irb = intel_create_renderbuffer(MESA_FORMAT_ARGB8888); rb = &irb->Base; irb->Base.Width = depth_irb->Base.Width; irb->Base.Height = depth_irb->Base.Height; @@ -265,7 +265,7 @@ intelDrawPixels(GLcontext * ctx, /* XXX this function doesn't seem to work reliably even when all * the pre-requisite conditions are met. * Note that this function is never hit with conform. - * Fall back to swrast because even the _mesa_meta_draw_pixels() approach + * Fall back to swrast because even the _mesa_meta_DrawPixels() approach * isn't working because of an apparent stencil bug. */ if (intel_stencil_drawpixels(ctx, x, y, width, height, format, type, @@ -280,6 +280,6 @@ intelDrawPixels(GLcontext * ctx, } #endif - _mesa_meta_draw_pixels(ctx, x, y, width, height, format, type, - unpack, pixels); + _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type, + unpack, pixels); } diff --git a/shared/intel_pixel_read.c b/shared/intel_pixel_read.c index 8713463..20424e2 100644 --- a/shared/intel_pixel_read.c +++ b/shared/intel_pixel_read.c @@ -180,16 +180,7 @@ do_blit_readpixels(GLcontext * ctx, if (!src) return GL_FALSE; - if (pack->BufferObj->Name) { - /* XXX This validation should be done by core mesa: - */ - if (!_mesa_validate_pbo_access(2, pack, width, height, 1, - format, type, pixels)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels"); - return GL_TRUE; - } - } - else { + if (!_mesa_is_bufferobj(pack->BufferObj)) { /* PBO only for now: */ if (INTEL_DEBUG & DEBUG_PIXEL) @@ -225,9 +216,8 @@ do_blit_readpixels(GLcontext * ctx, rowLength = -rowLength; } - /* XXX 64-bit cast? */ - dst_offset = (GLuint) _mesa_image_address(2, pack, pixels, width, height, - format, type, 0, 0, 0); + dst_offset = (GLintptr) _mesa_image_address(2, pack, pixels, width, height, + format, type, 0, 0, 0); /* Although the blits go on the command buffer, need to do this and @@ -236,14 +226,14 @@ do_blit_readpixels(GLcontext * ctx, intelFlush(&intel->ctx); LOCK_HARDWARE(intel); - if (intel->driDrawable->numClipRects) { + if (intel->driReadDrawable->numClipRects) { GLboolean all = (width * height * src->cpp == dst->Base.Size && x == 0 && dst_offset == 0); dri_bo *dst_buffer = intel_bufferobj_buffer(intel, dst, all ? INTEL_WRITE_FULL : INTEL_WRITE_PART); - __DRIdrawablePrivate *dPriv = intel->driDrawable; + __DRIdrawablePrivate *dPriv = intel->driReadDrawable; int nbox = dPriv->numClipRects; drm_clip_rect_t *box = dPriv->pClipRects; drm_clip_rect_t rect; @@ -295,11 +285,11 @@ intelReadPixels(GLcontext * ctx, intelFlush(ctx); -#ifdef I915 if (do_blit_readpixels (ctx, x, y, width, height, format, type, pack, pixels)) return; +#ifdef I915 if (do_texture_readpixels (ctx, x, y, width, height, format, type, pack, pixels)) return; diff --git a/shared/intel_regions.c b/shared/intel_regions.c index a86c66a..8097516 100644 --- a/shared/intel_regions.c +++ b/shared/intel_regions.c @@ -582,8 +582,7 @@ intel_recreate_static(struct intel_context *intel, * instead of which tiling mode it is. Guess. */ if (region_desc->tiled) { - if (IS_965(intel->intelScreen->deviceID) && - region_desc == &intelScreen->depth) + if (intel->gen >= 4 && region_desc == &intelScreen->depth) region->tiling = I915_TILING_Y; else region->tiling = I915_TILING_X; diff --git a/shared/intel_regions.h b/shared/intel_regions.h index 0d379bd..535fcd7 100644 --- a/shared/intel_regions.h +++ b/shared/intel_regions.h @@ -62,6 +62,8 @@ struct intel_region GLuint map_refcount; /**< Reference count for mapping */ GLuint draw_offset; /**< Offset of drawing address within the region */ + GLuint draw_x, draw_y; /**< Offset of drawing within the region */ + uint32_t tiling; /**< Which tiling mode the region is in */ uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */ drmAddress classic_map; /**< drmMap of the region when not in GEM mode */ diff --git a/shared/intel_screen.c b/shared/intel_screen.c index 1b8c56e..789135b 100644 --- a/shared/intel_screen.c +++ b/shared/intel_screen.c @@ -79,6 +79,10 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN(fragment_shader, bool, false) + DRI_CONF_DESC(en, "Enable limited ARB_fragment_shader support on 915/945.") + DRI_CONF_OPT_END + DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) @@ -88,10 +92,14 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_NO_RAST(false) DRI_CONF_ALWAYS_FLUSH_BATCH(false) DRI_CONF_ALWAYS_FLUSH_CACHE(false) + + DRI_CONF_OPT_BEGIN(stub_occlusion_query, bool, false) + DRI_CONF_DESC(en, "Enable stub ARB_occlusion_query support on 915/945.") + DRI_CONF_OPT_END DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 10; +const GLuint __driNConfigOptions = 12; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; @@ -341,7 +349,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else { GLboolean swStencil = (mesaVis->stencilBits > 0 && mesaVis->depthBits != 24); - GLenum rgbFormat; + gl_format rgbFormat; struct intel_framebuffer *intel_fb = CALLOC_STRUCT(intel_framebuffer); @@ -351,11 +359,11 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis); if (mesaVis->redBits == 5) - rgbFormat = GL_RGB5; + rgbFormat = MESA_FORMAT_RGB565; else if (mesaVis->alphaBits == 0) - rgbFormat = GL_RGB8; + rgbFormat = MESA_FORMAT_XRGB8888; else - rgbFormat = GL_RGBA8; + rgbFormat = MESA_FORMAT_ARGB8888; /* setup the hardware-based renderbuffers */ intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); @@ -374,7 +382,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, if (mesaVis->stencilBits == 8) { /* combined depth/stencil buffer */ struct intel_renderbuffer *depthStencilRb - = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT); + = intel_create_renderbuffer(MESA_FORMAT_S8_Z24); /* note: bind RB to two attachment points */ _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthStencilRb->Base); @@ -382,7 +390,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, &depthStencilRb->Base); } else { struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT24); + = intel_create_renderbuffer(MESA_FORMAT_X8_Z24); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } @@ -390,7 +398,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else if (mesaVis->depthBits == 16) { /* just 16-bit depth buffer, no hw stencil */ struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); + = intel_create_renderbuffer(MESA_FORMAT_Z16); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } @@ -688,18 +696,6 @@ static const __DRIconfig **intelInitScreen(__DRIscreenPrivate *psp) return NULL; } - /* Calling driInitExtensions here, with a NULL context pointer, - * does not actually enable the extensions. It just makes sure - * that all the dispatch offsets for all the extensions that - * *might* be enables are known. This is needed because the - * dispatch offsets need to be known when _mesa_context_create is - * called, but we can't enable the extensions until we have a - * context pointer. - * - * Hello chicken. Hello egg. How are you two today? - */ - intelInitExtensions(NULL, GL_TRUE); - if (!intelInitDriver(psp)) return NULL; @@ -752,18 +748,6 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) int color; __DRIconfig **configs = NULL; - /* Calling driInitExtensions here, with a NULL context pointer, - * does not actually enable the extensions. It just makes sure - * that all the dispatch offsets for all the extensions that - * *might* be enables are known. This is needed because the - * dispatch offsets need to be known when _mesa_context_create is - * called, but we can't enable the extensions until we have a - * context pointer. - * - * Hello chicken. Hello egg. How are you two today? - */ - intelInitExtensions(NULL, GL_TRUE); - /* Allocate the private area */ intelScreen = (intelScreenPrivate *) CALLOC(sizeof(intelScreenPrivate)); if (!intelScreen) { diff --git a/shared/intel_span.c b/shared/intel_span.c index 8df4990..34c3d9d 100644 --- a/shared/intel_span.c +++ b/shared/intel_span.c @@ -29,7 +29,6 @@ #include "main/macros.h" #include "main/mtypes.h" #include "main/colormac.h" -#include "main/texformat.h" #include "intel_buffers.h" #include "intel_fbo.h" @@ -132,18 +131,6 @@ pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val) dri_bo_subdata(irb->region->buffer, offset, 1, &val); } -static uint32_t -z24s8_to_s8z24(uint32_t val) -{ - return (val << 24) | (val >> 8); -} - -static uint32_t -s8z24_to_z24s8(uint32_t val) -{ - return (val >> 24) | (val << 8); -} - static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb, int x, int y) { @@ -163,6 +150,9 @@ static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, int x_tile_number, y_tile_number; int tile_off, tile_base; + x += irb->region->draw_x; + y += irb->region->draw_y; + tile_stride = (irb->region->pitch * irb->region->cpp) << 3; xbyte = x * irb->region->cpp; @@ -218,6 +208,9 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, int x_tile_number, y_tile_number; int tile_off, tile_base; + x += irb->region->draw_x; + y += irb->region->draw_y; + tile_stride = (irb->region->pitch * irb->region->cpp) << 5; xbyte = x * irb->region->cpp; @@ -273,8 +266,11 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, unsigned int num_cliprects; \ struct drm_clip_rect *cliprects; \ int x_off, y_off; \ + int pitch = irb->region->pitch * irb->region->cpp; \ + void *buf = irb->region->buffer->virtual; \ GLuint p; \ (void) p; \ + (void)buf; (void)pitch; /* unused for non-gttmap. */ \ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); /* XXX FBO: this is identical to the macro in spantmp2.h except we get @@ -296,7 +292,6 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define Y_FLIP(_y) ((_y) * yScale + yBias) -/* XXX with GEM, these need to tell the kernel */ #define HW_LOCK() #define HW_UNLOCK() @@ -339,7 +334,7 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #include "intel_spantmp.h" /* x8r8g8b8 color span and pixel functions */ -#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_FMT GL_BGR #define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV #define INTEL_READ_VALUE(offset) pread_xrgb8888(irb, offset) #define INTEL_WRITE_VALUE(offset, v) pwrite_xrgb8888(irb, offset, v) @@ -354,6 +349,9 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, unsigned int num_cliprects; \ struct drm_clip_rect *cliprects; \ int x_off, y_off; \ + int pitch = irb->region->pitch * irb->region->cpp; \ + void *buf = irb->region->buffer->virtual; \ + (void)buf; (void)pitch; /* unused for non-gttmap. */ \ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); @@ -366,20 +364,22 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define INTEL_TAG(name) name##_z16 #include "intel_depthtmp.h" -/* z24 depthbuffer functions. */ +/* z24x8 depthbuffer functions. */ #define INTEL_VALUE_TYPE GLuint #define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, d) #define INTEL_READ_DEPTH(offset) pread_32(irb, offset) -#define INTEL_TAG(name) name##_z24 +#define INTEL_TAG(name) name##_z24_x8 #include "intel_depthtmp.h" -/* z24s8 depthbuffer functions. */ -#define INTEL_VALUE_TYPE GLuint -#define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, z24s8_to_s8z24(d)) -#define INTEL_READ_DEPTH(offset) s8z24_to_z24s8(pread_32(irb, offset)) -#define INTEL_TAG(name) name##_z24_s8 -#include "intel_depthtmp.h" +/** + ** 8-bit stencil function (XXX FBO: This is obsolete) + **/ +/* XXX */ +#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, NO_TILE(_x, _y) + 3, d) +#define READ_STENCIL(d, _x, _y) d = pread_8(irb, NO_TILE(_x, _y) + 3); +#define TAG(x) intel_gttmap_##x##_z24_s8 +#include "stenciltmp.h" /** ** 8-bit stencil function (XXX FBO: This is obsolete) @@ -413,6 +413,9 @@ intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) if (irb == NULL || irb->region == NULL) return; + if (intel->intelScreen->kernel_exec_fencing) + drm_intel_gem_bo_map_gtt(irb->region->buffer); + intel_set_span_functions(intel, rb); } @@ -425,7 +428,10 @@ intel_renderbuffer_unmap(struct intel_context *intel, if (irb == NULL || irb->region == NULL) return; - clear_span_cache(irb); + if (intel->intelScreen->kernel_exec_fencing) + drm_intel_gem_bo_unmap_gtt(irb->region->buffer); + else + clear_span_cache(irb); rb->GetRow = NULL; rb->PutRow = NULL; @@ -444,23 +450,30 @@ intel_renderbuffer_unmap(struct intel_context *intel, * _ColorReadBuffer, _DepthBuffer or _StencilBuffer fields. */ static void -intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) +intel_map_unmap_framebuffer(struct intel_context *intel, + struct gl_framebuffer *fb, + GLboolean map) { - GLcontext *ctx = &intel->ctx; - GLuint i, j; + GLuint i; /* color draw buffers */ - for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) { + for (i = 0; i < fb->_NumColorDrawBuffers; i++) { if (map) - intel_renderbuffer_map(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); + intel_renderbuffer_map(intel, fb->_ColorDrawBuffers[i]); else - intel_renderbuffer_unmap(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); + intel_renderbuffer_unmap(intel, fb->_ColorDrawBuffers[i]); } + /* color read buffer */ + if (map) + intel_renderbuffer_map(intel, fb->_ColorReadBuffer); + else + intel_renderbuffer_unmap(intel, fb->_ColorReadBuffer); + /* check for render to textures */ for (i = 0; i < BUFFER_COUNT; i++) { struct gl_renderbuffer_attachment *att = - ctx->DrawBuffer->Attachment + i; + fb->Attachment + i; struct gl_texture_object *tex = att->Texture; if (tex) { /* render to texture */ @@ -472,33 +485,24 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) } } - /* color read buffers */ - if (map) - intel_renderbuffer_map(intel, ctx->ReadBuffer->_ColorReadBuffer); - else - intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer); - /* depth buffer (Note wrapper!) */ - if (ctx->DrawBuffer->_DepthBuffer) { + if (fb->_DepthBuffer) { if (map) - intel_renderbuffer_map(intel, ctx->DrawBuffer->_DepthBuffer->Wrapped); + intel_renderbuffer_map(intel, fb->_DepthBuffer->Wrapped); else - intel_renderbuffer_unmap(intel, - ctx->DrawBuffer->_DepthBuffer->Wrapped); + intel_renderbuffer_unmap(intel, fb->_DepthBuffer->Wrapped); } /* stencil buffer (Note wrapper!) */ - if (ctx->DrawBuffer->_StencilBuffer) { + if (fb->_StencilBuffer) { if (map) - intel_renderbuffer_map(intel, - ctx->DrawBuffer->_StencilBuffer->Wrapped); + intel_renderbuffer_map(intel, fb->_StencilBuffer->Wrapped); else - intel_renderbuffer_unmap(intel, - ctx->DrawBuffer->_StencilBuffer->Wrapped); + intel_renderbuffer_unmap(intel, fb->_StencilBuffer->Wrapped); } -} - + intel_check_front_buffer_rendering(intel); +} /** * Prepare for software rendering. Map current read/draw framebuffers' @@ -522,7 +526,9 @@ intelSpanRenderStart(GLcontext * ctx) } } - intel_map_unmap_buffers(intel, GL_TRUE); + intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_TRUE); + if (ctx->ReadBuffer != ctx->DrawBuffer) + intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_TRUE); } /** @@ -544,7 +550,9 @@ intelSpanRenderFinish(GLcontext * ctx) } } - intel_map_unmap_buffers(intel, GL_FALSE); + intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_FALSE); + if (ctx->ReadBuffer != ctx->DrawBuffer) + intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_FALSE); UNLOCK_HARDWARE(intel); } @@ -558,6 +566,43 @@ intelInitSpanFuncs(GLcontext * ctx) swdd->SpanRenderFinish = intelSpanRenderFinish; } +void +intel_map_vertex_shader_textures(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + int i; + + if (ctx->VertexProgram._Current == NULL) + return; + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled && + ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) { + struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + + intel_tex_map_images(intel, intel_texture_object(texObj)); + } + } +} + +void +intel_unmap_vertex_shader_textures(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + int i; + + if (ctx->VertexProgram._Current == NULL) + return; + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled && + ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) { + struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + + intel_tex_unmap_images(intel, intel_texture_object(texObj)); + } + } +} /** * Plug in appropriate span read/write functions for the given renderbuffer. @@ -578,7 +623,51 @@ intel_set_span_functions(struct intel_context *intel, else tiling = I915_TILING_NONE; - switch (irb->texformat->MesaFormat) { + if (intel->intelScreen->kernel_exec_fencing) { + switch (irb->Base.Format) { + case MESA_FORMAT_RGB565: + intel_gttmap_InitPointers_RGB565(rb); + break; + case MESA_FORMAT_ARGB4444: + intel_gttmap_InitPointers_ARGB4444(rb); + break; + case MESA_FORMAT_ARGB1555: + intel_gttmap_InitPointers_ARGB1555(rb); + break; + case MESA_FORMAT_XRGB8888: + intel_gttmap_InitPointers_xRGB8888(rb); + break; + case MESA_FORMAT_ARGB8888: + intel_gttmap_InitPointers_ARGB8888(rb); + break; + case MESA_FORMAT_Z16: + intel_gttmap_InitDepthPointers_z16(rb); + break; + case MESA_FORMAT_X8_Z24: + intel_gttmap_InitDepthPointers_z24_x8(rb); + break; + case MESA_FORMAT_S8_Z24: + /* There are a few different ways SW asks us to access the S8Z24 data: + * Z24 depth-only depth reads + * S8Z24 depth reads + * S8Z24 stencil reads. + */ + if (rb->Format == MESA_FORMAT_S8_Z24) { + intel_gttmap_InitDepthPointers_z24_x8(rb); + } else if (rb->Format == MESA_FORMAT_S8) { + intel_gttmap_InitStencilPointers_z24_s8(rb); + } + break; + default: + _mesa_problem(NULL, + "Unexpected MesaFormat %d in intelSetSpanFunctions", + irb->Base.Format); + break; + } + return; + } + + switch (irb->Base.Format) { case MESA_FORMAT_RGB565: switch (tiling) { case I915_TILING_NONE: @@ -621,35 +710,33 @@ intel_set_span_functions(struct intel_context *intel, break; } break; + case MESA_FORMAT_XRGB8888: + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_xRGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_xRGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_xRGB8888(rb); + break; + } + break; case MESA_FORMAT_ARGB8888: - if (rb->AlphaBits == 0) { /* XXX: Need xRGB8888 Mesa format */ - /* 8888 RGBx */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_xRGB8888(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_xRGB8888(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_xRGB8888(rb); - break; - } - } else { - /* 8888 RGBA */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_ARGB8888(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_ARGB8888(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_ARGB8888(rb); - break; - } + /* 8888 RGBA */ + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_ARGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_ARGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_ARGB8888(rb); + break; } break; case MESA_FORMAT_Z16: @@ -666,39 +753,27 @@ intel_set_span_functions(struct intel_context *intel, break; } break; + case MESA_FORMAT_X8_Z24: case MESA_FORMAT_S8_Z24: /* There are a few different ways SW asks us to access the S8Z24 data: * Z24 depth-only depth reads * S8Z24 depth reads * S8Z24 stencil reads. */ - if (rb->_ActualFormat == GL_DEPTH_COMPONENT24) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z24(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z24(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z24(rb); - break; - } - } else if (rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { + if (rb->Format == MESA_FORMAT_S8_Z24) { switch (tiling) { case I915_TILING_NONE: default: - intelInitDepthPointers_z24_s8(rb); + intelInitDepthPointers_z24_x8(rb); break; case I915_TILING_X: - intel_XTile_InitDepthPointers_z24_s8(rb); + intel_XTile_InitDepthPointers_z24_x8(rb); break; case I915_TILING_Y: - intel_YTile_InitDepthPointers_z24_s8(rb); + intel_YTile_InitDepthPointers_z24_x8(rb); break; } - } else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { + } else if (rb->Format == MESA_FORMAT_S8) { switch (tiling) { case I915_TILING_NONE: default: @@ -711,6 +786,9 @@ intel_set_span_functions(struct intel_context *intel, intel_YTile_InitStencilPointers_z24_s8(rb); break; } + } else { + _mesa_problem(NULL, + "Unexpected ActualFormat in intelSetSpanFunctions"); } break; default: diff --git a/shared/intel_span.h b/shared/intel_span.h index acbeb4a..bffe109 100644 --- a/shared/intel_span.h +++ b/shared/intel_span.h @@ -36,5 +36,7 @@ void intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb); void intel_renderbuffer_unmap(struct intel_context *intel, struct gl_renderbuffer *rb); +void intel_map_vertex_shader_textures(GLcontext *ctx); +void intel_unmap_vertex_shader_textures(GLcontext *ctx); #endif diff --git a/shared/intel_spantmp.h b/shared/intel_spantmp.h index ead0b1c..bad0339 100644 --- a/shared/intel_spantmp.h +++ b/shared/intel_spantmp.h @@ -30,6 +30,12 @@ * all the tiling styles. */ +#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT +#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE +#define TAG(x) INTEL_TAG(intel_gttmap_##x) +#define TAG2(x, y) INTEL_TAG(intel_gttmap_##x##y) +#include "spantmp2.h" + #define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT #define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE #define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(NO_TILE(_x, _y), v) @@ -48,8 +54,8 @@ #define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT #define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE -#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(X_TILE(_x, _y), v) -#define GET_VALUE(_x, _y) INTEL_READ_VALUE(X_TILE(_x, _y)) +#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(Y_TILE(_x, _y), v) +#define GET_VALUE(_x, _y) INTEL_READ_VALUE(Y_TILE(_x, _y)) #define TAG(x) INTEL_TAG(intel_YTile_##x) #define TAG2(x, y) INTEL_TAG(intel_YTile_##x)##y #include "spantmp2.h" diff --git a/shared/intel_syncobj.c b/shared/intel_syncobj.c index 1286fe9..0d7889d 100644 --- a/shared/intel_syncobj.c +++ b/shared/intel_syncobj.c @@ -114,7 +114,7 @@ static void intel_check_sync(GLcontext *ctx, struct gl_sync_object *s) { struct intel_sync_object *sync = (struct intel_sync_object *)s; - if (sync->bo && drm_intel_bo_busy(sync->bo)) { + if (sync->bo && !drm_intel_bo_busy(sync->bo)) { drm_intel_bo_unreference(sync->bo); sync->bo = NULL; s->StatusFlag = 1; diff --git a/shared/intel_tex.c b/shared/intel_tex.c index df63f29..215a534 100644 --- a/shared/intel_tex.c +++ b/shared/intel_tex.c @@ -2,6 +2,7 @@ #include "main/texobj.h" #include "main/teximage.h" #include "main/mipmap.h" +#include "drivers/common/meta.h" #include "intel_context.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" @@ -158,11 +159,58 @@ timed_memcpy(void *dest, const void *src, size_t n) } #endif /* DO_DEBUG */ + +/** + * Called via ctx->Driver.GenerateMipmap() + * This is basically a wrapper for _mesa_meta_GenerateMipmap() which checks + * if we'll be using software mipmap generation. In that case, we need to + * map/unmap the base level texture image. + */ +static void +intelGenerateMipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, texObj)) { + /* sw path: need to map texture images */ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); + _mesa_generate_mipmap(ctx, target, texObj); + intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); + + { + GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + GLuint face, i; + /* Update the level information in our private data in the new images, + * since it didn't get set as part of a normal TexImage path. + */ + for (face = 0; face < nr_faces; face++) { + for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { + struct intel_texture_image *intelImage = + intel_texture_image(texObj->Image[face][i]); + if (!intelImage) + break; + intelImage->level = i; + intelImage->face = face; + /* Unreference the miptree to signal that the new Data is a + * bare pointer from mesa. + */ + intel_miptree_release(intel, &intelImage->mt); + } + } + } + } + else { + _mesa_meta_GenerateMipmap(ctx, target, texObj); + } +} + + void intelInitTextureFuncs(struct dd_function_table *functions) { functions->ChooseTextureFormat = intelChooseTextureFormat; - functions->GenerateMipmap = intel_generate_mipmap; + functions->GenerateMipmap = intelGenerateMipmap; functions->NewTextureObject = intelNewTextureObject; functions->NewTextureImage = intelNewTextureImage; diff --git a/shared/intel_tex.h b/shared/intel_tex.h index 471aa2a..f3cc0ff 100644 --- a/shared/intel_tex.h +++ b/shared/intel_tex.h @@ -29,6 +29,7 @@ #define INTELTEX_INC #include "main/mtypes.h" +#include "main/formats.h" #include "intel_context.h" #include "texmem.h" @@ -41,10 +42,8 @@ void intelInitTextureSubImageFuncs(struct dd_function_table *functions); void intelInitTextureCopyImageFuncs(struct dd_function_table *functions); -const struct gl_texture_format *intelChooseTextureFormat(GLcontext * ctx, - GLint internalFormat, - GLenum format, - GLenum type); +gl_format intelChooseTextureFormat(GLcontext *ctx, GLint internalFormat, + GLenum format, GLenum type); void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); @@ -71,7 +70,4 @@ void intel_tex_unmap_images(struct intel_context *intel, int intel_compressed_num_bytes(GLuint mesaFormat); -void intel_generate_mipmap(GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj); - #endif diff --git a/shared/intel_tex_copy.c b/shared/intel_tex_copy.c index 74f7f58..767d04d 100644 --- a/shared/intel_tex_copy.c +++ b/shared/intel_tex_copy.c @@ -29,8 +29,10 @@ #include "main/enums.h" #include "main/image.h" #include "main/teximage.h" +#include "main/texstate.h" #include "main/mipmap.h" -#include "swrast/swrast.h" + +#include "drivers/common/meta.h" #include "intel_screen.h" #include "intel_context.h" @@ -91,9 +93,7 @@ do_copy_texsubimage(struct intel_context *intel, GLint x, GLint y, GLsizei width, GLsizei height) { GLcontext *ctx = &intel->ctx; - struct gl_texture_object *texObj = intelImage->base.TexObject; - const struct intel_region *src = - get_teximage_source(intel, internalFormat); + const struct intel_region *src = get_teximage_source(intel, internalFormat); if (!intelImage->mt || !src) { if (INTEL_DEBUG & DEBUG_FALLBACKS) @@ -109,33 +109,36 @@ do_copy_texsubimage(struct intel_context *intel, return GL_FALSE; } - intelFlush(ctx); + // intelFlush(ctx); LOCK_HARDWARE(intel); { drm_intel_bo *dst_bo = intel_region_buffer(intel, intelImage->mt->region, INTEL_WRITE_PART); - GLuint image_offset = intel_miptree_image_offset(intelImage->mt, - intelImage->face, - intelImage->level); const GLint orig_x = x; const GLint orig_y = y; + GLuint image_x, image_y; GLshort src_pitch; + /* get dest x/y in destination texture */ + intel_miptree_get_image_offset(intelImage->mt, + intelImage->level, + intelImage->face, + 0, + &image_x, &image_y); /* Update dst for clipped src. Need to also clip the source rect. */ dstx += x - orig_x; dsty += y - orig_y; /* Can't blit to tiled buffers with non-tile-aligned offset. */ - if (intelImage->mt->region->tiling != I915_TILING_NONE && - (image_offset & 4095) != 0) { + if (intelImage->mt->region->tiling == I915_TILING_Y) { UNLOCK_HARDWARE(intel); return GL_FALSE; } if (ctx->ReadBuffer->Name == 0) { /* reading from a window, adjust x, y */ - __DRIdrawablePrivate *dPriv = intel->driDrawable; + const __DRIdrawablePrivate *dPriv = intel->driReadDrawable; y = dPriv->y + (dPriv->h - (y + height)); x += dPriv->x; @@ -152,17 +155,19 @@ do_copy_texsubimage(struct intel_context *intel, src_pitch = src->pitch; } + /* blit from src buffer to texture */ if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_pitch, src->buffer, - 0, + src->draw_offset, src->tiling, intelImage->mt->pitch, dst_bo, - image_offset, + 0, intelImage->mt->region->tiling, - x, y, dstx, dsty, width, height, + x, y, image_x + dstx, image_y + dsty, + width, height, GL_COPY)) { UNLOCK_HARDWARE(intel); return GL_FALSE; @@ -171,11 +176,6 @@ do_copy_texsubimage(struct intel_context *intel, UNLOCK_HARDWARE(intel); - /* GL_SGIS_generate_mipmap */ - if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } - return GL_TRUE; } @@ -185,8 +185,7 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLint border) { - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); struct gl_texture_image *texImage = @@ -222,8 +221,10 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, return; fail: - _swrast_copy_teximage1d(ctx, target, level, internalFormat, x, y, - width, border); + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__); + _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y, + width, border); } @@ -233,8 +234,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, GLint x, GLint y, GLsizei width, GLsizei height, GLint border) { - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); struct gl_texture_image *texImage = @@ -249,7 +249,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, */ ctx->Driver.TexImage2D(ctx, target, level, internalFormat, width, height, border, - GL_RGBA, CHAN_TYPE, NULL, + GL_RGBA, GL_UNSIGNED_BYTE, NULL, &ctx->DefaultPacking, texObj, texImage); srcx = x; @@ -270,8 +270,10 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, return; fail: - _swrast_copy_teximage2d(ctx, target, level, internalFormat, x, y, - width, height, border); + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__); + _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, + width, height, border); } @@ -279,8 +281,7 @@ static void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); struct gl_texture_image *texImage = @@ -295,7 +296,9 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, if (!do_copy_texsubimage(intel_context(ctx), target, intel_texture_image(texImage), internalFormat, xoffset, 0, x, y, width, 1)) { - _swrast_copy_texsubimage1d(ctx, target, level, xoffset, x, y, width); + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__); + _mesa_meta_CopyTexSubImage1D(ctx, target, level, xoffset, x, y, width); } } @@ -305,8 +308,7 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height) { - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); struct gl_texture_image *texImage = @@ -321,10 +323,10 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, internalFormat, xoffset, yoffset, x, y, width, height)) { - DBG("%s - fallback to swrast\n", __FUNCTION__); - - _swrast_copy_texsubimage2d(ctx, target, level, - xoffset, yoffset, x, y, width, height); + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__); + _mesa_meta_CopyTexSubImage2D(ctx, target, level, + xoffset, yoffset, x, y, width, height); } } diff --git a/shared/intel_tex_format.c b/shared/intel_tex_format.c index 3322a71..87efb72 100644 --- a/shared/intel_tex_format.c +++ b/shared/intel_tex_format.c @@ -1,7 +1,6 @@ #include "intel_context.h" #include "intel_tex.h" #include "intel_chipset.h" -#include "main/texformat.h" #include "main/enums.h" @@ -16,7 +15,7 @@ * these if we take the step of simply swizzling the colors * immediately after sampling... */ -const struct gl_texture_format * +gl_format intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, GLenum format, GLenum type) { @@ -34,48 +33,48 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case GL_COMPRESSED_RGBA: if (format == GL_BGRA) { if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) { - return &_mesa_texformat_argb8888; + return MESA_FORMAT_ARGB8888; } else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) { - return &_mesa_texformat_argb4444; + return MESA_FORMAT_ARGB4444; } else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) { - return &_mesa_texformat_argb1555; + return MESA_FORMAT_ARGB1555; } } - return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444; + return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444; case 3: case GL_RGB: case GL_COMPRESSED_RGB: if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { - return &_mesa_texformat_rgb565; + return MESA_FORMAT_RGB565; } - return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565; + return do32bpt ? MESA_FORMAT_XRGB8888 : MESA_FORMAT_RGB565; case GL_RGBA8: case GL_RGB10_A2: case GL_RGBA12: case GL_RGBA16: - return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444; + return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444; case GL_RGBA4: case GL_RGBA2: - return &_mesa_texformat_argb4444; + return MESA_FORMAT_ARGB4444; case GL_RGB5_A1: - return &_mesa_texformat_argb1555; + return MESA_FORMAT_ARGB1555; case GL_RGB8: case GL_RGB10: case GL_RGB12: case GL_RGB16: - return &_mesa_texformat_argb8888; + return MESA_FORMAT_XRGB8888; case GL_RGB5: case GL_RGB4: case GL_R3_G3_B2: - return &_mesa_texformat_rgb565; + return MESA_FORMAT_RGB565; case GL_ALPHA: case GL_ALPHA4: @@ -83,7 +82,7 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case GL_ALPHA12: case GL_ALPHA16: case GL_COMPRESSED_ALPHA: - return &_mesa_texformat_a8; + return MESA_FORMAT_A8; case 1: case GL_LUMINANCE: @@ -92,18 +91,24 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case GL_LUMINANCE12: case GL_LUMINANCE16: case GL_COMPRESSED_LUMINANCE: - return &_mesa_texformat_l8; + return MESA_FORMAT_L8; + + case GL_LUMINANCE12_ALPHA4: + case GL_LUMINANCE12_ALPHA12: + case GL_LUMINANCE16_ALPHA16: +#ifndef I915 + return MESA_FORMAT_AL1616; +#else + /* FALLTHROUGH */ +#endif case 2: case GL_LUMINANCE_ALPHA: case GL_LUMINANCE4_ALPHA4: case GL_LUMINANCE6_ALPHA2: case GL_LUMINANCE8_ALPHA8: - case GL_LUMINANCE12_ALPHA4: - case GL_LUMINANCE12_ALPHA12: - case GL_LUMINANCE16_ALPHA16: case GL_COMPRESSED_LUMINANCE_ALPHA: - return &_mesa_texformat_al88; + return MESA_FORMAT_AL88; case GL_INTENSITY: case GL_INTENSITY4: @@ -111,41 +116,41 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case GL_INTENSITY12: case GL_INTENSITY16: case GL_COMPRESSED_INTENSITY: - return &_mesa_texformat_i8; + return MESA_FORMAT_I8; case GL_YCBCR_MESA: if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE) - return &_mesa_texformat_ycbcr; + return MESA_FORMAT_YCBCR; else - return &_mesa_texformat_ycbcr_rev; + return MESA_FORMAT_YCBCR_REV; case GL_COMPRESSED_RGB_FXT1_3DFX: - return &_mesa_texformat_rgb_fxt1; + return MESA_FORMAT_RGB_FXT1; case GL_COMPRESSED_RGBA_FXT1_3DFX: - return &_mesa_texformat_rgba_fxt1; + return MESA_FORMAT_RGBA_FXT1; case GL_RGB_S3TC: case GL_RGB4_S3TC: case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; + return MESA_FORMAT_RGB_DXT1; case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return &_mesa_texformat_rgba_dxt1; + return MESA_FORMAT_RGBA_DXT1; case GL_RGBA_S3TC: case GL_RGBA4_S3TC: case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - return &_mesa_texformat_rgba_dxt3; + return MESA_FORMAT_RGBA_DXT3; case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return &_mesa_texformat_rgba_dxt5; + return MESA_FORMAT_RGBA_DXT5; case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT16: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: #if 0 - return &_mesa_texformat_z16; + return MESA_FORMAT_Z16; #else /* fall-through. * 16bpp depth texture can't be paired with a stencil buffer so @@ -154,7 +159,7 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, #endif case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: - return &_mesa_texformat_s8_z24; + return MESA_FORMAT_S8_Z24; #ifndef I915 case GL_SRGB_EXT: @@ -165,41 +170,41 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case GL_COMPRESSED_SRGB_ALPHA_EXT: case GL_COMPRESSED_SLUMINANCE_EXT: case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: - return &_mesa_texformat_sargb8; + return MESA_FORMAT_SARGB8; case GL_SLUMINANCE_EXT: case GL_SLUMINANCE8_EXT: if (IS_G4X(intel->intelScreen->deviceID)) - return &_mesa_texformat_sl8; + return MESA_FORMAT_SL8; else - return &_mesa_texformat_sargb8; + return MESA_FORMAT_SARGB8; case GL_SLUMINANCE_ALPHA_EXT: case GL_SLUMINANCE8_ALPHA8_EXT: if (IS_G4X(intel->intelScreen->deviceID)) - return &_mesa_texformat_sla8; + return MESA_FORMAT_SLA8; else - return &_mesa_texformat_sargb8; + return MESA_FORMAT_SARGB8; case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: - return &_mesa_texformat_srgb_dxt1; + return MESA_FORMAT_SRGB_DXT1; /* i915 could also do this */ case GL_DUDV_ATI: case GL_DU8DV8_ATI: - return &_mesa_texformat_dudv8; + return MESA_FORMAT_DUDV8; case GL_RGBA_SNORM: case GL_RGBA8_SNORM: - return &_mesa_texformat_signed_rgba8888_rev; + return MESA_FORMAT_SIGNED_RGBA8888_REV; #endif default: fprintf(stderr, "unexpected texture format %s in %s\n", _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__); - return NULL; + return MESA_FORMAT_NONE; } - return NULL; /* never get here */ + return MESA_FORMAT_NONE; /* never get here */ } int intel_compressed_num_bytes(GLuint mesaFormat) diff --git a/shared/intel_tex_image.c b/shared/intel_tex_image.c index c5f5220..66d61f9 100644 --- a/shared/intel_tex_image.c +++ b/shared/intel_tex_image.c @@ -1,17 +1,15 @@ -#include -#include - #include "main/glheader.h" #include "main/macros.h" #include "main/mtypes.h" #include "main/enums.h" -#include "main/colortab.h" +#include "main/bufferobj.h" #include "main/convolve.h" #include "main/context.h" -#include "main/simple_list.h" +#include "main/formats.h" +#include "main/image.h" #include "main/texcompress.h" -#include "main/texformat.h" +#include "main/texstore.h" #include "main/texgetimage.h" #include "main/texobj.h" #include "main/texstore.h" @@ -73,6 +71,7 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, GLuint depth = intelImage->base.Depth; GLuint l2width, l2height, l2depth; GLuint i, comp_byte = 0; + GLuint texelBytes; DBG("%s\n", __FUNCTION__); @@ -116,7 +115,8 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, */ if ((intelObj->base.MinFilter == GL_NEAREST || intelObj->base.MinFilter == GL_LINEAR) && - intelImage->level == firstLevel) { + intelImage->level == firstLevel && + (intel->gen < 4 || firstLevel == 0)) { lastLevel = firstLevel; } else { @@ -127,8 +127,11 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, } assert(!intelObj->mt); - if (intelImage->base.IsCompressed) - comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat); + if (_mesa_is_format_compressed(intelImage->base.TexFormat)) + comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat); + + texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat); + intelObj->mt = intel_miptree_create(intel, intelObj->base.Target, intelImage->base._BaseFormat, @@ -138,7 +141,7 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, width, height, depth, - intelImage->base.TexFormat->TexelBytes, + texelBytes, comp_byte, expect_accelerated_upload); @@ -170,7 +173,7 @@ target_to_face(GLenum target) static GLboolean check_pbo_format(GLint internalFormat, GLenum format, GLenum type, - const struct gl_texture_format *mesa_format) + gl_format mesa_format) { switch (internalFormat) { case 4: @@ -178,12 +181,12 @@ check_pbo_format(GLint internalFormat, return (format == GL_BGRA && (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) && - mesa_format == &_mesa_texformat_argb8888); + mesa_format == MESA_FORMAT_ARGB8888); case 3: case GL_RGB: return (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 && - mesa_format == &_mesa_texformat_rgb565); + mesa_format == MESA_FORMAT_RGB565); case GL_YCBCR_MESA: return (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE); default: @@ -204,9 +207,12 @@ try_pbo_upload(struct intel_context *intel, { struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); GLuint src_offset, src_stride; - GLuint dst_offset, dst_stride; + GLuint dst_x, dst_y, dst_stride; + dri_bo *dst_buffer = intel_region_buffer(intel, + intelImage->mt->region, + INTEL_WRITE_FULL); - if (unpack->BufferObj->Name == 0 || + if (!_mesa_is_bufferobj(unpack->BufferObj) || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: failure 1\n", __FUNCTION__); @@ -221,26 +227,23 @@ try_pbo_upload(struct intel_context *intel, else src_stride = width; - dst_offset = intel_miptree_image_offset(intelImage->mt, - intelImage->face, - intelImage->level); + intel_miptree_get_image_offset(intelImage->mt, intelImage->level, + intelImage->face, 0, + &dst_x, &dst_y); dst_stride = intelImage->mt->pitch; - intelFlush(&intel->ctx); + if (drm_intel_bo_references(intel->batch->buf, dst_buffer)) + intelFlush(&intel->ctx); LOCK_HARDWARE(intel); { dri_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ); - dri_bo *dst_buffer = intel_region_buffer(intel, - intelImage->mt->region, - INTEL_WRITE_FULL); - if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_stride, src_buffer, src_offset, GL_FALSE, - dst_stride, dst_buffer, dst_offset, GL_FALSE, - 0, 0, 0, 0, width, height, + dst_stride, dst_buffer, 0, GL_FALSE, + 0, 0, dst_x, dst_y, width, height, GL_COPY)) { UNLOCK_HARDWARE(intel); return GL_FALSE; @@ -262,9 +265,9 @@ try_pbo_zcopy(struct intel_context *intel, { struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); GLuint src_offset, src_stride; - GLuint dst_offset, dst_stride; + GLuint dst_x, dst_y, dst_stride; - if (unpack->BufferObj->Name == 0 || + if (!_mesa_is_bufferobj(unpack->BufferObj) || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: failure 1\n", __FUNCTION__); @@ -279,13 +282,14 @@ try_pbo_zcopy(struct intel_context *intel, else src_stride = width; - dst_offset = intel_miptree_image_offset(intelImage->mt, - intelImage->face, - intelImage->level); + intel_miptree_get_image_offset(intelImage->mt, intelImage->level, + intelImage->face, 0, + &dst_x, &dst_y); dst_stride = intelImage->mt->pitch; - if (src_stride != dst_stride || dst_offset != 0 || src_offset != 0) { + if (src_stride != dst_stride || dst_x != 0 || dst_y != 0 || + src_offset != 0) { DBG("%s: failure 2\n", __FUNCTION__); return GL_FALSE; } @@ -320,8 +324,6 @@ intelTexImage(GLcontext * ctx, DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); - intelFlush(ctx); - intelImage->face = target_to_face(target); intelImage->level = level; @@ -330,22 +332,11 @@ intelTexImage(GLcontext * ctx, &postConvHeight); } - /* choose the texture format */ - texImage->TexFormat = intelChooseTextureFormat(ctx, internalFormat, - format, type); - - _mesa_set_fetch_functions(texImage, dims); - - if (texImage->TexFormat->TexelBytes == 0) { - /* must be a compressed format */ + if (_mesa_is_format_compressed(texImage->TexFormat)) { texelBytes = 0; - texImage->IsCompressed = GL_TRUE; - texImage->CompressedSize = - ctx->Driver.CompressedTextureSize(ctx, texImage->Width, - texImage->Height, texImage->Depth, - texImage->TexFormat->MesaFormat); - } else { - texelBytes = texImage->TexFormat->TexelBytes; + } + else { + texelBytes = _mesa_get_format_bytes(texImage->TexFormat); /* Minimum pitch of 32 bytes */ if (postConvWidth * texelBytes < 32) { @@ -378,8 +369,7 @@ intelTexImage(GLcontext * ctx, intelObj->mt->first_level == level && intelObj->mt->last_level == level && intelObj->mt->target != GL_TEXTURE_CUBE_MAP_ARB && - !intel_miptree_match_image(intelObj->mt, &intelImage->base, - intelImage->face, intelImage->level)) { + !intel_miptree_match_image(intelObj->mt, &intelImage->base)) { DBG("release it\n"); intel_miptree_release(intel, &intelObj->mt); @@ -396,17 +386,17 @@ intelTexImage(GLcontext * ctx, assert(!intelImage->mt); if (intelObj->mt && - intel_miptree_match_image(intelObj->mt, &intelImage->base, - intelImage->face, intelImage->level)) { + intel_miptree_match_image(intelObj->mt, &intelImage->base)) { intel_miptree_reference(&intelImage->mt, intelObj->mt); assert(intelImage->mt); } else if (intelImage->base.Border == 0) { int comp_byte = 0; - - if (intelImage->base.IsCompressed) { + GLuint texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat); + GLenum baseFormat = _mesa_get_format_base_format(intelImage->base.TexFormat); + if (_mesa_is_format_compressed(intelImage->base.TexFormat)) { comp_byte = - intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat); + intel_compressed_num_bytes(intelImage->base.TexFormat); } /* Didn't fit in the object miptree, but it's suitable for inclusion in @@ -414,11 +404,11 @@ intelTexImage(GLcontext * ctx, * It'll get moved into the object miptree at validate time. */ intelImage->mt = intel_miptree_create(intel, target, - intelImage->base.TexFormat->BaseFormat, + baseFormat, internalFormat, level, level, width, height, depth, - intelImage->base.TexFormat->TexelBytes, + texelBytes, comp_byte, pixels == NULL); } @@ -427,7 +417,7 @@ intelTexImage(GLcontext * ctx, */ if (dims <= 2 && intelImage->mt && - unpack->BufferObj->Name != 0 && + _mesa_is_bufferobj(unpack->BufferObj) && check_pbo_format(internalFormat, format, type, intelImage->base.TexFormat)) { @@ -482,21 +472,31 @@ intelTexImage(GLcontext * ctx, LOCK_HARDWARE(intel); if (intelImage->mt) { - if (pixels != NULL) + if (pixels != NULL) { + /* Flush any queued rendering with the texture before mapping. */ + if (drm_intel_bo_references(intel->batch->buf, + intelImage->mt->region->buffer)) { + intelFlush(ctx); + } texImage->Data = intel_miptree_image_map(intel, intelImage->mt, intelImage->face, intelImage->level, &dstRowStride, intelImage->base.ImageOffsets); + } + texImage->RowStride = dstRowStride / intelImage->mt->cpp; } else { /* Allocate regular memory and store the image there temporarily. */ - if (texImage->IsCompressed) { - sizeInBytes = texImage->CompressedSize; + if (_mesa_is_format_compressed(texImage->TexFormat)) { + sizeInBytes = _mesa_format_image_size(texImage->TexFormat, + texImage->Width, + texImage->Height, + texImage->Depth); dstRowStride = - _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); + _mesa_format_row_stride(texImage->TexFormat, width); assert(dims != 3); } else { @@ -527,17 +527,20 @@ intelTexImage(GLcontext * ctx, pixels, srcRowStride, 0, 0); - } else + } + else { memcpy(texImage->Data, pixels, imageSize); - } else if (!texImage->TexFormat->StoreImage(ctx, dims, - texImage->_BaseFormat, - texImage->TexFormat, - texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */ - dstRowStride, - texImage->ImageOffsets, - width, height, depth, - format, type, pixels, unpack)) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); + } + } + else if (!_mesa_texstore(ctx, dims, + texImage->_BaseFormat, + texImage->TexFormat, + texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */ + dstRowStride, + texImage->ImageOffsets, + width, height, depth, + format, type, pixels, unpack)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); } } @@ -550,11 +553,6 @@ intelTexImage(GLcontext * ctx, } UNLOCK_HARDWARE(intel); - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } } @@ -670,9 +668,10 @@ intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level, if (compressed) { _mesa_get_compressed_teximage(ctx, target, level, pixels, texObj, texImage); - } else { + } + else { _mesa_get_teximage(ctx, target, level, format, type, pixels, - texObj, texImage); + texObj, texImage); } @@ -735,17 +734,16 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, { struct intel_framebuffer *intel_fb = dPriv->driverPrivate; struct intel_context *intel = pDRICtx->driverPrivate; + GLcontext *ctx = &intel->ctx; struct intel_texture_object *intelObj; struct intel_texture_image *intelImage; struct intel_mipmap_tree *mt; struct intel_renderbuffer *rb; - struct gl_texture_unit *texUnit; struct gl_texture_object *texObj; struct gl_texture_image *texImage; - int level = 0, type, format, internalFormat; + int level = 0, internalFormat; - texUnit = &intel->ctx.Texture.Unit[intel->ctx.Texture.CurrentUnit]; - texObj = _mesa_select_tex_object(&intel->ctx, texUnit, target); + texObj = _mesa_get_current_tex_object(ctx, target); intelObj = intel_texture_object(texObj); if (!intelObj) @@ -760,8 +758,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, if (rb->region == NULL) return; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) internalFormat = GL_RGB; else @@ -792,14 +788,14 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, intelImage->face = target_to_face(target); intelImage->level = level; - texImage->TexFormat = intelChooseTextureFormat(&intel->ctx, internalFormat, - type, format); - _mesa_set_fetch_functions(texImage, 2); + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + texImage->TexFormat = MESA_FORMAT_XRGB8888; + else + texImage->TexFormat = MESA_FORMAT_ARGB8888; texImage->RowStride = rb->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); - if (!intel_miptree_match_image(intelObj->mt, &intelImage->base, - intelImage->face, intelImage->level)) { + if (!intel_miptree_match_image(intelObj->mt, &intelImage->base)) { fprintf(stderr, "miptree doesn't match image\n"); } diff --git a/shared/intel_tex_layout.h b/shared/intel_tex_layout.h index c9de9b5..a9ac9e7 100644 --- a/shared/intel_tex_layout.h +++ b/shared/intel_tex_layout.h @@ -33,7 +33,7 @@ #include "main/macros.h" -static GLuint minify( GLuint d ) +static INLINE GLuint minify( GLuint d ) { return MAX2(1, d>>1); } diff --git a/shared/intel_tex_obj.h b/shared/intel_tex_obj.h index 5a93461..3ad10d3 100644 --- a/shared/intel_tex_obj.h +++ b/shared/intel_tex_obj.h @@ -66,6 +66,7 @@ struct intel_texture_image * Else there is no image data. */ struct intel_mipmap_tree *mt; + GLboolean used_as_render_target; }; static INLINE struct intel_texture_object * diff --git a/shared/intel_tex_subimage.c b/shared/intel_tex_subimage.c index 8903707..1f68208 100644 --- a/shared/intel_tex_subimage.c +++ b/shared/intel_tex_subimage.c @@ -85,13 +85,13 @@ intelTexSubimage(GLcontext * ctx, &dstRowStride, texImage->ImageOffsets); else { - if (texImage->IsCompressed) { + if (_mesa_is_format_compressed(texImage->TexFormat)) { dstRowStride = - _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); + _mesa_format_row_stride(texImage->TexFormat, width); assert(dims != 3); } else { - dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes; + dstRowStride = texImage->RowStride * _mesa_get_format_bytes(texImage->TexFormat); } } @@ -105,18 +105,20 @@ intelTexSubimage(GLcontext * ctx, xoffset, yoffset / 4, (width + 3) & ~3, (height + 3) / 4, pixels, (width + 3) & ~3, 0, 0); - } else + } + else { memcpy(texImage->Data, pixels, imageSize); + } } else { - if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, - texImage->TexFormat, - texImage->Data, - xoffset, yoffset, zoffset, - dstRowStride, - texImage->ImageOffsets, - width, height, depth, - format, type, pixels, packing)) { + if (!_mesa_texstore(ctx, dims, texImage->_BaseFormat, + texImage->TexFormat, + texImage->Data, + xoffset, yoffset, zoffset, + dstRowStride, + texImage->ImageOffsets, + width, height, depth, + format, type, pixels, packing)) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); } } @@ -129,11 +131,6 @@ intelTexSubimage(GLcontext * ctx, } UNLOCK_HARDWARE(intel); - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } } diff --git a/shared/intel_tex_validate.c b/shared/intel_tex_validate.c index a284d54..c9a24ac 100644 --- a/shared/intel_tex_validate.c +++ b/shared/intel_tex_validate.c @@ -5,6 +5,7 @@ #include "intel_batchbuffer.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" +#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_TEXTURE @@ -14,7 +15,8 @@ * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL. */ static void -intel_calculate_first_last_level(struct intel_texture_object *intelObj) +intel_calculate_first_last_level(struct intel_context *intel, + struct intel_texture_object *intelObj) { struct gl_texture_object *tObj = &intelObj->base; const struct gl_texture_image *const baseImage = @@ -40,27 +42,27 @@ intel_calculate_first_last_level(struct intel_texture_object *intelObj) firstLevel = lastLevel = tObj->BaseLevel; } else { -#ifdef I915 - firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5); - firstLevel = MAX2(firstLevel, tObj->BaseLevel); - firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2); - lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5); - lastLevel = MAX2(lastLevel, tObj->BaseLevel); - lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2); - lastLevel = MIN2(lastLevel, tObj->MaxLevel); - lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ -#else - /* Currently not taking min/max lod into account here, those - * values are programmed as sampler state elsewhere and we - * upload the same mipmap levels regardless. Not sure if - * this makes sense as it means it isn't possible for the app - * to use min/max lod to reduce texture memory pressure: - */ - firstLevel = tObj->BaseLevel; - lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, - tObj->MaxLevel); - lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ -#endif + if (intel->gen == 2) { + firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5); + firstLevel = MAX2(firstLevel, tObj->BaseLevel); + firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2); + lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5); + lastLevel = MAX2(lastLevel, tObj->BaseLevel); + lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2); + lastLevel = MIN2(lastLevel, tObj->MaxLevel); + lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ + } else { + /* Min/max LOD are taken into account in sampler state. We don't + * want to re-layout textures just because clamping has been applied + * since it means a bunch of blitting around and probably no memory + * savings (since we have to keep the other levels around anyway). + */ + firstLevel = tObj->BaseLevel; + lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, + tObj->MaxLevel); + /* need at least one level */ + lastLevel = MAX2(firstLevel, lastLevel); + } } break; case GL_TEXTURE_RECTANGLE_NV: @@ -135,9 +137,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) /* What levels must the tree include at a minimum? */ - intel_calculate_first_last_level(intelObj); - firstImage = - intel_texture_image(intelObj->base.Image[0][intelObj->firstLevel]); + intel_calculate_first_last_level(intel, intelObj); + firstImage = intel_texture_image(tObj->Image[0][intelObj->firstLevel]); /* Fallback case: */ @@ -165,11 +166,12 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) intel_miptree_reference(&intelObj->mt, firstImage->mt); } - if (firstImage->base.IsCompressed) { - comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat->MesaFormat); + if (_mesa_is_format_compressed(firstImage->base.TexFormat)) { + comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat); cpp = comp_byte; } - else cpp = firstImage->base.TexFormat->TexelBytes; + else + cpp = _mesa_get_format_bytes(firstImage->base.TexFormat); /* Check tree can hold all active levels. Check tree matches * target, imageFormat, etc. @@ -189,7 +191,7 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) intelObj->mt->height0 != firstImage->base.Height || intelObj->mt->depth0 != firstImage->base.Depth || intelObj->mt->cpp != cpp || - intelObj->mt->compressed != firstImage->base.IsCompressed)) { + intelObj->mt->compressed != _mesa_is_format_compressed(firstImage->base.TexFormat))) { intel_miptree_release(intel, &intelObj->mt); } @@ -220,8 +222,13 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) intel_texture_image(intelObj->base.Image[face][i]); /* Need to import images in main memory or held in other trees. + * If it's a render target, then its data isn't needed to be in + * the object tree (otherwise we'd be FBO incomplete), and we need + * to keep track of the image's MT as needing to be pulled in still, + * or we'll lose the rendering that's done to it. */ - if (intelObj->mt != intelImage->mt) { + if (intelObj->mt != intelImage->mt && + !intelImage->used_as_render_target) { copy_image_data_to_tree(intel, intelObj, intelImage); } } -- cgit v1.2.3