From 54a8e9cc3988d908b5b846a752679127cacefd3b Mon Sep 17 00:00:00 2001 From: Luc Verhaegen Date: Tue, 16 Mar 2010 20:19:03 +0100 Subject: Import i915 and i965 dri drivers from mesa 7.8-rc1. --- configure.ac | 6 +- i915/Makefile.am | 3 - i915/i830_context.c | 6 +- i915/i830_context.h | 11 +- i915/i830_metaops.c | 456 --------------------------- i915/i830_state.c | 7 +- i915/i830_texstate.c | 69 ++-- i915/i830_vtbl.c | 146 ++++----- i915/i915_context.c | 13 +- i915/i915_context.h | 11 +- i915/i915_debug.c | 178 ++++++----- i915/i915_debug_fp.c | 83 +++-- i915/i915_fragprog.c | 11 +- i915/i915_metaops.c | 507 ------------------------------ i915/i915_program.c | 2 +- i915/i915_state.c | 7 +- i915/i915_tex_layout.c | 4 +- i915/i915_texstate.c | 68 ++-- i915/i915_vtbl.c | 155 ++++----- i915/intel_render.c | 4 +- i915/intel_tris.c | 103 +----- i965/Makefile.am | 14 +- i965/brw_cc.c | 5 +- i965/brw_clip.c | 26 +- i965/brw_clip.h | 1 - i965/brw_clip_line.c | 13 +- i965/brw_clip_point.c | 1 - i965/brw_clip_state.c | 9 +- i965/brw_clip_tri.c | 9 +- i965/brw_clip_unfilled.c | 1 - i965/brw_clip_util.c | 16 +- i965/brw_context.c | 49 ++- i965/brw_context.h | 46 ++- i965/brw_curbe.c | 68 ++-- i965/brw_defines.h | 260 ++++++++++++++- i965/brw_disasm.c | 3 +- i965/brw_draw.c | 23 +- i965/brw_draw_upload.c | 144 ++++----- i965/brw_eu.c | 4 +- i965/brw_eu_debug.c | 14 +- i965/brw_eu_emit.c | 117 ++++--- i965/brw_fallback.c | 8 +- i965/brw_gs.c | 17 +- i965/brw_gs.h | 1 - i965/brw_gs_emit.c | 21 +- i965/brw_gs_state.c | 7 +- i965/brw_misc_state.c | 140 +++++++-- i965/brw_program.c | 11 +- i965/brw_queryobj.c | 8 +- i965/brw_sf.c | 13 +- i965/brw_sf_emit.c | 6 +- i965/brw_sf_state.c | 19 +- i965/brw_state.h | 51 ++- i965/brw_state_batch.c | 10 +- i965/brw_state_cache.c | 275 ++++++++-------- i965/brw_state_dump.c | 1 - i965/brw_state_upload.c | 91 +++++- i965/brw_structs.h | 114 ++++++- i965/brw_tex_layout.c | 3 +- i965/brw_urb.c | 18 +- i965/brw_vs.c | 31 +- i965/brw_vs.h | 3 + i965/brw_vs_emit.c | 300 ++++++++++++------ i965/brw_vs_state.c | 22 +- i965/brw_vs_surface_state.c | 16 +- i965/brw_vtbl.c | 32 +- i965/brw_wm.c | 28 +- i965/brw_wm.h | 3 +- i965/brw_wm_debug.c | 68 ++-- i965/brw_wm_emit.c | 74 +++-- i965/brw_wm_fp.c | 15 +- i965/brw_wm_glsl.c | 23 +- i965/brw_wm_pass0.c | 4 +- i965/brw_wm_sampler_state.c | 29 +- i965/brw_wm_state.c | 26 +- i965/brw_wm_surface_state.c | 198 ++++++------ i965/gen6_cc.c | 296 ++++++++++++++++++ i965/gen6_clip_state.c | 75 +++++ i965/gen6_depthstencil.c | 165 ++++++++++ i965/gen6_gs_state.c | 91 ++++++ i965/gen6_sampler_state.c | 71 +++++ i965/gen6_scissor_state.c | 105 +++++++ i965/gen6_sf_state.c | 187 +++++++++++ i965/gen6_urb.c | 83 +++++ i965/gen6_viewport_state.c | 173 ++++++++++ i965/gen6_vs_state.c | 119 +++++++ i965/gen6_wm_state.c | 160 ++++++++++ shared/intel_batchbuffer.c | 135 +++----- shared/intel_batchbuffer.h | 131 ++++---- shared/intel_blit.c | 519 +++++++++++------------------- shared/intel_blit.h | 2 +- shared/intel_buffer_objects.c | 154 ++++++++- shared/intel_buffers.c | 120 ++----- shared/intel_buffers.h | 6 - shared/intel_chipset.h | 14 +- shared/intel_clear.c | 11 +- shared/intel_context.c | 509 +++++++----------------------- shared/intel_context.h | 172 ++-------- shared/intel_decode.c | 10 +- shared/intel_depthtmp.h | 64 ---- shared/intel_extensions.c | 24 +- shared/intel_extensions.h | 3 + shared/intel_fbo.c | 82 +++-- shared/intel_fbo.h | 28 +- shared/intel_mipmap_tree.c | 30 +- shared/intel_pixel.c | 15 +- shared/intel_pixel_bitmap.c | 162 ++++------ shared/intel_pixel_copy.c | 170 ++++------ shared/intel_pixel_draw.c | 10 +- shared/intel_pixel_read.c | 194 +++--------- shared/intel_regions.c | 203 +++--------- shared/intel_regions.h | 14 +- shared/intel_screen.c | 713 +++++++++++++----------------------------- shared/intel_screen.h | 59 +--- shared/intel_span.c | 553 ++++---------------------------- shared/intel_spantmp.h | 67 ---- shared/intel_state.c | 22 -- shared/intel_swapbuffers.c | 248 --------------- shared/intel_swapbuffers.h | 52 --- shared/intel_syncobj.c | 4 +- shared/intel_tex.c | 4 +- shared/intel_tex.h | 2 - shared/intel_tex_copy.c | 21 +- shared/intel_tex_format.c | 5 +- shared/intel_tex_image.c | 108 ++++--- shared/intel_tex_obj.h | 4 - shared/intel_tex_subimage.c | 4 +- shared/intel_tex_validate.c | 2 - 128 files changed, 4787 insertions(+), 5747 deletions(-) delete mode 100644 i915/i830_metaops.c delete mode 100644 i915/i915_metaops.c create mode 100644 i965/gen6_cc.c create mode 100644 i965/gen6_clip_state.c create mode 100644 i965/gen6_depthstencil.c create mode 100644 i965/gen6_gs_state.c create mode 100644 i965/gen6_sampler_state.c create mode 100644 i965/gen6_scissor_state.c create mode 100644 i965/gen6_sf_state.c create mode 100644 i965/gen6_urb.c create mode 100644 i965/gen6_viewport_state.c create mode 100644 i965/gen6_vs_state.c create mode 100644 i965/gen6_wm_state.c delete mode 100644 shared/intel_depthtmp.h delete mode 100644 shared/intel_spantmp.h delete mode 100644 shared/intel_swapbuffers.c delete mode 100644 shared/intel_swapbuffers.h diff --git a/configure.ac b/configure.ac index d339ca1..93954e0 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-i9xx], 7.7.0, [], mesa-dri-i9xx) +AC_INIT([mesa-dri-i9xx], 7.8.0, [], mesa-dri-i9xx) AM_INIT_AUTOMAKE([dist-bzip2]) @@ -16,8 +16,8 @@ AC_PROG_CC AC_HEADER_STDC PKG_CHECK_MODULES([DRM], [libdrm >= 2.4.15 libdrm_intel]) -PKG_CHECK_MODULES([DRI], [libmesadri >= 7.7.0 libmesadri < 7.8.0 - libmesadricommon >= 7.7.0 libmesadricommon < 7.8.0]) +PKG_CHECK_MODULES([DRI], [libmesadri >= 7.8.0 libmesadri < 7.9.0 + libmesadricommon >= 7.8.0 libmesadricommon < 7.9.0]) AC_OUTPUT([ Makefile diff --git a/i915/Makefile.am b/i915/Makefile.am index c994f96..609d661 100644 --- a/i915/Makefile.am +++ b/i915/Makefile.am @@ -9,7 +9,6 @@ i915_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \ i915_dri_ladir = @libdir@/dri i915_dri_la_SOURCES = \ i830_context.c \ - i830_metaops.c \ i830_state.c \ i830_texblend.c \ i830_texstate.c \ @@ -35,14 +34,12 @@ i915_dri_la_SOURCES = \ ../shared/intel_pixel_read.c \ ../shared/intel_buffers.c \ ../shared/intel_blit.c \ - ../shared/intel_swapbuffers.c \ i915_tex_layout.c \ i915_texstate.c \ i915_context.c \ i915_debug.c \ i915_debug_fp.c \ i915_fragprog.c \ - i915_metaops.c \ i915_program.c \ i915_state.c \ i915_vtbl.c \ diff --git a/i915/i830_context.c b/i915/i830_context.c index 840946f..ebe8b15 100644 --- a/i915/i830_context.c +++ b/i915/i830_context.c @@ -28,14 +28,11 @@ #include "i830_context.h" #include "main/imports.h" #include "texmem.h" -#include "intel_tex.h" #include "tnl/tnl.h" #include "tnl/t_vertex.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" -#include "utils.h" #include "intel_span.h" -#include "intel_pixel.h" #include "intel_tris.h" /*************************************** @@ -53,7 +50,7 @@ extern const struct tnl_pipeline_stage *intel_pipeline[]; GLboolean i830CreateContext(const __GLcontextModes * mesaVis, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate) { struct dd_function_table functions; @@ -108,7 +105,6 @@ i830CreateContext(const __GLcontextModes * mesaVis, intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf; i830InitState(i830); - i830InitMetaFuncs(i830); _tnl_allow_vertex_fog(ctx, 1); _tnl_allow_pixel_fog(ctx, 0); diff --git a/i915/i830_context.h b/i915/i830_context.h index f73cbbf..d7eb9c2 100644 --- a/i915/i830_context.h +++ b/i915/i830_context.h @@ -34,7 +34,8 @@ #define I830_FALLBACK_COLORMASK 0x2000 #define I830_FALLBACK_STENCIL 0x4000 #define I830_FALLBACK_STIPPLE 0x8000 -#define I830_FALLBACK_LOGICOP 0x10000 +#define I830_FALLBACK_LOGICOP 0x20000 +#define I830_FALLBACK_DRAW_OFFSET 0x200000 #define I830_UPLOAD_CTX 0x1 #define I830_UPLOAD_BUFFERS 0x2 @@ -144,7 +145,7 @@ struct i830_context GLuint lodbias_tm0s3[MAX_TEXTURE_UNITS]; DECLARE_RENDERINPUTS(last_index_bitset); - struct i830_hw_state meta, initial, state, *current; + struct i830_hw_state state; }; @@ -178,7 +179,7 @@ i830_state_draw_region(struct intel_context *intel, */ extern GLboolean i830CreateContext(const __GLcontextModes * mesaVis, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); /* i830_tex.c, i830_texstate.c @@ -206,10 +207,6 @@ extern void i830EmitState(struct i830_context *i830); extern void i830InitState(struct i830_context *i830); extern void i830_update_provoking_vertex(GLcontext *ctx); -/* i830_metaops.c - */ -extern void i830InitMetaFuncs(struct i830_context *i830); - /*====================================================================== * Inline conversion functions. These are better-typed than the * macros used previously: diff --git a/i915/i830_metaops.c b/i915/i830_metaops.c deleted file mode 100644 index 2cce661..0000000 --- a/i915/i830_metaops.c +++ /dev/null @@ -1,456 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "main/enums.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "utils.h" - -#include "intel_screen.h" -#include "intel_batchbuffer.h" -#include "intel_regions.h" - -#include "i830_context.h" -#include "i830_reg.h" - -/* A large amount of state doesn't need to be uploaded. - */ -#define ACTIVE (I830_UPLOAD_INVARIENT | \ - I830_UPLOAD_CTX | \ - I830_UPLOAD_BUFFERS | \ - I830_UPLOAD_STIPPLE | \ - I830_UPLOAD_TEXBLEND(0) | \ - I830_UPLOAD_TEX(0)) - - -#define SET_STATE( i830, STATE ) \ -do { \ - i830->current->emitted &= ~ACTIVE; \ - i830->current = &i830->STATE; \ - i830->current->emitted &= ~ACTIVE; \ -} while (0) - - -static void -set_no_stencil_write(struct intel_context *intel) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - - /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_FALSE ) - */ - i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST; - i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_STENCIL_WRITE; - i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST; - i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_STENCIL_WRITE; - - i830->meta.emitted &= ~I830_UPLOAD_CTX; -} - -static void -set_no_depth_write(struct intel_context *intel) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - - /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE ) - */ - i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK; - i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK; - i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST; - i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE; - - i830->meta.emitted &= ~I830_UPLOAD_CTX; -} - -/* Set depth unit to replace. - */ -static void -set_depth_replace(struct intel_context *intel) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - - /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE ) - * ctx->Driver.DepthMask( ctx, GL_TRUE ) - */ - i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK; - i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK; - i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST; - i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DEPTH_WRITE; - - /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS ) - */ - i830->meta.Ctx[I830_CTXREG_STATE3] &= ~DEPTH_TEST_FUNC_MASK; - i830->meta.Ctx[I830_CTXREG_STATE3] |= (ENABLE_DEPTH_TEST_FUNC | - DEPTH_TEST_FUNC - (COMPAREFUNC_ALWAYS)); - - i830->meta.emitted &= ~I830_UPLOAD_CTX; -} - - -/* Set stencil unit to replace always with the reference value. - */ -static void -set_stencil_replace(struct intel_context *intel, - GLuint s_mask, GLuint s_clear) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - - /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE ) - */ - i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST; - i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE; - - /* ctx->Driver.StencilMask( ctx, s_mask ) - */ - i830->meta.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK; - i830->meta.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK((s_mask & - 0xff))); - - /* ctx->Driver.StencilOp( ctx, GL_REPLACE, GL_REPLACE, GL_REPLACE ) - */ - i830->meta.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_OPS_MASK); - i830->meta.Ctx[I830_CTXREG_STENCILTST] |= - (ENABLE_STENCIL_PARMS | - STENCIL_FAIL_OP(STENCILOP_REPLACE) | - STENCIL_PASS_DEPTH_FAIL_OP(STENCILOP_REPLACE) | - STENCIL_PASS_DEPTH_PASS_OP(STENCILOP_REPLACE)); - - /* ctx->Driver.StencilFunc( ctx, GL_ALWAYS, s_clear, ~0 ) - */ - i830->meta.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK; - i830->meta.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(0xff)); - - i830->meta.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_REF_VALUE_MASK | - ENABLE_STENCIL_TEST_FUNC_MASK); - i830->meta.Ctx[I830_CTXREG_STENCILTST] |= - (ENABLE_STENCIL_REF_VALUE | - ENABLE_STENCIL_TEST_FUNC | - STENCIL_REF_VALUE((s_clear & 0xff)) | - STENCIL_TEST_FUNC(COMPAREFUNC_ALWAYS)); - - - - i830->meta.emitted &= ~I830_UPLOAD_CTX; -} - - -static void -set_color_mask(struct intel_context *intel, GLboolean state) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - - const GLuint mask = ((1 << WRITEMASK_RED_SHIFT) | - (1 << WRITEMASK_GREEN_SHIFT) | - (1 << WRITEMASK_BLUE_SHIFT) | - (1 << WRITEMASK_ALPHA_SHIFT)); - - i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~mask; - - if (state) { - i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= - (i830->state.Ctx[I830_CTXREG_ENABLES_2] & mask); - } - - i830->meta.emitted &= ~I830_UPLOAD_CTX; -} - -/* Installs a one-stage passthrough texture blend pipeline. Is there - * more that can be done to turn off texturing? - */ -static void -set_no_texture(struct intel_context *intel) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - static const struct gl_tex_env_combine_state comb = { - GL_NONE, GL_NONE, - {GL_TEXTURE, 0, 0,}, {GL_TEXTURE, 0, 0,}, - {GL_SRC_COLOR, 0, 0}, {GL_SRC_ALPHA, 0, 0}, - 0, 0, 0, 0 - }; - - i830->meta.TexBlendWordsUsed[0] = - i830SetTexEnvCombine(i830, &comb, 0, TEXBLENDARG_TEXEL0, - i830->meta.TexBlend[0], NULL); - - i830->meta.TexBlend[0][0] |= TEXOP_LAST_STAGE; - i830->meta.emitted &= ~I830_UPLOAD_TEXBLEND(0); -} - -/* Set up a single element blend stage for 'replace' texturing with no - * funny ops. - */ -static void -set_texture_blend_replace(struct intel_context *intel) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - static const struct gl_tex_env_combine_state comb = { - GL_REPLACE, GL_REPLACE, - {GL_TEXTURE, GL_TEXTURE, GL_TEXTURE,}, {GL_TEXTURE, GL_TEXTURE, - GL_TEXTURE,}, - {GL_SRC_COLOR, GL_SRC_COLOR, GL_SRC_COLOR}, {GL_SRC_ALPHA, GL_SRC_ALPHA, - GL_SRC_ALPHA}, - 0, 0, 1, 1 - }; - - i830->meta.TexBlendWordsUsed[0] = - i830SetTexEnvCombine(i830, &comb, 0, TEXBLENDARG_TEXEL0, - i830->meta.TexBlend[0], NULL); - - i830->meta.TexBlend[0][0] |= TEXOP_LAST_STAGE; - i830->meta.emitted &= ~I830_UPLOAD_TEXBLEND(0); - -/* fprintf(stderr, "%s: TexBlendWordsUsed[0]: %d\n", */ -/* __FUNCTION__, i830->meta.TexBlendWordsUsed[0]); */ -} - - - -/* Set up an arbitary piece of memory as a rectangular texture - * (including the front or back buffer). - */ -static GLboolean -set_tex_rect_source(struct intel_context *intel, - dri_bo *buffer, - GLuint offset, - GLuint pitch, GLuint height, GLenum format, GLenum type) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - GLuint *setup = i830->meta.Tex[0]; - GLint numLevels = 1; - GLuint textureFormat; - GLuint cpp; - - /* A full implementation of this would do the upload through - * glTexImage2d, and get all the conversion operations at that - * point. We are restricted, but still at least have access to the - * fragment program swizzle. - */ - switch (format) { - case GL_BGRA: - switch (type) { - case GL_UNSIGNED_INT_8_8_8_8_REV: - case GL_UNSIGNED_BYTE: - textureFormat = (MAPSURF_32BIT | MT_32BIT_ARGB8888); - cpp = 4; - break; - default: - return GL_FALSE; - } - break; - case GL_RGBA: - switch (type) { - case GL_UNSIGNED_INT_8_8_8_8_REV: - case GL_UNSIGNED_BYTE: - textureFormat = (MAPSURF_32BIT | MT_32BIT_ABGR8888); - cpp = 4; - break; - default: - return GL_FALSE; - } - break; - case GL_BGR: - switch (type) { - case GL_UNSIGNED_SHORT_5_6_5_REV: - textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565); - cpp = 2; - break; - default: - return GL_FALSE; - } - break; - case GL_RGB: - switch (type) { - case GL_UNSIGNED_SHORT_5_6_5: - textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565); - cpp = 2; - break; - default: - return GL_FALSE; - } - break; - - default: - return GL_FALSE; - } - - i830->meta.tex_buffer[0] = buffer; - i830->meta.tex_offset[0] = offset; - - setup[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | - (LOAD_TEXTURE_MAP0 << 0) | 4); - setup[I830_TEXREG_TM0S1] = (((height - 1) << TM0S1_HEIGHT_SHIFT) | - ((pitch - 1) << TM0S1_WIDTH_SHIFT) | - textureFormat); - setup[I830_TEXREG_TM0S2] = - (((((pitch * cpp) / 4) - - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK); - - setup[I830_TEXREG_TM0S3] = - ((((numLevels - - 1) * - 4) << TM0S3_MIN_MIP_SHIFT) | (FILTER_NEAREST << - TM0S3_MIN_FILTER_SHIFT) | - (MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT) | (FILTER_NEAREST << - TM0S3_MAG_FILTER_SHIFT)); - - setup[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(0)); - - setup[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD | - MAP_UNIT(0) | - ENABLE_TEXCOORD_PARAMS | - TEXCOORDS_ARE_IN_TEXELUNITS | - TEXCOORDTYPE_CARTESIAN | - ENABLE_ADDR_V_CNTL | - TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP) | - ENABLE_ADDR_U_CNTL | - TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP)); - - i830->meta.emitted &= ~I830_UPLOAD_TEX(0); - return GL_TRUE; -} - - -static void -set_vertex_format(struct intel_context *intel) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - i830->meta.Ctx[I830_CTXREG_VF] = (_3DSTATE_VFT0_CMD | - VFT0_TEX_COUNT(1) | - VFT0_DIFFUSE | VFT0_XYZ); - i830->meta.Ctx[I830_CTXREG_VF2] = (_3DSTATE_VFT1_CMD | - VFT1_TEX0_FMT(TEXCOORDFMT_2D) | - VFT1_TEX1_FMT(TEXCOORDFMT_2D) | - VFT1_TEX2_FMT(TEXCOORDFMT_2D) | - VFT1_TEX3_FMT(TEXCOORDFMT_2D)); - i830->meta.emitted &= ~I830_UPLOAD_CTX; -} - - -static void -meta_import_pixel_state(struct intel_context *intel) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - - i830->meta.Ctx[I830_CTXREG_STATE1] = i830->state.Ctx[I830_CTXREG_STATE1]; - i830->meta.Ctx[I830_CTXREG_STATE2] = i830->state.Ctx[I830_CTXREG_STATE2]; - i830->meta.Ctx[I830_CTXREG_STATE3] = i830->state.Ctx[I830_CTXREG_STATE3]; - i830->meta.Ctx[I830_CTXREG_STATE4] = i830->state.Ctx[I830_CTXREG_STATE4]; - i830->meta.Ctx[I830_CTXREG_STATE5] = i830->state.Ctx[I830_CTXREG_STATE5]; - i830->meta.Ctx[I830_CTXREG_IALPHAB] = i830->state.Ctx[I830_CTXREG_IALPHAB]; - i830->meta.Ctx[I830_CTXREG_STENCILTST] = - i830->state.Ctx[I830_CTXREG_STENCILTST]; - i830->meta.Ctx[I830_CTXREG_ENABLES_1] = - i830->state.Ctx[I830_CTXREG_ENABLES_1]; - i830->meta.Ctx[I830_CTXREG_ENABLES_2] = - i830->state.Ctx[I830_CTXREG_ENABLES_2]; - i830->meta.Ctx[I830_CTXREG_AA] = i830->state.Ctx[I830_CTXREG_AA]; - i830->meta.Ctx[I830_CTXREG_FOGCOLOR] = - i830->state.Ctx[I830_CTXREG_FOGCOLOR]; - i830->meta.Ctx[I830_CTXREG_BLENDCOLOR0] = - i830->state.Ctx[I830_CTXREG_BLENDCOLOR0]; - i830->meta.Ctx[I830_CTXREG_BLENDCOLOR1] = - i830->state.Ctx[I830_CTXREG_BLENDCOLOR1]; - i830->meta.Ctx[I830_CTXREG_MCSB0] = i830->state.Ctx[I830_CTXREG_MCSB0]; - i830->meta.Ctx[I830_CTXREG_MCSB1] = i830->state.Ctx[I830_CTXREG_MCSB1]; - - - i830->meta.Ctx[I830_CTXREG_STATE3] &= ~CULLMODE_MASK; - i830->meta.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE; - i830->meta.emitted &= ~I830_UPLOAD_CTX; - - - i830->meta.Buffer[I830_DESTREG_SENABLE] = - i830->state.Buffer[I830_DESTREG_SENABLE]; - i830->meta.Buffer[I830_DESTREG_SR1] = i830->state.Buffer[I830_DESTREG_SR1]; - i830->meta.Buffer[I830_DESTREG_SR2] = i830->state.Buffer[I830_DESTREG_SR2]; - i830->meta.emitted &= ~I830_UPLOAD_BUFFERS; -} - - - -/* Select between front and back draw buffers. - */ -static void -meta_draw_region(struct intel_context *intel, - struct intel_region *color_region, - struct intel_region *depth_region) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - - i830_state_draw_region(intel, &i830->meta, color_region, depth_region); -} - - -/* Operations where the 3D engine is decoupled temporarily from the - * current GL state and used for other purposes than simply rendering - * incoming triangles. - */ -static void -install_meta_state(struct intel_context *intel) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - memcpy(&i830->meta, &i830->initial, sizeof(i830->meta)); - - i830->meta.active = ACTIVE; - i830->meta.emitted = 0; - - SET_STATE(i830, meta); - set_vertex_format(intel); - set_no_texture(intel); -} - -static void -leave_meta_state(struct intel_context *intel) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - intel_region_release(&i830->meta.draw_region); - intel_region_release(&i830->meta.depth_region); -/* intel_region_release(intel, &i830->meta.tex_region[0]); */ - SET_STATE(i830, state); -} - - - -void -i830InitMetaFuncs(struct i830_context *i830) -{ - i830->intel.vtbl.install_meta_state = install_meta_state; - i830->intel.vtbl.leave_meta_state = leave_meta_state; - i830->intel.vtbl.meta_no_depth_write = set_no_depth_write; - i830->intel.vtbl.meta_no_stencil_write = set_no_stencil_write; - i830->intel.vtbl.meta_stencil_replace = set_stencil_replace; - i830->intel.vtbl.meta_depth_replace = set_depth_replace; - i830->intel.vtbl.meta_color_mask = set_color_mask; - i830->intel.vtbl.meta_no_texture = set_no_texture; - i830->intel.vtbl.meta_texture_blend_replace = set_texture_blend_replace; - i830->intel.vtbl.meta_tex_rect_source = set_tex_rect_source; - i830->intel.vtbl.meta_draw_region = meta_draw_region; - i830->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state; -} diff --git a/i915/i830_state.c b/i915/i830_state.c index 645ebe3..3b9b3ae 100644 --- a/i915/i830_state.c +++ b/i915/i830_state.c @@ -620,7 +620,7 @@ i830LineWidth(GLcontext * ctx, GLfloat widthf) DBG("%s\n", __FUNCTION__); width = (int) (widthf * 2); - CLAMP_SELF(width, 1, 15); + width = CLAMP(width, 1, 15); state5 = i830->state.Ctx[I830_CTXREG_STATE5] & ~FIXED_LINE_WIDTH_MASK; state5 |= (ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(width)); @@ -639,7 +639,7 @@ i830PointSize(GLcontext * ctx, GLfloat size) DBG("%s\n", __FUNCTION__); - CLAMP_SELF(point_size, 1, 256); + point_size = CLAMP(point_size, 1, 256); I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_STATE5] &= ~FIXED_POINT_WIDTH_MASK; i830->state.Ctx[I830_CTXREG_STATE5] |= (ENABLE_FIXED_POINT_WIDTH | @@ -1127,9 +1127,6 @@ i830InitState(struct i830_context *i830) _mesa_init_driver_state(ctx); - memcpy(&i830->initial, &i830->state, sizeof(i830->state)); - - i830->current = &i830->state; i830->state.emitted = 0; i830->state.active = (I830_UPLOAD_INVARIENT | I830_UPLOAD_RASTER_RULES | diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c index ce409b3..e8f7e37 100644 --- a/i915/i830_texstate.c +++ b/i915/i830_texstate.c @@ -27,6 +27,7 @@ #include "main/mtypes.h" #include "main/enums.h" +#include "main/colormac.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" @@ -121,6 +122,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) GLuint *state = i830->state.Tex[unit], format, pitch; GLint lodbias; GLubyte border[4]; + GLuint dst_x, dst_y; memset(state, 0, sizeof(state)); @@ -131,7 +133,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) i830->state.tex_buffer[unit] = NULL; } - if (!intelObj->imageOverride && !intel_finalize_mipmap_tree(intel, unit)) + if (!intel_finalize_mipmap_tree(intel, unit)) return GL_FALSE; /* Get first image here, since intelObj->firstLevel will get set in @@ -139,42 +141,20 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) */ firstImage = tObj->Image[0][intelObj->firstLevel]; - if (intelObj->imageOverride) { - i830->state.tex_buffer[unit] = NULL; - i830->state.tex_offset[unit] = intelObj->textureOffset; + intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0, + &dst_x, &dst_y); - switch (intelObj->depthOverride) { - case 32: - format = MAPSURF_32BIT | MT_32BIT_ARGB8888; - break; - case 24: - default: - format = MAPSURF_32BIT | MT_32BIT_XRGB8888; - break; - case 16: - format = MAPSURF_16BIT | MT_16BIT_RGB565; - break; - } - - pitch = intelObj->pitchOverride; - } else { - GLuint dst_x, dst_y; - - intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0, - &dst_x, &dst_y); - - dri_bo_reference(intelObj->mt->region->buffer); - i830->state.tex_buffer[unit] = intelObj->mt->region->buffer; - /* XXX: This calculation is probably broken for tiled images with - * a non-page-aligned offset. - */ - i830->state.tex_offset[unit] = (dst_x + dst_y * intelObj->mt->pitch) * - intelObj->mt->cpp; + dri_bo_reference(intelObj->mt->region->buffer); + i830->state.tex_buffer[unit] = intelObj->mt->region->buffer; + /* XXX: This calculation is probably broken for tiled images with + * a non-page-aligned offset. + */ + i830->state.tex_offset[unit] = (dst_x + dst_y * intelObj->mt->pitch) * + intelObj->mt->cpp; - format = translate_texture_format(firstImage->TexFormat, - firstImage->InternalFormat); - pitch = intelObj->mt->pitch * intelObj->mt->cpp; - } + format = translate_texture_format(firstImage->TexFormat, + firstImage->InternalFormat); + pitch = intelObj->mt->pitch * intelObj->mt->cpp; state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | (LOAD_TEXTURE_MAP0 << unit) | 4); @@ -303,16 +283,15 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) } /* convert border color from float to ubyte */ - CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor[0]); - CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor[1]); - CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor[2]); - CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor[3]); - - state[I830_TEXREG_TM0S4] = INTEL_PACKCOLOR8888(border[0], - border[1], - border[2], - border[3]); - + CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]); + CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]); + CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]); + CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]); + + state[I830_TEXREG_TM0S4] = PACK_COLOR_8888(border[3], + border[0], + border[1], + border[2]); I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE); /* memcmp was already disabled, but definitely won't work as the diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c index e8c8d5a..be96419 100644 --- a/i915/i830_vtbl.c +++ b/i915/i830_vtbl.c @@ -25,8 +25,6 @@ * **************************************************************************/ -#include "glapi/glapi.h" - #include "i830_context.h" #include "i830_reg.h" #include "intel_batchbuffer.h" @@ -126,7 +124,7 @@ i830_render_start(struct intel_context *intel) for (i = 0; i < I830_TEX_UNITS; i++) { if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_TEX(i))) { - GLuint sz = VB->TexCoordPtr[i]->size; + GLuint sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size; GLuint emit; GLuint mcs = (i830->state.Tex[i][I830_TEXREG_MCS] & ~TEXCOORDTYPE_MASK); @@ -237,8 +235,8 @@ static GLboolean i830_check_vertex_size(struct intel_context *intel, GLuint expected) { struct i830_context *i830 = i830_context(&intel->ctx); - int vft0 = i830->current->Ctx[I830_CTXREG_VF]; - int vft1 = i830->current->Ctx[I830_CTXREG_VF2]; + int vft0 = i830->state.Ctx[I830_CTXREG_VF]; + int vft1 = i830->state.Ctx[I830_CTXREG_VF2]; int nrtex = (vft0 & VFT0_TEX_COUNT_MASK) >> VFT0_TEX_COUNT_SHIFT; int i, sz = 0; @@ -298,7 +296,7 @@ i830_emit_invarient_state(struct intel_context *intel) { BATCH_LOCALS; - BEGIN_BATCH(29, IGNORE_CLIPRECTS); + BEGIN_BATCH(29); OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); OUT_BATCH(0); @@ -366,7 +364,7 @@ i830_emit_invarient_state(struct intel_context *intel) #define emit( intel, state, size ) \ - intel_batchbuffer_data(intel->batch, state, size, IGNORE_CLIPRECTS ) + intel_batchbuffer_data(intel->batch, state, size ) static GLuint get_dirty(struct i830_hw_state *state) @@ -414,7 +412,7 @@ static void i830_emit_state(struct intel_context *intel) { struct i830_context *i830 = i830_context(&intel->ctx); - struct i830_hw_state *state = i830->current; + struct i830_hw_state *state = &i830->state; int i, count; GLuint dirty; dri_bo *aper_array[3 + I830_TEX_UNITS]; @@ -429,13 +427,9 @@ i830_emit_state(struct intel_context *intel) * It might be better to talk about explicit places where * scheduling is allowed, rather than assume that it is whenever a * batchbuffer fills up. - * - * Set the space as LOOP_CLIPRECTS now, since that's what our primitives - * will be emitted under. */ intel_batchbuffer_require_space(intel->batch, - get_state_size(state) + INTEL_PRIM_EMIT_SIZE, - LOOP_CLIPRECTS); + get_state_size(state) + INTEL_PRIM_EMIT_SIZE); count = 0; again: aper_count = 0; @@ -491,29 +485,24 @@ i830_emit_state(struct intel_context *intel) } if (dirty & I830_UPLOAD_BUFFERS) { - GLuint count = 9; + GLuint count = 15; DBG("I830_UPLOAD_BUFFERS:\n"); if (state->depth_region) count += 3; - if (intel->constant_cliprect) - count += 6; - - BEGIN_BATCH(count, IGNORE_CLIPRECTS); + BEGIN_BATCH(count); OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]); OUT_RELOC(state->draw_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - state->draw_region->draw_offset); + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); if (state->depth_region) { OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - state->depth_region->draw_offset); + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); } OUT_BATCH(state->Buffer[I830_DESTREG_DV0]); @@ -523,15 +512,13 @@ i830_emit_state(struct intel_context *intel) OUT_BATCH(state->Buffer[I830_DESTREG_SR1]); OUT_BATCH(state->Buffer[I830_DESTREG_SR2]); - if (intel->constant_cliprect) { - assert(state->Buffer[I830_DESTREG_DRAWRECT0] != MI_NOOP); - OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT0]); - OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT1]); - OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT2]); - OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT3]); - OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT4]); - OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT5]); - } + assert(state->Buffer[I830_DESTREG_DRAWRECT0] != MI_NOOP); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT0]); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT1]); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT2]); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT3]); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT4]); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT5]); ADVANCE_BATCH(); } @@ -544,7 +531,7 @@ i830_emit_state(struct intel_context *intel) if ((dirty & I830_UPLOAD_TEX(i))) { DBG("I830_UPLOAD_TEX(%d):\n", i); - BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1, IGNORE_CLIPRECTS); + BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1); OUT_BATCH(state->Tex[i][I830_TEXREG_TM0LI]); if (state->tex_buffer[i]) { @@ -552,10 +539,6 @@ i830_emit_state(struct intel_context *intel) I915_GEM_DOMAIN_SAMPLER, 0, state->tex_offset[i]); } - else if (state == &i830->meta) { - assert(i == 0); - OUT_BATCH(0); - } else { OUT_BATCH(state->tex_offset[i]); } @@ -590,10 +573,6 @@ i830_destroy_context(struct intel_context *intel) intel_region_release(&i830->state.draw_region); intel_region_release(&i830->state.depth_region); - intel_region_release(&i830->meta.draw_region); - intel_region_release(&i830->meta.depth_region); - intel_region_release(&i830->initial.draw_region); - intel_region_release(&i830->initial.depth_region); for (i = 0; i < I830_TEX_UNITS; i++) { if (i830->state.tex_buffer[i] != NULL) { @@ -605,24 +584,23 @@ i830_destroy_context(struct intel_context *intel) _tnl_free_vertices(&intel->ctx); } - -void -i830_state_draw_region(struct intel_context *intel, - struct i830_hw_state *state, - struct intel_region *color_region, - struct intel_region *depth_region) +static void +i830_set_draw_region(struct intel_context *intel, + struct intel_region *color_regions[], + struct intel_region *depth_region, + GLuint num_regions) { struct i830_context *i830 = i830_context(&intel->ctx); GLcontext *ctx = &intel->ctx; struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); GLuint value; + struct i830_hw_state *state = &i830->state; + uint32_t draw_x, draw_y; - ASSERT(state == &i830->state || state == &i830->meta); - - if (state->draw_region != color_region) { + if (state->draw_region != color_regions[0]) { intel_region_release(&state->draw_region); - intel_region_reference(&state->draw_region, color_region); + intel_region_reference(&state->draw_region, color_regions[0]); } if (state->depth_region != depth_region) { intel_region_release(&state->depth_region); @@ -633,7 +611,7 @@ i830_state_draw_region(struct intel_context *intel, * Set stride/cpp values */ i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_CBUFADDR0], - color_region, BUF_3D_ID_COLOR_BACK); + color_regions[0], BUF_3D_ID_COLOR_BACK); i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_DBUFADDR0], depth_region, BUF_3D_ID_DEPTH); @@ -673,38 +651,42 @@ i830_state_draw_region(struct intel_context *intel, } state->Buffer[I830_DESTREG_DV1] = value; - if (intel->constant_cliprect) { - state->Buffer[I830_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO; - state->Buffer[I830_DESTREG_DRAWRECT1] = 0; - state->Buffer[I830_DESTREG_DRAWRECT2] = 0; /* xmin, ymin */ - state->Buffer[I830_DESTREG_DRAWRECT3] = - (ctx->DrawBuffer->Width & 0xffff) | - (ctx->DrawBuffer->Height << 16); - state->Buffer[I830_DESTREG_DRAWRECT4] = 0; /* xoff, yoff */ - state->Buffer[I830_DESTREG_DRAWRECT5] = 0; + /* We set up the drawing rectangle to be offset into the color + * region's location in the miptree. If it doesn't match with + * depth's offsets, we can't render to it. + * + * (Well, not actually true -- the hw grew a bit to let depth's + * offset get forced to 0,0. We may want to use that if people are + * hitting that case. Also, some configurations may be supportable + * by tweaking the start offset of the buffers around, which we + * can't do in general due to tiling) + */ + FALLBACK(intel, I830_FALLBACK_DRAW_OFFSET, + (depth_region && color_regions[0]) && + (depth_region->draw_x != color_regions[0]->draw_x || + depth_region->draw_y != color_regions[0]->draw_y)); + + if (color_regions[0]) { + draw_x = color_regions[0]->draw_x; + draw_y = color_regions[0]->draw_y; + } else if (depth_region) { + draw_x = depth_region->draw_x; + draw_y = depth_region->draw_y; } else { - state->Buffer[I830_DESTREG_DRAWRECT0] = MI_NOOP; - state->Buffer[I830_DESTREG_DRAWRECT1] = MI_NOOP; - state->Buffer[I830_DESTREG_DRAWRECT2] = MI_NOOP; - state->Buffer[I830_DESTREG_DRAWRECT3] = MI_NOOP; - state->Buffer[I830_DESTREG_DRAWRECT4] = MI_NOOP; - state->Buffer[I830_DESTREG_DRAWRECT5] = MI_NOOP; + draw_x = 0; + draw_y = 0; } - I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS); - - -} + state->Buffer[I830_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO; + state->Buffer[I830_DESTREG_DRAWRECT1] = 0; + state->Buffer[I830_DESTREG_DRAWRECT2] = (draw_y << 16) | draw_x; + state->Buffer[I830_DESTREG_DRAWRECT3] = + ((ctx->DrawBuffer->Width + draw_x) & 0xffff) | + ((ctx->DrawBuffer->Height + draw_y) << 16); + state->Buffer[I830_DESTREG_DRAWRECT4] = (draw_y << 16) | draw_x; + state->Buffer[I830_DESTREG_DRAWRECT5] = MI_NOOP; - -static void -i830_set_draw_region(struct intel_context *intel, - struct intel_region *color_regions[], - struct intel_region *depth_region, - GLuint num_regions) -{ - struct i830_context *i830 = i830_context(&intel->ctx); - i830_state_draw_region(intel, &i830->state, color_regions[0], depth_region); + I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS); } /* This isn't really handled at the moment. @@ -714,17 +696,13 @@ i830_new_batch(struct intel_context *intel) { struct i830_context *i830 = i830_context(&intel->ctx); i830->state.emitted = 0; - - /* Check that we didn't just wrap our batchbuffer at a bad time. */ - assert(!intel->no_batch_wrap); } static void i830_assert_not_dirty( struct intel_context *intel ) { struct i830_context *i830 = i830_context(&intel->ctx); - struct i830_hw_state *state = i830->current; - assert(!get_dirty(state)); + assert(!get_dirty(&i830->state)); } static void diff --git a/i915/i915_context.c b/i915/i915_context.c index 7d4c7cf..4d86aae 100644 --- a/i915/i915_context.c +++ b/i915/i915_context.c @@ -28,7 +28,6 @@ #include "i915_context.h" #include "main/imports.h" #include "main/macros.h" -#include "intel_tex.h" #include "intel_tris.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" @@ -38,15 +37,11 @@ #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" -#include "utils.h" #include "i915_reg.h" #include "i915_program.h" -#include "intel_regions.h" -#include "intel_batchbuffer.h" #include "intel_tris.h" #include "intel_span.h" -#include "intel_pixel.h" /*************************************** * Mesa's Driver Functions @@ -100,7 +95,7 @@ extern const struct tnl_pipeline_stage *intel_pipeline[]; GLboolean i915CreateContext(const __GLcontextModes * mesaVis, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate) { struct dd_function_table functions; @@ -113,10 +108,9 @@ i915CreateContext(const __GLcontextModes * mesaVis, return GL_FALSE; if (0) - _mesa_printf("\ntexmem-0-3 branch\n\n"); + printf("\ntexmem-0-3 branch\n\n"); i915InitVtbl(i915); - i915InitMetaFuncs(i915); i915InitDriverFunctions(&functions); @@ -143,6 +137,9 @@ i915CreateContext(const __GLcontextModes * mesaVis, ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS; ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS; ctx->Const.MaxVarying = I915_TEX_UNITS; + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.MaxVertexTextureImageUnits + + ctx->Const.MaxTextureImageUnits; /* Advertise the full hardware capabilities. The new memory * manager should cope much better with overload situations: diff --git a/i915/i915_context.h b/i915/i915_context.h index 25418d5..b516928 100644 --- a/i915/i915_context.h +++ b/i915/i915_context.h @@ -40,6 +40,7 @@ #define I915_FALLBACK_POLYGON_SMOOTH 0x40000 #define I915_FALLBACK_POINT_SMOOTH 0x80000 #define I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN 0x100000 +#define I915_FALLBACK_DRAW_OFFSET 0x200000 #define I915_UPLOAD_CTX 0x1 #define I915_UPLOAD_BUFFERS 0x2 @@ -259,7 +260,7 @@ struct i915_context struct i915_fragment_program *current_program; - struct i915_hw_state meta, initial, state, *current; + struct i915_hw_state state; }; @@ -318,7 +319,7 @@ do { \ * i915_context.c */ extern GLboolean i915CreateContext(const __GLcontextModes * mesaVis, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); @@ -345,12 +346,6 @@ extern void i915_update_provoking_vertex(GLcontext *ctx); extern void i915UpdateTextureState(struct intel_context *intel); extern void i915InitTextureFuncs(struct dd_function_table *functions); -/*====================================================================== - * i915_metaops.c - */ -void i915InitMetaFuncs(struct i915_context *i915); - - /*====================================================================== * i915_fragprog.c */ diff --git a/i915/i915_debug.c b/i915/i915_debug.c index fecfac3..4569fb9 100644 --- a/i915/i915_debug.c +++ b/i915/i915_debug.c @@ -31,27 +31,25 @@ #include "i915_context.h" #include "i915_debug.h" -#define PRINTF( ... ) _mesa_printf( __VA_ARGS__ ) - static GLboolean debug( struct debug_stream *stream, const char *name, GLuint len ) { GLuint i; GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); if (len == 0) { - PRINTF("Error - zero length packet (0x%08x)\n", stream->ptr[0]); + printf("Error - zero length packet (0x%08x)\n", stream->ptr[0]); assert(0); return GL_FALSE; } if (stream->print_addresses) - PRINTF("%08x: ", stream->offset); + printf("%08x: ", stream->offset); - PRINTF("%s (%d dwords):\n", name, len); + printf("%s (%d dwords):\n", name, len); for (i = 0; i < len; i++) - PRINTF("\t0x%08x\n", ptr[i]); - PRINTF("\n"); + printf("\t0x%08x\n", ptr[i]); + printf("\n"); stream->offset += len * sizeof(GLuint); @@ -88,17 +86,17 @@ static GLboolean debug_prim( struct debug_stream *stream, const char *name, - PRINTF("%s %s (%d dwords):\n", name, prim, len); - PRINTF("\t0x%08x\n", ptr[0]); + printf("%s %s (%d dwords):\n", name, prim, len); + printf("\t0x%08x\n", ptr[0]); for (i = 1; i < len; i++) { if (dump_floats) - PRINTF("\t0x%08x // %f\n", ptr[i], *(GLfloat *)&ptr[i]); + printf("\t0x%08x // %f\n", ptr[i], *(GLfloat *)&ptr[i]); else - PRINTF("\t0x%08x\n", ptr[i]); + printf("\t0x%08x\n", ptr[i]); } - PRINTF("\n"); + printf("\n"); stream->offset += len * sizeof(GLuint); @@ -113,15 +111,15 @@ static GLboolean debug_program( struct debug_stream *stream, const char *name, G GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); if (len == 0) { - PRINTF("Error - zero length packet (0x%08x)\n", stream->ptr[0]); + printf("Error - zero length packet (0x%08x)\n", stream->ptr[0]); assert(0); return GL_FALSE; } if (stream->print_addresses) - PRINTF("%08x: ", stream->offset); + printf("%08x: ", stream->offset); - PRINTF("%s (%d dwords):\n", name, len); + printf("%s (%d dwords):\n", name, len); i915_disassemble_program( ptr, len ); stream->offset += len * sizeof(GLuint); @@ -135,17 +133,17 @@ static GLboolean debug_chain( struct debug_stream *stream, const char *name, GLu GLuint old_offset = stream->offset + len * sizeof(GLuint); GLuint i; - PRINTF("%s (%d dwords):\n", name, len); + printf("%s (%d dwords):\n", name, len); for (i = 0; i < len; i++) - PRINTF("\t0x%08x\n", ptr[i]); + printf("\t0x%08x\n", ptr[i]); stream->offset = ptr[1] & ~0x3; if (stream->offset < old_offset) - PRINTF("\n... skipping backwards from 0x%x --> 0x%x ...\n\n", + printf("\n... skipping backwards from 0x%x --> 0x%x ...\n\n", old_offset, stream->offset ); else - PRINTF("\n... skipping from 0x%x --> 0x%x ...\n\n", + printf("\n... skipping from 0x%x --> 0x%x ...\n\n", old_offset, stream->offset ); @@ -165,10 +163,10 @@ static GLboolean debug_variable_length_prim( struct debug_stream *stream ) len = 1+(i+2)/2; - PRINTF("3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); + printf("3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); for (i = 0; i < len; i++) - PRINTF("\t0x%08x\n", ptr[i]); - PRINTF("\n"); + printf("\t0x%08x\n", ptr[i]); + printf("\n"); stream->offset += len * sizeof(GLuint); return GL_TRUE; @@ -178,9 +176,9 @@ static GLboolean debug_variable_length_prim( struct debug_stream *stream ) #define BITS( dw, hi, lo, ... ) \ do { \ unsigned himask = 0xffffffffU >> (31 - (hi)); \ - PRINTF("\t\t "); \ - PRINTF(__VA_ARGS__); \ - PRINTF(": 0x%x\n", ((dw) & himask) >> (lo)); \ + printf("\t\t "); \ + printf(__VA_ARGS__); \ + printf(": 0x%x\n", ((dw) & himask) >> (lo)); \ } while (0) #define MBZ( dw, hi, lo) do { \ @@ -194,9 +192,9 @@ do { \ #define FLAG( dw, bit, ... ) \ do { \ if (((dw) >> (bit)) & 1) { \ - PRINTF("\t\t "); \ - PRINTF(__VA_ARGS__); \ - PRINTF("\n"); \ + printf("\t\t "); \ + printf(__VA_ARGS__); \ + printf("\n"); \ } \ } while (0) @@ -208,17 +206,17 @@ static GLboolean debug_load_immediate( struct debug_stream *stream, GLuint bits = (ptr[0] >> 4) & 0xff; GLuint j = 0; - PRINTF("%s (%d dwords, flags: %x):\n", name, len, bits); - PRINTF("\t0x%08x\n", ptr[j++]); + printf("%s (%d dwords, flags: %x):\n", name, len, bits); + printf("\t0x%08x\n", ptr[j++]); if (bits & (1<<0)) { - PRINTF("\t LIS0: 0x%08x\n", ptr[j]); - PRINTF("\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); + printf("\t LIS0: 0x%08x\n", ptr[j]); + printf("\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); BITS(ptr[j], 0, 0, "vb invalidate disable"); j++; } if (bits & (1<<1)) { - PRINTF("\t LIS1: 0x%08x\n", ptr[j]); + printf("\t LIS1: 0x%08x\n", ptr[j]); BITS(ptr[j], 29, 24, "vb dword width"); BITS(ptr[j], 21, 16, "vb dword pitch"); BITS(ptr[j], 15, 0, "vb max index"); @@ -226,7 +224,7 @@ static GLboolean debug_load_immediate( struct debug_stream *stream, } if (bits & (1<<2)) { int i; - PRINTF("\t LIS2: 0x%08x\n", ptr[j]); + printf("\t LIS2: 0x%08x\n", ptr[j]); for (i = 0; i < 8; i++) { unsigned tc = (ptr[j] >> (i * 4)) & 0xf; if (tc != 0xf) @@ -235,11 +233,11 @@ static GLboolean debug_load_immediate( struct debug_stream *stream, j++; } if (bits & (1<<3)) { - PRINTF("\t LIS3: 0x%08x\n", ptr[j]); + printf("\t LIS3: 0x%08x\n", ptr[j]); j++; } if (bits & (1<<4)) { - PRINTF("\t LIS4: 0x%08x\n", ptr[j]); + printf("\t LIS4: 0x%08x\n", ptr[j]); BITS(ptr[j], 31, 23, "point width"); BITS(ptr[j], 22, 19, "line width"); FLAG(ptr[j], 18, "alpha flatshade"); @@ -261,7 +259,7 @@ static GLboolean debug_load_immediate( struct debug_stream *stream, j++; } if (bits & (1<<5)) { - PRINTF("\t LIS5: 0x%08x\n", ptr[j]); + printf("\t LIS5: 0x%08x\n", ptr[j]); BITS(ptr[j], 31, 28, "rgba write disables"); FLAG(ptr[j], 27, "force dflt point width"); FLAG(ptr[j], 26, "last pixel enable"); @@ -279,7 +277,7 @@ static GLboolean debug_load_immediate( struct debug_stream *stream, j++; } if (bits & (1<<6)) { - PRINTF("\t LIS6: 0x%08x\n", ptr[j]); + printf("\t LIS6: 0x%08x\n", ptr[j]); FLAG(ptr[j], 31, "alpha test enable"); BITS(ptr[j], 30, 28, "alpha func"); BITS(ptr[j], 27, 20, "alpha ref"); @@ -296,7 +294,7 @@ static GLboolean debug_load_immediate( struct debug_stream *stream, } - PRINTF("\n"); + printf("\n"); assert(j == len); @@ -315,34 +313,34 @@ static GLboolean debug_load_indirect( struct debug_stream *stream, GLuint bits = (ptr[0] >> 8) & 0x3f; GLuint i, j = 0; - PRINTF("%s (%d dwords):\n", name, len); - PRINTF("\t0x%08x\n", ptr[j++]); + printf("%s (%d dwords):\n", name, len); + printf("\t0x%08x\n", ptr[j++]); for (i = 0; i < 6; i++) { if (bits & (1<ptr + stream->offset); int j = 0; - PRINTF("%s (%d dwords):\n", name, len); - PRINTF("\t0x%08x\n", ptr[j++]); + printf("%s (%d dwords):\n", name, len); + printf("\t0x%08x\n", ptr[j++]); BR13(stream, ptr[j++]); BR2223(stream, ptr[j], ptr[j+1]); @@ -460,8 +458,8 @@ static GLboolean debug_color_blit( struct debug_stream *stream, GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); int j = 0; - PRINTF("%s (%d dwords):\n", name, len); - PRINTF("\t0x%08x\n", ptr[j++]); + printf("%s (%d dwords):\n", name, len); + printf("\t0x%08x\n", ptr[j++]); BR13(stream, ptr[j++]); BR2223(stream, ptr[j], ptr[j+1]); @@ -481,8 +479,8 @@ static GLboolean debug_modes4( struct debug_stream *stream, GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); int j = 0; - PRINTF("%s (%d dwords):\n", name, len); - PRINTF("\t0x%08x\n", ptr[j]); + printf("%s (%d dwords):\n", name, len); + printf("\t0x%08x\n", ptr[j]); BITS(ptr[j], 21, 18, "logicop func"); FLAG(ptr[j], 17, "stencil test mask modify-enable"); FLAG(ptr[j], 16, "stencil write mask modify-enable"); @@ -502,26 +500,26 @@ static GLboolean debug_map_state( struct debug_stream *stream, GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); int j = 0; - PRINTF("%s (%d dwords):\n", name, len); - PRINTF("\t0x%08x\n", ptr[j++]); + printf("%s (%d dwords):\n", name, len); + printf("\t0x%08x\n", ptr[j++]); { - PRINTF("\t0x%08x\n", ptr[j]); + printf("\t0x%08x\n", ptr[j]); BITS(ptr[j], 15, 0, "map mask"); j++; } while (j < len) { { - PRINTF("\t TMn.0: 0x%08x\n", ptr[j]); - PRINTF("\t map address: 0x%08x\n", (ptr[j] & ~0x3)); + printf("\t TMn.0: 0x%08x\n", ptr[j]); + printf("\t map address: 0x%08x\n", (ptr[j] & ~0x3)); FLAG(ptr[j], 1, "vertical line stride"); FLAG(ptr[j], 0, "vertical line stride offset"); j++; } { - PRINTF("\t TMn.1: 0x%08x\n", ptr[j]); + printf("\t TMn.1: 0x%08x\n", ptr[j]); BITS(ptr[j], 31, 21, "height"); BITS(ptr[j], 20, 10, "width"); BITS(ptr[j], 9, 7, "surface format"); @@ -532,7 +530,7 @@ static GLboolean debug_map_state( struct debug_stream *stream, j++; } { - PRINTF("\t TMn.2: 0x%08x\n", ptr[j]); + printf("\t TMn.2: 0x%08x\n", ptr[j]); BITS(ptr[j], 31, 21, "dword pitch"); BITS(ptr[j], 20, 15, "cube face enables"); BITS(ptr[j], 14, 9, "max lod"); @@ -554,18 +552,18 @@ static GLboolean debug_sampler_state( struct debug_stream *stream, GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); int j = 0; - PRINTF("%s (%d dwords):\n", name, len); - PRINTF("\t0x%08x\n", ptr[j++]); + printf("%s (%d dwords):\n", name, len); + printf("\t0x%08x\n", ptr[j++]); { - PRINTF("\t0x%08x\n", ptr[j]); + printf("\t0x%08x\n", ptr[j]); BITS(ptr[j], 15, 0, "sampler mask"); j++; } while (j < len) { { - PRINTF("\t TSn.0: 0x%08x\n", ptr[j]); + printf("\t TSn.0: 0x%08x\n", ptr[j]); FLAG(ptr[j], 31, "reverse gamma"); FLAG(ptr[j], 30, "planar to packed"); FLAG(ptr[j], 29, "yuv->rgb"); @@ -582,7 +580,7 @@ static GLboolean debug_sampler_state( struct debug_stream *stream, } { - PRINTF("\t TSn.1: 0x%08x\n", ptr[j]); + printf("\t TSn.1: 0x%08x\n", ptr[j]); BITS(ptr[j], 31, 24, "min lod"); MBZ( ptr[j], 23, 18 ); FLAG(ptr[j], 17, "kill pixel enable"); @@ -597,7 +595,7 @@ static GLboolean debug_sampler_state( struct debug_stream *stream, j++; } { - PRINTF("\t TSn.2: 0x%08x (default color)\n", ptr[j]); + printf("\t TSn.2: 0x%08x (default color)\n", ptr[j]); j++; } } @@ -614,11 +612,11 @@ static GLboolean debug_dest_vars( struct debug_stream *stream, GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); int j = 0; - PRINTF("%s (%d dwords):\n", name, len); - PRINTF("\t0x%08x\n", ptr[j++]); + printf("%s (%d dwords):\n", name, len); + printf("\t0x%08x\n", ptr[j++]); { - PRINTF("\t0x%08x\n", ptr[j]); + printf("\t0x%08x\n", ptr[j]); FLAG(ptr[j], 31, "early classic ztest"); FLAG(ptr[j], 30, "opengl tex default color"); FLAG(ptr[j], 29, "bypass iz"); @@ -649,11 +647,11 @@ static GLboolean debug_buf_info( struct debug_stream *stream, GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); int j = 0; - PRINTF("%s (%d dwords):\n", name, len); - PRINTF("\t0x%08x\n", ptr[j++]); + printf("%s (%d dwords):\n", name, len); + printf("\t0x%08x\n", ptr[j++]); { - PRINTF("\t0x%08x\n", ptr[j]); + printf("\t0x%08x\n", ptr[j]); BITS(ptr[j], 28, 28, "aux buffer id"); BITS(ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); FLAG(ptr[j], 23, "use fence regs"); @@ -665,7 +663,7 @@ static GLboolean debug_buf_info( struct debug_stream *stream, j++; } - PRINTF("\t0x%08x -- buffer base address\n", ptr[j++]); + printf("\t0x%08x -- buffer base address\n", ptr[j++]); stream->offset += len * sizeof(GLuint); assert(j == len); @@ -826,7 +824,7 @@ i915_dump_batchbuffer( GLuint *start, GLuint bytes = (end - start) * 4; GLboolean done = GL_FALSE; - PRINTF("\n\nBATCH: (%d)\n", bytes / 4); + printf("\n\nBATCH: (%d)\n", bytes / 4); stream.offset = 0; stream.ptr = (char *)start; @@ -843,7 +841,7 @@ i915_dump_batchbuffer( GLuint *start, stream.offset >= 0); } - PRINTF("END-BATCH\n\n\n"); + printf("END-BATCH\n\n\n"); } diff --git a/i915/i915_debug_fp.c b/i915/i915_debug_fp.c index 84347a0..adfc9e8 100644 --- a/i915/i915_debug_fp.c +++ b/i915/i915_debug_fp.c @@ -30,11 +30,6 @@ #include "i915_reg.h" #include "i915_debug.h" #include "main/imports.h" -#include "shader/program.h" -#include "shader/prog_instruction.h" -#include "shader/prog_print.h" - -#define PRINTF( ... ) _mesa_printf( __VA_ARGS__ ) static const char *opcodes[0x20] = { "NOP", @@ -126,27 +121,27 @@ print_reg_type_nr(GLuint type, GLuint nr) case REG_TYPE_T: switch (nr) { case T_DIFFUSE: - PRINTF("T_DIFFUSE"); + printf("T_DIFFUSE"); return; case T_SPECULAR: - PRINTF("T_SPECULAR"); + printf("T_SPECULAR"); return; case T_FOG_W: - PRINTF("T_FOG_W"); + printf("T_FOG_W"); return; default: - PRINTF("T_TEX%d", nr); + printf("T_TEX%d", nr); return; } case REG_TYPE_OC: if (nr == 0) { - PRINTF("oC"); + printf("oC"); return; } break; case REG_TYPE_OD: if (nr == 0) { - PRINTF("oD"); + printf("oD"); return; } break; @@ -154,7 +149,7 @@ print_reg_type_nr(GLuint type, GLuint nr) break; } - PRINTF("%s[%d]", regname[type], nr); + printf("%s[%d]", regname[type], nr); } #define REG_SWIZZLE_MASK 0x7777 @@ -175,33 +170,33 @@ print_reg_neg_swizzle(GLuint reg) (reg & REG_NEGATE_MASK) == 0) return; - PRINTF("."); + printf("."); for (i = 3; i >= 0; i--) { if (reg & (1 << ((i * 4) + 3))) - PRINTF("-"); + printf("-"); switch ((reg >> (i * 4)) & 0x7) { case 0: - PRINTF("x"); + printf("x"); break; case 1: - PRINTF("y"); + printf("y"); break; case 2: - PRINTF("z"); + printf("z"); break; case 3: - PRINTF("w"); + printf("w"); break; case 4: - PRINTF("0"); + printf("0"); break; case 5: - PRINTF("1"); + printf("1"); break; default: - PRINTF("?"); + printf("?"); break; } } @@ -226,15 +221,15 @@ print_dest_reg(GLuint dword) print_reg_type_nr(type, nr); if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) return; - PRINTF("."); + printf("."); if (dword & A0_DEST_CHANNEL_X) - PRINTF("x"); + printf("x"); if (dword & A0_DEST_CHANNEL_Y) - PRINTF("y"); + printf("y"); if (dword & A0_DEST_CHANNEL_Z) - PRINTF("z"); + printf("z"); if (dword & A0_DEST_CHANNEL_W) - PRINTF("w"); + printf("w"); } @@ -249,29 +244,29 @@ print_arith_op(GLuint opcode, const GLuint * program) if (opcode != A0_NOP) { print_dest_reg(program[0]); if (program[0] & A0_DEST_SATURATE) - PRINTF(" = SATURATE "); + printf(" = SATURATE "); else - PRINTF(" = "); + printf(" = "); } - PRINTF("%s ", opcodes[opcode]); + printf("%s ", opcodes[opcode]); print_src_reg(GET_SRC0_REG(program[0], program[1])); if (args[opcode] == 1) { - PRINTF("\n"); + printf("\n"); return; } - PRINTF(", "); + printf(", "); print_src_reg(GET_SRC1_REG(program[1], program[2])); if (args[opcode] == 2) { - PRINTF("\n"); + printf("\n"); return; } - PRINTF(", "); + printf(", "); print_src_reg(GET_SRC2_REG(program[2])); - PRINTF("\n"); + printf("\n"); return; } @@ -280,24 +275,24 @@ static void print_tex_op(GLuint opcode, const GLuint * program) { print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); - PRINTF(" = "); + printf(" = "); - PRINTF("%s ", opcodes[opcode]); + printf("%s ", opcodes[opcode]); - PRINTF("S[%d],", program[0] & T0_SAMPLER_NR_MASK); + printf("S[%d],", program[0] & T0_SAMPLER_NR_MASK); print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & REG_TYPE_MASK, (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); - PRINTF("\n"); + printf("\n"); } static void print_dcl_op(GLuint opcode, const GLuint * program) { - PRINTF("%s ", opcodes[opcode]); + printf("%s ", opcodes[opcode]); print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); - PRINTF("\n"); + printf("\n"); } @@ -307,7 +302,7 @@ i915_disassemble_program(const GLuint * program, GLuint sz) GLuint size = program[0] & 0x1ff; GLint i; - PRINTF("\t\tBEGIN\n"); + printf("\t\tBEGIN\n"); assert(size + 2 == sz); @@ -315,7 +310,7 @@ i915_disassemble_program(const GLuint * program, GLuint sz) for (i = 1; i < sz; i += 3, program += 3) { GLuint opcode = program[0] & (0x1f << 24); - PRINTF("\t\t"); + printf("\t\t"); if ((GLint) opcode >= A0_NOP && opcode <= A0_SLT) print_arith_op(opcode >> 24, program); @@ -324,10 +319,10 @@ i915_disassemble_program(const GLuint * program, GLuint sz) else if (opcode == D0_DCL) print_dcl_op(opcode >> 24, program); else - PRINTF("Unknown opcode 0x%x\n", opcode); + printf("Unknown opcode 0x%x\n", opcode); } - PRINTF("\t\tEND\n\n"); + printf("\t\tEND\n\n"); } diff --git a/i915/i915_fragprog.c b/i915/i915_fragprog.c index d9c6144..15e3b87 100644 --- a/i915/i915_fragprog.c +++ b/i915/i915_fragprog.c @@ -663,7 +663,7 @@ upload_program(struct i915_fragment_program *p) A0_MOV, get_result_vector(p, inst), get_result_flags(inst), 0, - swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); + swizzle(tmp, ZERO, ZERO, ZERO, ZERO), 0, 0); case OPCODE_POW: src0 = src_vector(p, &inst->SrcReg[0], program); @@ -1205,7 +1205,7 @@ i915IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) return GL_TRUE; } -static void +static GLboolean i915ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { @@ -1223,7 +1223,10 @@ i915ProgramStringNotify(GLcontext * ctx, } } - _tnl_program_string(ctx, target, prog); + (void) _tnl_program_string(ctx, target, prog); + + /* XXX check if program is legal, within limits */ + return GL_TRUE; } void @@ -1301,7 +1304,7 @@ i915ValidateFragmentProgram(struct i915_context *i915) for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { if (inputsRead & FRAG_BIT_TEX(i)) { - int sz = VB->TexCoordPtr[i]->size; + int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size; s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); diff --git a/i915/i915_metaops.c b/i915/i915_metaops.c deleted file mode 100644 index 90a78c6..0000000 --- a/i915/i915_metaops.c +++ /dev/null @@ -1,507 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "main/enums.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "utils.h" - -#include "intel_screen.h" -#include "intel_batchbuffer.h" -#include "intel_regions.h" - -#include "i915_context.h" -#include "i915_reg.h" - -/* We touch almost everything: - */ -#define ACTIVE (I915_UPLOAD_INVARIENT | \ - I915_UPLOAD_CTX | \ - I915_UPLOAD_BUFFERS | \ - I915_UPLOAD_STIPPLE | \ - I915_UPLOAD_PROGRAM | \ - I915_UPLOAD_FOG | \ - I915_UPLOAD_TEX(0)) - -#define SET_STATE( i915, STATE ) \ -do { \ - i915->current->emitted &= ~ACTIVE; \ - i915->current = &i915->STATE; \ - i915->current->emitted &= ~ACTIVE; \ -} while (0) - - -static void -meta_no_stencil_write(struct intel_context *intel) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - - /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_FALSE ) - */ - i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE | - S5_STENCIL_WRITE_ENABLE); - - i915->meta.emitted &= ~I915_UPLOAD_CTX; -} - -static void -meta_no_depth_write(struct intel_context *intel) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - - /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE ) - */ - i915->meta.Ctx[I915_CTXREG_LIS6] &= ~(S6_DEPTH_TEST_ENABLE | - S6_DEPTH_WRITE_ENABLE); - - i915->meta.emitted &= ~I915_UPLOAD_CTX; -} - -static void -meta_depth_replace(struct intel_context *intel) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - - /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_TRUE ) - * ctx->Driver.DepthMask( ctx, GL_TRUE ) - */ - i915->meta.Ctx[I915_CTXREG_LIS6] |= (S6_DEPTH_TEST_ENABLE | - S6_DEPTH_WRITE_ENABLE); - - /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS ) - */ - i915->meta.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_FUNC_MASK; - i915->meta.Ctx[I915_CTXREG_LIS6] |= - COMPAREFUNC_ALWAYS << S6_DEPTH_TEST_FUNC_SHIFT; - - i915->meta.emitted &= ~I915_UPLOAD_CTX; -} - - -/* Set stencil unit to replace always with the reference value. - */ -static void -meta_stencil_replace(struct intel_context *intel, - GLuint s_mask, GLuint s_clear) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - GLuint op = STENCILOP_REPLACE; - GLuint func = COMPAREFUNC_ALWAYS; - - /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE ) - */ - i915->meta.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE | - S5_STENCIL_WRITE_ENABLE); - - /* ctx->Driver.StencilMask( ctx, s_mask ) - */ - i915->meta.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK; - - i915->meta.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK(s_mask)); - - /* ctx->Driver.StencilOp( ctx, GL_REPLACE, GL_REPLACE, GL_REPLACE ) - */ - i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK | - S5_STENCIL_PASS_Z_FAIL_MASK | - S5_STENCIL_PASS_Z_PASS_MASK); - - i915->meta.Ctx[I915_CTXREG_LIS5] |= ((op << S5_STENCIL_FAIL_SHIFT) | - (op << S5_STENCIL_PASS_Z_FAIL_SHIFT) | - (op << S5_STENCIL_PASS_Z_PASS_SHIFT)); - - - /* ctx->Driver.StencilFunc( ctx, GL_ALWAYS, s_ref, ~0 ) - */ - i915->meta.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK; - i915->meta.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(0xff)); - - i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK | - S5_STENCIL_TEST_FUNC_MASK); - - i915->meta.Ctx[I915_CTXREG_LIS5] |= ((s_clear << S5_STENCIL_REF_SHIFT) | - (func << S5_STENCIL_TEST_FUNC_SHIFT)); - - - i915->meta.emitted &= ~I915_UPLOAD_CTX; -} - - -static void -meta_color_mask(struct intel_context *intel, GLboolean state) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - const GLuint mask = (S5_WRITEDISABLE_RED | - S5_WRITEDISABLE_GREEN | - S5_WRITEDISABLE_BLUE | S5_WRITEDISABLE_ALPHA); - - /* Copy colormask state from "regular" hw context. - */ - if (state) { - i915->meta.Ctx[I915_CTXREG_LIS5] &= ~mask; - i915->meta.Ctx[I915_CTXREG_LIS5] |= - (i915->state.Ctx[I915_CTXREG_LIS5] & mask); - } - else - i915->meta.Ctx[I915_CTXREG_LIS5] |= mask; - - i915->meta.emitted &= ~I915_UPLOAD_CTX; -} - - - -static void -meta_import_pixel_state(struct intel_context *intel) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - memcpy(i915->meta.Fog, i915->state.Fog, I915_FOG_SETUP_SIZE * 4); - - i915->meta.Ctx[I915_CTXREG_LIS5] = i915->state.Ctx[I915_CTXREG_LIS5]; - i915->meta.Ctx[I915_CTXREG_LIS6] = i915->state.Ctx[I915_CTXREG_LIS6]; - i915->meta.Ctx[I915_CTXREG_STATE4] = i915->state.Ctx[I915_CTXREG_STATE4]; - i915->meta.Ctx[I915_CTXREG_BLENDCOLOR1] = - i915->state.Ctx[I915_CTXREG_BLENDCOLOR1]; - i915->meta.Ctx[I915_CTXREG_IAB] = i915->state.Ctx[I915_CTXREG_IAB]; - - i915->meta.Buffer[I915_DESTREG_SENABLE] = - i915->state.Buffer[I915_DESTREG_SENABLE]; - i915->meta.Buffer[I915_DESTREG_SR1] = i915->state.Buffer[I915_DESTREG_SR1]; - i915->meta.Buffer[I915_DESTREG_SR2] = i915->state.Buffer[I915_DESTREG_SR2]; - - i915->meta.emitted &= ~I915_UPLOAD_FOG; - i915->meta.emitted &= ~I915_UPLOAD_BUFFERS; - i915->meta.emitted &= ~I915_UPLOAD_CTX; -} - - - - -#define REG( type, nr ) (((type)<<5)|(nr)) - -#define REG_R(x) REG(REG_TYPE_R, x) -#define REG_T(x) REG(REG_TYPE_T, x) -#define REG_CONST(x) REG(REG_TYPE_CONST, x) -#define REG_S(x) REG(REG_TYPE_S, x) -#define REG_OC REG(REG_TYPE_OC, 0) -#define REG_OD REG(REG_TYPE_OD, 0) -#define REG_U(x) REG(REG_TYPE_U, x) - -#define REG_T_DIFFUSE REG(REG_TYPE_T, T_DIFFUSE) -#define REG_T_SPECULAR REG(REG_TYPE_T, T_SPECULAR) -#define REG_T_FOG_W REG(REG_TYPE_T, T_FOG_W) -#define REG_T_TEX(x) REG(REG_TYPE_T, x) - - -#define A0_DEST_REG( reg ) ( (reg) << A0_DEST_NR_SHIFT ) -#define A0_SRC0_REG( reg ) ( (reg) << A0_SRC0_NR_SHIFT ) -#define A1_SRC1_REG( reg ) ( (reg) << A1_SRC1_NR_SHIFT ) -#define A1_SRC2_REG( reg ) ( (reg) << A1_SRC2_NR_SHIFT ) -#define A2_SRC2_REG( reg ) ( (reg) << A2_SRC2_NR_SHIFT ) -#define D0_DECL_REG( reg ) ( (reg) << D0_NR_SHIFT ) -#define T0_DEST_REG( reg ) ( (reg) << T0_DEST_NR_SHIFT ) - -#define T0_SAMPLER( unit ) ((unit)<ctx); - - static const GLuint prog[] = { - _3DSTATE_PIXEL_SHADER_PROGRAM, - - /* Declare incoming diffuse color: - */ - (D0_DCL | D0_DECL_REG(REG_T_DIFFUSE) | D0_CHANNEL_ALL), - D1_MBZ, - D2_MBZ, - - /* output-color = mov(t_diffuse) - */ - (A0_MOV | - A0_DEST_REG(REG_OC) | - A0_DEST_CHANNEL_ALL | A0_SRC0_REG(REG_T_DIFFUSE)), - (A1_SRC0_XYZW), - 0, - }; - - - memcpy(i915->meta.Program, prog, sizeof(prog)); - i915->meta.ProgramSize = sizeof(prog) / sizeof(*prog); - i915->meta.Program[0] |= i915->meta.ProgramSize - 2; - i915->meta.emitted &= ~I915_UPLOAD_PROGRAM; -} - -static void -meta_texture_blend_replace(struct intel_context *intel) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - - static const GLuint prog[] = { - _3DSTATE_PIXEL_SHADER_PROGRAM, - - /* Declare the sampler: - */ - (D0_DCL | D0_DECL_REG(REG_S(0)) | D0_SAMPLE_TYPE_2D | D0_CHANNEL_NONE), - D1_MBZ, - D2_MBZ, - - /* Declare the interpolated texture coordinate: - */ - (D0_DCL | D0_DECL_REG(REG_T_TEX(0)) | D0_CHANNEL_ALL), - D1_MBZ, - D2_MBZ, - - /* output-color = texld(sample0, texcoord0) - */ - (T0_TEXLD | T0_DEST_REG(REG_OC) | T0_SAMPLER(0)), - T1_ADDRESS_REG(REG_TYPE_T, 0), - T2_MBZ - }; - - memcpy(i915->meta.Program, prog, sizeof(prog)); - i915->meta.ProgramSize = sizeof(prog) / sizeof(*prog); - i915->meta.Program[0] |= i915->meta.ProgramSize - 2; - i915->meta.emitted &= ~I915_UPLOAD_PROGRAM; -} - - - - - -/* Set up an arbitary piece of memory as a rectangular texture - * (including the front or back buffer). - */ -static GLboolean -meta_tex_rect_source(struct intel_context *intel, - dri_bo *buffer, - GLuint offset, - GLuint pitch, GLuint height, GLenum format, GLenum type) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - GLuint unit = 0; - GLint numLevels = 1; - GLuint *state = i915->meta.Tex[0]; - GLuint textureFormat; - GLuint cpp; - - /* A full implementation of this would do the upload through - * glTexImage2d, and get all the conversion operations at that - * point. We are restricted, but still at least have access to the - * fragment program swizzle. - */ - switch (format) { - case GL_BGRA: - switch (type) { - case GL_UNSIGNED_INT_8_8_8_8_REV: - case GL_UNSIGNED_BYTE: - textureFormat = (MAPSURF_32BIT | MT_32BIT_ARGB8888); - cpp = 4; - break; - default: - return GL_FALSE; - } - break; - case GL_RGBA: - switch (type) { - case GL_UNSIGNED_INT_8_8_8_8_REV: - case GL_UNSIGNED_BYTE: - textureFormat = (MAPSURF_32BIT | MT_32BIT_ABGR8888); - cpp = 4; - break; - default: - return GL_FALSE; - } - break; - case GL_BGR: - switch (type) { - case GL_UNSIGNED_SHORT_5_6_5_REV: - textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565); - cpp = 2; - break; - default: - return GL_FALSE; - } - break; - case GL_RGB: - switch (type) { - case GL_UNSIGNED_SHORT_5_6_5: - textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565); - cpp = 2; - break; - default: - return GL_FALSE; - } - break; - - default: - return GL_FALSE; - } - - - if ((pitch * cpp) & 3) { - _mesa_printf("%s: texture is not dword pitch\n", __FUNCTION__); - return GL_FALSE; - } - -/* intel_region_release(&i915->meta.tex_region[0]); */ -/* intel_region_reference(&i915->meta.tex_region[0], region); */ - i915->meta.tex_buffer[0] = buffer; - i915->meta.tex_offset[0] = offset; - - state[I915_TEXREG_MS3] = (((height - 1) << MS3_HEIGHT_SHIFT) | - ((pitch - 1) << MS3_WIDTH_SHIFT) | - textureFormat | MS3_USE_FENCE_REGS); - - state[I915_TEXREG_MS4] = (((((pitch * cpp) / 4) - 1) << MS4_PITCH_SHIFT) | - MS4_CUBE_FACE_ENA_MASK | - ((((numLevels - 1) * 4)) << MS4_MAX_LOD_SHIFT)); - - state[I915_TEXREG_SS2] = ((FILTER_NEAREST << SS2_MIN_FILTER_SHIFT) | - (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | - (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT)); - - state[I915_TEXREG_SS3] = ((TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT) | - (TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) | - (TEXCOORDMODE_WRAP << SS3_TCZ_ADDR_MODE_SHIFT) | - (unit << SS3_TEXTUREMAP_INDEX_SHIFT)); - - state[I915_TEXREG_SS4] = 0; - - i915->meta.emitted &= ~I915_UPLOAD_TEX(0); - return GL_TRUE; -} - - -/** - * Set the color and depth drawing region for meta ops. - */ -static void -meta_draw_region(struct intel_context *intel, - struct intel_region *color_region, - struct intel_region *depth_region) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - i915_state_draw_region(intel, &i915->meta, color_region, depth_region); -} - - -static void -set_vertex_format(struct intel_context *intel) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - - i915->meta.Ctx[I915_CTXREG_LIS2] = - (S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) | - S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | - S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) | - S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) | - S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) | - S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | - S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) | - S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT)); - - i915->meta.Ctx[I915_CTXREG_LIS4] &= ~S4_VFMT_MASK; - - i915->meta.Ctx[I915_CTXREG_LIS4] |= (S4_VFMT_COLOR | S4_VFMT_XYZ); - - i915->meta.emitted &= ~I915_UPLOAD_CTX; -} - - - -/* Operations where the 3D engine is decoupled temporarily from the - * current GL state and used for other purposes than simply rendering - * incoming triangles. - */ -static void -install_meta_state(struct intel_context *intel) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - memcpy(&i915->meta, &i915->initial, sizeof(i915->meta)); - i915->meta.active = ACTIVE; - i915->meta.emitted = 0; - - SET_STATE(i915, meta); - set_vertex_format(intel); - meta_no_texture(intel); -} - -static void -leave_meta_state(struct intel_context *intel) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - intel_region_release(&i915->meta.draw_region); - intel_region_release(&i915->meta.depth_region); -/* intel_region_release(&i915->meta.tex_region[0]); */ - SET_STATE(i915, state); -} - - - -void -i915InitMetaFuncs(struct i915_context *i915) -{ - i915->intel.vtbl.install_meta_state = install_meta_state; - i915->intel.vtbl.leave_meta_state = leave_meta_state; - i915->intel.vtbl.meta_no_depth_write = meta_no_depth_write; - i915->intel.vtbl.meta_no_stencil_write = meta_no_stencil_write; - i915->intel.vtbl.meta_stencil_replace = meta_stencil_replace; - i915->intel.vtbl.meta_depth_replace = meta_depth_replace; - i915->intel.vtbl.meta_color_mask = meta_color_mask; - i915->intel.vtbl.meta_no_texture = meta_no_texture; - i915->intel.vtbl.meta_texture_blend_replace = meta_texture_blend_replace; - i915->intel.vtbl.meta_tex_rect_source = meta_tex_rect_source; - i915->intel.vtbl.meta_draw_region = meta_draw_region; - i915->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state; -} diff --git a/i915/i915_program.c b/i915/i915_program.c index e7908bd..3902c69 100644 --- a/i915/i915_program.c +++ b/i915/i915_program.c @@ -245,7 +245,7 @@ GLuint i915_emit_texld( struct i915_fragment_program *p, } else { assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); - assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); /* Can't use unsaved temps for coords, as the phase boundary would result * in the contents becoming undefined. */ diff --git a/i915/i915_state.c b/i915/i915_state.c index cc98d12..7275617 100644 --- a/i915/i915_state.c +++ b/i915/i915_state.c @@ -571,7 +571,7 @@ i915LineWidth(GLcontext * ctx, GLfloat widthf) DBG("%s\n", __FUNCTION__); width = (int) (widthf * 2); - CLAMP_SELF(width, 1, 0xf); + width = CLAMP(width, 1, 0xf); lis4 |= width << S4_LINE_WIDTH_SHIFT; if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) { @@ -589,7 +589,7 @@ i915PointSize(GLcontext * ctx, GLfloat size) DBG("%s\n", __FUNCTION__); - CLAMP_SELF(point_size, 1, 255); + point_size = CLAMP(point_size, 1, 255); lis4 |= point_size << S4_POINT_WIDTH_SHIFT; if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) { @@ -1157,7 +1157,4 @@ i915InitState(struct i915_context *i915) i915_init_packets(i915); _mesa_init_driver_state(ctx); - - memcpy(&i915->initial, &i915->state, sizeof(i915->state)); - i915->current = &i915->state; } diff --git a/i915/i915_tex_layout.c b/i915/i915_tex_layout.c index d9588e5..fe3908f 100644 --- a/i915/i915_tex_layout.c +++ b/i915/i915_tex_layout.c @@ -145,8 +145,8 @@ i915_miptree_layout_cube(struct intel_context *intel, intel_miptree_set_image_offset(mt, level, face, x, y); if (d == 0) - _mesa_printf("cube mipmap %d/%d (%d..%d) is 0x0\n", - face, level, mt->first_level, mt->last_level); + printf("cube mipmap %d/%d (%d..%d) is 0x0\n", + face, level, mt->first_level, mt->last_level); d >>= 1; x += step_offsets[face][0] * d; diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c index de25848..a1ab8f8 100644 --- a/i915/i915_texstate.c +++ b/i915/i915_texstate.c @@ -28,6 +28,7 @@ #include "main/mtypes.h" #include "main/enums.h" #include "main/macros.h" +#include "main/colormac.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" @@ -149,7 +150,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) i915->state.tex_buffer[unit] = NULL; } - if (!intelObj->imageOverride && !intel_finalize_mipmap_tree(intel, unit)) + if (!intel_finalize_mipmap_tree(intel, unit)) return GL_FALSE; /* Get first image here, since intelObj->firstLevel will get set in @@ -157,34 +158,14 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) */ firstImage = tObj->Image[0][intelObj->firstLevel]; - if (intelObj->imageOverride) { - i915->state.tex_buffer[unit] = NULL; - i915->state.tex_offset[unit] = intelObj->textureOffset; + dri_bo_reference(intelObj->mt->region->buffer); + i915->state.tex_buffer[unit] = intelObj->mt->region->buffer; + i915->state.tex_offset[unit] = 0; /* Always the origin of the miptree */ - switch (intelObj->depthOverride) { - case 32: - format = MAPSURF_32BIT | MT_32BIT_ARGB8888; - break; - case 24: - default: - format = MAPSURF_32BIT | MT_32BIT_XRGB8888; - break; - case 16: - format = MAPSURF_16BIT | MT_16BIT_RGB565; - break; - } - - pitch = intelObj->pitchOverride; - } else { - dri_bo_reference(intelObj->mt->region->buffer); - i915->state.tex_buffer[unit] = intelObj->mt->region->buffer; - i915->state.tex_offset[unit] = 0; /* Always the origin of the miptree */ - - format = translate_texture_format(firstImage->TexFormat, - firstImage->InternalFormat, - tObj->DepthMode); - pitch = intelObj->mt->pitch * intelObj->mt->cpp; - } + format = translate_texture_format(firstImage->TexFormat, + firstImage->InternalFormat, + tObj->DepthMode); + pitch = intelObj->mt->pitch * intelObj->mt->cpp; state[I915_TEXREG_MS3] = (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) | @@ -196,10 +177,11 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I915_TEXREG_MS3] |= MS3_TILE_WALK; } - /* We get one field with fraction bits to cover the maximum addressable (smallest - * resolution) LOD. Use it to cover both MAX_LEVEL and MAX_LOD. + /* We get one field with fraction bits for the maximum addressable + * (lowest resolution) LOD. Use it to cover both MAX_LEVEL and + * MAX_LOD. */ - maxlod = MIN2(tObj->MaxLod, tObj->MaxLevel - tObj->BaseLevel); + maxlod = MIN2(tObj->MaxLod, tObj->_MaxLevel - tObj->BaseLevel); state[I915_TEXREG_MS4] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK | @@ -346,25 +328,25 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) } /* convert border color from float to ubyte */ - CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor[0]); - CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor[1]); - CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor[2]); - CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor[3]); + CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]); + CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]); + CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]); + CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]); if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the channels * for safety. */ - state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(border[0], - border[0], - border[0], - border[0]); + state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[0], + border[0], + border[0], + border[0]); } else { - state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(border[0], - border[1], - border[2], - border[3]); + state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[3], + border[0], + border[1], + border[2]); } diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c index ff97e5a..0a93e64 100644 --- a/i915/i915_vtbl.c +++ b/i915/i915_vtbl.c @@ -37,17 +37,13 @@ #include "tnl/t_vertex.h" #include "intel_batchbuffer.h" -#include "intel_tex.h" #include "intel_regions.h" #include "intel_tris.h" #include "intel_fbo.h" -#include "intel_chipset.h" #include "i915_reg.h" #include "i915_context.h" -#include "glapi/glapi.h" - static void i915_render_prevalidate(struct intel_context *intel) { @@ -59,6 +55,7 @@ i915_render_prevalidate(struct intel_context *intel) static void i915_render_start(struct intel_context *intel) { + intel_prepare_render(intel); } @@ -100,8 +97,8 @@ static GLboolean i915_check_vertex_size(struct intel_context *intel, GLuint expected) { struct i915_context *i915 = i915_context(&intel->ctx); - int lis2 = i915->current->Ctx[I915_CTXREG_LIS2]; - int lis4 = i915->current->Ctx[I915_CTXREG_LIS4]; + int lis2 = i915->state.Ctx[I915_CTXREG_LIS2]; + int lis4 = i915->state.Ctx[I915_CTXREG_LIS4]; int i, sz = 0; switch (lis4 & S4_VFMT_XYZW_MASK) { @@ -174,7 +171,7 @@ i915_emit_invarient_state(struct intel_context *intel) { BATCH_LOCALS; - BEGIN_BATCH(17, IGNORE_CLIPRECTS); + BEGIN_BATCH(17); OUT_BATCH(_3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | @@ -220,7 +217,7 @@ i915_emit_invarient_state(struct intel_context *intel) #define emit(intel, state, size ) \ - intel_batchbuffer_data(intel->batch, state, size, IGNORE_CLIPRECTS ) + intel_batchbuffer_data(intel->batch, state, size) static GLuint get_dirty(struct i915_hw_state *state) @@ -287,7 +284,7 @@ static void i915_emit_state(struct intel_context *intel) { struct i915_context *i915 = i915_context(&intel->ctx); - struct i915_hw_state *state = i915->current; + struct i915_hw_state *state = &i915->state; int i, count, aper_count; GLuint dirty; dri_bo *aper_array[3 + I915_TEX_UNITS]; @@ -301,13 +298,9 @@ i915_emit_state(struct intel_context *intel) * It might be better to talk about explicit places where * scheduling is allowed, rather than assume that it is whenever a * batchbuffer fills up. - * - * Set the space as LOOP_CLIPRECTS now, since that's what our primitives - * will be emitted under. */ intel_batchbuffer_require_space(intel->batch, - get_state_size(state) + INTEL_PRIM_EMIT_SIZE, - LOOP_CLIPRECTS); + get_state_size(state) + INTEL_PRIM_EMIT_SIZE); count = 0; again: aper_count = 0; @@ -373,7 +366,7 @@ i915_emit_state(struct intel_context *intel) } if (dirty & I915_UPLOAD_BUFFERS) { - GLuint count = 9; + GLuint count = 15; if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_BUFFERS:\n"); @@ -381,22 +374,17 @@ i915_emit_state(struct intel_context *intel) if (state->depth_region) count += 3; - if (intel->constant_cliprect) - count += 6; - - BEGIN_BATCH(count, IGNORE_CLIPRECTS); + BEGIN_BATCH(count); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]); OUT_RELOC(state->draw_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - state->draw_region->draw_offset); + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); if (state->depth_region) { OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - state->depth_region->draw_offset); + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); } OUT_BATCH(state->Buffer[I915_DESTREG_DV0]); @@ -406,15 +394,13 @@ i915_emit_state(struct intel_context *intel) OUT_BATCH(state->Buffer[I915_DESTREG_SR1]); OUT_BATCH(state->Buffer[I915_DESTREG_SR2]); - if (intel->constant_cliprect) { - assert(state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP); - OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]); - OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]); - OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]); - OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]); - OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]); - OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]); - } + assert(state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]); ADVANCE_BATCH(); } @@ -441,7 +427,7 @@ i915_emit_state(struct intel_context *intel) if (dirty & I915_UPLOAD_TEX(i)) nr++; - BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS); + BEGIN_BATCH(2 + nr * 3); OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); for (i = 0; i < I915_TEX_UNITS; i++) @@ -452,10 +438,6 @@ i915_emit_state(struct intel_context *intel) I915_GEM_DOMAIN_SAMPLER, 0, state->tex_offset[i]); } - else if (state == &i915->meta) { - assert(i == 0); - OUT_BATCH(0); - } else { OUT_BATCH(state->tex_offset[i]); } @@ -465,7 +447,7 @@ i915_emit_state(struct intel_context *intel) } ADVANCE_BATCH(); - BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS); + BEGIN_BATCH(2 + nr * 3); OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); for (i = 0; i < I915_TEX_UNITS; i++) @@ -509,10 +491,6 @@ i915_destroy_context(struct intel_context *intel) intel_region_release(&i915->state.draw_region); intel_region_release(&i915->state.depth_region); - intel_region_release(&i915->meta.draw_region); - intel_region_release(&i915->meta.depth_region); - intel_region_release(&i915->initial.draw_region); - intel_region_release(&i915->initial.depth_region); for (i = 0; i < I915_TEX_UNITS; i++) { if (i915->state.tex_buffer[i] != NULL) { @@ -542,29 +520,23 @@ i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, } } -/** - * Set the drawing regions for the color and depth/stencil buffers. - * This involves setting the pitch, cpp and buffer ID/location. - * Also set pixel format for color and Z rendering - * Used for setting both regular and meta state. - */ -void -i915_state_draw_region(struct intel_context *intel, - struct i915_hw_state *state, - struct intel_region *color_region, - struct intel_region *depth_region) +static void +i915_set_draw_region(struct intel_context *intel, + struct intel_region *color_regions[], + struct intel_region *depth_region, + GLuint num_regions) { struct i915_context *i915 = i915_context(&intel->ctx); GLcontext *ctx = &intel->ctx; struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); GLuint value; + struct i915_hw_state *state = &i915->state; + uint32_t draw_x, draw_y; - ASSERT(state == &i915->state || state == &i915->meta); - - if (state->draw_region != color_region) { + if (state->draw_region != color_regions[0]) { intel_region_release(&state->draw_region); - intel_region_reference(&state->draw_region, color_region); + intel_region_reference(&state->draw_region, color_regions[0]); } if (state->depth_region != depth_region) { intel_region_release(&state->depth_region); @@ -575,7 +547,7 @@ i915_state_draw_region(struct intel_context *intel, * Set stride/cpp values */ i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_CBUFADDR0], - color_region, BUF_3D_ID_COLOR_BACK); + color_regions[0], BUF_3D_ID_COLOR_BACK); i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_DBUFADDR0], depth_region, BUF_3D_ID_DEPTH); @@ -611,7 +583,7 @@ i915_state_draw_region(struct intel_context *intel, * the value of this bit, the pipeline needs to be MI_FLUSHed. And it * can only be set when a depth buffer is already defined. */ - if (IS_945(intel->intelScreen->deviceID) && intel->use_early_z && + if (intel->is_945 && intel->use_early_z && depth_region->tiling != I915_TILING_NONE) value |= CLASSIC_EARLY_DEPTH; @@ -623,36 +595,43 @@ i915_state_draw_region(struct intel_context *intel, } state->Buffer[I915_DESTREG_DV1] = value; - if (intel->constant_cliprect) { - state->Buffer[I915_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO; - state->Buffer[I915_DESTREG_DRAWRECT1] = 0; - state->Buffer[I915_DESTREG_DRAWRECT2] = 0; /* xmin, ymin */ - state->Buffer[I915_DESTREG_DRAWRECT3] = - (ctx->DrawBuffer->Width & 0xffff) | - (ctx->DrawBuffer->Height << 16); - state->Buffer[I915_DESTREG_DRAWRECT4] = 0; /* xoff, yoff */ - state->Buffer[I915_DESTREG_DRAWRECT5] = 0; + /* We set up the drawing rectangle to be offset into the color + * region's location in the miptree. If it doesn't match with + * depth's offsets, we can't render to it. + * + * (Well, not actually true -- the hw grew a bit to let depth's + * offset get forced to 0,0. We may want to use that if people are + * hitting that case. Also, some configurations may be supportable + * by tweaking the start offset of the buffers around, which we + * can't do in general due to tiling) + */ + FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET, + (depth_region && color_regions[0]) && + (depth_region->draw_x != color_regions[0]->draw_x || + depth_region->draw_y != color_regions[0]->draw_y)); + + if (color_regions[0]) { + draw_x = color_regions[0]->draw_x; + draw_y = color_regions[0]->draw_y; + } else if (depth_region) { + draw_x = depth_region->draw_x; + draw_y = depth_region->draw_y; } else { - state->Buffer[I915_DESTREG_DRAWRECT0] = MI_NOOP; - state->Buffer[I915_DESTREG_DRAWRECT1] = MI_NOOP; - state->Buffer[I915_DESTREG_DRAWRECT2] = MI_NOOP; - state->Buffer[I915_DESTREG_DRAWRECT3] = MI_NOOP; - state->Buffer[I915_DESTREG_DRAWRECT4] = MI_NOOP; - state->Buffer[I915_DESTREG_DRAWRECT5] = MI_NOOP; + draw_x = 0; + draw_y = 0; } - I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS); -} - + /* When changing drawing rectangle offset, an MI_FLUSH is first required. */ + state->Buffer[I915_DESTREG_DRAWRECT0] = MI_FLUSH; + state->Buffer[I915_DESTREG_DRAWRECT1] = _3DSTATE_DRAWRECT_INFO; + state->Buffer[I915_DESTREG_DRAWRECT2] = 0; + state->Buffer[I915_DESTREG_DRAWRECT3] = (draw_y << 16) | draw_x; + state->Buffer[I915_DESTREG_DRAWRECT4] = + ((ctx->DrawBuffer->Width + draw_x) & 0xffff) | + ((ctx->DrawBuffer->Height + draw_y) << 16); + state->Buffer[I915_DESTREG_DRAWRECT5] = (draw_y << 16) | draw_x; -static void -i915_set_draw_region(struct intel_context *intel, - struct intel_region *color_regions[], - struct intel_region *depth_region, - GLuint num_regions) -{ - struct i915_context *i915 = i915_context(&intel->ctx); - i915_state_draw_region(intel, &i915->state, color_regions[0], depth_region); + I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS); } @@ -667,17 +646,13 @@ i915_new_batch(struct intel_context *intel) * difficulties associated with them (physical address requirements). */ i915->state.emitted = 0; - - /* Check that we didn't just wrap our batchbuffer at a bad time. */ - assert(!intel->no_batch_wrap); } static void i915_assert_not_dirty( struct intel_context *intel ) { struct i915_context *i915 = i915_context(&intel->ctx); - struct i915_hw_state *state = i915->current; - GLuint dirty = get_dirty(state); + GLuint dirty = get_dirty(&i915->state); assert(!dirty); } diff --git a/i915/intel_render.c b/i915/intel_render.c index 410052b..ec20939 100644 --- a/i915/intel_render.c +++ b/i915/intel_render.c @@ -117,7 +117,7 @@ intelDmaPrimitive(struct intel_context *intel, GLenum prim) intel_set_prim(intel, hw_prim[prim]); } -static inline GLuint intel_get_vb_max(struct intel_context *intel) +static INLINE GLuint intel_get_vb_max(struct intel_context *intel) { GLuint ret; @@ -129,7 +129,7 @@ static inline GLuint intel_get_vb_max(struct intel_context *intel) return ret; } -static inline GLuint intel_get_current_max(struct intel_context *intel) +static INLINE GLuint intel_get_current_max(struct intel_context *intel) { if (intel->intelScreen->no_vbo) diff --git a/i915/intel_tris.c b/i915/intel_tris.c index bc527aa..fb191fe 100644 --- a/i915/intel_tris.c +++ b/i915/intel_tris.c @@ -52,8 +52,6 @@ #include "intel_buffers.h" #include "intel_reg.h" #include "intel_span.h" -#include "intel_tex.h" -#include "intel_chipset.h" #include "i830_context.h" #include "i830_reg.h" @@ -68,7 +66,7 @@ intel_flush_inline_primitive(struct intel_context *intel) assert(intel->prim.primitive != ~0); -/* _mesa_printf("/\n"); */ +/* printf("/\n"); */ if (used < 8) goto do_discard; @@ -89,20 +87,18 @@ intel_flush_inline_primitive(struct intel_context *intel) static void intel_start_inline(struct intel_context *intel, uint32_t prim) { - uint32_t batch_flags = LOOP_CLIPRECTS; BATCH_LOCALS; intel->vtbl.emit_state(intel); intel->no_batch_wrap = GL_TRUE; - /*_mesa_printf("%s *", __progname);*/ + /*printf("%s *", __progname);*/ /* Emit a slot which will be filled with the inline primitive * command later. */ - BEGIN_BATCH(2, batch_flags); - OUT_BATCH(0); + BEGIN_BATCH(1); assert((intel->batch->dirty_state & (1<<1)) == 0); @@ -114,7 +110,7 @@ static void intel_start_inline(struct intel_context *intel, uint32_t prim) ADVANCE_BATCH(); intel->no_batch_wrap = GL_FALSE; -/* _mesa_printf(">"); */ +/* printf(">"); */ } static void intel_wrap_inline(struct intel_context *intel) @@ -136,7 +132,7 @@ static GLuint *intel_extend_inline(struct intel_context *intel, GLuint dwords) if (intel_batchbuffer_space(intel->batch) < sz) intel_wrap_inline(intel); -/* _mesa_printf("."); */ +/* printf("."); */ intel->vtbl.assert_not_dirty(intel); @@ -221,7 +217,7 @@ void intel_flush_prim(struct intel_context *intel) intel->prim.count = 0; offset = intel->prim.start_offset; intel->prim.start_offset = intel->prim.current_offset; - if (!IS_9XX(intel->intelScreen->deviceID)) + if (intel->gen < 3) intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128); intel->prim.flush = NULL; @@ -251,8 +247,8 @@ void intel_flush_prim(struct intel_context *intel) intel->vertex_size * 4); #endif - if (IS_9XX(intel->intelScreen->deviceID)) { - BEGIN_BATCH(5, LOOP_CLIPRECTS); + if (intel->gen >= 3) { + BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(0) | I1_LOAD_S(1) | 1); assert((offset & !S0_VB_OFFSET_MASK) == 0); @@ -270,7 +266,7 @@ void intel_flush_prim(struct intel_context *intel) } else { struct i830_context *i830 = i830_context(&intel->ctx); - BEGIN_BATCH(5, LOOP_CLIPRECTS); + BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(0) | I1_LOAD_S(2) | 1); /* S0 */ @@ -607,7 +603,6 @@ static struct #define DO_POINTS 1 #define DO_FULL_QUAD 1 -#define HAVE_RGBA 1 #define HAVE_SPEC 1 #define HAVE_BACK_COLORS 0 #define HAVE_HW_FLATSHADE 1 @@ -1181,6 +1176,8 @@ static char *fallbackStrings[] = { [17] = "Logic op", [18] = "Smooth polygon", [19] = "Smooth point", + [20] = "point sprite coord origin", + [21] = "depth/color drawing offset", }; @@ -1250,81 +1247,6 @@ union fi GLint i; }; - -/**********************************************************************/ -/* Used only with the metaops callbacks. */ -/**********************************************************************/ -static void -intel_meta_draw_poly(struct intel_context *intel, - GLuint n, - GLfloat xy[][2], - GLfloat z, GLuint color, GLfloat tex[][2]) -{ - union fi *vb; - GLint i; - unsigned int saved_vertex_size = intel->vertex_size; - - LOCK_HARDWARE(intel); - - intel->vertex_size = 6; - - /* All 3d primitives should be emitted with LOOP_CLIPRECTS, - * otherwise the drawing origin (DR4) might not be set correctly. - */ - intel_set_prim(intel, PRIM3D_TRIFAN); - vb = (union fi *) intel_get_prim_space(intel, n); - - for (i = 0; i < n; i++) { - vb[0].f = xy[i][0]; - vb[1].f = xy[i][1]; - vb[2].f = z; - vb[3].i = color; - vb[4].f = tex[i][0]; - vb[5].f = tex[i][1]; - vb += 6; - } - - INTEL_FIREVERTICES(intel); - - intel->vertex_size = saved_vertex_size; - - UNLOCK_HARDWARE(intel); -} - -static void -intel_meta_draw_quad(struct intel_context *intel, - GLfloat x0, GLfloat x1, - GLfloat y0, GLfloat y1, - GLfloat z, - GLuint color, - GLfloat s0, GLfloat s1, GLfloat t0, GLfloat t1) -{ - GLfloat xy[4][2]; - GLfloat tex[4][2]; - - xy[0][0] = x0; - xy[0][1] = y0; - xy[1][0] = x1; - xy[1][1] = y0; - xy[2][0] = x1; - xy[2][1] = y1; - xy[3][0] = x0; - xy[3][1] = y1; - - tex[0][0] = s0; - tex[0][1] = t0; - tex[1][0] = s1; - tex[1][1] = t0; - tex[2][0] = s1; - tex[2][1] = t1; - tex[3][0] = s0; - tex[3][1] = t1; - - intel_meta_draw_poly(intel, 4, xy, z, color, tex); -} - - - /**********************************************************************/ /* Initialization. */ /**********************************************************************/ @@ -1333,7 +1255,6 @@ intel_meta_draw_quad(struct intel_context *intel, void intelInitTriFuncs(GLcontext * ctx) { - struct intel_context *intel = intel_context(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); static int firsttime = 1; @@ -1350,6 +1271,4 @@ intelInitTriFuncs(GLcontext * ctx) tnl->Driver.Render.BuildVertices = _tnl_build_vertices; tnl->Driver.Render.CopyPV = _tnl_copy_pv; tnl->Driver.Render.Interp = _tnl_interp; - - intel->vtbl.meta_draw_quad = intel_meta_draw_quad; } diff --git a/i965/Makefile.am b/i965/Makefile.am index 8a61dab..d9c82d5 100644 --- a/i965/Makefile.am +++ b/i965/Makefile.am @@ -27,7 +27,6 @@ i965_dri_la_SOURCES = \ ../shared/intel_pixel_draw.c \ ../shared/intel_pixel_read.c \ ../shared/intel_state.c \ - ../shared/intel_swapbuffers.c \ ../shared/intel_syncobj.c \ ../shared/intel_tex.c \ ../shared/intel_tex_copy.c \ @@ -88,4 +87,15 @@ i965_dri_la_SOURCES = \ brw_wm_pass2.c \ brw_wm_sampler_state.c \ brw_wm_state.c \ - brw_wm_surface_state.c + brw_wm_surface_state.c \ + gen6_cc.c \ + gen6_clip_state.c \ + gen6_depthstencil.c \ + gen6_gs_state.c \ + gen6_sampler_state.c \ + gen6_scissor_state.c \ + gen6_sf_state.c \ + gen6_urb.c \ + gen6_viewport_state.c \ + gen6_vs_state.c \ + gen6_wm_state.c diff --git a/i965/brw_cc.c b/i965/brw_cc.c index bac1c3a..fa2d394 100644 --- a/i965/brw_cc.c +++ b/i965/brw_cc.c @@ -34,9 +34,7 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_util.h" -#include "intel_fbo.h" #include "main/macros.h" -#include "main/enums.h" static void prepare_cc_vp( struct brw_context *brw ) { @@ -295,8 +293,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, key, sizeof(*key), &brw->cc.vp_bo, 1, - &cc, sizeof(cc), - NULL, NULL); + &cc, sizeof(cc)); /* Emit CC viewport relocation */ dri_bo_emit_reloc(bo, diff --git a/i965/brw_clip.c b/i965/brw_clip.c index dbd10a5..d3275c7 100644 --- a/i965/brw_clip.c +++ b/i965/brw_clip.c @@ -50,6 +50,7 @@ static void compile_clip_prog( struct brw_context *brw, struct brw_clip_prog_key *key ) { + struct intel_context *intel = &brw->intel; struct brw_clip_compile c; const GLuint *program; GLuint program_size; @@ -65,14 +66,13 @@ static void compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; c.key = *key; - c.need_ff_sync = BRW_IS_IGDNG(brw); /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) delta = 3 * REG_SIZE; else delta = REG_SIZE; @@ -85,7 +85,7 @@ static void compile_clip_prog( struct brw_context *brw, c.nr_attrs = brw_count_bits(c.key.attrs); - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ @@ -130,20 +130,22 @@ static void compile_clip_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_upload_cache( &brw->cache, - BRW_CLIP_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->clip.prog_data ); + brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->clip.prog_data); } /* Calculate interpolants for triangle and line rasterization. */ static void upload_clip_prog(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; struct brw_clip_prog_key key; memset(&key, 0, sizeof(key)); @@ -160,7 +162,7 @@ static void upload_clip_prog(struct brw_context *brw) /* _NEW_TRANSFORM */ key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; else key.clip_mode = BRW_CLIPMODE_NORMAL; diff --git a/i965/brw_clip.h b/i965/brw_clip.h index 1c68255..d71bac7 100644 --- a/i965/brw_clip.h +++ b/i965/brw_clip.h @@ -118,7 +118,6 @@ struct brw_clip_compile { GLuint header_position_offset; GLuint offset[VERT_ATTRIB_MAX]; - GLboolean need_ff_sync; }; #define ATTR_SIZE (4*4) diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c index fa9648f..ceb62a3 100644 --- a/i965/brw_clip_line.c +++ b/i965/brw_clip_line.c @@ -39,13 +39,13 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) { + struct intel_context *intel = &c->func.brw->intel; GLuint i = 0,j; /* Register usage is static, precompute here: @@ -85,7 +85,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) i++; } - if (c->need_ff_sync) { + if (intel->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -126,6 +126,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) static void clip_and_emit_line( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; struct brw_indirect vtx0 = brw_indirect(0, 0); struct brw_indirect vtx1 = brw_indirect(1, 0); struct brw_indirect newvtx0 = brw_indirect(2, 0); @@ -152,7 +153,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -189,7 +190,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); { @@ -214,7 +215,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* If both are positive, do nothing */ /* Only on GM965/G965 */ - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); } @@ -229,7 +230,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_ENDIF(p, is_neg2); } } diff --git a/i965/brw_clip_point.c b/i965/brw_clip_point.c index 8458f61..7f47634 100644 --- a/i965/brw_clip_point.c +++ b/i965/brw_clip_point.c @@ -39,7 +39,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c index 234b374..424c9a1 100644 --- a/i965/brw_clip_state.c +++ b/i965/brw_clip_state.c @@ -32,7 +32,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" struct brw_clip_unit_key { unsigned int total_grf; @@ -74,6 +73,7 @@ static dri_bo * clip_unit_create_from_key(struct brw_context *brw, struct brw_clip_unit_key *key) { + struct intel_context *intel = &brw->intel; struct brw_clip_unit_state clip; dri_bo *bo; @@ -105,7 +105,7 @@ clip_unit_create_from_key(struct brw_context *brw, /* Although up to 16 concurrent Clip threads are allowed on IGDNG, * only 2 threads can output VUEs at a time. */ - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) clip.thread4.max_threads = 16 - 1; else clip.thread4.max_threads = 2 - 1; @@ -130,7 +130,7 @@ clip_unit_create_from_key(struct brw_context *brw, clip.clip5.api_mode = BRW_CLIP_API_OGL; clip.clip5.clip_mode = key->clip_mode; - if (BRW_IS_G4X(brw)) + if (intel->is_g4x) clip.clip5.negative_w_clip_test = 1; clip.clip6.clipper_viewport_state_ptr = 0; @@ -142,8 +142,7 @@ clip_unit_create_from_key(struct brw_context *brw, bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, key, sizeof(*key), &brw->clip.prog_bo, 1, - &clip, sizeof(clip), - NULL, NULL); + &clip, sizeof(clip)); /* Emit clip program relocation */ assert(brw->clip.prog_bo); diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c index cf79224..815211a 100644 --- a/i965/brw_clip_tri.c +++ b/i965/brw_clip_tri.c @@ -39,7 +39,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" static void release_tmps( struct brw_clip_compile *c ) @@ -51,6 +50,7 @@ static void release_tmps( struct brw_clip_compile *c ) void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { + struct intel_context *intel = &c->func.brw->intel; GLuint i = 0,j; /* Register usage is static, precompute here: @@ -78,7 +78,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, for (j = 0; j < 3; j++) { GLuint delta = c->nr_attrs*16 + 32; - if (BRW_IS_IGDNG(c->func.brw)) + if (intel->is_ironlake) delta = c->nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); @@ -119,7 +119,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, i++; } - if (c->need_ff_sync) { + if (intel->needs_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -571,6 +571,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) { struct brw_instruction *neg_rhw; struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); brw_clip_init_clipmask(c); @@ -578,7 +579,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) /* if -ve rhw workaround bit is set, do cliptest */ - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); diff --git a/i965/brw_clip_unfilled.c b/i965/brw_clip_unfilled.c index ad1bfa4..f36d22f 100644 --- a/i965/brw_clip_unfilled.c +++ b/i965/brw_clip_unfilled.c @@ -39,7 +39,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" diff --git a/i965/brw_clip_util.c b/i965/brw_clip_util.c index 5a73abd..14bc889 100644 --- a/i965/brw_clip_util.c +++ b/i965/brw_clip_util.c @@ -40,7 +40,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_clip.h" @@ -135,6 +134,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, GLboolean force_edgeflag) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = get_tmp(c); GLuint i; @@ -142,7 +142,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, */ /* * After CLIP stage, only first 256 bits of the VUE are read - * back on IGDNG, so needn't change it + * back on Ironlake, so needn't change it */ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); @@ -151,7 +151,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, for (i = 0; i < c->nr_attrs; i++) { GLuint delta = i*16 + 32; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) delta = i * 16 + 32 * 3; if (delta == c->offset[VERT_RESULT_EDGE]) { @@ -185,7 +185,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, if (i & 1) { GLuint delta = i*16 + 32; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) delta = i * 16 + 32 * 3; brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); @@ -359,7 +359,9 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) void brw_clip_ff_sync(struct brw_clip_compile *c) { - if (c->need_ff_sync) { + struct intel_context *intel = &c->func.brw->intel; + + if (intel->needs_ff_sync) { struct brw_compile *p = &c->func; struct brw_instruction *need_ff_sync; @@ -388,7 +390,9 @@ void brw_clip_ff_sync(struct brw_clip_compile *c) void brw_clip_init_ff_sync(struct brw_clip_compile *c) { - if (c->need_ff_sync) { + struct intel_context *intel = &c->func.brw->intel; + + if (intel->needs_ff_sync) { struct brw_compile *p = &c->func; brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); diff --git a/i965/brw_context.c b/i965/brw_context.c index aaa2d80..a512896 100644 --- a/i965/brw_context.c +++ b/i965/brw_context.c @@ -33,7 +33,6 @@ #include "main/imports.h" #include "main/api_noop.h" #include "main/macros.h" -/* #include "main/vtxfmt.h" */ #include "main/simple_list.h" #include "shader/shader_api.h" @@ -41,16 +40,9 @@ #include "brw_defines.h" #include "brw_draw.h" #include "brw_state.h" -#include "brw_vs.h" -#include "intel_tex.h" -#include "intel_blit.h" -#include "intel_batchbuffer.h" -#include "intel_pixel.h" #include "intel_span.h" #include "tnl/t_pipeline.h" -#include "utils.h" - /*************************************** * Mesa's Driver Functions @@ -77,7 +69,7 @@ static void brwInitDriverFunctions( struct dd_function_table *functions ) } GLboolean brwCreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, + __DRIcontext *driContextPriv, void *sharedContextPrivate) { struct dd_function_table functions; @@ -86,7 +78,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, GLcontext *ctx = &intel->ctx; if (!brw) { - _mesa_printf("%s: failed to alloc context\n", __FUNCTION__); + printf("%s: failed to alloc context\n", __FUNCTION__); return GL_FALSE; } @@ -95,7 +87,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, if (!intelInitContext( intel, mesaVis, driContextPriv, sharedContextPrivate, &functions )) { - _mesa_printf("%s: failed to init intel context\n", __FUNCTION__); + printf("%s: failed to init intel context\n", __FUNCTION__); FREE(brw); return GL_FALSE; } @@ -111,6 +103,9 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, ctx->Const.MaxTextureImageUnits); ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.MaxVertexTextureImageUnits + + ctx->Const.MaxTextureImageUnits; /* Mesa limits textures to 4kx4k; it would be nice to fix that someday */ @@ -155,6 +150,38 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, ctx->Const.FragmentProgram.MaxEnvParams); + if (intel->is_ironlake || intel->is_g4x || intel->gen >= 6) { + brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45; + brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; + brw->has_surface_tile_offset = GL_TRUE; + brw->has_compr4 = GL_TRUE; + brw->has_aa_line_parameters = GL_TRUE; + } else { + brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965; + brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; + } + + /* WM maximum threads is number of EUs times number of threads per EU. */ + if (intel->is_ironlake) { + brw->urb.size = 1024; + brw->vs_max_threads = 72; + brw->wm_max_threads = 12 * 6; + } else if (intel->is_g4x) { + brw->urb.size = 384; + brw->vs_max_threads = 32; + brw->wm_max_threads = 10 * 5; + } else if (intel->gen < 6) { + brw->urb.size = 256; + brw->vs_max_threads = 16; + brw->wm_max_threads = 8 * 4; + brw->has_negative_rhw_bug = GL_TRUE; + } + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) { + brw->vs_max_threads = 1; + brw->wm_max_threads = 1; + } + brw_init_state( brw ); brw->state.dirty.mesa = ~0; diff --git a/i965/brw_context.h b/i965/brw_context.h index fded47a..d6fc37e 100644 --- a/i965/brw_context.h +++ b/i965/brw_context.h @@ -131,7 +131,6 @@ struct brw_context; #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_PSP 0x800 #define BRW_NEW_WM_SURFACES 0x1000 -#define BRW_NEW_FENCE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 /** @@ -172,8 +171,8 @@ struct brw_fragment_program { GLuint id; /**< serial no. to identify frag progs, never re-used */ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ - dri_bo *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; + dri_bo *const_buffer; /** Program constant buffer/surface */ /** for debugging, which texture units are referenced */ GLbitfield tex_units_used; @@ -283,6 +282,9 @@ struct brw_vs_ouput_sizes { enum brw_cache_id { + BRW_BLEND_STATE, + BRW_DEPTH_STENCIL_STATE, + BRW_COLOR_CALC_STATE, BRW_CC_VP, BRW_CC_UNIT, BRW_WM_PROG, @@ -291,7 +293,7 @@ enum brw_cache_id { BRW_WM_UNIT, BRW_SF_PROG, BRW_SF_VP, - BRW_SF_UNIT, + BRW_SF_UNIT, /* scissor state on gen6 */ BRW_VS_UNIT, BRW_VS_PROG, BRW_GS_UNIT, @@ -332,7 +334,6 @@ struct brw_cache { struct brw_cache_item **items; GLuint size, n_items; - GLuint aux_size[BRW_MAX_CACHE]; char *name[BRW_MAX_CACHE]; /* Record of the last BOs chosen for each cache_id. Used to set @@ -356,6 +357,9 @@ struct brw_tracked_state { /* Flags for brw->state.cache. */ +#define CACHE_NEW_BLEND_STATE (1<curbe.total_size = reg; if (0) - _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", - brw->curbe.wm_start, - brw->curbe.wm_size, - brw->curbe.clip_start, - brw->curbe.clip_size, - brw->curbe.vs_start, - brw->curbe.vs_size ); + printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; } @@ -198,7 +198,7 @@ static void prepare_constant_buffer(struct brw_context *brw) return; } - buf = (GLfloat *) _mesa_calloc(bufsz); + buf = (GLfloat *) calloc(1, bufsz); /* fragment shader constants */ if (brw->curbe.wm_size) { @@ -256,25 +256,36 @@ static void prepare_constant_buffer(struct brw_context *brw) */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); - /* XXX just use a memcpy here */ - for (i = 0; i < nr; i++) { - const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; - buf[offset + i * 4 + 0] = value[0]; - buf[offset + i * 4 + 1] = value[1]; - buf[offset + i * 4 + 2] = value[2]; - buf[offset + i * 4 + 3] = value[3]; + if (vp->use_const_buffer) { + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + } + } else { + for (i = 0; i < nr; i++) { + memcpy(buf + offset + i * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } } } if (0) { for (i = 0; i < sz*16; i+=4) - _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, - buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); - _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", - brw->curbe.last_buf, buf, - bufsz, brw->curbe.last_bufsz, - brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + printf("last_buf %p buf %p sz %d/%d cmp %d\n", + brw->curbe.last_buf, buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.curbe_bo != NULL && @@ -282,20 +293,20 @@ static void prepare_constant_buffer(struct brw_context *brw) bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ - _mesa_free(buf); + free(buf); } else { /* constants have changed */ if (brw->curbe.last_buf) - _mesa_free(brw->curbe.last_buf); + free(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; if (brw->curbe.curbe_bo != NULL && - (brw->curbe.need_new_bo || - brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)) + brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) { + drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); dri_bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } @@ -307,6 +318,7 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; + drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo); } brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; @@ -315,7 +327,9 @@ static void prepare_constant_buffer(struct brw_context *brw) /* Copy data to the buffer: */ - dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf); + memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset, + buf, + bufsz); } brw_add_validated_bo(brw, brw->curbe.curbe_bo); @@ -340,7 +354,7 @@ static void emit_constant_buffer(struct brw_context *brw) struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - BEGIN_BATCH(2, IGNORE_CLIPRECTS); + BEGIN_BATCH(2); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); OUT_BATCH(0); diff --git a/i965/brw_defines.h b/i965/brw_defines.h index c19510b..bb1b5f5 100644 --- a/i965/brw_defines.h +++ b/i965/brw_defines.h @@ -530,6 +530,7 @@ #define BRW_OPCODE_POP 47 #define BRW_OPCODE_WAIT 48 #define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_MATH 56 #define BRW_OPCODE_ADD 64 #define BRW_OPCODE_MUL 65 #define BRW_OPCODE_AVG 66 @@ -727,7 +728,8 @@ #define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ #define BRW_MATH_FUNCTION_COS 7 /* was 8 */ #define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ -#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ #define BRW_MATH_FUNCTION_POW 10 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 @@ -778,17 +780,33 @@ #define CMD_PIPELINED_STATE_POINTERS 0x7800 #define CMD_BINDING_TABLE_PTRS 0x7801 +# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) +# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 10) + +#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ +# define PS_SAMPLER_STATE_CHANGE (1 << 12) +# define GS_SAMPLER_STATE_CHANGE (1 << 9) +# define VS_SAMPLER_STATE_CHANGE (1 << 8) +/* DW1: VS */ +/* DW2: GS */ +/* DW3: PS */ #define CMD_VERTEX_BUFFER 0x7808 # define BRW_VB0_INDEX_SHIFT 27 +# define GEN6_VB0_INDEX_SHIFT 26 # define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) # define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) +# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) # define BRW_VB0_PITCH_SHIFT 0 #define CMD_VERTEX_ELEMENT 0x7809 # define BRW_VE0_INDEX_SHIFT 27 +# define GEN6_VE0_INDEX_SHIFT 26 # define BRW_VE0_FORMAT_SHIFT 16 # define BRW_VE0_VALID (1 << 26) +# define GEN6_VE0_VALID (1 << 25) # define BRW_VE0_SRC_OFFSET_SHIFT 0 # define BRW_VE1_COMPONENT_NOSTORE 0 # define BRW_VE1_COMPONENT_STORE_SRC 1 @@ -805,8 +823,219 @@ # define BRW_VE1_DST_OFFSET_SHIFT 0 #define CMD_INDEX_BUFFER 0x780a -#define CMD_VF_STATISTICS_965 0x780b +#define CMD_VF_STATISTICS_965 0x780b #define CMD_VF_STATISTICS_GM45 0x680b +#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */ + +#define CMD_URB 0x7805 /* GEN6+ */ +# define GEN6_URB_VS_SIZE_SHIFT 16 +# define GEN6_URB_VS_ENTRIES_SHIFT 0 +# define GEN6_URB_GS_SIZE_SHIFT 8 +# define GEN6_URB_GS_ENTRIES_SHIFT 0 + +#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) +# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) +# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) + +#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ + +#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */ +/* DW2 */ +# define GEN6_VS_SPF_MODE (1 << 31) +# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 +# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW5 */ +# define GEN6_VS_MAX_THREADS_SHIFT 25 +# define GEN6_VS_STATISTICS_ENABLE (1 << 10) +# define GEN6_VS_CACHE_DISABLE (1 << 1) +# define GEN6_VS_ENABLE (1 << 0) + +#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */ +/* DW2 */ +# define GEN6_GS_SPF_MODE (1 << 31) +# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 +/* DW5 */ +# define GEN6_GS_MAX_THREADS_SHIFT 25 +# define GEN6_GS_STATISTICS_ENABLE (1 << 10) +# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) +# define GEN6_GS_RENDERING_ENABLE (1 << 8) +/* DW6 */ +# define GEN6_GS_ENABLE (1 << 15) + +#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ +/* DW1 */ +# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/* DW2 */ +# define GEN6_CLIP_ENABLE (1 << 31) +# define GEN6_CLIP_API_OGL (0 << 30) +# define GEN6_CLIP_API_D3D (1 << 30) +# define GEN6_CLIP_XY_TEST (1 << 28) +# define GEN6_CLIP_Z_TEST (1 << 27) +# define GEN6_CLIP_GB_TEST (1 << 26) +# define GEN6_CLIP_MODE_NORMAL (0 << 13) +# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) +# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) +# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) +# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8) +# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 +# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 +# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 +/* DW3 */ +# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 +# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 + +#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */ +/* DW1 */ +# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) +# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) +# define GEN6_SF_STATISTICS_ENABLE (1 << 10) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) +# define GEN6_SF_FRONT_SOLID (0 << 5) +# define GEN6_SF_FRONT_WIREFRAME (1 << 5) +# define GEN6_SF_FRONT_POINT (2 << 5) +# define GEN6_SF_BACK_SOLID (0 << 3) +# define GEN6_SF_BACK_WIREFRAME (1 << 3) +# define GEN6_SF_BACK_POINT (2 << 3) +# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) +# define GEN6_SF_WINDING_CCW (1 << 0) +/* DW3 */ +# define GEN6_SF_LINE_AA_ENABLE (1 << 31) +# define GEN6_SF_CULL_BOTH (0 << 29) +# define GEN6_SF_CULL_NONE (1 << 29) +# define GEN6_SF_CULL_FRONT (2 << 29) +# define GEN6_SF_CULL_BACK (3 << 29) +# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ +# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) +# define GEN6_SF_SCISSOR_ENABLE (1 << 11) +# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) +# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) +# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) +# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) +/* DW4 */ +# define GEN6_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 +# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) +# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) +# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) +# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) +# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) +# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ +/* DW5: depth offset constant */ +/* DW6: depth offset scale */ +/* DW7: depth offset clamp */ +/* DW8 */ +# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) +# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) +# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) +# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) +# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 +# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 +# define ATTRIBUTE_1_SOURCE_SHIFT 16 +# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) +# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) +# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) +# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) +# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 +# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 +# define ATTRIBUTE_0_SOURCE_SHIFT 0 +/* DW16: Point sprite texture coordinate enables */ +/* DW17: Constant interpolation enables */ +/* DW18: attr 0-7 wrap shortest enables */ +/* DW19: attr 8-16 wrap shortest enables */ + +#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN6_WM_SPF_MODE (1 << 31) +# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW3: scratch space */ +/* DW4 */ +# define GEN6_WM_STATISTICS_ENABLE (1 << 31) +# define GEN6_WM_DEPTH_CLEAR (1 << 30) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 +/* DW5 */ +# define GEN6_WM_MAX_THREADS_SHIFT 25 +# define GEN6_WM_KILL_ENABLE (1 << 22) +# define GEN6_WM_COMPUTED_DEPTH (1 << 21) +# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN6_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) +# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) +# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) +# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) +# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) +# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 12) +# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN6_WM_USES_SOURCE_W (1 << 8) +# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) +# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_WM_POSOFFSET_NONE (0 << 18) +# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) +# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) +# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) +# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) +# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) +# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) +# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) +# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) +# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) +# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) +# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) +# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) +/* DW7: kernel 1 pointer */ +/* DW8: kernel 2 pointer */ + +#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */ +#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */ +#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */ +# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) +# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) +# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) +# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) + +#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */ #define CMD_DRAW_RECT 0x7900 #define CMD_BLEND_CONSTANT_COLOR 0x7901 @@ -818,6 +1047,25 @@ #define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 #define CMD_AA_LINE_PARAMETERS 0x790a +#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */ +/* DW1 */ +# define SVB_INDEX_SHIFT 29 +# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ +/* DW2: SVB index */ +/* DW3: SVB maximum index */ + +#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */ +/* DW1 */ +# define MS_PIXEL_LOCATION_CENTER (0 << 4) +# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define MS_NUMSAMPLES_1 (0 << 1) +# define MS_NUMSAMPLES_4 (2 << 1) +# define MS_NUMSAMPLES_8 (3 << 1) + +#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */ +# define DEPTH_CLEAR_VALID (1 << 15) +/* DW1: depth clear value */ + #define CMD_PIPE_CONTROL 0x7a00 #define CMD_3D_PRIM 0x7b00 @@ -832,12 +1080,4 @@ #include "intel_chipset.h" -#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) -#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID)) -#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) -#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) -#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) -#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ - (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ - #endif diff --git a/i965/brw_disasm.c b/i965/brw_disasm.c index 9fef230..a8f6b99 100644 --- a/i965/brw_disasm.c +++ b/i965/brw_disasm.c @@ -239,7 +239,7 @@ char *imm_encoding[8] = { [2] = "UW", [3] = "W", [5] = "VF", - [5] = "V", + [6] = "V", [7] = "F" }; @@ -365,6 +365,7 @@ static int format (FILE *f, char *format, ...) va_start (args, format); vsnprintf (buf, sizeof (buf) - 1, format, args); + va_end (args); string (f, buf); return 0; } diff --git a/i965/brw_draw.c b/i965/brw_draw.c index 8bcb608..e348d46 100644 --- a/i965/brw_draw.c +++ b/i965/brw_draw.c @@ -39,10 +39,8 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" -#include "brw_fallback.h" #include "intel_batchbuffer.h" -#include "intel_buffer_objects.h" #define FILE_DEBUG_FLAG DEBUG_BATCH @@ -84,7 +82,7 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) GLcontext *ctx = &brw->intel.ctx; if (INTEL_DEBUG & DEBUG_PRIMS) - _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); + printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); /* Slight optimization to avoid the GS program when not needed: */ @@ -127,7 +125,7 @@ static void brw_emit_prim(struct brw_context *brw, struct intel_context *intel = &brw->intel; if (INTEL_DEBUG & DEBUG_PRIMS) - _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), + printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), prim->start, prim->count); prim_packet.header.opcode = CMD_3D_PRIM; @@ -145,7 +143,7 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.base_vert_location = prim->basevertex; /* Can't wrap here, since we rely on the validated state. */ - brw->no_batch_wrap = GL_TRUE; + intel->no_batch_wrap = GL_TRUE; /* If we're set to always flush, do it before and after the primitive emit. * We want to catch both missed flushes that hurt instruction/state cache @@ -157,13 +155,13 @@ static void brw_emit_prim(struct brw_context *brw, } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, - sizeof(prim_packet), LOOP_CLIPRECTS); + sizeof(prim_packet)); } if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel->batch); } - brw->no_batch_wrap = GL_FALSE; + intel->no_batch_wrap = GL_FALSE; } static void brw_merge_inputs( struct brw_context *brw, @@ -339,12 +337,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, * so can't access it earlier. */ - LOCK_HARDWARE(intel); - - if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) { - UNLOCK_HARDWARE(intel); - return GL_TRUE; - } + intel_prepare_render(intel); for (i = 0; i < nr_prims; i++) { uint32_t hw_prim; @@ -356,8 +349,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, * an upper bound of how much we might emit in a single * brw_try_draw_prims(). */ - intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, - LOOP_CLIPRECTS); + intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4); hw_prim = brw_set_prim(brw, prim[i].mode); @@ -404,7 +396,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (intel->always_flush_batch) intel_batchbuffer_flush(intel->batch); out: - UNLOCK_HARDWARE(intel); brw_state_cache_check_size(brw); diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c index ee684f6..71a4357 100644 --- a/i965/brw_draw_upload.c +++ b/i965/brw_draw_upload.c @@ -29,19 +29,15 @@ #include "main/glheader.h" #include "main/bufferobj.h" #include "main/context.h" -#include "main/state.h" -/* #include "main/api_validate.h" */ #include "main/enums.h" #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" -#include "brw_fallback.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -#include "intel_tex.h" static GLuint double_types[5] = { 0, @@ -59,6 +55,14 @@ static GLuint float_types[5] = { BRW_SURFACEFORMAT_R32G32B32A32_FLOAT }; +static GLuint half_float_types[5] = { + 0, + BRW_SURFACEFORMAT_R16_FLOAT, + BRW_SURFACEFORMAT_R16G16_FLOAT, + 0, /* can't seem to render this one */ + BRW_SURFACEFORMAT_R16G16B16A16_FLOAT +}; + static GLuint uint_types_norm[5] = { 0, BRW_SURFACEFORMAT_R32_UNORM, @@ -165,13 +169,14 @@ static GLuint get_surface_type( GLenum type, GLuint size, GLenum format, GLboolean normalized ) { if (INTEL_DEBUG & DEBUG_VERTS) - _mesa_printf("type %s size %d normalized %d\n", + printf("type %s size %d normalized %d\n", _mesa_lookup_enum_by_nr(type), size, normalized); if (normalized) { switch (type) { case GL_DOUBLE: return double_types[size]; case GL_FLOAT: return float_types[size]; + case GL_HALF_FLOAT: return half_float_types[size]; case GL_INT: return int_types_norm[size]; case GL_SHORT: return short_types_norm[size]; case GL_BYTE: return byte_types_norm[size]; @@ -194,6 +199,7 @@ static GLuint get_surface_type( GLenum type, GLuint size, switch (type) { case GL_DOUBLE: return double_types[size]; case GL_FLOAT: return float_types[size]; + case GL_HALF_FLOAT: return half_float_types[size]; case GL_INT: return int_types_scale[size]; case GL_SHORT: return short_types_scale[size]; case GL_BYTE: return byte_types_scale[size]; @@ -211,6 +217,7 @@ static GLuint get_size( GLenum type ) switch (type) { case GL_DOUBLE: return sizeof(GLdouble); case GL_FLOAT: return sizeof(GLfloat); + case GL_HALF_FLOAT: return sizeof(GLhalfARB); case GL_INT: return sizeof(GLint); case GL_SHORT: return sizeof(GLshort); case GL_BYTE: return sizeof(GLbyte); @@ -243,14 +250,6 @@ static void wrap_buffers( struct brw_context *brw, dri_bo_unreference(brw->vb.upload.bo); brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", size, 1); - - /* Set the internal VBO\ to no-backing-store. We only use them as a - * temporary within a brw_try_draw_prims while the lock is held. - */ - /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH - FAKE TO PUSH THIS STUFF */ -// if (!brw->intel.ttm) -// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); } static void get_space( struct brw_context *brw, @@ -277,7 +276,6 @@ copy_array_to_vbo_array( struct brw_context *brw, struct brw_vertex_element *element, GLuint dst_stride) { - struct intel_context *intel = &brw->intel; GLuint size = element->count * dst_stride; get_space(brw, size, &element->bo, &element->offset); @@ -290,52 +288,26 @@ copy_array_to_vbo_array( struct brw_context *brw, } if (dst_stride == element->glarray->StrideB) { - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - memcpy((char *)element->bo->virtual + element->offset, - element->glarray->Ptr, size); - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); - } + drm_intel_gem_bo_map_gtt(element->bo); + memcpy((char *)element->bo->virtual + element->offset, + element->glarray->Ptr, size); + drm_intel_gem_bo_unmap_gtt(element->bo); } else { char *dest; const unsigned char *src = element->glarray->Ptr; int i; - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - dest = element->bo->virtual; - dest += element->offset; - - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - void *data; - - data = _mesa_malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - dri_bo_subdata(element->bo, - element->offset, - size, - data); + drm_intel_gem_bo_map_gtt(element->bo); + dest = element->bo->virtual; + dest += element->offset; - _mesa_free(data); + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; } + + drm_intel_gem_bo_unmap_gtt(element->bo); } } @@ -356,7 +328,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* First build an array of pointers to ve's in vb.inputs_read */ if (0) - _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; @@ -502,12 +474,19 @@ static void brw_emit_vertices(struct brw_context *brw) * a VE loads from them. */ if (brw->vb.nr_enabled == 0) { - BEGIN_BATCH(3, IGNORE_CLIPRECTS); + BEGIN_BATCH(3); OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); - OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + if (IS_GEN6(intel->intelScreen->deviceID)) { + OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } else { + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | @@ -522,20 +501,28 @@ static void brw_emit_vertices(struct brw_context *brw) * are interleaved or from the same VBO. TBD if this makes a * performance difference. */ - BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); + BEGIN_BATCH(1 + brw->vb.nr_enabled * 4); OUT_BATCH((CMD_VERTEX_BUFFER << 16) | ((1 + brw->vb.nr_enabled * 4) - 2)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t dw0; + + if (intel->gen >= 6) { + dw0 = GEN6_VB0_ACCESS_VERTEXDATA | + (i << GEN6_VB0_INDEX_SHIFT); + } else { + dw0 = BRW_VB0_ACCESS_VERTEXDATA | + (i << BRW_VB0_INDEX_SHIFT); + } - OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | - BRW_VB0_ACCESS_VERTEXDATA | + OUT_BATCH(dw0 | (input->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->offset); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake || intel->gen >= 6) { OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->bo->size - 1); @@ -545,7 +532,7 @@ static void brw_emit_vertices(struct brw_context *brw) } ADVANCE_BATCH(); - BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); + BEGIN_BATCH(1 + brw->vb.nr_enabled * 2); OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; @@ -566,12 +553,19 @@ static void brw_emit_vertices(struct brw_context *brw) break; } - OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (format << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + if (IS_GEN6(intel->intelScreen->deviceID)) { + OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) | + GEN6_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } else { + OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake || intel->gen >= 6) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | @@ -625,13 +619,9 @@ static void brw_prepare_indices(struct brw_context *brw) /* Straight upload */ - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(bo); - memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); - drm_intel_gem_bo_unmap_gtt(bo); - } else { - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); - } + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); } else { offset = (GLuint) (unsigned long) index_buffer->ptr; brw->ib.start_vertex_offset = 0; @@ -712,7 +702,7 @@ static void brw_emit_index_buffer(struct brw_context *brw) ib.header.bits.index_format = get_index_type(index_buffer->type); ib.header.bits.cut_index_enable = 0; - BEGIN_BATCH(4, IGNORE_CLIPRECTS); + BEGIN_BATCH(4); OUT_BATCH( ib.header.dword ); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, diff --git a/i965/brw_eu.c b/i965/brw_eu.c index 1df5613..4e7c122 100644 --- a/i965/brw_eu.c +++ b/i965/brw_eu.c @@ -237,7 +237,7 @@ brw_resolve_cals(struct brw_compile *c) struct brw_glsl_call *call, *next; for (call = c->first_call; call; call = next) { next = call->next; - _mesa_free(call); + free(call); } c->first_call = NULL; } @@ -247,7 +247,7 @@ brw_resolve_cals(struct brw_compile *c) struct brw_glsl_label *label, *next; for (label = c->first_label; label; label = next) { next = label->next; - _mesa_free(label); + free(label); } c->first_label = NULL; } diff --git a/i965/brw_eu_debug.c b/i965/brw_eu_debug.c index 29f3f6d..99453af 100644 --- a/i965/brw_eu_debug.c +++ b/i965/brw_eu_debug.c @@ -54,9 +54,9 @@ void brw_print_reg( struct brw_reg hwreg ) "f" }; - _mesa_printf("%s%s", - hwreg.abs ? "abs/" : "", - hwreg.negate ? "-" : ""); + printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); if (hwreg.file == BRW_GENERAL_REGISTER_FILE && hwreg.nr % 2 == 0 && @@ -66,7 +66,7 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && hwreg.type == BRW_REGISTER_TYPE_F) { /* vector register */ - _mesa_printf("vec%d", hwreg.nr); + printf("vec%d", hwreg.nr); } else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && hwreg.vstride == BRW_VERTICAL_STRIDE_0 && @@ -74,13 +74,13 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && hwreg.type == BRW_REGISTER_TYPE_F) { /* "scalar" register */ - _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); } else if (hwreg.file == BRW_IMMEDIATE_VALUE) { - _mesa_printf("imm %f", hwreg.dw1.f); + printf("imm %f", hwreg.dw1.f); } else { - _mesa_printf("%s%d.%d<%d;%d,%d>:%s", + printf("%s%d.%d<%d;%d,%d>:%s", file[hwreg.file], hwreg.nr, hwreg.subnr / type_sz(hwreg.type), diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c index 7ceabba..f69d529 100644 --- a/i965/brw_eu_emit.c +++ b/i965/brw_eu_emit.c @@ -102,8 +102,6 @@ static void brw_set_dest( struct brw_instruction *insn, static void brw_set_src0( struct brw_instruction *insn, struct brw_reg reg ) { - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); @@ -199,7 +197,7 @@ void brw_set_src1( struct brw_instruction *insn, * in the future: */ assert (reg.address_mode == BRW_ADDRESS_DIRECT); - //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits3.da1.src1_subreg_nr = reg.subnr; @@ -252,9 +250,10 @@ static void brw_set_math_message( struct brw_context *brw, GLboolean saturate, GLuint dataType ) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { insn->bits3.math_igdng.function = function; insn->bits3.math_igdng.int_type = integer_type; insn->bits3.math_igdng.precision = low_precision; @@ -319,9 +318,10 @@ static void brw_set_urb_message( struct brw_context *brw, GLuint offset, GLuint swizzle_control ) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake || intel->gen >= 6) { insn->bits3.urb_igdng.opcode = 0; /* ? */ insn->bits3.urb_igdng.offset = offset; insn->bits3.urb_igdng.swizzle_control = swizzle_control; @@ -332,8 +332,16 @@ static void brw_set_urb_message( struct brw_context *brw, insn->bits3.urb_igdng.response_length = response_length; insn->bits3.urb_igdng.msg_length = msg_length; insn->bits3.urb_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (intel->gen >= 6) { + /* For SNB, the SFID bits moved to the condmod bits, and + * EOT stayed in bits3 above. Does the EOT bit setting + * below on Ironlake even do anything? + */ + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; + } else { + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } } else { insn->bits3.urb.opcode = 0; /* ? */ insn->bits3.urb.offset = offset; @@ -358,9 +366,10 @@ static void brw_set_dp_write_message( struct brw_context *brw, GLuint response_length, GLuint end_of_thread ) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; insn->bits3.dp_write_igdng.msg_control = msg_control; insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; @@ -395,9 +404,10 @@ static void brw_set_dp_read_message( struct brw_context *brw, GLuint response_length, GLuint end_of_thread ) { + struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; insn->bits3.dp_read_igdng.msg_control = msg_control; insn->bits3.dp_read_igdng.msg_type = msg_type; @@ -433,10 +443,11 @@ static void brw_set_sampler_message(struct brw_context *brw, GLuint header_present, GLuint simd_mode) { + struct intel_context *intel = &brw->intel; assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { insn->bits3.sampler_igdng.binding_table_index = binding_table_index; insn->bits3.sampler_igdng.sampler = sampler; insn->bits3.sampler_igdng.msg_type = msg_type; @@ -447,7 +458,7 @@ static void brw_set_sampler_message(struct brw_context *brw, insn->bits3.sampler_igdng.end_of_thread = eot; insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; insn->bits2.send_igdng.end_of_thread = eot; - } else if (BRW_IS_G4X(brw)) { + } else if (intel->is_g4x) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; insn->bits3.sampler_g4x.msg_type = msg_type; @@ -648,10 +659,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *if_insn) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) br = 2; if (p->single_program_flow) { @@ -690,9 +702,10 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, void brw_ENDIF(struct brw_compile *p, struct brw_instruction *patch_insn) { + struct intel_context *intel = &p->brw->intel; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) br = 2; if (p->single_program_flow) { @@ -803,10 +816,11 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint br = 1; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) br = 2; if (p->single_program_flow) @@ -846,14 +860,15 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *jmp_insn) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *landing = &p->store[p->nr_insn]; GLuint jmpi = 1; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); - assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); } @@ -908,26 +923,40 @@ void brw_math( struct brw_compile *p, GLuint data_type, GLuint precision ) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; - GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + struct intel_context *intel = &p->brw->intel; - /* Example code doesn't set predicate_control for send - * instructions. - */ - insn->header.predicate_control = 0; - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen >= 6) { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_math_message(p->brw, - insn, - msg_length, response_length, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - data_type); + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_src1(insn, brw_null_reg()); + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); + } } /** @@ -1263,7 +1292,7 @@ void brw_SAMPLE(struct brw_compile *p, GLboolean need_stall = 0; if (writemask == 0) { - /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + /*printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -1295,7 +1324,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; - /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + /* printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); @@ -1368,7 +1397,18 @@ void brw_urb_WRITE(struct brw_compile *p, GLuint offset, GLuint swizzle) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + + /* Sandybridge doesn't have the implied move for SENDs, + * and the first message register index comes from src0. + */ + if (intel->gen >= 6) { + brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + src0 = brw_message_reg(msg_reg_nr); + } + + insn = next_insn(p, BRW_OPCODE_SEND); assert(msg_length < BRW_MAX_MRF); @@ -1376,7 +1416,8 @@ void brw_urb_WRITE(struct brw_compile *p, brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_urb_message(p->brw, insn, diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c index 562a178..ba401c2 100644 --- a/i965/brw_fallback.c +++ b/i965/brw_fallback.c @@ -36,18 +36,13 @@ #include "swrast/swrast.h" #include "tnl/tnl.h" #include "brw_context.h" -#include "brw_fallback.h" -#include "intel_chipset.h" #include "intel_fbo.h" #include "intel_regions.h" -#include "glapi/glapi.h" - #define FILE_DEBUG_FLAG DEBUG_FALLBACKS static GLboolean do_check_fallback(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; GLcontext *ctx = &brw->intel.ctx; GLuint i; @@ -86,8 +81,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) } /* _NEW_BUFFERS */ - if (IS_965(intel->intelScreen->deviceID) && - !IS_G4X(intel->intelScreen->deviceID)) { + if (!brw->has_surface_tile_offset) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); diff --git a/i965/brw_gs.c b/i965/brw_gs.c index 610b6c3..7261b31 100644 --- a/i965/brw_gs.c +++ b/i965/brw_gs.c @@ -47,6 +47,7 @@ static void compile_gs_prog( struct brw_context *brw, struct brw_gs_prog_key *key ) { + struct intel_context *intel = &brw->intel; struct brw_gs_compile c; const GLuint *program; GLuint program_size; @@ -54,13 +55,12 @@ static void compile_gs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); c.key = *key; - c.need_ff_sync = BRW_IS_IGDNG(brw); /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ @@ -125,12 +125,13 @@ static void compile_gs_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->gs.prog_data ); + brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->gs.prog_data); } static const GLenum gs_prim[GL_POLYGON+1] = { diff --git a/i965/brw_gs.h b/i965/brw_gs.h index 010c1c2..813b8d4 100644 --- a/i965/brw_gs.h +++ b/i965/brw_gs.h @@ -63,7 +63,6 @@ struct brw_gs_compile { GLuint nr_attrs; GLuint nr_regs; GLuint nr_bytes; - GLboolean need_ff_sync; }; #define ATTR_SIZE (4*4) diff --git a/i965/brw_gs_emit.c b/i965/brw_gs_emit.c index 0fc5b02..dd7b057 100644 --- a/i965/brw_gs_emit.c +++ b/i965/brw_gs_emit.c @@ -40,7 +40,6 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" -#include "brw_util.h" #include "brw_gs.h" static void brw_gs_alloc_regs( struct brw_gs_compile *c, @@ -122,12 +121,14 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) { + struct intel_context *intel = &c->func.brw->intel; + brw_gs_alloc_regs(c, 4); /* Use polygons for correct edgeflag behaviour. Note that vertex 3 * is the PV for quads, but vertex 0 for polygons: */ - if (c->need_ff_sync) + if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); if (key->pv_first) { brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); @@ -145,9 +146,11 @@ void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) { + struct intel_context *intel = &c->func.brw->intel; + brw_gs_alloc_regs(c, 4); - if (c->need_ff_sync) + if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); if (key->pv_first) { brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); @@ -165,9 +168,11 @@ void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) void brw_gs_tris( struct brw_gs_compile *c ) { + struct intel_context *intel = &c->func.brw->intel; + brw_gs_alloc_regs(c, 3); - if (c->need_ff_sync) + if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); @@ -176,9 +181,11 @@ void brw_gs_tris( struct brw_gs_compile *c ) void brw_gs_lines( struct brw_gs_compile *c ) { + struct intel_context *intel = &c->func.brw->intel; + brw_gs_alloc_regs(c, 2); - if (c->need_ff_sync) + if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); @@ -186,9 +193,11 @@ void brw_gs_lines( struct brw_gs_compile *c ) void brw_gs_points( struct brw_gs_compile *c ) { + struct intel_context *intel = &c->func.brw->intel; + brw_gs_alloc_regs(c, 1); - if (c->need_ff_sync) + if (intel->needs_ff_sync) brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); } diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c index ed9d2ff..d8ad5ce 100644 --- a/i965/brw_gs_state.c +++ b/i965/brw_gs_state.c @@ -34,7 +34,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" struct brw_gs_unit_key { unsigned int total_grf; @@ -72,6 +71,7 @@ gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) static dri_bo * gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) { + struct intel_context *intel = &brw->intel; struct brw_gs_unit_state gs; dri_bo *bo; @@ -98,7 +98,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) else gs.thread4.max_threads = 0; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) gs.thread4.rendering_enable = 1; if (INTEL_DEBUG & DEBUG_STATS) @@ -107,8 +107,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, key, sizeof(*key), &brw->gs.prog_bo, 1, - &gs, sizeof(gs), - NULL, NULL); + &gs, sizeof(gs)); if (key->prog_active) { /* Emit GS program relocation */ diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c index 4b0d598..d030ed4 100644 --- a/i965/brw_misc_state.c +++ b/i965/brw_misc_state.c @@ -78,10 +78,7 @@ static void upload_drawing_rect(struct brw_context *brw) struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; - if (!intel->constant_cliprect) - return; - - BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); OUT_BATCH(0); /* xmin, ymin */ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | @@ -116,7 +113,7 @@ static void upload_binding_table_pointers(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - BEGIN_BATCH(6, IGNORE_CLIPRECTS); + BEGIN_BATCH(6); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); if (brw->vs.bind_bo != NULL) OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ @@ -139,6 +136,41 @@ const struct brw_tracked_state brw_binding_table_pointers = { .emit = upload_binding_table_pointers, }; +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is 0. + */ +static void upload_gen6_binding_table_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | + GEN6_BINDING_TABLE_MODIFY_VS | + GEN6_BINDING_TABLE_MODIFY_GS | + GEN6_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); + OUT_BATCH(0); /* gs */ + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + ADVANCE_BATCH(); +} + +const struct brw_tracked_state gen6_binding_table_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SURF_BIND, + }, + .prepare = prepare_binding_table_pointers, + .emit = upload_gen6_binding_table_pointers, +}; /** * Upload pointers to the per-stage state. @@ -150,7 +182,7 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) { struct intel_context *intel = &brw->intel; - BEGIN_BATCH(7, IGNORE_CLIPRECTS); + BEGIN_BATCH(7); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); if (brw->gs.prog_active) @@ -212,10 +244,17 @@ static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; + unsigned int len; + + if (intel->gen >= 6) + len = 7; + else if (intel->is_g4x || intel->is_ironlake) + len = 6; + else + len = 5; if (region == NULL) { - BEGIN_BATCH(len, IGNORE_CLIPRECTS); + BEGIN_BATCH(len); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); @@ -223,9 +262,12 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6) OUT_BATCH(0); + if (intel->gen >= 6) + OUT_BATCH(0); + ADVANCE_BATCH(); } else { unsigned int format; @@ -246,8 +288,10 @@ static void emit_depthbuffer(struct brw_context *brw) } assert(region->tiling != I915_TILING_X); + if (IS_GEN6(intel->intelScreen->deviceID)) + assert(region->tiling != I915_TILING_NONE); - BEGIN_BATCH(len, IGNORE_CLIPRECTS); + BEGIN_BATCH(len); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | (format << 18) | @@ -262,9 +306,20 @@ static void emit_depthbuffer(struct brw_context *brw) ((region->height - 1) << 19)); OUT_BATCH(0); - if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6) OUT_BATCH(0); + if (intel->gen >= 6) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } + + /* Initialize it for safety. */ + if (intel->gen >= 6) { + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(0); ADVANCE_BATCH(); } } @@ -330,7 +385,7 @@ const struct brw_tracked_state brw_polygon_stipple = { static void upload_polygon_stipple_offset(struct brw_context *brw) { - __DRIdrawablePrivate *dPriv = brw->intel.driDrawable; + GLcontext *ctx = &brw->intel.ctx; struct brw_polygon_stipple_offset bpso; memset(&bpso, 0, sizeof(bpso)); @@ -346,8 +401,8 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) * worry about. */ if (brw->intel.ctx.DrawBuffer->Name == 0) { - bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; - bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + bpso.bits0.x_offset = 0; + bpso.bits0.y_offset = (32 - (ctx->DrawBuffer->Height & 31)) & 31; } else { bpso.bits0.y_offset = 0; @@ -374,8 +429,8 @@ const struct brw_tracked_state brw_polygon_stipple_offset = { static void upload_aa_line_parameters(struct brw_context *brw) { struct brw_aa_line_parameters balp; - - if (BRW_IS_965(brw)) + + if (!brw->has_aa_line_parameters) return; /* use legacy aa line coverage computation */ @@ -438,18 +493,20 @@ const struct brw_tracked_state brw_line_stipple = { static void upload_invarient_state( struct brw_context *brw ) { + struct intel_context *intel = &brw->intel; + { /* 0x61040000 Pipeline Select */ /* PipelineSelect : 0 */ struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); - ps.header.opcode = CMD_PIPELINE_SELECT(brw); + ps.header.opcode = brw->CMD_PIPELINE_SELECT; ps.header.pipeline_select = 0; BRW_BATCH_STRUCT(brw, &ps); } - { + if (intel->gen < 6) { struct brw_global_depth_offset_clamp gdo; memset(&gdo, 0, sizeof(gdo)); @@ -462,6 +519,32 @@ static void upload_invarient_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &gdo); } + intel_batchbuffer_emit_mi_flush(intel->batch); + + if (intel->gen >= 6) { + int i; + + BEGIN_BATCH(3); + OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2)); + OUT_BATCH(MS_PIXEL_LOCATION_CENTER | + MS_NUMSAMPLES_1); + OUT_BATCH(0); /* positions for 4/8-sample */ + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(1); + ADVANCE_BATCH(); + + for (i = 0; i < 4; i++) { + BEGIN_BATCH(4); + OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(i << SVB_INDEX_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0xffffffff); + ADVANCE_BATCH(); + } + } /* 0x61020000 State Instruction Pointer */ { @@ -480,7 +563,7 @@ static void upload_invarient_state( struct brw_context *brw ) struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); - vfs.opcode = CMD_VF_STATISTICS(brw); + vfs.opcode = brw->CMD_VF_STATISTICS; if (INTEL_DEBUG & DEBUG_STATS) vfs.statistics_enable = 1; @@ -512,8 +595,21 @@ static void upload_state_base_address( struct brw_context *brw ) /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ - if (BRW_IS_IGDNG(brw)) { - BEGIN_BATCH(8, IGNORE_CLIPRECTS); + if (intel->gen >= 6) { + BEGIN_BATCH(10); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Dynamic state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* Instruction base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Dynamic state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + OUT_BATCH(1); /* Instruction access upper bound */ + ADVANCE_BATCH(); + } else if (intel->is_ironlake) { + BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ OUT_BATCH(1); /* Surface state base address */ @@ -524,7 +620,7 @@ static void upload_state_base_address( struct brw_context *brw ) OUT_BATCH(1); /* Instruction access upper bound */ ADVANCE_BATCH(); } else { - BEGIN_BATCH(6, IGNORE_CLIPRECTS); + BEGIN_BATCH(6); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); OUT_BATCH(1); /* General state base address */ OUT_BATCH(1); /* Surface state base address */ diff --git a/i965/brw_program.c b/i965/brw_program.c index bac6918..c78f7b3 100644 --- a/i965/brw_program.c +++ b/i965/brw_program.c @@ -37,7 +37,6 @@ #include "tnl/tnl.h" #include "brw_context.h" -#include "brw_util.h" #include "brw_wm.h" static void brwBindProgram( GLcontext *ctx, @@ -112,9 +111,10 @@ static GLboolean brwIsProgramNative( GLcontext *ctx, return GL_TRUE; } -static void brwProgramStringNotify( GLcontext *ctx, - GLenum target, - struct gl_program *prog ) + +static GLboolean brwProgramStringNotify( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) { struct brw_context *brw = brw_context(ctx); @@ -151,6 +151,9 @@ static void brwProgramStringNotify( GLcontext *ctx, */ _tnl_program_string(ctx, target, prog); } + + /* XXX check if program is legal, within limits */ + return GL_TRUE; } void brwInitFragProgFuncs( struct dd_function_table *functions ) diff --git a/i965/brw_queryobj.c b/i965/brw_queryobj.c index a195bc3..6cce7e5 100644 --- a/i965/brw_queryobj.c +++ b/i965/brw_queryobj.c @@ -73,7 +73,7 @@ brw_new_query_object(GLcontext *ctx, GLuint id) { struct brw_query_object *query; - query = _mesa_calloc(sizeof(struct brw_query_object)); + query = calloc(1, sizeof(struct brw_query_object)); query->Base.Id = id; query->Base.Result = 0; @@ -89,7 +89,7 @@ brw_delete_query(GLcontext *ctx, struct gl_query_object *q) struct brw_query_object *query = (struct brw_query_object *)q; dri_bo_unreference(query->bo); - _mesa_free(query); + free(query); } static void @@ -188,7 +188,7 @@ brw_emit_query_begin(struct brw_context *brw) if (brw->query.active || is_empty_list(&brw->query.active_head)) return; - BEGIN_BATCH(4, IGNORE_CLIPRECTS); + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT); @@ -227,7 +227,7 @@ brw_emit_query_end(struct brw_context *brw) if (!brw->query.active) return; - BEGIN_BATCH(4, IGNORE_CLIPRECTS); + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT); diff --git a/i965/brw_sf.c b/i965/brw_sf.c index 968890f..8e6839b 100644 --- a/i965/brw_sf.c +++ b/i965/brw_sf.c @@ -117,12 +117,13 @@ static void compile_sf_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->sf.prog_data ); + brw->sf.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->sf.prog_data); } /* Calculate interpolants for triangle and line rasterization. diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c index 3eae41e..bb08055 100644 --- a/i965/brw_sf_emit.c +++ b/i965/brw_sf_emit.c @@ -149,6 +149,7 @@ static void copy_colors( struct brw_sf_compile *c, static void do_flatshade_triangle( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; @@ -161,7 +162,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) jmpi = 2; brw_push_insn_state(p); @@ -187,6 +188,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) static void do_flatshade_line( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; @@ -199,7 +201,7 @@ static void do_flatshade_line( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (BRW_IS_IGDNG(p->brw)) + if (intel->is_ironlake) jmpi = 2; brw_push_insn_state(p); diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c index bb69435..847c886 100644 --- a/i965/brw_sf_state.c +++ b/i965/brw_sf_state.c @@ -35,7 +35,6 @@ #include "brw_state.h" #include "brw_defines.h" #include "main/macros.h" -#include "intel_fbo.h" static void upload_sf_vp(struct brw_context *brw) { @@ -70,9 +69,9 @@ static void upload_sf_vp(struct brw_context *brw) * for DrawBuffer->_[XY]{min,max} */ - /* The scissor only needs to handle the intersection of drawable and - * scissor rect. Clipping to the boundaries of static shared buffers - * for front/back/depth is covered by looping over cliprects in brw_draw.c. + /* The scissor only needs to handle the intersection of drawable + * and scissor rect, since there are no longer cliprects for shared + * buffers with DRI2. * * Note that the hardware's coordinates are inclusive, while Mesa's min is * inclusive but max is exclusive. @@ -165,6 +164,7 @@ static dri_bo * sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, dri_bo **reloc_bufs) { + struct intel_context *intel = &brw->intel; struct brw_sf_unit_state sf; dri_bo *bo; int chipset_max_threads; @@ -177,7 +177,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread3.dispatch_grf_start_reg = 3; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) sf.thread3.urb_entry_read_offset = 3; else sf.thread3.urb_entry_read_offset = 1; @@ -187,10 +187,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread4.nr_urb_entries = key->nr_urb_entries; sf.thread4.urb_entry_allocation_size = key->sfsize - 1; - /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or - * 48(IGDNG) threads + /* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or + * 48 (Ironlake) threads. */ - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) chipset_max_threads = 48; else chipset_max_threads = 24; @@ -308,8 +308,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, key, sizeof(*key), reloc_bufs, 2, - &sf, sizeof(sf), - NULL, NULL); + &sf, sizeof(sf)); /* STATE_PREFETCH command description describes this state as being * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. diff --git a/i965/brw_state.h b/i965/brw_state.h index b129b1f..f790cfa 100644 --- a/i965/brw_state.h +++ b/i965/brw_state.h @@ -35,7 +35,7 @@ #include "brw_context.h" -static inline void +static INLINE void brw_add_validated_bo(struct brw_context *brw, dri_bo *bo) { assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos)); @@ -90,6 +90,23 @@ const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; const struct brw_tracked_state brw_index_buffer; +const struct brw_tracked_state gen6_binding_table_pointers; +const struct brw_tracked_state gen6_blend_state; +const struct brw_tracked_state gen6_cc_state_pointers; +const struct brw_tracked_state gen6_cc_vp; +const struct brw_tracked_state gen6_clip_state; +const struct brw_tracked_state gen6_clip_vp; +const struct brw_tracked_state gen6_color_calc_state; +const struct brw_tracked_state gen6_depth_stencil_state; +const struct brw_tracked_state gen6_gs_state; +const struct brw_tracked_state gen6_sampler_state; +const struct brw_tracked_state gen6_scissor_state; +const struct brw_tracked_state gen6_sf_state; +const struct brw_tracked_state gen6_sf_vp; +const struct brw_tracked_state gen6_urb; +const struct brw_tracked_state gen6_viewport_state; +const struct brw_tracked_state gen6_vs_state; +const struct brw_tracked_state gen6_wm_state; /** * Use same key for WM and VS surfaces. @@ -124,16 +141,26 @@ dri_bo *brw_cache_data(struct brw_cache *cache, dri_bo **reloc_bufs, GLuint nr_reloc_bufs); -dri_bo *brw_upload_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_sz, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_sz, - const void *aux, - void *aux_return ); +drm_intel_bo *brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz); + +drm_intel_bo *brw_upload_cache_with_auxdata(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz, + const void *aux, + GLuint aux_sz, + void *aux_return); dri_bo *brw_search_cache( struct brw_cache *cache, enum brw_cache_id cache_id, @@ -151,7 +178,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo); /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) +#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s))) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c index 7821898..3901941 100644 --- a/i965/brw_state_batch.c +++ b/i965/brw_state_batch.c @@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + intel_batchbuffer_data(brw->intel.batch, data, sz); return GL_TRUE; } @@ -57,8 +57,8 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) return GL_FALSE; if (item->sz != sz) { - _mesa_free(item->header); - item->header = _mesa_malloc(sz); + free(item->header); + item->header = malloc(sz); item->sz = sz; } goto emit; @@ -68,14 +68,14 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, assert(!item); item = CALLOC_STRUCT(brw_cached_batch_item); - item->header = _mesa_malloc(sz); + item->header = malloc(sz); item->sz = sz; item->next = brw->cached_batch_items; brw->cached_batch_items = item; emit: memcpy(item->header, newheader, sz); - intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + intel_batchbuffer_data(brw->intel.batch, data, sz); return GL_TRUE; } diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c index e4c9ba7..c08cb45 100644 --- a/i965/brw_state_cache.c +++ b/i965/brw_state_cache.c @@ -59,37 +59,27 @@ #include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" - -/* XXX: Fixme - have to include these to get the sizes of the prog_key - * structs: - */ #include "brw_wm.h" -#include "brw_vs.h" -#include "brw_clip.h" -#include "brw_sf.h" -#include "brw_gs.h" static GLuint -hash_key(const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +hash_key(struct brw_cache_item *item) { - GLuint *ikey = (GLuint *)key; - GLuint hash = 0, i; + GLuint *ikey = (GLuint *)item->key; + GLuint hash = item->cache_id, i; - assert(key_size % 4 == 0); + assert(item->key_size % 4 == 0); /* I'm sure this can be improved on: */ - for (i = 0; i < key_size/4; i++) { + for (i = 0; i < item->key_size/4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); } /* Include the BO pointers as key data as well */ - ikey = (GLuint *)reloc_bufs; - key_size = nr_reloc_bufs * sizeof(dri_bo *); - for (i = 0; i < key_size/4; i++) { + ikey = (GLuint *)item->reloc_bufs; + for (i = 0; i < item->nr_reloc_bufs * sizeof(drm_intel_bo *) / 4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); } @@ -114,11 +104,22 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } +static int +brw_cache_item_equals(const struct brw_cache_item *a, + const struct brw_cache_item *b) +{ + return a->cache_id == b->cache_id && + a->hash == b->hash && + a->key_size == b->key_size && + (memcmp(a->key, b->key, a->key_size) == 0) && + a->nr_reloc_bufs == b->nr_reloc_bufs && + (memcmp(a->reloc_bufs, b->reloc_bufs, + a->nr_reloc_bufs * sizeof(dri_bo *)) == 0); +} static struct brw_cache_item * -search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, - GLuint hash, const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +search_cache(struct brw_cache *cache, GLuint hash, + struct brw_cache_item *lookup) { struct brw_cache_item *c; @@ -133,13 +134,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, #endif for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->cache_id == cache_id && - c->hash == hash && - c->key_size == key_size && - memcmp(c->key, key, key_size) == 0 && - c->nr_reloc_bufs == nr_reloc_bufs && - memcmp(c->reloc_bufs, reloc_bufs, - nr_reloc_bufs * sizeof(dri_bo *)) == 0) + if (brw_cache_item_equals(lookup, c)) return c; } @@ -155,7 +150,7 @@ rehash(struct brw_cache *cache) GLuint size, i; size = cache->size * 3; - items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items)); + items = (struct brw_cache_item**) calloc(1, size * sizeof(*items)); for (i = 0; i < cache->size; i++) for (c = cache->items[i]; c; c = next) { @@ -182,10 +177,18 @@ brw_search_cache(struct brw_cache *cache, void *aux_return) { struct brw_cache_item *item; - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + struct brw_cache_item lookup; + GLuint hash; - item = search_cache(cache, cache_id, hash, key, key_size, - reloc_bufs, nr_reloc_bufs); + lookup.cache_id = cache_id; + lookup.key = key; + lookup.key_size = key_size; + lookup.reloc_bufs = reloc_bufs; + lookup.nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(&lookup); + lookup.hash = hash; + + item = search_cache(cache, hash, &lookup); if (item == NULL) return NULL; @@ -200,48 +203,52 @@ brw_search_cache(struct brw_cache *cache, } -dri_bo * -brw_upload_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_size, - const void *aux, - void *aux_return ) +drm_intel_bo * +brw_upload_cache_with_auxdata(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size, + const void *aux, + GLuint aux_size, + void *aux_return) { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + GLuint hash; GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *); - GLuint aux_size = cache->aux_size[cache_id]; void *tmp; dri_bo *bo; int i; + item->cache_id = cache_id; + item->key = key; + item->key_size = key_size; + item->reloc_bufs = reloc_bufs; + item->nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(item); + item->hash = hash; + /* Create the buffer object to contain the data */ bo = dri_bo_alloc(cache->brw->intel.bufmgr, cache->name[cache_id], data_size, 1 << 6); /* Set up the memory containing the key, aux_data, and reloc_bufs */ - tmp = _mesa_malloc(key_size + aux_size + relocs_size); + tmp = malloc(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); - memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy(tmp + key_size, aux, aux_size); memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); for (i = 0; i < nr_reloc_bufs; i++) { if (reloc_bufs[i] != NULL) dri_bo_reference(reloc_bufs[i]); } - item->cache_id = cache_id; item->key = tmp; - item->hash = hash; - item->key_size = key_size; item->reloc_bufs = tmp + key_size + aux_size; - item->nr_reloc_bufs = nr_reloc_bufs; item->bo = bo; dri_bo_reference(bo); @@ -255,12 +262,11 @@ brw_upload_cache( struct brw_cache *cache, cache->n_items++; if (aux_return) { - assert(cache->aux_size[cache_id]); *(void **)aux_return = (void *)((char *)item->key + item->key_size); } if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("upload %s: %d bytes to cache id %d\n", + printf("upload %s: %d bytes to cache id %d\n", cache->name[cache_id], data_size, cache_id); @@ -272,6 +278,23 @@ brw_upload_cache( struct brw_cache *cache, return bo; } +drm_intel_bo * +brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size) +{ + return brw_upload_cache_with_auxdata(cache, cache_id, + key, key_size, + reloc_bufs, nr_reloc_bufs, + data, data_size, + NULL, 0, + NULL); +} /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. @@ -292,11 +315,18 @@ brw_cache_data(struct brw_cache *cache, GLuint nr_reloc_bufs) { dri_bo *bo; - struct brw_cache_item *item; - GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); - - item = search_cache(cache, cache_id, hash, data, data_size, - reloc_bufs, nr_reloc_bufs); + struct brw_cache_item *item, lookup; + GLuint hash; + + lookup.cache_id = cache_id; + lookup.key = data; + lookup.key_size = data_size; + lookup.reloc_bufs = reloc_bufs; + lookup.nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(&lookup); + lookup.hash = hash; + + item = search_cache(cache, hash, &lookup); if (item) { update_cache_last(cache, cache_id, item->bo); dri_bo_reference(item->bo); @@ -306,8 +336,7 @@ brw_cache_data(struct brw_cache *cache, bo = brw_upload_cache(cache, cache_id, data, data_size, reloc_bufs, nr_reloc_bufs, - data, data_size, - NULL, NULL); + data, data_size); return bo; } @@ -321,11 +350,9 @@ enum pool_type { static void brw_init_cache_id(struct brw_cache *cache, const char *name, - enum brw_cache_id id, - GLuint aux_size) + enum brw_cache_id id) { cache->name[id] = strdup(name); - cache->aux_size[id] = aux_size; } @@ -339,82 +366,31 @@ brw_init_non_surface_cache(struct brw_context *brw) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - - brw_init_cache_id(cache, - "CC_VP", - BRW_CC_VP, - 0); - - brw_init_cache_id(cache, - "CC_UNIT", - BRW_CC_UNIT, - 0); - - brw_init_cache_id(cache, - "WM_PROG", - BRW_WM_PROG, - sizeof(struct brw_wm_prog_data)); - - brw_init_cache_id(cache, - "SAMPLER_DEFAULT_COLOR", - BRW_SAMPLER_DEFAULT_COLOR, - 0); - - brw_init_cache_id(cache, - "SAMPLER", - BRW_SAMPLER, - 0); - - brw_init_cache_id(cache, - "WM_UNIT", - BRW_WM_UNIT, - 0); - - brw_init_cache_id(cache, - "SF_PROG", - BRW_SF_PROG, - sizeof(struct brw_sf_prog_data)); - - brw_init_cache_id(cache, - "SF_VP", - BRW_SF_VP, - 0); - - brw_init_cache_id(cache, - "SF_UNIT", - BRW_SF_UNIT, - 0); - - brw_init_cache_id(cache, - "VS_UNIT", - BRW_VS_UNIT, - 0); - - brw_init_cache_id(cache, - "VS_PROG", - BRW_VS_PROG, - sizeof(struct brw_vs_prog_data)); - - brw_init_cache_id(cache, - "CLIP_UNIT", - BRW_CLIP_UNIT, - 0); - - brw_init_cache_id(cache, - "CLIP_PROG", - BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_data)); - - brw_init_cache_id(cache, - "GS_UNIT", - BRW_GS_UNIT, - 0); - - brw_init_cache_id(cache, - "GS_PROG", - BRW_GS_PROG, - sizeof(struct brw_gs_prog_data)); + calloc(1, cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP); + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT); + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG); + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR); + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER); + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT); + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG); + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP); + + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT); + + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT); + + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG); + + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT); + + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG); + + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT); + + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG); + brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE); } @@ -428,17 +404,10 @@ brw_init_surface_cache(struct brw_context *brw) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - - brw_init_cache_id(cache, - "SS_SURFACE", - BRW_SS_SURFACE, - 0); + calloc(1, cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(cache, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - 0); + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE); + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND); } @@ -457,7 +426,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) GLuint i; if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + printf("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { @@ -476,7 +445,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) cache->n_items = 0; if (brw->curbe.last_buf) { - _mesa_free(brw->curbe.last_buf); + free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; } @@ -497,7 +466,7 @@ brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) GLuint i; if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + printf("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (prev = &cache->items[i]; *prev;) { @@ -525,7 +494,7 @@ void brw_state_cache_check_size(struct brw_context *brw) { if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. @@ -544,7 +513,7 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) GLuint i; if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + printf("%s\n", __FUNCTION__); brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c index e94fa7d..020ac52 100644 --- a/i965/brw_state_dump.c +++ b/i965/brw_state_dump.c @@ -28,7 +28,6 @@ #include "main/mtypes.h" #include "brw_context.h" -#include "brw_state.h" #include "brw_defines.h" /** diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c index af8dfb4..9e54f29 100644 --- a/i965/brw_state_upload.c +++ b/i965/brw_state_upload.c @@ -35,6 +35,7 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "intel_buffers.h" +#include "intel_chipset.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which @@ -42,7 +43,7 @@ * current fragment and vertex programs, and so cannot be a static * value. */ -const struct brw_tracked_state *atoms[] = +static const struct brw_tracked_state *gen4_atoms[] = { &brw_check_fallback, @@ -101,6 +102,63 @@ const struct brw_tracked_state *atoms[] = &brw_constant_buffer }; +const struct brw_tracked_state *gen6_atoms[] = +{ + &brw_check_fallback, + + &brw_wm_input_sizes, + &brw_vs_prog, + &brw_gs_prog, + &brw_wm_prog, + + &gen6_clip_vp, + &gen6_sf_vp, + &gen6_cc_vp, + + /* Command packets: */ + &brw_invarient_state, + + &gen6_viewport_state, /* must do after *_vp stages */ + + &gen6_urb, + &gen6_blend_state, /* must do before cc unit */ + &gen6_color_calc_state, /* must do before cc unit */ + &gen6_depth_stencil_state, /* must do before cc unit */ + &gen6_cc_state_pointers, + + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ + + &brw_wm_samplers, + &gen6_sampler_state, + + &gen6_vs_state, + &gen6_gs_state, + &gen6_clip_state, + &gen6_sf_state, + &gen6_wm_state, + + &gen6_scissor_state, + + &brw_state_base_address, + + &gen6_binding_table_pointers, + + &brw_depthbuffer, + + &brw_polygon_stipple, + &brw_polygon_stipple_offset, + + &brw_line_stipple, + &brw_aa_line_parameters, + + &brw_drawing_rect, + + &brw_indices, + &brw_index_buffer, + &brw_vertices, +}; void brw_init_state( struct brw_context *brw ) { @@ -208,7 +266,6 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), DEFINE_BIT(BRW_NEW_PSP), - DEFINE_BIT(BRW_NEW_FENCE), DEFINE_BIT(BRW_NEW_INDICES), DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), @@ -218,6 +275,7 @@ static struct dirty_bit_map brw_bits[] = { }; static struct dirty_bit_map cache_bits[] = { + DEFINE_BIT(CACHE_NEW_BLEND_STATE), DEFINE_BIT(CACHE_NEW_CC_VP), DEFINE_BIT(CACHE_NEW_CC_UNIT), DEFINE_BIT(CACHE_NEW_WM_PROG), @@ -277,6 +335,8 @@ void brw_validate_state( struct brw_context *brw ) struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; GLuint i; + const struct brw_tracked_state **atoms; + int num_atoms; brw_clear_validated_bos(brw); @@ -285,6 +345,14 @@ void brw_validate_state( struct brw_context *brw ) brw_add_validated_bo(brw, intel->batch->buf); + if (IS_GEN6(intel->intelScreen->deviceID)) { + atoms = gen6_atoms; + num_atoms = ARRAY_SIZE(gen6_atoms); + } else { + atoms = gen4_atoms; + num_atoms = ARRAY_SIZE(gen4_atoms); + } + if (brw->emit_state_always) { state->mesa |= ~0; state->brw |= ~0; @@ -312,7 +380,7 @@ void brw_validate_state( struct brw_context *brw ) brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */ /* do prepare stage for all atoms */ - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) @@ -344,9 +412,20 @@ void brw_validate_state( struct brw_context *brw ) void brw_upload_state(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; int i; static int dirty_count = 0; + const struct brw_tracked_state **atoms; + int num_atoms; + + if (IS_GEN6(intel->intelScreen->deviceID)) { + atoms = gen6_atoms; + num_atoms = ARRAY_SIZE(gen6_atoms); + } else { + atoms = gen4_atoms; + num_atoms = ARRAY_SIZE(gen4_atoms); + } brw_clear_validated_bos(brw); @@ -356,10 +435,10 @@ void brw_upload_state(struct brw_context *brw) * state atoms are ordered correctly in the list. */ struct brw_state_flags examined, prev; - _mesa_memset(&examined, 0, sizeof(examined)); + memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; @@ -388,7 +467,7 @@ void brw_upload_state(struct brw_context *brw) } } else { - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) diff --git a/i965/brw_structs.h b/i965/brw_structs.h index 66d4127..3c2adfc 100644 --- a/i965/brw_structs.h +++ b/i965/brw_structs.h @@ -658,7 +658,105 @@ struct brw_clip_unit_state GLfloat viewport_ymax; }; +struct gen6_blend_state +{ + struct { + GLuint dest_blend_factor:5; + GLuint source_blend_factor:5; + GLuint pad3:1; + GLuint blend_func:3; + GLuint pad2:1; + GLuint ia_dest_blend_factor:5; + GLuint ia_source_blend_factor:5; + GLuint pad1:1; + GLuint ia_blend_func:3; + GLuint pad0:1; + GLuint ia_blend_enable:1; + GLuint blend_enable:1; + } blend0; + + struct { + GLuint post_blend_clamp_enable:1; + GLuint pre_blend_clamp_enable:1; + GLuint clamp_range:2; + GLuint pad0:4; + GLuint x_dither_offset:2; + GLuint y_dither_offset:2; + GLuint dither_enable:1; + GLuint alpha_test_func:3; + GLuint alpha_test_enable:1; + GLuint pad1:1; + GLuint logic_op_func:4; + GLuint logic_op_enable:1; + GLuint pad2:1; + GLuint write_disable_b:1; + GLuint write_disable_g:1; + GLuint write_disable_r:1; + GLuint write_disable_a:1; + GLuint pad3:1; + GLuint alpha_to_coverage_dither:1; + GLuint alpha_to_one:1; + GLuint alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state +{ + struct { + GLuint alpha_test_format:1; + GLuint pad0:14; + GLuint round_disable:1; + GLuint bf_stencil_ref:8; + GLuint stencil_ref:8; + } cc0; + union { + GLfloat alpha_ref_f; + struct { + GLuint ui:8; + GLuint pad0:24; + } alpha_ref_fi; + } cc1; + + GLfloat constant_r; + GLfloat constant_g; + GLfloat constant_b; + GLfloat constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + GLuint pad0:3; + GLuint bf_stencil_pass_depth_pass_op:3; + GLuint bf_stencil_pass_depth_fail_op:3; + GLuint bf_stencil_fail_op:3; + GLuint bf_stencil_func:3; + GLuint bf_stencil_enable:1; + GLuint pad1:2; + GLuint stencil_write_enable:1; + GLuint stencil_pass_depth_pass_op:3; + GLuint stencil_pass_depth_fail_op:3; + GLuint stencil_fail_op:3; + GLuint stencil_func:3; + GLuint stencil_enable:1; + } ds0; + + struct { + GLuint bf_stencil_write_mask:8; + GLuint bf_stencil_test_mask:8; + GLuint stencil_write_mask:8; + GLuint stencil_test_mask:8; + } ds1; + + struct { + GLuint pad0:25; + GLuint depth_write_enable:1; + GLuint depth_test_func:3; + GLuint pad1:1; + GLuint depth_test_enable:1; + } ds2; +}; struct brw_cc_unit_state { @@ -752,8 +850,6 @@ struct brw_cc_unit_state } cc7; }; - - struct brw_sf_unit_state { struct thread0 thread0; @@ -813,6 +909,11 @@ struct brw_sf_unit_state }; +struct gen6_scissor_state +{ + GLuint ymin, xmin; + GLuint ymax, xmax; +}; struct brw_gs_unit_state { @@ -1043,6 +1144,15 @@ struct brw_sf_viewport } scissor; }; +struct gen6_sf_viewport { + GLfloat m00; + GLfloat m11; + GLfloat m22; + GLfloat m30; + GLfloat m31; + GLfloat m32; +}; + /* Documented in the subsystem/shared-functions/sampler chapter... */ struct brw_surface_state diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c index e59e52e..09edfd8 100644 --- a/i965/brw_tex_layout.c +++ b/i965/brw_tex_layout.c @@ -36,7 +36,6 @@ #include "intel_tex_layout.h" #include "intel_context.h" #include "main/macros.h" -#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE @@ -49,7 +48,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - if (IS_IGDNG(intel->intelScreen->deviceID)) { + if (intel->is_ironlake) { GLuint align_h = 2, align_w = 4; GLuint level; GLuint x = 0; diff --git a/i965/brw_urb.c b/i965/brw_urb.c index 8c6f435..4f6b900 100644 --- a/i965/brw_urb.c +++ b/i965/brw_urb.c @@ -105,7 +105,8 @@ static GLboolean check_urb_layout( struct brw_context *brw ) brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; - return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw); + return brw->urb.cs_start + brw->urb.nr_cs_entries * + brw->urb.csize <= brw->urb.size; } /* Most minimal update, forces re-emit of URB fence packet after GS @@ -113,6 +114,7 @@ static GLboolean check_urb_layout( struct brw_context *brw ) */ static void recalculate_urb_fence( struct brw_context *brw ) { + struct intel_context *intel = &brw->intel; GLuint csize = brw->curbe.total_size; GLuint vsize = brw->vs.prog_data->urb_entry_size; GLuint sfsize = brw->sf.prog_data->urb_entry_size; @@ -146,7 +148,7 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.constrained = 0; - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { brw->urb.nr_vs_entries = 128; brw->urb.nr_sf_entries = 48; if (check_urb_layout(brw)) { @@ -156,7 +158,7 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; } - } else if (BRW_IS_G4X(brw)) { + } else if (intel->is_g4x) { brw->urb.nr_vs_entries = 64; if (check_urb_layout(brw)) { goto done; @@ -184,23 +186,23 @@ static void recalculate_urb_fence( struct brw_context *brw ) * entries and the values for minimum nr of entries * provided above. */ - _mesa_printf("couldn't calculate URB layout!\n"); + printf("couldn't calculate URB layout!\n"); exit(1); } if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) - _mesa_printf("URB CONSTRAINED\n"); + printf("URB CONSTRAINED\n"); } done: if (INTEL_DEBUG & DEBUG_URB) - _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, brw->urb.gs_start, brw->urb.clip_start, brw->urb.sf_start, brw->urb.cs_start, - URB_SIZES(brw)); + brw->urb.size); brw->state.dirty.brw |= BRW_NEW_URB_FENCE; } @@ -244,7 +246,7 @@ void brw_upload_urb_fence(struct brw_context *brw) uf.bits0.gs_fence = brw->urb.clip_start; uf.bits0.clp_fence = brw->urb.sf_start; uf.bits1.sf_fence = brw->urb.cs_start; - uf.bits1.cs_fence = URB_SIZES(brw); + uf.bits1.cs_fence = brw->urb.size; BRW_BATCH_STRUCT(brw, &uf); } diff --git a/i965/brw_vs.c b/i965/brw_vs.c index fd055e2..44b085e 100644 --- a/i965/brw_vs.c +++ b/i965/brw_vs.c @@ -35,6 +35,7 @@ #include "brw_util.h" #include "brw_state.h" #include "shader/prog_print.h" +#include "shader/prog_parameter.h" @@ -42,9 +43,11 @@ static void do_vs_prog( struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key ) { + GLcontext *ctx = &brw->intel.ctx; GLuint program_size; const GLuint *program; struct brw_vs_compile c; + int aux_size; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -73,13 +76,27 @@ static void do_vs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + /* We upload from &c.prog_data including the constant_map assuming + * they're packed together. It would be nice to have a + * compile-time assert macro here. + */ + assert(c.constant_map == (int8_t *)&c.prog_data + + sizeof(c.prog_data)); + assert(ctx->Const.VertexProgram.MaxNativeParameters == + ARRAY_SIZE(c.constant_map)); + + aux_size = sizeof(c.prog_data); + if (c.vp->use_const_buffer) + aux_size += c.vp->program.Base.Parameters->NumParameters; + dri_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->vs.prog_data ); + brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + aux_size, + &brw->vs.prog_data); } @@ -109,6 +126,8 @@ static void brw_upload_vs_prog(struct brw_context *brw) &brw->vs.prog_data); if (brw->vs.prog_bo == NULL) do_vs_prog(brw, vp, &key); + brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + + sizeof(*brw->vs.prog_data)); } diff --git a/i965/brw_vs.h b/i965/brw_vs.h index 4a59136..95e0501 100644 --- a/i965/brw_vs.h +++ b/i965/brw_vs.h @@ -51,6 +51,7 @@ struct brw_vs_compile { struct brw_compile func; struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; + int8_t constant_map[1024]; struct brw_vertex_program *vp; @@ -81,6 +82,8 @@ struct brw_vs_compile { GLint index; struct brw_reg reg; } current_const[3]; + + GLboolean needs_stack; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c index 27aac8b..a7c4b58 100644 --- a/i965/brw_vs_emit.c +++ b/i965/brw_vs_emit.c @@ -67,6 +67,7 @@ static void release_tmps( struct brw_vs_compile *c ) */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { + struct intel_context *intel = &c->func.brw->intel; GLuint i, reg = 0, mrf; int attributes_in_vue; @@ -103,9 +104,47 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Vertex program parameters from curbe: */ if (c->vp->use_const_buffer) { - /* get constants from a real constant buffer */ - c->prog_data.curb_read_length = 0; - c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; + int constant = 0; + + /* We've got more constants than we can load with the push + * mechanism. This is often correlated with reladdr loads where + * we should probably be using a pull mechanism anyway to avoid + * excessive reading. However, the pull mechanism is slow in + * general. So, we try to allocate as many non-reladdr-loaded + * constants through the push buffer as we can before giving up. + */ + memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); + for (i = 0; + i < c->vp->program.Base.NumInstructions && constant < max_constant; + i++) { + struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; + int arg; + + for (arg = 0; arg < 3 && constant < max_constant; arg++) { + if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR && + inst->SrcReg[arg].File != PROGRAM_CONSTANT && + inst->SrcReg[arg].File != PROGRAM_UNIFORM && + inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && + inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) || + inst->SrcReg[arg].RelAddr) + continue; + + if (c->constant_map[inst->SrcReg[arg].Index] == -1) { + c->constant_map[inst->SrcReg[arg].Index] = constant++; + } + } + } + + for (i = 0; i < constant; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, + (i%2) * 4), + 0, 4, 1); + } + reg += (constant + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + /* XXX 0 causes a bug elsewhere... */ + c->prog_data.nr_params = MAX2(constant * 4, 4); } else { /* use a section of the GRF for constants */ @@ -141,10 +180,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; c->first_overflow_output = 0; - if (BRW_IS_IGDNG(c->func.brw)) - mrf = 8; + if (intel->gen >= 6) + mrf = 6; + else if (intel->is_ironlake) + mrf = 8; else - mrf = 4; + mrf = 4; for (i = 0; i < VERT_RESULT_MAX; i++) { if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { @@ -213,8 +254,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } } - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); - reg += 2; + if (c->needs_stack) { + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + } /* Some opcodes need an internal temporary: */ @@ -238,17 +281,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); - if (BRW_IS_IGDNG(c->func.brw)) - c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; + if (intel->gen >= 6) + c->prog_data.urb_entry_size = (attributes_in_vue + 4 + 7) / 8; + else if (intel->is_ironlake) + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else - c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; c->prog_data.total_grf = reg; if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); - _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); - _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + printf("%s reg = %d\n", __FUNCTION__, reg); } } @@ -438,9 +483,11 @@ static void emit_math1( struct brw_vs_compile *c, * whether that turns out to be a simulator bug or not: */ struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; - GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); + GLboolean need_tmp = (intel->gen < 6 && + (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE)); if (need_tmp) tmp = get_tmp(c); @@ -469,9 +516,11 @@ static void emit_math2( struct brw_vs_compile *c, GLuint precision) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; - GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); + GLboolean need_tmp = (intel->gen < 6 && + (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE)); if (need_tmp) tmp = get_tmp(c); @@ -761,15 +810,14 @@ get_constant(struct brw_vs_compile *c, { const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; - struct brw_reg const_reg; - struct brw_reg const2_reg; - const GLboolean relAddr = src->RelAddr; + struct brw_reg const_reg = c->current_const[argIndex].reg; assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index || relAddr) { + if (c->current_const[argIndex].index != src->Index) { struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + /* Keep track of the last constant loaded in this slot, for reuse. */ c->current_const[argIndex].index = src->Index; #if 0 @@ -778,48 +826,74 @@ get_constant(struct brw_vs_compile *c, #endif /* need to fetch the constant now */ brw_dp_READ_4_vs(p, - c->current_const[argIndex].reg,/* writeback dest */ + const_reg, /* writeback dest */ 0, /* oword */ - relAddr, /* relative indexing? */ + 0, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); - - if (relAddr) { - /* second read */ - const2_reg = get_tmp(c); - - /* use upper half of address reg for second read */ - addrReg = stride(addrReg, 0, 4, 0); - addrReg.subnr = 16; - - brw_dp_READ_4_vs(p, - const2_reg, /* writeback dest */ - 1, /* oword */ - relAddr, /* relative indexing? */ - addrReg, /* address register */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_VERT_CONST_BUFFER - ); - } } - const_reg = c->current_const[argIndex].reg; + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; - if (relAddr) { - /* merge the two Owords into the constant register */ - /* const_reg[7..4] = const2_reg[7..4] */ - brw_MOV(p, - suboffset(stride(const_reg, 0, 4, 1), 4), - suboffset(stride(const2_reg, 0, 4, 1), 4)); - release_tmp(c, const2_reg); - } - else { - /* replicate lower four floats into upper half (to get XYZWXYZW) */ - const_reg = stride(const_reg, 0, 4, 0); - const_reg.subnr = 0; - } + return const_reg; +} + +static struct brw_reg +get_reladdr_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg = c->current_const[argIndex].reg; + struct brw_reg const2_reg; + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + assert(argIndex < 3); + + /* Can't reuse a reladdr constant load. */ + c->current_const[argIndex].index = -1; + + #if 0 + printf(" fetch const[a0.x+%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + + /* fetch the first vec4 */ + brw_dp_READ_4_vs(p, + const_reg, /* writeback dest */ + 0, /* oword */ + 1, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + /* second vec4 */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + 1, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); return const_reg; } @@ -927,7 +1001,13 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: if (c->vp->use_const_buffer) { - return get_constant(c, inst, argIndex); + if (!relAddr && c->constant_map[index] != -1) { + assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); + return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; + } else if (relAddr) + return get_reladdr_constant(c, inst, argIndex); + else + return get_constant(c, inst, argIndex); } else if (relAddr) { return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); @@ -1113,11 +1193,13 @@ static void emit_swz( struct brw_vs_compile *c, static void emit_vertex_write( struct brw_vs_compile *c) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; struct brw_reg m0 = brw_message_reg(0); struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; int eot; - GLuint len_vertext_header = 2; + GLuint len_vertex_header = 2; if (c->key.copy_edgeflag) { brw_MOV(p, @@ -1125,18 +1207,20 @@ static void emit_vertex_write( struct brw_vs_compile *c) get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG)); } - /* Build ndc coords */ - ndc = get_tmp(c); - /* ndc = 1.0 / pos.w */ - emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); - /* ndc.xyz = pos * ndc */ - brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + if (intel->gen < 6) { + /* Build ndc coords */ + ndc = get_tmp(c); + /* ndc = 1.0 / pos.w */ + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + /* ndc.xyz = pos * ndc */ + brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + } /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || - c->key.nr_userclip || BRW_IS_965(p->brw)) + c->key.nr_userclip || brw->has_negative_rhw_bug) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1167,7 +1251,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (BRW_IS_965(p->brw)) { + if (brw->has_negative_rhw_bug) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -1193,21 +1277,41 @@ static void emit_vertex_write( struct brw_vs_compile *c) * of zeros followed by two sets of NDC coordinates: */ brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, offset(m0, 2), ndc); - - if (BRW_IS_IGDNG(p->brw)) { - /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ - brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ - /* m4, m5 contain the distances from vertex to the user clip planeXXX. - * Seems it is useless for us. - * m6 is used for aligning, so that the remainder of vertex element is - * reg-aligned. - */ - brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */ - len_vertext_header = 6; + + if (intel->gen >= 6) { + /* There are 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the 4D space position + * dword 8-15 (m3,m4) of the vertex header is the user clip distance. + * m5 is the first vertex data we fill, which is the vertex position. + */ + brw_MOV(p, offset(m0, 2), pos); + brw_MOV(p, offset(m0, 5), pos); + len_vertex_header = 4; + } else if (intel->is_ironlake) { + /* There are 20 DWs (D0-D19) in VUE header on Ironlake: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the ndc position (set above) + * dword 8-11 (m3) of the vertex header is the 4D space position + * dword 12-19 (m4,m5) of the vertex header is the user clip distance. + * m6 is a pad so that the vertex element data is aligned + * m7 is the first vertex data we fill, which is the vertex position. + */ + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + brw_MOV(p, offset(m0, 7), pos); + len_vertex_header = 6; } else { - brw_MOV(p, offset(m0, 3), pos); - len_vertext_header = 2; + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 (m1) is indices, point width, clip flags. + * dword 4-7 (m2) is ndc position (set above) + * + * dword 8-11 (m3) is the first vertex data, which we always have be the + * vertex position. + */ + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + len_vertex_header = 2; } eot = (c->first_overflow_output == 0); @@ -1218,7 +1322,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ + MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ eot, /* eot */ eot, /* writes complete */ @@ -1359,29 +1463,32 @@ void brw_vs_emit(struct brw_vs_compile *c ) #define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; const GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, if_depth = 0, loop_depth = 0; GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH] = { 0 }; const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; GLuint file; if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("vs-mesa:\n"); + printf("vs-mesa:\n"); _mesa_print_program(&c->vp->program.Base); - _mesa_printf("\n"); + printf("\n"); } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); - - /* Message registers can't be read, so copy the output into GRF register - if they are used in source registers */ + for (insn = 0; insn < nr_insns; insn++) { GLuint i; struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + + /* Message registers can't be read, so copy the output into GRF + * register if they are used in source registers + */ for (i = 0; i < 3; i++) { struct prog_src_register *src = &inst->SrcReg[i]; GLuint index = src->Index; @@ -1389,12 +1496,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) c->output_regs[index].used_in_src = GL_TRUE; } + + switch (inst->Opcode) { + case OPCODE_CAL: + case OPCODE_RET: + c->needs_stack = GL_TRUE; + break; + default: + break; + } } /* Static register allocation */ brw_vs_alloc_regs(c); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + + if (c->needs_stack) + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (insn = 0; insn < nr_insns; insn++) { @@ -1592,7 +1710,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) loop_depth--; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) br = 2; inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); @@ -1708,9 +1826,9 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (INTEL_DEBUG & DEBUG_VS) { int i; - _mesa_printf("vs-native:\n"); + printf("vs-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + printf("\n"); } } diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c index 7285466..fd9f2fe 100644 --- a/i965/brw_vs_state.c +++ b/i965/brw_vs_state.c @@ -82,9 +82,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) static dri_bo * vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) { + struct intel_context *intel = &brw->intel; struct brw_vs_unit_state vs; dri_bo *bo; - int chipset_max_threads; memset(&vs, 0, sizeof(vs)); @@ -98,7 +98,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) */ vs.thread1.single_program_flow = 0; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ else vs.thread1.binding_table_entry_count = key->nr_surfaces; @@ -109,7 +109,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) { + if (intel->is_ironlake) { switch (key->nr_urb_entries) { case 8: case 12: @@ -135,7 +135,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) case 32: break; case 64: - assert(BRW_IS_G4X(brw)); + assert(intel->is_g4x); break; default: assert(0); @@ -145,17 +145,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread4.urb_entry_allocation_size = key->urb_size - 1; - if (BRW_IS_IGDNG(brw)) - chipset_max_threads = 72; - else if (BRW_IS_G4X(brw)) - chipset_max_threads = 32; - else - chipset_max_threads = 16; vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, - 1, chipset_max_threads) - 1; - - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - vs.thread4.max_threads = 0; + 1, brw->vs_max_threads) - 1; /* No samplers for ARB_vp programs: */ @@ -173,8 +164,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, key, sizeof(*key), &brw->vs.prog_bo, 1, - &vs, sizeof(vs), - NULL, NULL); + &vs, sizeof(vs)); /* Emit VS program relocation */ dri_bo_emit_reloc(bo, diff --git a/i965/brw_vs_surface_state.c b/i965/brw_vs_surface_state.c index 3bc9840..4007b5a 100644 --- a/i965/brw_vs_surface_state.c +++ b/i965/brw_vs_surface_state.c @@ -35,7 +35,6 @@ #include "brw_context.h" #include "brw_state.h" -#include "brw_defines.h" /* Creates a new VS constant buffer reflecting the current VS program's * constants, if needed by the VS program. @@ -68,13 +67,13 @@ brw_vs_update_constant_buffer(struct brw_context *brw) */ _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); - intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE); + drm_intel_gem_bo_map_gtt(const_buffer); for (i = 0; i < params->NumParameters; i++) { memcpy(const_buffer->virtual + i * 4 * sizeof(float), params->ParameterValues[i], 4 * sizeof(float)); } - intel_bo_unmap_gtt_preferred(intel, const_buffer); + drm_intel_gem_bo_unmap_gtt(const_buffer); return const_buffer; } @@ -105,7 +104,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (vp->const_buffer == 0) { + if (vp->const_buffer == NULL) { drm_intel_bo_unreference(brw->vs.surf_bo[surf]); brw->vs.surf_bo[surf] = NULL; return; @@ -133,7 +132,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->vs.surf_bo[surf] == NULL) { brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); @@ -156,7 +155,7 @@ brw_vs_get_binding_table(struct brw_context *brw) if (bind_bo == NULL) { GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); - uint32_t *data = malloc(data_size); + uint32_t data[BRW_VS_MAX_SURF]; int i; for (i = 0; i < BRW_VS_MAX_SURF; i++) @@ -168,8 +167,7 @@ brw_vs_get_binding_table(struct brw_context *brw) bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->vs.surf_bo, BRW_VS_MAX_SURF, - data, data_size, - NULL, NULL); + data, data_size); /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_VS_MAX_SURF; i++) { @@ -182,8 +180,6 @@ brw_vs_get_binding_table(struct brw_context *brw) I915_GEM_DOMAIN_INSTRUCTION, 0); } } - - free(data); } return bind_bo; diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c index 34aaea3..96a44bf 100644 --- a/i965/brw_vtbl.c +++ b/i965/brw_vtbl.c @@ -44,7 +44,6 @@ #include "brw_state.h" #include "brw_draw.h" #include "brw_state.h" -#include "brw_fallback.h" #include "brw_vs.h" #include "brw_wm.h" @@ -68,11 +67,11 @@ static void brw_destroy_context( struct intel_context *intel ) brw_draw_destroy( brw ); brw_clear_validated_bos(brw); if (brw->wm.compile_data) { - _mesa_free(brw->wm.compile_data->instruction); - _mesa_free(brw->wm.compile_data->vreg); - _mesa_free(brw->wm.compile_data->refs); - _mesa_free(brw->wm.compile_data->prog_instructions); - _mesa_free(brw->wm.compile_data); + free(brw->wm.compile_data->instruction); + free(brw->wm.compile_data->vreg); + free(brw->wm.compile_data->refs); + free(brw->wm.compile_data->prog_instructions); + free(brw->wm.compile_data); } for (i = 0; i < brw->state.nr_color_regions; i++) @@ -103,6 +102,9 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->cc.prog_bo); dri_bo_release(&brw->cc.state_bo); dri_bo_release(&brw->cc.vp_bo); + dri_bo_release(&brw->cc.blend_state_bo); + dri_bo_release(&brw->cc.depth_stencil_state_bo); + dri_bo_release(&brw->cc.color_calc_state_bo); } @@ -140,6 +142,12 @@ static void brw_finish_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); brw_emit_query_end(brw); + + if (brw->curbe.curbe_bo) { + drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); + drm_intel_bo_unreference(brw->curbe.curbe_bo); + brw->curbe.curbe_bo = NULL; + } } @@ -150,11 +158,6 @@ static void brw_new_batch( struct intel_context *intel ) { struct brw_context *brw = brw_context(&intel->ctx); - /* Check that we didn't just wrap our batchbuffer at a bad time. */ - assert(!brw->no_batch_wrap); - - brw->curbe.need_new_bo = GL_TRUE; - /* Mark all context state as needing to be re-emitted. * This is probably not as severe as on 915, since almost all of our state * is just in referenced buffers. @@ -175,12 +178,6 @@ static void brw_new_batch( struct intel_context *intel ) } } - -static void brw_note_fence( struct intel_context *intel, GLuint fence ) -{ - brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; -} - static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -196,7 +193,6 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.update_texture_state = 0; brw->intel.vtbl.invalidate_state = brw_invalidate_state; - brw->intel.vtbl.note_fence = brw_note_fence; brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; diff --git a/i965/brw_wm.c b/i965/brw_wm.c index 6895f64..991e1b9 100644 --- a/i965/brw_wm.c +++ b/i965/brw_wm.c @@ -30,7 +30,6 @@ */ #include "brw_context.h" -#include "brw_util.h" #include "brw_wm.h" #include "brw_state.h" @@ -152,11 +151,11 @@ static void do_wm_prog( struct brw_context *brw, */ return; } - c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction)); - c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN * + c->instruction = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->instruction)); + c->prog_instructions = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->prog_instructions)); - c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); - c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs)); + c->vreg = calloc(1, BRW_WM_MAX_VREG * sizeof(*c->vreg)); + c->refs = calloc(1, BRW_WM_MAX_REF * sizeof(*c->refs)); } else { void *instruction = c->instruction; void *prog_instructions = c->prog_instructions; @@ -199,12 +198,13 @@ static void do_wm_prog( struct brw_context *brw, program = brw_get_program(&c->func, &program_size); dri_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - NULL, 0, - program, program_size, - &c->prog_data, - &brw->wm.prog_data ); + brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + sizeof(c->prog_data), + &brw->wm.prog_data); } @@ -336,11 +336,7 @@ static void brw_wm_populate_key( struct brw_context *brw, * drawable height in order to invert the Y axis. */ if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) { - if (brw->intel.driDrawable != NULL) { - key->origin_x = brw->intel.driDrawable->x; - key->origin_y = brw->intel.driDrawable->y; - key->drawable_height = brw->intel.driDrawable->h; - } + key->drawable_height = ctx->DrawBuffer->Height; } key->nr_color_regions = brw->state.nr_color_regions; diff --git a/i965/brw_wm.h b/i965/brw_wm.h index 9dcb6e1..88d84ee 100644 --- a/i965/brw_wm.h +++ b/i965/brw_wm.h @@ -76,10 +76,9 @@ struct brw_wm_prog_key { GLushort tex_swizzles[BRW_MAX_TEX_UNIT]; - GLuint program_string_id:32; - GLushort origin_x, origin_y; GLushort drawable_height; GLbitfield64 vp_outputs_written; + GLuint program_string_id:32; }; diff --git a/i965/brw_wm_debug.c b/i965/brw_wm_debug.c index 2208210..a78cc8b 100644 --- a/i965/brw_wm_debug.c +++ b/i965/brw_wm_debug.c @@ -41,21 +41,21 @@ void brw_wm_print_value( struct brw_wm_compile *c, if (c->state >= PASS2_DONE) brw_print_reg(value->hw_reg); else if( value == &c->undef_value ) - _mesa_printf("undef"); + printf("undef"); else if( value - c->vreg >= 0 && value - c->vreg < BRW_WM_MAX_VREG) - _mesa_printf("r%d", value - c->vreg); + printf("r%d", value - c->vreg); else if (value - c->creg >= 0 && value - c->creg < BRW_WM_MAX_PARAM) - _mesa_printf("c%d", value - c->creg); + printf("c%d", value - c->creg); else if (value - c->payload.input_interp >= 0 && value - c->payload.input_interp < FRAG_ATTRIB_MAX) - _mesa_printf("i%d", value - c->payload.input_interp); + printf("i%d", value - c->payload.input_interp); else if (value - c->payload.depth >= 0 && value - c->payload.depth < FRAG_ATTRIB_MAX) - _mesa_printf("d%d", value - c->payload.depth); + printf("d%d", value - c->payload.depth); else - _mesa_printf("?"); + printf("?"); } void brw_wm_print_ref( struct brw_wm_compile *c, @@ -64,16 +64,16 @@ void brw_wm_print_ref( struct brw_wm_compile *c, struct brw_reg hw_reg = ref->hw_reg; if (ref->unspill_reg) - _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot); + printf("UNSPILL(%x)/", ref->value->spill_slot); if (c->state >= PASS2_DONE) brw_print_reg(ref->hw_reg); else { - _mesa_printf("%s", hw_reg.negate ? "-" : ""); - _mesa_printf("%s", hw_reg.abs ? "abs/" : ""); + printf("%s", hw_reg.negate ? "-" : ""); + printf("%s", hw_reg.abs ? "abs/" : ""); brw_wm_print_value(c, ref->value); if ((hw_reg.nr&1) || hw_reg.subnr) { - _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); + printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); } } } @@ -84,22 +84,22 @@ void brw_wm_print_insn( struct brw_wm_compile *c, GLuint i, arg; GLuint nr_args = brw_wm_nr_args(inst->opcode); - _mesa_printf("["); + printf("["); for (i = 0; i < 4; i++) { if (inst->dst[i]) { brw_wm_print_value(c, inst->dst[i]); if (inst->dst[i]->spill_slot) - _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot); + printf("/SPILL(%x)",inst->dst[i]->spill_slot); } else - _mesa_printf("#"); + printf("#"); if (i < 3) - _mesa_printf(","); + printf(","); } - _mesa_printf("]"); + printf("]"); if (inst->writemask != WRITEMASK_XYZW) - _mesa_printf(".%s%s%s%s", + printf(".%s%s%s%s", GET_BIT(inst->writemask, 0) ? "x" : "", GET_BIT(inst->writemask, 1) ? "y" : "", GET_BIT(inst->writemask, 2) ? "z" : "", @@ -107,58 +107,58 @@ void brw_wm_print_insn( struct brw_wm_compile *c, switch (inst->opcode) { case WM_PIXELXY: - _mesa_printf(" = PIXELXY"); + printf(" = PIXELXY"); break; case WM_DELTAXY: - _mesa_printf(" = DELTAXY"); + printf(" = DELTAXY"); break; case WM_PIXELW: - _mesa_printf(" = PIXELW"); + printf(" = PIXELW"); break; case WM_WPOSXY: - _mesa_printf(" = WPOSXY"); + printf(" = WPOSXY"); break; case WM_PINTERP: - _mesa_printf(" = PINTERP"); + printf(" = PINTERP"); break; case WM_LINTERP: - _mesa_printf(" = LINTERP"); + printf(" = LINTERP"); break; case WM_CINTERP: - _mesa_printf(" = CINTERP"); + printf(" = CINTERP"); break; case WM_FB_WRITE: - _mesa_printf(" = FB_WRITE"); + printf(" = FB_WRITE"); break; case WM_FRONTFACING: - _mesa_printf(" = FRONTFACING"); + printf(" = FRONTFACING"); break; default: - _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode)); + printf(" = %s", _mesa_opcode_string(inst->opcode)); break; } if (inst->saturate) - _mesa_printf("_SAT"); + printf("_SAT"); for (arg = 0; arg < nr_args; arg++) { - _mesa_printf(" ["); + printf(" ["); for (i = 0; i < 4; i++) { if (inst->src[arg][i]) { brw_wm_print_ref(c, inst->src[arg][i]); } else - _mesa_printf("%%"); + printf("%%"); if (i < 3) - _mesa_printf(","); + printf(","); else - _mesa_printf("]"); + printf("]"); } } - _mesa_printf("\n"); + printf("\n"); } void brw_wm_print_program( struct brw_wm_compile *c, @@ -166,9 +166,9 @@ void brw_wm_print_program( struct brw_wm_compile *c, { GLuint insn; - _mesa_printf("%s:\n", stage); + printf("%s:\n", stage); for (insn = 0; insn < c->nr_insns; insn++) brw_wm_print_insn(c, &c->instruction[insn]); - _mesa_printf("\n"); + printf("\n"); } diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c index 5390fd2..9315bca 100644 --- a/i965/brw_wm_emit.c +++ b/i965/brw_wm_emit.c @@ -138,19 +138,43 @@ void emit_wpos_xy(struct brw_wm_compile *c, * X and Y channels. */ if (mask & WRITEMASK_X) { - /* X' = X - origin */ - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); + if (c->fp->program.PixelCenterInteger) { + /* X' = X */ + brw_MOV(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W)); + } else { + /* X' = X + 0.5 */ + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W), + brw_imm_f(0.5)); + } } if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ - brw_ADD(p, - dst[1], - negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + if (c->fp->program.OriginUpperLeft) { + if (c->fp->program.PixelCenterInteger) { + /* Y' = Y */ + brw_MOV(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_W)); + } else { + /* Y' = Y + 0.5 */ + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_W), + brw_imm_f(0.5)); + } + } else { + float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5; + + /* Y' = (height - 1) - Y + center */ + brw_ADD(p, + dst[1], + negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_imm_f(c->key.drawable_height - 1 + center_offset)); + } } } @@ -692,7 +716,7 @@ void emit_xpd(struct brw_compile *p, { GLuint i; - assert(!(mask & WRITEMASK_W) == WRITEMASK_X); + assert((mask & WRITEMASK_W) != WRITEMASK_W); for (i = 0 ; i < 3; i++) { if (mask & (1<func; + struct intel_context *intel = &p->brw->intel; struct brw_reg dst_retyped; GLuint cur_mrf = 2, response_length; GLuint i, nr_texcoords; @@ -873,7 +898,7 @@ void emit_tex(struct brw_wm_compile *c, } /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ - if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8) + if (!intel->is_ironlake && c->dispatch_width == 8) nr_texcoords = 3; /* For shadow comparisons, we have to supply u,v,r. */ @@ -891,7 +916,7 @@ void emit_tex(struct brw_wm_compile *c, /* Fill in the shadow comparison reference value. */ if (shadow) { - if (BRW_IS_IGDNG(p->brw)) { + if (intel->is_ironlake) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; @@ -904,7 +929,7 @@ void emit_tex(struct brw_wm_compile *c, cur_mrf += mrf_per_channel; } - if (BRW_IS_IGDNG(p->brw)) { + if (intel->is_ironlake) { if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG; else @@ -944,6 +969,7 @@ void emit_txb(struct brw_wm_compile *c, GLuint sampler) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; GLuint msgLength; GLuint msg_type; GLuint mrf_per_channel; @@ -955,8 +981,8 @@ void emit_txb(struct brw_wm_compile *c, * undefined, and trust the execution mask to keep the undefined pixels * from mattering. */ - if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) { - if (BRW_IS_IGDNG(p->brw)) + if (c->dispatch_width == 16 || !intel->is_ironlake) { + if (intel->is_ironlake) msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; @@ -1084,7 +1110,7 @@ static void emit_kil_nv( struct brw_wm_compile *c ) brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ brw_AND(p, r0uw, c->emit_mask_reg, r0uw); brw_pop_insn_state(p); } @@ -1174,7 +1200,7 @@ void emit_fb_write(struct brw_wm_compile *c, brw_push_insn_state(p); for (channel = 0; channel < 4; channel++) { - if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) { + if (c->dispatch_width == 16 && brw->has_compr4) { /* By setting the high bit of the MRF register number, we indicate * that we want COMPR4 mode - instead of doing the usual destination * + 1 for the second half we get destination + 4. @@ -1596,10 +1622,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; default: - _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", - inst->opcode, inst->opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->opcode) : - "unknown"); + printf("Unsupported opcode %i (%s) in fragment shader\n", + inst->opcode, inst->opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->opcode) : + "unknown"); } for (i = 0; i < 4; i++) @@ -1612,9 +1638,9 @@ void brw_wm_emit( struct brw_wm_compile *c ) if (INTEL_DEBUG & DEBUG_WM) { int i; - _mesa_printf("wm-native:\n"); + printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + printf("\n"); } } diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c index 7d03179..d73c391 100644 --- a/i965/brw_wm_fp.c +++ b/i965/brw_wm_fp.c @@ -138,7 +138,6 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx) reg.CondMask = COND_TR; reg.CondSwizzle = 0; reg.CondSrc = 0; - reg.pad = 0; return reg; } @@ -160,7 +159,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) int bit = _mesa_ffs( ~c->fp_temp ); if (!bit) { - _mesa_printf("%s: out of temporaries\n", __FILE__); + printf("%s: out of temporaries\n", __FILE__); exit(1); } @@ -1035,7 +1034,7 @@ static void print_insns( const struct prog_instruction *insn, { GLuint i; for (i = 0; i < nr; i++, insn++) { - _mesa_printf("%3d: ", i); + printf("%3d: ", i); if (insn->Opcode < MAX_OPCODE) _mesa_print_instruction(insn); else if (insn->Opcode < MAX_WM_OPCODE) { @@ -1046,7 +1045,7 @@ static void print_insns( const struct prog_instruction *insn, 3); } else - _mesa_printf("965 Opcode %d\n", insn->Opcode); + printf("965 Opcode %d\n", insn->Opcode); } } @@ -1061,9 +1060,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) GLuint insn; if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pre-fp:\n"); + printf("pre-fp:\n"); _mesa_print_program(&fp->program.Base); - _mesa_printf("\n"); + printf("\n"); } c->pixel_xy = src_undef(); @@ -1169,9 +1168,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) } if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pass_fp:\n"); + printf("pass_fp:\n"); print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + printf("\n"); } } diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c index e8c2cb6..562608e 100644 --- a/i965/brw_wm_glsl.c +++ b/i965/brw_wm_glsl.c @@ -743,7 +743,7 @@ static void emit_kil(struct brw_wm_compile *c) struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ brw_AND(p, depth, c->emit_mask_reg, depth); brw_pop_insn_state(p); } @@ -1826,6 +1826,7 @@ get_argument_regs(struct brw_wm_compile *c, static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { + struct intel_context *intel = &brw->intel; #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; @@ -1848,7 +1849,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) c->cur_inst = i; #if 0 - _mesa_printf("Inst %d: ", i); + printf("Inst %d: ", i); _mesa_print_instruction(inst); #endif @@ -1876,10 +1877,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) else brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - dst_flags = inst->DstReg.WriteMask; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - dst_flags |= SATURATE; - switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, dst, dst_flags); @@ -2043,6 +2040,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: + assert(if_depth > 0); if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: @@ -2096,9 +2094,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) struct brw_instruction *inst0, *inst1; GLuint br = 1; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) br = 2; - + + assert(loop_depth > 0); loop_depth--; inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); /* patch all the BREAK/CONT instructions from last BGNLOOP */ @@ -2116,7 +2115,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } break; default: - _mesa_printf("unsupported IR in fragment shader %d\n", + printf("unsupported IR in fragment shader %d\n", inst->Opcode); } @@ -2128,10 +2127,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) post_wm_emit(c); if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("wm-native:\n"); + printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + printf("\n"); } } @@ -2142,7 +2141,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("brw_wm_glsl_emit:\n"); + printf("brw_wm_glsl_emit:\n"); } /* initial instruction translation/simplification */ diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c index ff4c082..60bd92e 100644 --- a/i965/brw_wm_pass0.c +++ b/i965/brw_wm_pass0.c @@ -105,7 +105,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, GLuint i = c->prog_data.nr_params++; if (i >= BRW_WM_MAX_PARAM) { - _mesa_printf("%s: out of params\n", __FUNCTION__); + printf("%s: out of params\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } @@ -154,7 +154,7 @@ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, return c->constref[i].ref; } else { - _mesa_printf("%s: out of constrefs\n", __FUNCTION__); + printf("%s: out of constrefs\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c index aa2e519..d7650af 100644 --- a/i965/brw_wm_sampler_state.c +++ b/i965/brw_wm_sampler_state.c @@ -89,7 +89,6 @@ struct wm_sampler_key { float max_aniso; GLenum minfilter, magfilter; GLenum comparemode, comparefunc; - dri_bo *sdc_bo; /** If target is cubemap, take context setting. */ @@ -105,7 +104,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, dri_bo *sdc_bo, struct brw_sampler_state *sampler) { - _mesa_memset(sampler, 0, sizeof(*sampler)); + memset(sampler, 0, sizeof(*sampler)); switch (key->minfilter) { case GL_NEAREST: @@ -230,7 +229,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, GLcontext *ctx = &brw->intel.ctx; int unit; - memset(key, 0, sizeof(*key)); + key->sampler_count = 0; for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { if (ctx->Texture.Unit[unit]._ReallyEnabled) { @@ -241,6 +240,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw, struct gl_texture_image *firstImage = texObj->Image[0][intelObj->firstLevel]; + memset(entry, 0, sizeof(*entry)); + entry->tex_target = texObj->Target; entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) @@ -262,10 +263,10 @@ brw_wm_sampler_populate_key(struct brw_context *brw, dri_bo_unreference(brw->wm.sdc_bo[unit]); if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { float bordercolor[4] = { - texObj->BorderColor[0], - texObj->BorderColor[0], - texObj->BorderColor[0], - texObj->BorderColor[0] + texObj->BorderColor.f[0], + texObj->BorderColor.f[0], + texObj->BorderColor.f[0], + texObj->BorderColor.f[0] }; /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the @@ -274,7 +275,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor); } else { brw->wm.sdc_bo[unit] = upload_default_color(brw, - texObj->BorderColor); + texObj->BorderColor.f); } key->sampler_count = unit + 1; } @@ -289,7 +290,7 @@ static void upload_wm_samplers( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct wm_sampler_key key; - int i; + int i, sampler_key_size; brw_wm_sampler_populate_key(brw, &key); @@ -303,8 +304,11 @@ static void upload_wm_samplers( struct brw_context *brw ) if (brw->wm.sampler_count == 0) return; + /* Only include the populated portion of the key in the search. */ + sampler_key_size = offsetof(struct wm_sampler_key, + sampler[key.sampler_count]); brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, - &key, sizeof(key), + &key, sampler_key_size, brw->wm.sdc_bo, key.sampler_count, NULL); @@ -324,10 +328,9 @@ static void upload_wm_samplers( struct brw_context *brw ) } brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, - &key, sizeof(key), + &key, sampler_key_size, brw->wm.sdc_bo, key.sampler_count, - &sampler, sizeof(sampler), - NULL, NULL); + &sampler, sizeof(sampler)); /* Emit SDC relocations */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c index f89ed9b..a7f80db 100644 --- a/i965/brw_wm_state.c +++ b/i965/brw_wm_state.c @@ -49,8 +49,6 @@ struct brw_wm_unit_key { unsigned int curbe_offset; unsigned int urb_size; - unsigned int max_threads; - unsigned int nr_surfaces, sampler_count; GLboolean uses_depth, computes_depth, uses_kill, is_glsl; GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; @@ -67,18 +65,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) memset(key, 0, sizeof(*key)); - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - key->max_threads = 1; - else { - /* WM maximum threads is number of EUs times number of threads per EU. */ - if (BRW_IS_IGDNG(brw)) - key->max_threads = 12 * 6; - else if (BRW_IS_G4X(brw)) - key->max_threads = 10 * 5; - else - key->max_threads = 8 * 4; - } - /* CACHE_NEW_WM_PROG */ key->total_grf = brw->wm.prog_data->total_grf; key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; @@ -140,6 +126,7 @@ static dri_bo * wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, dri_bo **reloc_bufs) { + struct intel_context *intel = &brw->intel; struct brw_wm_unit_state wm; dri_bo *bo; @@ -150,7 +137,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ else wm.thread1.binding_table_entry_count = key->nr_surfaces; @@ -170,7 +157,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) + if (intel->is_ironlake) wm.wm4.sampler_count = 0; /* hardware requirement */ else wm.wm4.sampler_count = (key->sampler_count + 1) / 4; @@ -191,7 +178,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, else wm.wm5.enable_16_pix = 1; - wm.wm5.max_threads = key->max_threads - 1; + wm.wm5.max_threads = brw->wm_max_threads - 1; wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ wm.wm5.legacy_line_rast = 0; wm.wm5.legacy_global_depth_bias = 0; @@ -223,8 +210,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, key, sizeof(*key), reloc_bufs, 3, - &wm, sizeof(wm), - NULL, NULL); + &wm, sizeof(wm)); /* Emit WM program relocation */ dri_bo_emit_reloc(bo, @@ -268,7 +254,7 @@ static void upload_wm_unit( struct brw_context *brw ) */ assert(key.total_scratch <= 12 * 1024); if (key.total_scratch) { - GLuint total = key.total_scratch * key.max_threads; + GLuint total = key.total_scratch * brw->wm_max_threads; if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { dri_bo_unreference(brw->wm.scratch_bo); diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c index 8335e5a..ce0bf0b 100644 --- a/i965/brw_wm_surface_state.c +++ b/i965/brw_wm_surface_state.c @@ -207,33 +207,14 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - if (key->bo) { - surf.ss0.surface_format = translate_tex_format(key->format, - key->internal_format, - key->depthmode); - } - else { - switch (key->depth) { - case 32: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; - default: - case 24: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - break; - case 16: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - break; - } - } + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); /* This is ok for all textures with channel width 8bit or less: */ /* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - if (key->bo) - surf.ss1.base_addr = key->bo->offset; /* reloc */ - else - surf.ss1.base_addr = key->offset; + surf.ss1.base_addr = key->bo->offset; /* reloc */ surf.ss2.mip_count = key->last_level - key->first_level; surf.ss2.width = key->width - 1; @@ -255,18 +236,14 @@ brw_create_texture_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); - - if (key->bo) { - /* Emit relocation to surface contents */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); - } + &key->bo, 1, + &surf, sizeof(surf)); + + /* Emit relocation to surface contents */ + drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), + key->bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0); + return bo; } @@ -282,19 +259,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) memset(&key, 0, sizeof(key)); - if (intelObj->imageOverride) { - key.pitch = intelObj->pitchOverride / intelObj->mt->cpp; - key.depth = intelObj->depthOverride; - key.bo = NULL; - key.offset = intelObj->textureOffset; - } else { - key.format = firstImage->TexFormat; - key.internal_format = firstImage->InternalFormat; - key.pitch = intelObj->mt->pitch; - key.depth = firstImage->Depth; - key.bo = intelObj->mt->region->buffer; - key.offset = 0; - } + key.format = firstImage->TexFormat; + key.internal_format = firstImage->InternalFormat; + key.pitch = intelObj->mt->pitch; + key.depth = firstImage->Depth; + key.bo = intelObj->mt->region->buffer; + key.offset = 0; key.target = tObj->Target; key.depthmode = tObj->DepthMode; @@ -309,7 +279,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); @@ -337,10 +307,7 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; assert(key->bo); - if (key->bo) - surf.ss1.base_addr = key->bo->offset; /* reloc */ - else - surf.ss1.base_addr = key->offset; + surf.ss1.base_addr = key->bo->offset; /* reloc */ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ @@ -350,21 +317,16 @@ brw_create_constant_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); - - if (key->bo) { - /* Emit relocation to surface contents. Section 5.1.1 of the gen4 - * bspec ("Data Cache") says that the data cache does not exist as - * a separate cache and is just the sampler cache. - */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); - } + &key->bo, 1, + &surf, sizeof(surf)); + + /* Emit relocation to surface contents. Section 5.1.1 of the gen4 + * bspec ("Data Cache") says that the data cache does not exist as + * a separate cache and is just the sampler cache. + */ + drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), + key->bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0); return bo; } @@ -422,7 +384,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (fp->const_buffer == 0) { + if (fp->const_buffer == NULL) { drm_intel_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; return; @@ -450,7 +412,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); @@ -511,7 +473,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, unsigned int unit) { - GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; dri_bo *region_bo = NULL; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_region *region = irb ? irb->region : NULL; @@ -522,7 +485,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, GLubyte color_mask[4]; GLboolean color_blend; uint32_t tiling; - uint32_t draw_offset; + uint32_t draw_x; + uint32_t draw_y; } key; memset(&key, 0, sizeof(key)); @@ -564,7 +528,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, } key.pitch = region->pitch; key.cpp = region->cpp; - key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ + key.draw_x = region->draw_x; + key.draw_y = region->draw_y; } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; @@ -572,20 +537,24 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.width = 1; key.height = 1; key.cpp = 4; - key.draw_offset = 0; + key.draw_x = 0; + key.draw_y = 0; } - /* _NEW_COLOR */ - memcpy(key.color_mask, ctx->Color.ColorMask, - sizeof(key.color_mask)); - /* As mentioned above, disable writes to the alpha component when the - * renderbuffer is XRGB. - */ - if (ctx->DrawBuffer->Visual.alphaBits == 0) - key.color_mask[3] = GL_FALSE; + if (intel->gen < 6) { + /* _NEW_COLOR */ + memcpy(key.color_mask, ctx->Color.ColorMask[unit], + sizeof(key.color_mask)); - key.color_blend = (!ctx->Color._LogicOpEnabled && - ctx->Color.BlendEnabled); + /* As mentioned above, disable writes to the alpha component when the + * renderbuffer is XRGB. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) + key.color_mask[3] = GL_FALSE; + + key.color_blend = (!ctx->Color._LogicOpEnabled && + (ctx->Color.BlendEnabled & (1 << unit))); + } dri_bo_unreference(brw->wm.surf_bo[unit]); brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, @@ -602,25 +571,32 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.surface_format = key.surface_format; surf.ss0.surface_type = key.surface_type; if (key.tiling == I915_TILING_NONE) { - surf.ss1.base_addr = key.draw_offset; + surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp; } else { - uint32_t tile_offset = key.draw_offset % 4096; - - surf.ss1.base_addr = key.draw_offset - tile_offset; - - assert(BRW_IS_G4X(brw) || tile_offset == 0); - if (BRW_IS_G4X(brw)) { - if (key.tiling == I915_TILING_X) { - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; - surf.ss5.y_offset = tile_offset / 512 / 2; - } else { - surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; - surf.ss5.y_offset = tile_offset / 128 / 2; - } + uint32_t tile_base, tile_x, tile_y; + uint32_t pitch = key.pitch * key.cpp; + + if (key.tiling == I915_TILING_X) { + tile_x = key.draw_x % (512 / key.cpp); + tile_y = key.draw_y % 8; + tile_base = ((key.draw_y / 8) * (8 * pitch)); + tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096; + } else { + /* Y */ + tile_x = key.draw_x % (128 / key.cpp); + tile_y = key.draw_y % 32; + tile_base = ((key.draw_y / 32) * (32 * pitch)); + tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096; } + assert(intel->is_g4x || (tile_x == 0 && tile_y == 0)); + assert(tile_x % 4 == 0); + assert(tile_y % 2 == 0); + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss1.base_addr = tile_base; + surf.ss5.x_offset = tile_x / 4; + surf.ss5.y_offset = tile_y / 2; } if (region_bo != NULL) surf.ss1.base_addr += region_bo->offset; /* reloc */ @@ -630,20 +606,21 @@ brw_update_renderbuffer_surface(struct brw_context *brw, brw_set_surface_tiling(&surf, key.tiling); surf.ss3.pitch = (key.pitch * key.cpp) - 1; - /* _NEW_COLOR */ - surf.ss0.color_blend = key.color_blend; - surf.ss0.writedisable_red = !key.color_mask[0]; - surf.ss0.writedisable_green = !key.color_mask[1]; - surf.ss0.writedisable_blue = !key.color_mask[2]; - surf.ss0.writedisable_alpha = !key.color_mask[3]; + if (intel->gen < 6) { + /* _NEW_COLOR */ + surf.ss0.color_blend = key.color_blend; + surf.ss0.writedisable_red = !key.color_mask[0]; + surf.ss0.writedisable_green = !key.color_mask[1]; + surf.ss0.writedisable_blue = !key.color_mask[2]; + surf.ss0.writedisable_alpha = !key.color_mask[3]; + } /* Key size will never match key size for textures, so we're safe. */ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), ®ion_bo, 1, - &surf, sizeof(surf), - NULL, NULL); + &surf, sizeof(surf)); if (region_bo != NULL) { /* We might sample from it, and we might render to it, so flag * them both. We might be able to figure out from other state @@ -690,8 +667,7 @@ brw_wm_get_binding_table(struct brw_context *brw) bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, - data, data_size, - NULL, NULL); + data, data_size); /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_WM_MAX_SURF; i++) { diff --git a/i965/gen6_cc.c b/i965/gen6_cc.c new file mode 100644 index 0000000..f7acad6 --- /dev/null +++ b/i965/gen6_cc.c @@ -0,0 +1,296 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "intel_batchbuffer.h" +#include "main/macros.h" + +struct gen6_blend_state_key { + GLboolean color_blend, alpha_enabled; + GLboolean dither; + + GLenum logic_op; + + GLenum blend_eq_rgb, blend_eq_a; + GLenum blend_src_rgb, blend_src_a; + GLenum blend_dst_rgb, blend_dst_a; + + GLenum alpha_func; +}; + +static void +blend_state_populate_key(struct brw_context *brw, + struct gen6_blend_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + + memset(key, 0, sizeof(*key)); + + /* _NEW_COLOR */ + if (ctx->Color._LogicOpEnabled) + key->logic_op = ctx->Color.LogicOp; + else + key->logic_op = GL_COPY; + + /* _NEW_COLOR */ + key->color_blend = ctx->Color.BlendEnabled; + if (key->color_blend) { + key->blend_eq_rgb = ctx->Color.BlendEquationRGB; + key->blend_eq_a = ctx->Color.BlendEquationA; + key->blend_src_rgb = ctx->Color.BlendSrcRGB; + key->blend_dst_rgb = ctx->Color.BlendDstRGB; + key->blend_src_a = ctx->Color.BlendSrcA; + key->blend_dst_a = ctx->Color.BlendDstA; + } + + /* _NEW_COLOR */ + key->alpha_enabled = ctx->Color.AlphaEnabled; + if (key->alpha_enabled) { + key->alpha_func = ctx->Color.AlphaFunc; + } + + /* _NEW_COLOR */ + key->dither = ctx->Color.DitherFlag; +} + +/** + * Creates the state cache entry for the given CC unit key. + */ +static drm_intel_bo * +blend_state_create_from_key(struct brw_context *brw, + struct gen6_blend_state_key *key) +{ + struct gen6_blend_state blend; + drm_intel_bo *bo; + + memset(&blend, 0, sizeof(blend)); + + if (key->logic_op != GL_COPY) { + blend.blend1.logic_op_enable = 1; + blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); + blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); + blend.blend0.blend_func = brw_translate_blend_equation(eqRGB); + + blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); + blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA); + + blend.blend0.blend_enable = 1; + blend.blend0.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + if (key->alpha_enabled) { + blend.blend1.alpha_test_enable = 1; + blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); + + } + + if (key->dither) { + blend.blend1.dither_enable = 1; + blend.blend1.y_dither_offset = 0; + blend.blend1.x_dither_offset = 0; + } + + bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE, + key, sizeof(*key), + NULL, 0, + &blend, sizeof(blend)); + + return bo; +} + +static void +prepare_blend_state(struct brw_context *brw) +{ + struct gen6_blend_state_key key; + + blend_state_populate_key(brw, &key); + + drm_intel_bo_unreference(brw->cc.blend_state_bo); + brw->cc.blend_state_bo = brw_search_cache(&brw->cache, BRW_BLEND_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.blend_state_bo == NULL) + brw->cc.blend_state_bo = blend_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_blend_state = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_blend_state, +}; + +struct gen6_color_calc_state_key { + GLubyte blend_constant_color[4]; + GLclampf alpha_ref; + GLubyte stencil_ref[2]; +}; + +static void +color_calc_state_populate_key(struct brw_context *brw, + struct gen6_color_calc_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + + memset(key, 0, sizeof(*key)); + + /* _NEW_STENCIL */ + if (ctx->Stencil._Enabled) { + const unsigned back = ctx->Stencil._BackFace; + + key->stencil_ref[0] = ctx->Stencil.Ref[0]; + if (ctx->Stencil._TestTwoSide) + key->stencil_ref[1] = ctx->Stencil.Ref[back]; + } + + /* _NEW_COLOR */ + if (ctx->Color.AlphaEnabled) + key->alpha_ref = ctx->Color.AlphaRef; + + key->blend_constant_color[0] = ctx->Color.BlendColor[0]; + key->blend_constant_color[1] = ctx->Color.BlendColor[1]; + key->blend_constant_color[2] = ctx->Color.BlendColor[2]; + key->blend_constant_color[3] = ctx->Color.BlendColor[3]; +} + +/** + * Creates the state cache entry for the given CC state key. + */ +static drm_intel_bo * +color_calc_state_create_from_key(struct brw_context *brw, + struct gen6_color_calc_state_key *key) +{ + struct gen6_color_calc_state cc; + drm_intel_bo *bo; + + memset(&cc, 0, sizeof(cc)); + + cc.cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc1.alpha_ref_fi.ui, key->alpha_ref); + + cc.cc0.stencil_ref = key->stencil_ref[0]; + cc.cc0.bf_stencil_ref = key->stencil_ref[1]; + + cc.constant_r = key->blend_constant_color[0]; + cc.constant_g = key->blend_constant_color[1]; + cc.constant_b = key->blend_constant_color[2]; + cc.constant_a = key->blend_constant_color[3]; + + bo = brw_upload_cache(&brw->cache, BRW_COLOR_CALC_STATE, + key, sizeof(*key), + NULL, 0, + &cc, sizeof(cc)); + + return bo; +} + +static void +prepare_color_calc_state(struct brw_context *brw) +{ + struct gen6_color_calc_state_key key; + + color_calc_state_populate_key(brw, &key); + + drm_intel_bo_unreference(brw->cc.state_bo); + brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.state_bo == NULL) + brw->cc.state_bo = color_calc_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_color_calc_state = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_color_calc_state, +}; + +static void upload_cc_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void prepare_cc_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->cc.state_bo); + brw_add_validated_bo(brw, brw->cc.blend_state_bo); + brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo); +} + +const struct brw_tracked_state gen6_cc_state_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = (CACHE_NEW_BLEND_STATE | + CACHE_NEW_COLOR_CALC_STATE | + CACHE_NEW_DEPTH_STENCIL_STATE) + }, + .prepare = prepare_cc_state_pointers, + .emit = upload_cc_state_pointers, +}; diff --git a/i965/gen6_clip_state.c b/i965/gen6_clip_state.c new file mode 100644 index 0000000..06f8145 --- /dev/null +++ b/i965/gen6_clip_state.c @@ -0,0 +1,75 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_clip_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + uint32_t depth_clamp = 0; + uint32_t provoking; + + if (!ctx->Transform.DepthClamp) + depth_clamp = GEN6_CLIP_Z_TEST; + + if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { + provoking = 0; + } else { + provoking = + (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | + (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) | + (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT); + } + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2)); + OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); + OUT_BATCH(GEN6_CLIP_ENABLE | + GEN6_CLIP_API_OGL | + GEN6_CLIP_MODE_REJECT_ALL | /* XXX: debug: get VS working */ + GEN6_CLIP_XY_TEST | + depth_clamp | + provoking); + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_clip_state = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_clip_state, +}; diff --git a/i965/gen6_depthstencil.c b/i965/gen6_depthstencil.c new file mode 100644 index 0000000..4924f0f --- /dev/null +++ b/i965/gen6_depthstencil.c @@ -0,0 +1,165 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "brw_context.h" +#include "brw_state.h" + +struct brw_depth_stencil_state_key { + GLenum depth_func; + GLboolean depth_test, depth_write; + GLboolean stencil, stencil_two_side; + GLenum stencil_func[2], stencil_fail_op[2]; + GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; + GLubyte stencil_write_mask[2], stencil_test_mask[2]; +}; + +static void +depth_stencil_state_populate_key(struct brw_context *brw, + struct brw_depth_stencil_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + const unsigned back = ctx->Stencil._BackFace; + + memset(key, 0, sizeof(*key)); + + /* _NEW_STENCIL */ + key->stencil = ctx->Stencil._Enabled; + key->stencil_two_side = ctx->Stencil._TestTwoSide; + + if (key->stencil) { + key->stencil_func[0] = ctx->Stencil.Function[0]; + key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; + key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; + key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; + key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; + key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; + } + if (key->stencil_two_side) { + key->stencil_func[1] = ctx->Stencil.Function[back]; + key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; + key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; + key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; + key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; + key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; + } + + key->depth_test = ctx->Depth.Test; + if (key->depth_test) { + key->depth_func = ctx->Depth.Func; + key->depth_write = ctx->Depth.Mask; + } +} + +/** + * Creates the state cache entry for the given DEPTH_STENCIL_STATE state key. + */ +static dri_bo * +depth_stencil_state_create_from_key(struct brw_context *brw, + struct brw_depth_stencil_state_key *key) +{ + struct gen6_depth_stencil_state ds; + dri_bo *bo; + + memset(&ds, 0, sizeof(ds)); + + /* _NEW_STENCIL */ + if (key->stencil) { + ds.ds0.stencil_enable = 1; + ds.ds0.stencil_func = + intel_translate_compare_func(key->stencil_func[0]); + ds.ds0.stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[0]); + ds.ds0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + ds.ds0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); + ds.ds1.stencil_write_mask = key->stencil_write_mask[0]; + ds.ds1.stencil_test_mask = key->stencil_test_mask[0]; + + if (key->stencil_two_side) { + ds.ds0.bf_stencil_enable = 1; + ds.ds0.bf_stencil_func = + intel_translate_compare_func(key->stencil_func[1]); + ds.ds0.bf_stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[1]); + ds.ds0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + ds.ds0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); + ds.ds1.bf_stencil_write_mask = key->stencil_write_mask[1]; + ds.ds1.bf_stencil_test_mask = key->stencil_test_mask[1]; + } + + /* Not really sure about this: + */ + if (key->stencil_write_mask[0] || + (key->stencil_two_side && key->stencil_write_mask[1])) + ds.ds0.stencil_write_enable = 1; + } + + /* _NEW_DEPTH */ + if (key->depth_test) { + ds.ds2.depth_test_enable = 1; + ds.ds2.depth_test_func = intel_translate_compare_func(key->depth_func); + ds.ds2.depth_write_enable = key->depth_write; + } + + bo = brw_upload_cache(&brw->cache, BRW_DEPTH_STENCIL_STATE, + key, sizeof(*key), + NULL, 0, + &ds, sizeof(ds)); + + return bo; +} + +static void +prepare_depth_stencil_state(struct brw_context *brw) +{ + struct brw_depth_stencil_state_key key; + + depth_stencil_state_populate_key(brw, &key); + + dri_bo_unreference(brw->cc.depth_stencil_state_bo); + brw->cc.depth_stencil_state_bo = brw_search_cache(&brw->cache, + BRW_DEPTH_STENCIL_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.depth_stencil_state_bo == NULL) + brw->cc.depth_stencil_state_bo = + depth_stencil_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_depth_stencil_state = { + .dirty = { + .mesa = _NEW_DEPTH | _NEW_STENCIL, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_depth_stencil_state, +}; diff --git a/i965/gen6_gs_state.c b/i965/gen6_gs_state.c new file mode 100644 index 0000000..161e7b8 --- /dev/null +++ b/i965/gen6_gs_state.c @@ -0,0 +1,91 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_gs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + /* Disable all the constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); + + if (brw->gs.prog_bo) { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(GEN6_GS_ENABLE); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_BATCH(0); /* prog_bo */ + OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + +const struct brw_tracked_state gen6_gs_state = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE | + BRW_NEW_CONTEXT), + .cache = CACHE_NEW_GS_PROG + }, + .emit = upload_gs_state, +}; diff --git a/i965/gen6_sampler_state.c b/i965/gen6_sampler_state.c new file mode 100644 index 0000000..ab8e751 --- /dev/null +++ b/i965/gen6_sampler_state.c @@ -0,0 +1,71 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_sampler_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 | + VS_SAMPLER_STATE_CHANGE | + GS_SAMPLER_STATE_CHANGE | + PS_SAMPLER_STATE_CHANGE | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + if (brw->wm.sampler_bo) + OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + else + OUT_BATCH(0); + + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void +prepare_sampler_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->wm.sampler_bo); +} + +const struct brw_tracked_state gen6_sampler_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SAMPLER + }, + .prepare = prepare_sampler_state_pointers, + .emit = upload_sampler_state_pointers, +}; diff --git a/i965/gen6_scissor_state.c b/i965/gen6_scissor_state.c new file mode 100644 index 0000000..2e21e5f --- /dev/null +++ b/i965/gen6_scissor_state.c @@ -0,0 +1,105 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +prepare_scissor_state(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + struct gen6_scissor_state scissor; + + /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ + + /* The scissor only needs to handle the intersection of drawable and + * scissor rect. Clipping to the boundaries of static shared buffers + * for front/back/depth is covered by looping over cliprects in brw_draw.c. + * + * Note that the hardware's coordinates are inclusive, while Mesa's min is + * inclusive but max is exclusive. + */ + if (render_to_fbo) { + /* texmemory: Y=0=bottom */ + scissor.xmin = ctx->DrawBuffer->_Xmin; + scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + scissor.ymin = ctx->DrawBuffer->_Ymin; + scissor.ymax = ctx->DrawBuffer->_Ymax - 1; + } + else { + /* memory: Y=0=top */ + scissor.xmin = ctx->DrawBuffer->_Xmin; + scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; + scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + } + + drm_intel_bo_unreference(brw->sf.state_bo); + brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT, + &scissor, sizeof(scissor), + NULL, 0); +} + +const struct brw_tracked_state gen6_scissor_state = { + .dirty = { + .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_scissor_state, +}; + +static void upload_scissor_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2)); + OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void prepare_scissor_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->sf.state_bo); +} + +const struct brw_tracked_state gen6_scissor_state_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SF_UNIT + }, + .prepare = prepare_scissor_state_pointers, + .emit = upload_scissor_state_pointers, +}; diff --git a/i965/gen6_sf_state.c b/i965/gen6_sf_state.c new file mode 100644 index 0000000..8d96b44 --- /dev/null +++ b/i965/gen6_sf_state.c @@ -0,0 +1,187 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "main/macros.h" +#include "intel_batchbuffer.h" + +static uint32_t +get_attr_override(struct brw_context *brw, int attr) +{ + uint32_t attr_override; + int attr_index = 0, i; + + /* Find the source index (0 = first attribute after the 4D position) + * for this output attribute. attr is currently a VERT_RESULT_* but should + * be FRAG_ATTRIB_*. + */ + for (i = 0; i < attr; i++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i)) + attr_index++; + } + attr_override = attr_index; + + return attr_index; +} + +static void +upload_sf_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + /* CACHE_NEW_VS_PROG */ + uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written); + /* This should probably be FS inputs read */ + uint32_t num_outputs = brw_count_bits(brw->vs.prog_data->outputs_written); + uint32_t dw1, dw2, dw3, dw4; + int i; + /* _NEW_BUFFER */ + GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + int attr = 0; + + dw1 = + num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | + (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 3 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE | + GEN6_SF_STATISTICS_ENABLE; + dw3 = 0; + dw4 = 0; + + /* _NEW_POLYGON */ + if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) + dw2 |= GEN6_SF_WINDING_CCW; + + /* _NEW_SCISSOR */ + if (ctx->Scissor.Enabled) + dw3 |= GEN6_SF_SCISSOR_ENABLE; + + /* _NEW_POLYGON */ + if (ctx->Polygon.CullFlag) { + switch (ctx->Polygon.CullFaceMode) { + case GL_FRONT: + dw3 |= GEN6_SF_CULL_BOTH; + break; + case GL_BACK: + dw3 |= GEN6_SF_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + dw3 |= GEN6_SF_CULL_BOTH; + break; + default: + assert(0); + break; + } + } else { + dw3 |= GEN6_SF_CULL_NONE; + } + + /* _NEW_LINE */ + dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) << + GEN6_SF_LINE_WIDTH_SHIFT; + if (ctx->Line.SmoothFlag) { + dw3 |= GEN6_SF_LINE_AA_ENABLE; + dw3 |= GEN6_SF_LINE_AA_MODE_TRUE; + dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0; + } + + /* _NEW_POINT */ + if (ctx->Point._Attenuated) + dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH; + + dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) << + GEN6_SF_POINT_WIDTH_SHIFT; + if (render_to_fbo) + dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + + /* _NEW_LIGHT */ + if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { + dw4 |= + (2 << GEN6_SF_TRI_PROVOKE_SHIFT) | + (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) | + (1 << GEN6_SF_LINE_PROVOKE_SHIFT); + } else { + dw4 |= + (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT); + } + + BEGIN_BATCH(20); + OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2)); + OUT_BATCH(dw1); + OUT_BATCH(dw2); + OUT_BATCH(dw3); + OUT_BATCH(dw4); + OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */ + OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */ + OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */ + for (i = 0; i < 8; i++) { + uint32_t attr_overrides = 0; + + /* These should be generating FS inputs read instead of VS + * outputs written + */ + for (; attr < 64; attr++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) { + attr_overrides |= get_attr_override(brw, attr); + attr++; + break; + } + } + + for (; attr < 64; attr++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) { + attr_overrides |= get_attr_override(brw, attr) << 16; + attr++; + break; + } + } + OUT_BATCH(attr_overrides); + } + OUT_BATCH(0); /* point sprite texcoord bitmask */ + OUT_BATCH(0); /* constant interp bitmask */ + OUT_BATCH(0); /* wrapshortest enables 0-7 */ + OUT_BATCH(0); /* wrapshortest enables 8-15 */ + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_sf_state = { + .dirty = { + .mesa = (_NEW_LIGHT | + _NEW_POLYGON | + _NEW_LINE | + _NEW_SCISSOR | + _NEW_BUFFERS), + .brw = BRW_NEW_CONTEXT, + .cache = CACHE_NEW_VS_PROG + }, + .emit = upload_sf_state, +}; diff --git a/i965/gen6_urb.c b/i965/gen6_urb.c new file mode 100644 index 0000000..5445e40 --- /dev/null +++ b/i965/gen6_urb.c @@ -0,0 +1,83 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "main/macros.h" +#include "intel_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +static void +prepare_urb( struct brw_context *brw ) +{ + brw->urb.nr_vs_entries = 24; + if (brw->gs.prog_bo) + brw->urb.nr_gs_entries = 4; + else + brw->urb.nr_gs_entries = 0; + /* CACHE_NEW_VS_PROG */ + brw->urb.vs_size = MIN2(brw->vs.prog_data->urb_entry_size, 1); + + /* Check that the number of URB rows (8 floats each) allocated is less + * than the URB space. + */ + assert((brw->urb.nr_vs_entries + + brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024); +} + +static void +upload_urb(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + assert(brw->urb.nr_vs_entries % 4 == 0); + assert(brw->urb.nr_gs_entries % 4 == 0); + /* GS requirement */ + assert(!brw->gs.prog_bo || brw->urb.vs_size < 5); + + intel_batchbuffer_emit_mi_flush(intel->batch); + + BEGIN_BATCH(3); + OUT_BATCH(CMD_URB << 16 | (3 - 2)); + OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) | + ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT)); + OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | + ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT)); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_urb = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = CACHE_NEW_VS_PROG, + }, + .prepare = prepare_urb, + .emit = upload_urb, +}; diff --git a/i965/gen6_viewport_state.c b/i965/gen6_viewport_state.c new file mode 100644 index 0000000..0c2aa42 --- /dev/null +++ b/i965/gen6_viewport_state.c @@ -0,0 +1,173 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" +#include "main/macros.h" + +/* The clip VP defines the guardband region where expensive clipping is skipped + * and fragments are allowed to be generated and clipped out cheaply by the SF. + * + * By setting it to NDC bounds of [-1,1], we don't do GB clipping. It's + * supposed to cause seams to become visible in apps due to shared edges taking + * different clip/no clip paths depending on whether the rest of the prim ends + * up in the guardband or not. + */ +static void +prepare_clip_vp(struct brw_context *brw) +{ + struct brw_clipper_viewport vp; + + vp.xmin = -1.0; + vp.xmax = 1.0; + vp.ymin = -1.0; + vp.ymax = 1.0; + + drm_intel_bo_unreference(brw->clip.vp_bo); + brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP, + &vp, sizeof(vp), + NULL, 0); +} + +const struct brw_tracked_state gen6_clip_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT, /* XXX: not really, but we need nonzero */ + .brw = 0, + .cache = 0, + }, + .prepare = prepare_clip_vp, +}; + +static void +prepare_sf_vp(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + struct brw_sf_viewport sfv; + GLfloat y_scale, y_bias; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + const GLfloat *v = ctx->Viewport._WindowMap.m; + + memset(&sfv, 0, sizeof(sfv)); + + /* _NEW_BUFFERS */ + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = ctx->DrawBuffer->Height; + } + + /* _NEW_VIEWPORT */ + sfv.viewport.m00 = v[MAT_SX]; + sfv.viewport.m11 = v[MAT_SY] * y_scale; + sfv.viewport.m22 = v[MAT_SZ] * depth_scale; + sfv.viewport.m30 = v[MAT_TX]; + sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; + sfv.viewport.m32 = v[MAT_TZ] * depth_scale; + + drm_intel_bo_unreference(brw->sf.vp_bo); + brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, + &sfv, sizeof(sfv), + NULL, 0); +} + +const struct brw_tracked_state gen6_sf_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT | _NEW_BUFFERS, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_sf_vp, +}; + +static void +prepare_cc_vp(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_cc_viewport ccv; + + /* _NEW_TRANSOFORM */ + if (ctx->Transform.DepthClamp) { + /* _NEW_VIEWPORT */ + ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + } else { + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + } + + drm_intel_bo_unreference(brw->cc.vp_bo); + brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), + NULL, 0); +} + +const struct brw_tracked_state gen6_cc_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_cc_vp, +}; + +static void prepare_viewport_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->sf.state_bo); +} + +static void upload_viewport_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) | + GEN6_CC_VIEWPORT_MODIFY | + GEN6_SF_VIEWPORT_MODIFY | + GEN6_CLIP_VIEWPORT_MODIFY); + OUT_RELOC(brw->clip.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->sf.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_viewport_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = (CACHE_NEW_CLIP_VP | + CACHE_NEW_SF_VP | + CACHE_NEW_CC_VP) + }, + .prepare = prepare_viewport_state_pointers, + .emit = upload_viewport_state_pointers, +}; diff --git a/i965/gen6_vs_state.c b/i965/gen6_vs_state.c new file mode 100644 index 0000000..fe597df --- /dev/null +++ b/i965/gen6_vs_state.c @@ -0,0 +1,119 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "intel_batchbuffer.h" + +static void +upload_vs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + unsigned int nr_params = vp->program.Base.Parameters->NumParameters; + drm_intel_bo *constant_bo; + int i; + + if (vp->use_const_buffer || nr_params == 0) { + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + + constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", + nr_params * 4 * sizeof(float), + 4096); + drm_intel_gem_bo_map_gtt(constant_bo); + for (i = 0; i < nr_params; i++) { + memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + drm_intel_gem_bo_unmap_gtt(constant_bo); + + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | + GEN6_CONSTANT_BUFFER_0_ENABLE | + (5 - 2)); + OUT_RELOC(constant_bo, + I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ + ALIGN(nr_params, 2) / 2 - 1); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + drm_intel_bo_unreference(constant_bo); + } + + intel_batchbuffer_emit_mi_flush(intel->batch); + + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); + OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | + (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | + (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) | + GEN6_VS_STATISTICS_ENABLE); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_vs_state = { + .dirty = { + .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | + BRW_NEW_URB_FENCE | + BRW_NEW_CONTEXT), + .cache = CACHE_NEW_VS_PROG + }, + .emit = upload_vs_state, +}; diff --git a/i965/gen6_wm_state.c b/i965/gen6_wm_state.c new file mode 100644 index 0000000..1eb17ca --- /dev/null +++ b/i965/gen6_wm_state.c @@ -0,0 +1,160 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "intel_batchbuffer.h" + +static void +upload_wm_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + unsigned int nr_params = fp->program.Base.Parameters->NumParameters; + drm_intel_bo *constant_bo; + int i; + uint32_t dw2, dw4, dw5, dw6; + + if (fp->use_const_buffer || nr_params == 0) { + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + + constant_bo = drm_intel_bo_alloc(intel->bufmgr, "WM constant_bo", + nr_params * 4 * sizeof(float), + 4096); + drm_intel_gem_bo_map_gtt(constant_bo); + for (i = 0; i < nr_params; i++) { + memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), + fp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + drm_intel_gem_bo_unmap_gtt(constant_bo); + + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | + GEN6_CONSTANT_BUFFER_0_ENABLE | + (5 - 2)); + OUT_RELOC(constant_bo, + I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ + ALIGN(nr_params, 2) / 2 - 1); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + drm_intel_bo_unreference(constant_bo); + } + + intel_batchbuffer_emit_mi_flush(intel->batch); + + dw2 = dw4 = dw5 = dw6 = 0; + dw4 |= GEN6_WM_STATISTICS_ENABLE; + dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; + dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; + + /* BRW_NEW_NR_SURFACES */ + dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT; + + /* CACHE_NEW_SAMPLER */ + dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; + dw4 |= (1 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); + + dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; + dw5 |= GEN6_WM_DISPATCH_ENABLE; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (fp->isGLSL) + dw5 |= GEN6_WM_8_DISPATCH_ENABLE; + else + dw5 |= GEN6_WM_16_DISPATCH_ENABLE; + + /* _NEW_LINE */ + if (ctx->Line.StippleFlag) + dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; + + /* _NEW_POLYGONSTIPPLE */ + if (ctx->Polygon.StippleFlag) + dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) + dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W; + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + dw5 |= GEN6_WM_COMPUTED_DEPTH; + + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + dw5 |= GEN6_WM_KILL_ENABLE; + + /* This should probably be FS inputs read */ + dw6 |= brw_count_bits(brw->vs.prog_data->outputs_written) << + GEN6_WM_NUM_SF_OUTPUTS_SHIFT; + + BEGIN_BATCH(9); + OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2)); + OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(dw2); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH(dw4); + OUT_BATCH(dw5); + OUT_BATCH(dw6); + OUT_BATCH(0); /* kernel 1 pointer */ + OUT_BATCH(0); /* kernel 2 pointer */ + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_wm_state = { + .dirty = { + .mesa = _NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_NR_WM_SURFACES | + BRW_NEW_URB_FENCE | + BRW_NEW_BATCH), + .cache = CACHE_NEW_SAMPLER + }, + .emit = upload_wm_state, +}; diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c index ca6e2fa..9768b0d 100644 --- a/shared/intel_batchbuffer.c +++ b/shared/intel_batchbuffer.c @@ -32,44 +32,6 @@ #include "intel_bufmgr.h" #include "intel_buffers.h" -/* Relocations in kernel space: - * - pass dma buffer seperately - * - memory manager knows how to patch - * - pass list of dependent buffers - * - pass relocation list - * - * Either: - * - get back an offset for buffer to fire - * - memory manager knows how to fire buffer - * - * Really want the buffer to be AGP and pinned. - * - */ - -/* Cliprect fence: The highest fence protecting a dma buffer - * containing explicit cliprect information. Like the old drawable - * lock but irq-driven. X server must wait for this fence to expire - * before changing cliprects [and then doing sw rendering?]. For - * other dma buffers, the scheduler will grab current cliprect info - * and mix into buffer. X server must hold the lock while changing - * cliprects??? Make per-drawable. Need cliprects in shared memory - * -- beats storing them with every cmd buffer in the queue. - * - * ==> X server must wait for this fence to expire before touching the - * framebuffer with new cliprects. - * - * ==> Cliprect-dependent buffers associated with a - * cliprect-timestamp. All of the buffers associated with a timestamp - * must go to hardware before any buffer with a newer timestamp. - * - * ==> Dma should be queued per-drawable for correct X/GL - * synchronization. Or can fences be used for this? - * - * Applies to: Blit operations, metaops, X server operations -- X - * server automatically waits on its own dma to complete before - * modifying cliprects ??? - */ - void intel_batchbuffer_reset(struct intel_batchbuffer *batch) { @@ -80,7 +42,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->buf = NULL; } - if (!batch->buffer && intel->ttm == GL_TRUE) + if (!batch->buffer) batch->buffer = malloc (intel->maxBatchSize); batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer", @@ -94,7 +56,6 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->size = intel->maxBatchSize; batch->ptr = batch->map; batch->dirty_state = ~0; - batch->cliprect_mode = IGNORE_CLIPRECTS; } struct intel_batchbuffer * @@ -129,13 +90,10 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch) /* TODO: Push this whole function into bufmgr. */ static void -do_flush_locked(struct intel_batchbuffer *batch, - GLuint used, GLboolean allow_unlock) +do_flush_locked(struct intel_batchbuffer *batch, GLuint used) { struct intel_context *intel = batch->intel; int ret = 0; - unsigned int num_cliprects = 0; - struct drm_clip_rect *cliprects = NULL; int x_off = 0, y_off = 0; if (batch->buffer) @@ -146,31 +104,8 @@ do_flush_locked(struct intel_batchbuffer *batch, batch->map = NULL; batch->ptr = NULL; - - if (batch->cliprect_mode == LOOP_CLIPRECTS) { - intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); - } - /* Dispatch the batchbuffer, if it has some effect (nonzero cliprects). - * Can't short-circuit like this once we have hardware contexts, but we - * should always be in DRI2 mode by then anyway. - */ - if ((batch->cliprect_mode != LOOP_CLIPRECTS || - num_cliprects != 0) && !intel->no_hw) { - dri_bo_exec(batch->buf, used, cliprects, num_cliprects, - (x_off & 0xffff) | (y_off << 16)); - } - - if (batch->cliprect_mode == LOOP_CLIPRECTS && num_cliprects == 0) { - if (allow_unlock) { - /* If we are not doing any actual user-visible rendering, - * do a sched_yield to keep the app from pegging the cpu while - * achieving nothing. - */ - UNLOCK_HARDWARE(intel); - sched_yield(); - LOCK_HARDWARE(intel); - } - } + if (!intel->no_hw) + dri_bo_exec(batch->buf, used, NULL, 0, (x_off & 0xffff) | (y_off << 16)); if (INTEL_DEBUG & DEBUG_BATCH) { dri_bo_map(batch->buf, GL_FALSE); @@ -183,7 +118,6 @@ do_flush_locked(struct intel_batchbuffer *batch, } if (ret != 0) { - UNLOCK_HARDWARE(intel); exit(1); } intel->vtbl.new_batch(intel); @@ -196,15 +130,14 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, struct intel_context *intel = batch->intel; GLuint used = batch->ptr - batch->map; - if (intel->first_post_swapbuffers_batch == NULL) { + if (!intel->using_dri2_swapbuffers && + intel->first_post_swapbuffers_batch == NULL) { intel->first_post_swapbuffers_batch = intel->batch->buf; drm_intel_bo_reference(intel->first_post_swapbuffers_batch); } - if (used == 0) { - batch->cliprect_mode = IGNORE_CLIPRECTS; + if (used == 0) return; - } if (INTEL_DEBUG & DEBUG_BATCH) fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, @@ -212,7 +145,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, batch->reserved_space = 0; /* Emit a flush if the bufmgr doesn't do it for us. */ - if (intel->always_flush_cache || !intel->ttm) { + if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(batch); used = batch->ptr - batch->map; } @@ -226,9 +159,10 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, } /* Mark the end of the buffer. */ - *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */ + *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; batch->ptr += 4; used = batch->ptr - batch->map; + assert (used <= batch->buf->size); /* Workaround for recursive batchbuffer flushing: If the window is * moved, we can get into a case where we try to flush during a @@ -244,14 +178,15 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (intel->vtbl.finish_batch) intel->vtbl.finish_batch(intel); + /* Check that we didn't just wrap our batchbuffer at a bad time. */ + assert(!intel->no_batch_wrap); + batch->reserved_space = BATCH_RESERVED; /* TODO: Just pass the relocation list and dma buffer up to the * kernel. */ - LOCK_HARDWARE(intel); - do_flush_locked(batch, used, GL_FALSE); - UNLOCK_HARDWARE(intel); + do_flush_locked(batch, used); if (INTEL_DEBUG & DEBUG_SYNC) { fprintf(stderr, "waiting for idle\n"); @@ -275,9 +210,11 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, { int ret; + assert(delta < buffer->size); + if (batch->ptr - batch->map > batch->buf->size) - _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", - batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + printf ("bad relocation ptr %p map %p offset %d size %lu\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); ret = dri_bo_emit_reloc(batch->buf, read_domains, write_domain, delta, batch->ptr - batch->map, buffer); @@ -291,13 +228,39 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, return GL_TRUE; } +GLboolean +intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch, + drm_intel_bo *buffer, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) +{ + int ret; + + assert(delta < buffer->size); + + if (batch->ptr - batch->map > batch->buf->size) + printf ("bad relocation ptr %p map %p offset %d size %lu\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map, + buffer, delta, + read_domains, write_domain); + + /* + * Using the old buffer offset, write in what the right data would + * be, in case the buffer doesn't move and we can short-circuit the + * relocation processing in the kernel + */ + intel_batchbuffer_emit_dword (batch, buffer->offset + delta); + + return GL_TRUE; +} + void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, GLuint bytes, - enum cliprect_mode cliprect_mode) + const void *data, GLuint bytes) { assert((bytes & 3) == 0); - intel_batchbuffer_require_space(batch, bytes, cliprect_mode); + intel_batchbuffer_require_space(batch, bytes); __memcpy(batch->ptr, data, bytes); batch->ptr += bytes; } @@ -314,7 +277,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) struct intel_context *intel = batch->intel; if (intel->gen >= 4) { - BEGIN_BATCH(4, IGNORE_CLIPRECTS); + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | PIPE_CONTROL_INSTRUCTION_FLUSH | PIPE_CONTROL_WRITE_FLUSH | @@ -324,7 +287,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) OUT_BATCH(0); /* write data */ ADVANCE_BATCH(); } else { - BEGIN_BATCH(1, IGNORE_CLIPRECTS); + BEGIN_BATCH(1); OUT_BATCH(MI_FLUSH); ADVANCE_BATCH(); } diff --git a/shared/intel_batchbuffer.h b/shared/intel_batchbuffer.h index d4a9445..e5ad261 100644 --- a/shared/intel_batchbuffer.h +++ b/shared/intel_batchbuffer.h @@ -10,35 +10,6 @@ #define BATCH_SZ 16384 #define BATCH_RESERVED 16 -enum cliprect_mode { - /** - * Batchbuffer contents may be looped over per cliprect, but do not - * require it. - */ - IGNORE_CLIPRECTS, - /** - * Batchbuffer contents require looping over per cliprect at batch submit - * time. - * - * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single - * constant cliprect, as in DRI2 or FBO rendering. - */ - LOOP_CLIPRECTS, - /** - * Batchbuffer contents contain drawing that should not be executed multiple - * times. - */ - NO_LOOP_CLIPRECTS, - /** - * Batchbuffer contents contain drawing that already handles cliprects, such - * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE. - * - * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch - * outside of LOCK/UNLOCK. This is upgraded to just NO_LOOP_CLIPRECTS when - * there's a constant cliprect, as in DRI2 or FBO rendering. - */ - REFERENCES_CLIPRECTS -}; struct intel_batchbuffer { @@ -51,15 +22,15 @@ struct intel_batchbuffer GLubyte *map; GLubyte *ptr; - enum cliprect_mode cliprect_mode; - GLuint size; +#ifdef DEBUG /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ struct { GLuint total; GLubyte *start_ptr; } emit; +#endif GLuint dirty_state; GLuint reserved_space; @@ -85,8 +56,7 @@ void intel_batchbuffer_reset(struct intel_batchbuffer *batch); * intel_buffer_dword() calls. */ void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, GLuint bytes, - enum cliprect_mode cliprect_mode); + const void *data, GLuint bytes); void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, GLuint bytes); @@ -96,8 +66,24 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, uint32_t read_domains, uint32_t write_domain, uint32_t offset); +GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch, + drm_intel_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch); +static INLINE uint32_t float_as_int(float f) +{ + union { + float f; + uint32_t d; + } fi; + + fi.f = f; + return fi.d; +} + /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to * be passed as structs rather than dwords, but that's a little bit of @@ -113,66 +99,73 @@ intel_batchbuffer_space(struct intel_batchbuffer *batch) static INLINE void intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) { - assert(batch->map); +#ifdef DEBUG assert(intel_batchbuffer_space(batch) >= 4); +#endif *(GLuint *) (batch->ptr) = dword; batch->ptr += 4; } +static INLINE void +intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f) +{ + intel_batchbuffer_emit_dword(batch, float_as_int(f)); +} + static INLINE void intel_batchbuffer_require_space(struct intel_batchbuffer *batch, - GLuint sz, - enum cliprect_mode cliprect_mode) + GLuint sz) { +#ifdef DEBUG assert(sz < batch->size - 8); +#endif if (intel_batchbuffer_space(batch) < sz) intel_batchbuffer_flush(batch); +} + +static INLINE void +intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n) +{ + intel_batchbuffer_require_space(batch, n * 4); +#ifdef DEBUG + assert(batch->map); + assert(batch->emit.start_ptr == NULL); + batch->emit.total = n * 4; + batch->emit.start_ptr = batch->ptr; +#endif +} - if ((cliprect_mode == LOOP_CLIPRECTS || - cliprect_mode == REFERENCES_CLIPRECTS) && - batch->intel->constant_cliprect) - cliprect_mode = NO_LOOP_CLIPRECTS; - - if (cliprect_mode != IGNORE_CLIPRECTS) { - if (batch->cliprect_mode == IGNORE_CLIPRECTS) { - batch->cliprect_mode = cliprect_mode; - } else { - if (batch->cliprect_mode != cliprect_mode) { - intel_batchbuffer_flush(batch); - batch->cliprect_mode = cliprect_mode; - } - } +static INLINE void +intel_batchbuffer_advance(struct intel_batchbuffer *batch) +{ +#ifdef DEBUG + unsigned int _n = batch->ptr - batch->emit.start_ptr; + assert(batch->emit.start_ptr != NULL); + if (_n != batch->emit.total) { + fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", + _n, batch->emit.total); + abort(); } + batch->emit.start_ptr = NULL; +#endif } /* Here are the crusty old macros, to be removed: */ #define BATCH_LOCALS -#define BEGIN_BATCH(n, cliprect_mode) do { \ - intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \ - assert(intel->batch->emit.start_ptr == NULL); \ - intel->batch->emit.total = (n) * 4; \ - intel->batch->emit.start_ptr = intel->batch->ptr; \ -} while (0) - +#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n) #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) - +#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel->batch,f) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ - assert((unsigned) (delta) < buf->size); \ intel_batchbuffer_emit_reloc(intel->batch, buf, \ read_domains, write_domain, delta); \ } while (0) +#define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do { \ + intel_batchbuffer_emit_reloc_fenced(intel->batch, buf, \ + read_domains, write_domain, delta); \ +} while (0) -#define ADVANCE_BATCH() do { \ - unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ - assert(intel->batch->emit.start_ptr != NULL); \ - if (_n != intel->batch->emit.total) { \ - fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \ - _n, intel->batch->emit.total); \ - abort(); \ - } \ - intel->batch->emit.start_ptr = NULL; \ -} while(0) +#define ADVANCE_BATCH() intel_batchbuffer_advance(intel->batch); #endif diff --git a/shared/intel_blit.c b/shared/intel_blit.c index 9f638b0..f2769aa 100644 --- a/shared/intel_blit.c +++ b/shared/intel_blit.c @@ -38,141 +38,9 @@ #include "intel_reg.h" #include "intel_regions.h" #include "intel_batchbuffer.h" -#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_BLIT -/** - * Copy the back color buffer to the front color buffer. - * Used for SwapBuffers(). - */ -void -intelCopyBuffer(const __DRIdrawablePrivate * dPriv, - const drm_clip_rect_t * rect) -{ - - struct intel_context *intel; - const intelScreenPrivate *intelScreen; - - DBG("%s\n", __FUNCTION__); - - assert(dPriv); - - intel = intelScreenContext(dPriv->driScreenPriv->private); - if (!intel) - return; - - intelScreen = intel->intelScreen; - - /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets - * should work regardless. - */ - LOCK_HARDWARE(intel); - - if (dPriv && dPriv->numClipRects) { - struct intel_framebuffer *intel_fb = dPriv->driverPrivate; - struct intel_region *src, *dst; - int nbox = dPriv->numClipRects; - drm_clip_rect_t *pbox = dPriv->pClipRects; - int cpp; - int src_pitch, dst_pitch; - unsigned short src_x, src_y; - int BR13, CMD; - int i; - dri_bo *aper_array[3]; - - src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT); - dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT); - - src_pitch = src->pitch * src->cpp; - dst_pitch = dst->pitch * dst->cpp; - - cpp = src->cpp; - - ASSERT(intel_fb); - ASSERT(intel_fb->Base.Name == 0); /* Not a user-created FBO */ - ASSERT(src); - ASSERT(dst); - ASSERT(src->cpp == dst->cpp); - - if (cpp == 2) { - BR13 = (0xCC << 16) | BR13_565; - CMD = XY_SRC_COPY_BLT_CMD; - } - else { - BR13 = (0xCC << 16) | BR13_8888; - CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - } - - assert(src->tiling != I915_TILING_Y); - assert(dst->tiling != I915_TILING_Y); -#ifndef I915 - if (src->tiling != I915_TILING_NONE) { - CMD |= XY_SRC_TILED; - src_pitch /= 4; - } - if (dst->tiling != I915_TILING_NONE) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } -#endif - /* do space/cliprects check before going any further */ - intel_batchbuffer_require_space(intel->batch, 8 * 4, - REFERENCES_CLIPRECTS); - again: - aper_array[0] = intel->batch->buf; - aper_array[1] = dst->buffer; - aper_array[2] = src->buffer; - - if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { - intel_batchbuffer_flush(intel->batch); - goto again; - } - - for (i = 0; i < nbox; i++, pbox++) { - drm_clip_rect_t box = *pbox; - - if (rect) { - if (!intel_intersect_cliprects(&box, &box, rect)) - continue; - } - - if (box.x1 >= box.x2 || - box.y1 >= box.y2) - continue; - - assert(box.x1 < box.x2); - assert(box.y1 < box.y2); - src_x = box.x1 - dPriv->x + dPriv->backX; - src_y = box.y1 - dPriv->y + dPriv->backY; - - BEGIN_BATCH(8, REFERENCES_CLIPRECTS); - OUT_BATCH(CMD); - OUT_BATCH(BR13 | dst_pitch); - OUT_BATCH((box.y1 << 16) | box.x1); - OUT_BATCH((box.y2 << 16) | box.x2); - - OUT_RELOC(dst->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - OUT_BATCH((src_y << 16) | src_x); - OUT_BATCH(src_pitch); - OUT_RELOC(src->buffer, - I915_GEM_DOMAIN_RENDER, 0, - 0); - ADVANCE_BATCH(); - } - - /* Flush the rendering and the batch so that the results all land on the - * screen in a timely fashion. - */ - intel_batchbuffer_emit_mi_flush(intel->batch); - intel_batchbuffer_flush(intel->batch); - } - - UNLOCK_HARDWARE(intel); -} - static GLuint translate_raster_op(GLenum logicop) { switch(logicop) { @@ -221,6 +89,10 @@ intelEmitCopyBlit(struct intel_context *intel, dri_bo *aper_array[3]; BATCH_LOCALS; + /* Blits are in a different ringbuffer so we don't use them. */ + if (intel->gen >= 6) + return GL_FALSE; + if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return GL_FALSE; @@ -234,7 +106,7 @@ intelEmitCopyBlit(struct intel_context *intel, return GL_FALSE; } - /* do space/cliprects check before going any further */ + /* do space check before going any further */ do { aper_array[0] = intel->batch->buf; aper_array[1] = dst_buffer; @@ -247,27 +119,26 @@ intelEmitCopyBlit(struct intel_context *intel, break; } while (pass < 2); + intel_prepare_render(intel); + if (pass >= 2) { - LOCK_HARDWARE(intel); - dri_bo_map(dst_buffer, GL_TRUE); - dri_bo_map(src_buffer, GL_FALSE); - _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset, - cpp, - dst_pitch, - dst_x, dst_y, - w, h, - (GLubyte *)src_buffer->virtual + src_offset, - src_pitch, - src_x, src_y); - - dri_bo_unmap(src_buffer); - dri_bo_unmap(dst_buffer); - UNLOCK_HARDWARE(intel); - - return GL_TRUE; + drm_intel_gem_bo_map_gtt(dst_buffer); + drm_intel_gem_bo_map_gtt(src_buffer); + _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset, + cpp, + dst_pitch, + dst_x, dst_y, + w, h, + (GLubyte *)src_buffer->virtual + src_offset, + src_pitch, + src_x, src_y); + drm_intel_gem_bo_unmap_gtt(src_buffer); + drm_intel_gem_bo_unmap_gtt(dst_buffer); + + return GL_TRUE; } - intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); + intel_batchbuffer_require_space(intel->batch, 8 * 4); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, src_buffer, src_pitch, src_offset, src_x, src_y, @@ -312,19 +183,19 @@ intelEmitCopyBlit(struct intel_context *intel, assert(dst_x < dst_x2); assert(dst_y < dst_y2); - BEGIN_BATCH(8, NO_LOOP_CLIPRECTS); + BEGIN_BATCH(8); OUT_BATCH(CMD); OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC_FENCED(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH((uint16_t)src_pitch); - OUT_RELOC(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, - src_offset); + OUT_RELOC_FENCED(src_buffer, + I915_GEM_DOMAIN_RENDER, 0, + src_offset); ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel->batch); @@ -346,12 +217,13 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) struct intel_context *intel = intel_context(ctx); struct gl_framebuffer *fb = ctx->DrawBuffer; GLuint clear_depth; - GLbitfield skipBuffers = 0; - unsigned int num_cliprects; - struct drm_clip_rect *cliprects; - int x_off, y_off; + GLboolean all; + GLint cx, cy, cw, ch; BATCH_LOCALS; + /* Blits are in a different ringbuffer so we don't use them. */ + assert(intel->gen < 6); + /* * Compute values for clearing the buffers. */ @@ -363,187 +235,147 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) clear_depth |= (ctx->Stencil.Clear & 0xff) << 24; } - /* If clearing both depth and stencil, skip BUFFER_BIT_STENCIL in - * the loop below. - */ - if ((mask & BUFFER_BIT_DEPTH) && (mask & BUFFER_BIT_STENCIL)) { - skipBuffers = BUFFER_BIT_STENCIL; - } - - LOCK_HARDWARE(intel); - - intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); - if (num_cliprects) { - GLint cx, cy, cw, ch; - drm_clip_rect_t clear; - int i; - - /* Get clear bounds after locking */ - cx = fb->_Xmin; + cx = fb->_Xmin; + if (fb->Name == 0) + cy = ctx->DrawBuffer->Height - fb->_Ymax; + else cy = fb->_Ymin; - cw = fb->_Xmax - cx; - ch = fb->_Ymax - cy; + cw = fb->_Xmax - fb->_Xmin; + ch = fb->_Ymax - fb->_Ymin; - if (fb->Name == 0) { - /* clearing a window */ + if (cw == 0 || ch == 0) + return; - /* flip top to bottom */ - clear.x1 = cx + x_off; - clear.y1 = intel->driDrawable->y + intel->driDrawable->h - cy - ch; - clear.x2 = clear.x1 + cw; - clear.y2 = clear.y1 + ch; - } - else { - /* clearing FBO */ - assert(num_cliprects == 1); - assert(cliprects == &intel->fboRect); - clear.x1 = cx; - clear.y1 = cy; - clear.x2 = clear.x1 + cw; - clear.y2 = clear.y1 + ch; - /* no change to mask */ + GLuint buf; + all = (cw == fb->Width && ch == fb->Height); + + intel_prepare_render(intel); + + /* Loop over all renderbuffers */ + for (buf = 0; buf < BUFFER_COUNT && mask; buf++) { + const GLbitfield bufBit = 1 << buf; + struct intel_renderbuffer *irb; + drm_intel_bo *write_buffer; + int x1, y1, x2, y2; + uint32_t clear_val; + uint32_t BR13, CMD; + int pitch, cpp; + drm_intel_bo *aper_array[2]; + + if (!(mask & bufBit)) + continue; + + /* OK, clear this renderbuffer */ + irb = intel_get_renderbuffer(fb, buf); + write_buffer = intel_region_buffer(intel, irb->region, + all ? INTEL_WRITE_FULL : + INTEL_WRITE_PART); + x1 = cx + irb->region->draw_x; + y1 = cy + irb->region->draw_y; + x2 = cx + cw + irb->region->draw_x; + y2 = cy + ch + irb->region->draw_y; + + pitch = irb->region->pitch; + cpp = irb->region->cpp; + + DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", + __FUNCTION__, + irb->region->buffer, (pitch * cpp), + x1, y1, x2 - x1, y2 - y1); + + BR13 = 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD; + + /* Setup the blit command */ + if (cpp == 4) { + BR13 |= BR13_8888; + if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) { + if (mask & BUFFER_BIT_DEPTH) + CMD |= XY_BLT_WRITE_RGB; + if (mask & BUFFER_BIT_STENCIL) + CMD |= XY_BLT_WRITE_ALPHA; + } else { + /* clearing RGBA */ + CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + } + } else { + ASSERT(cpp == 2); + BR13 |= BR13_565; } - for (i = 0; i < num_cliprects; i++) { - const drm_clip_rect_t *box = &cliprects[i]; - drm_clip_rect_t b; - GLuint buf; - GLuint clearMask = mask; /* use copy, since we modify it below */ - GLboolean all = (cw == fb->Width && ch == fb->Height); - - if (!all) { - intel_intersect_cliprects(&b, &clear, box); - } - else { - b = *box; - } - - if (b.x1 >= b.x2 || b.y1 >= b.y2) - continue; - - if (0) - _mesa_printf("clear %d,%d..%d,%d, mask %x\n", - b.x1, b.y1, b.x2, b.y2, mask); - - /* Loop over all renderbuffers */ - for (buf = 0; buf < BUFFER_COUNT && clearMask; buf++) { - const GLbitfield bufBit = 1 << buf; - if ((clearMask & bufBit) && !(bufBit & skipBuffers)) { - /* OK, clear this renderbuffer */ - struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, buf); - dri_bo *write_buffer = - intel_region_buffer(intel, irb->region, - all ? INTEL_WRITE_FULL : - INTEL_WRITE_PART); - int x1 = b.x1 + irb->region->draw_x; - int y1 = b.y1 + irb->region->draw_y; - int x2 = b.x2 + irb->region->draw_x; - int y2 = b.y2 + irb->region->draw_y; - - GLuint clearVal; - GLint pitch, cpp; - GLuint BR13, CMD; - - pitch = irb->region->pitch; - cpp = irb->region->cpp; - - DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", - __FUNCTION__, - irb->region->buffer, (pitch * cpp), - x1, y1, x2 - x1, y2 - y1); - - BR13 = 0xf0 << 16; - CMD = XY_COLOR_BLT_CMD; - - /* Setup the blit command */ - if (cpp == 4) { - BR13 |= BR13_8888; - if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) { - if (clearMask & BUFFER_BIT_DEPTH) - CMD |= XY_BLT_WRITE_RGB; - if (clearMask & BUFFER_BIT_STENCIL) - CMD |= XY_BLT_WRITE_ALPHA; - } - else { - /* clearing RGBA */ - CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - } - } - else { - ASSERT(cpp == 2); - BR13 |= BR13_565; - } - - assert(irb->region->tiling != I915_TILING_Y); + assert(irb->region->tiling != I915_TILING_Y); #ifndef I915 - if (irb->region->tiling != I915_TILING_NONE) { - CMD |= XY_DST_TILED; - pitch /= 4; - } + if (irb->region->tiling != I915_TILING_NONE) { + CMD |= XY_DST_TILED; + pitch /= 4; + } #endif - BR13 |= (pitch * cpp); - - if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) { - clearVal = clear_depth; - } - else { - uint8_t clear[4]; - GLclampf *color = ctx->Color.ClearColor; - - CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]); - CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]); - CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); - CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); - - switch (irb->Base.Format) { - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_XRGB8888: - clearVal = intel->ClearColor8888; - break; - case MESA_FORMAT_RGB565: - clearVal = intel->ClearColor565; - break; - case MESA_FORMAT_ARGB4444: - clearVal = PACK_COLOR_4444(clear[3], clear[0], - clear[1], clear[2]); - break; - case MESA_FORMAT_ARGB1555: - clearVal = PACK_COLOR_1555(clear[3], clear[0], - clear[1], clear[2]); - break; - default: - _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n", - irb->Base.Format); - clearVal = 0; - } - } - - /* - _mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n", - buf, irb->Base.Name); - */ - - assert(x1 < x2); - assert(y1 < y2); - - BEGIN_BATCH(6, REFERENCES_CLIPRECTS); - OUT_BATCH(CMD); - OUT_BATCH(BR13); - OUT_BATCH((y1 << 16) | x1); - OUT_BATCH((y2 << 16) | x2); - OUT_RELOC(write_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - OUT_BATCH(clearVal); - ADVANCE_BATCH(); - clearMask &= ~bufBit; /* turn off bit, for faster loop exit */ - } - } + BR13 |= (pitch * cpp); + + if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) { + clear_val = clear_depth; + } else { + uint8_t clear[4]; + GLclampf *color = ctx->Color.ClearColor; + + CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); + + switch (irb->Base.Format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + clear_val = PACK_COLOR_8888(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_RGB565: + clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]); + break; + case MESA_FORMAT_ARGB4444: + clear_val = PACK_COLOR_4444(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_ARGB1555: + clear_val = PACK_COLOR_1555(clear[3], clear[0], + clear[1], clear[2]); + break; + default: + _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n", + irb->Base.Format); + clear_val = 0; + } } - } - UNLOCK_HARDWARE(intel); + assert(x1 < x2); + assert(y1 < y2); + + /* do space check before going any further */ + aper_array[0] = intel->batch->buf; + aper_array[1] = write_buffer; + + if (drm_intel_bufmgr_check_aperture_space(aper_array, + ARRAY_SIZE(aper_array)) != 0) { + intel_batchbuffer_flush(intel->batch); + } + + BEGIN_BATCH(6); + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y1 << 16) | x1); + OUT_BATCH((y2 << 16) | x2); + OUT_RELOC_FENCED(write_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH(clear_val); + ADVANCE_BATCH(); + + if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) + mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); + else + mask &= ~bufBit; /* turn off bit, for faster loop exit */ + } } GLboolean @@ -562,6 +394,10 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13, blit_cmd; + /* Blits are in a different ringbuffer so we don't use them. */ + if (intel->gen >= 6) + return GL_FALSE; + if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return GL_FALSE; @@ -585,8 +421,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, intel_batchbuffer_require_space( intel->batch, (8 * 4) + (3 * 4) + - dwords * 4, - REFERENCES_CLIPRECTS ); + dwords * 4 ); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) @@ -608,14 +443,14 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, if (dst_tiling != I915_TILING_NONE) blit_cmd |= XY_DST_TILED; - BEGIN_BATCH(8 + 3, REFERENCES_CLIPRECTS); + BEGIN_BATCH(8 + 3); OUT_BATCH(opcode); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC_FENCED(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ @@ -627,8 +462,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, intel_batchbuffer_data( intel->batch, src_bits, - dwords * 4, - REFERENCES_CLIPRECTS ); + dwords * 4 ); intel_batchbuffer_emit_mi_flush(intel->batch); @@ -649,6 +483,9 @@ intel_emit_linear_blit(struct intel_context *intel, { GLuint pitch, height; + /* Blits are in a different ringbuffer so we don't use them. */ + assert(intel->gen < 6); + /* The pitch is a signed value. */ pitch = MIN2(size, (1 << 15) - 1); height = size / pitch; diff --git a/shared/intel_blit.h b/shared/intel_blit.h index 240cb7c..eb66fe0 100644 --- a/shared/intel_blit.h +++ b/shared/intel_blit.h @@ -30,7 +30,7 @@ #include "intel_context.h" -extern void intelCopyBuffer(const __DRIdrawablePrivate * dpriv, +extern void intelCopyBuffer(const __DRIdrawable * dpriv, const drm_clip_rect_t * rect); extern void intelClearWithBlit(GLcontext * ctx, GLbitfield mask); diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c index 3b7015b..103aaf2 100644 --- a/shared/intel_buffer_objects.c +++ b/shared/intel_buffer_objects.c @@ -31,10 +31,12 @@ #include "main/macros.h" #include "main/bufferobj.h" -#include "intel_context.h" #include "intel_blit.h" #include "intel_buffer_objects.h" #include "intel_batchbuffer.h" +#include "intel_context.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" #include "intel_regions.h" static GLboolean @@ -113,7 +115,7 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) if (obj->Pointer) intel_bufferobj_unmap(ctx, 0, obj); - _mesa_free(intel_obj->sys_buffer); + free(intel_obj->sys_buffer); if (intel_obj->region) { intel_bufferobj_release_region(intel, intel_obj); } @@ -121,7 +123,7 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) dri_bo_unreference(intel_obj->buffer); } - _mesa_free(intel_obj); + free(intel_obj); } @@ -155,7 +157,7 @@ intel_bufferobj_data(GLcontext * ctx, dri_bo_unreference(intel_obj->buffer); intel_obj->buffer = NULL; } - _mesa_free(intel_obj->sys_buffer); + free(intel_obj->sys_buffer); intel_obj->sys_buffer = NULL; if (size != 0) { @@ -164,7 +166,7 @@ intel_bufferobj_data(GLcontext * ctx, * with their contents anyway. */ if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) { - intel_obj->sys_buffer = _mesa_malloc(size); + intel_obj->sys_buffer = malloc(size); if (intel_obj->sys_buffer != NULL) { if (data != NULL) memcpy(intel_obj->sys_buffer, data, size); @@ -285,7 +287,7 @@ intel_bufferobj_map(GLcontext * ctx, return NULL; } - if (write_only && intel->intelScreen->kernel_exec_fencing) { + if (write_only) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); intel_obj->mapped_gtt = GL_TRUE; } else { @@ -373,14 +375,13 @@ intel_bufferobj_map_range(GLcontext * ctx, if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { - intel_obj->range_map_buffer = _mesa_malloc(length); + intel_obj->range_map_buffer = malloc(length); obj->Pointer = intel_obj->range_map_buffer; } else { intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, "range map", length, 64); - if (!(access & GL_MAP_READ_BIT) && - intel->intelScreen->kernel_exec_fencing) { + if (!(access & GL_MAP_READ_BIT)) { drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); intel_obj->mapped_gtt = GL_TRUE; } else { @@ -393,8 +394,7 @@ intel_bufferobj_map_range(GLcontext * ctx, return obj->Pointer; } - if (!(access & GL_MAP_READ_BIT) && - intel->intelScreen->kernel_exec_fencing) { + if (!(access & GL_MAP_READ_BIT)) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); intel_obj->mapped_gtt = GL_TRUE; } else { @@ -523,7 +523,7 @@ intel_bufferobj_buffer(struct intel_context *intel, intel_obj->Base.Size, sys_buffer, &intel_obj->Base); - _mesa_free(sys_buffer); + free(sys_buffer); intel_obj->sys_buffer = NULL; } @@ -588,6 +588,126 @@ intel_bufferobj_copy_subdata(GLcontext *ctx, intel_batchbuffer_emit_mi_flush(intel->batch); } +#if FEATURE_APPLE_object_purgeable +static GLenum +intel_buffer_purgeable(GLcontext * ctx, + drm_intel_bo *buffer, + GLenum option) +{ + int retained = 0; + + if (buffer != NULL) + retained = drm_intel_bo_madvise (buffer, I915_MADV_DONTNEED); + + return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE; +} + +static GLenum +intel_buffer_object_purgeable(GLcontext * ctx, + struct gl_buffer_object *obj, + GLenum option) +{ + struct intel_buffer_object *intel; + + intel = intel_buffer_object (obj); + if (intel->buffer != NULL) + return intel_buffer_purgeable (ctx, intel->buffer, option); + + if (option == GL_RELEASED_APPLE) { + if (intel->sys_buffer != NULL) { + free(intel->sys_buffer); + intel->sys_buffer = NULL; + } + + return GL_RELEASED_APPLE; + } else { + /* XXX Create the buffer and madvise(MADV_DONTNEED)? */ + return intel_buffer_purgeable (ctx, + intel_bufferobj_buffer(intel_context(ctx), + intel, INTEL_READ), + option); + } +} + +static GLenum +intel_texture_object_purgeable(GLcontext * ctx, + struct gl_texture_object *obj, + GLenum option) +{ + struct intel_texture_object *intel; + + intel = intel_texture_object(obj); + if (intel->mt == NULL || intel->mt->region == NULL) + return GL_RELEASED_APPLE; + + return intel_buffer_purgeable (ctx, intel->mt->region->buffer, option); +} + +static GLenum +intel_render_object_purgeable(GLcontext * ctx, + struct gl_renderbuffer *obj, + GLenum option) +{ + struct intel_renderbuffer *intel; + + intel = intel_renderbuffer(obj); + if (intel->region == NULL) + return GL_RELEASED_APPLE; + + return intel_buffer_purgeable (ctx, intel->region->buffer, option); +} + +static GLenum +intel_buffer_unpurgeable(GLcontext * ctx, + drm_intel_bo *buffer, + GLenum option) +{ + int retained; + + retained = 0; + if (buffer != NULL) + retained = drm_intel_bo_madvise (buffer, I915_MADV_WILLNEED); + + return retained ? GL_RETAINED_APPLE : GL_UNDEFINED_APPLE; +} + +static GLenum +intel_buffer_object_unpurgeable(GLcontext * ctx, + struct gl_buffer_object *obj, + GLenum option) +{ + return intel_buffer_unpurgeable (ctx, intel_buffer_object (obj)->buffer, option); +} + +static GLenum +intel_texture_object_unpurgeable(GLcontext * ctx, + struct gl_texture_object *obj, + GLenum option) +{ + struct intel_texture_object *intel; + + intel = intel_texture_object(obj); + if (intel->mt == NULL || intel->mt->region == NULL) + return GL_UNDEFINED_APPLE; + + return intel_buffer_unpurgeable (ctx, intel->mt->region->buffer, option); +} + +static GLenum +intel_render_object_unpurgeable(GLcontext * ctx, + struct gl_renderbuffer *obj, + GLenum option) +{ + struct intel_renderbuffer *intel; + + intel = intel_renderbuffer(obj); + if (intel->region == NULL) + return GL_UNDEFINED_APPLE; + + return intel_buffer_unpurgeable (ctx, intel->region->buffer, option); +} +#endif + void intelInitBufferObjectFuncs(struct dd_function_table *functions) { @@ -601,4 +721,14 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions) functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range; functions->UnmapBuffer = intel_bufferobj_unmap; functions->CopyBufferSubData = intel_bufferobj_copy_subdata; + +#if FEATURE_APPLE_object_purgeable + functions->BufferObjectPurgeable = intel_buffer_object_purgeable; + functions->TextureObjectPurgeable = intel_texture_object_purgeable; + functions->RenderObjectPurgeable = intel_render_object_purgeable; + + functions->BufferObjectUnpurgeable = intel_buffer_object_unpurgeable; + functions->TextureObjectUnpurgeable = intel_texture_object_unpurgeable; + functions->RenderObjectUnpurgeable = intel_render_object_unpurgeable; +#endif } diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c index 0564318..b106930 100644 --- a/shared/intel_buffers.c +++ b/shared/intel_buffers.c @@ -28,45 +28,7 @@ #include "intel_context.h" #include "intel_buffers.h" #include "intel_fbo.h" -#include "intel_regions.h" -#include "intel_batchbuffer.h" #include "main/framebuffer.h" -#include "drirenderbuffer.h" - - -/** - * XXX move this into a new dri/common/cliprects.c file. - */ -GLboolean -intel_intersect_cliprects(drm_clip_rect_t * dst, - const drm_clip_rect_t * a, - const drm_clip_rect_t * b) -{ - GLint bx = b->x1; - GLint by = b->y1; - GLint bw = b->x2 - bx; - GLint bh = b->y2 - by; - - if (bx < a->x1) - bw -= a->x1 - bx, bx = a->x1; - if (by < a->y1) - bh -= a->y1 - by, by = a->y1; - if (bx + bw > a->x2) - bw = a->x2 - bx; - if (by + bh > a->y2) - bh = a->y2 - by; - if (bw <= 0) - return GL_FALSE; - if (bh <= 0) - return GL_FALSE; - - dst->x1 = bx; - dst->y1 = by; - dst->x2 = bx + bw; - dst->y2 = by + bh; - - return GL_TRUE; -} /** * Return pointer to current color drawing region, or NULL. @@ -96,42 +58,6 @@ intel_readbuf_region(struct intel_context *intel) return NULL; } -void -intel_get_cliprects(struct intel_context *intel, - struct drm_clip_rect **cliprects, - unsigned int *num_cliprects, - int *x_off, int *y_off) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - if (intel->constant_cliprect) { - /* FBO or DRI2 rendering, which can just use the fb's size. */ - intel->fboRect.x1 = 0; - intel->fboRect.y1 = 0; - intel->fboRect.x2 = intel->ctx.DrawBuffer->Width; - intel->fboRect.y2 = intel->ctx.DrawBuffer->Height; - - *cliprects = &intel->fboRect; - *num_cliprects = 1; - *x_off = 0; - *y_off = 0; - } else if (intel->front_cliprects || dPriv->numBackClipRects == 0) { - /* use the front clip rects */ - *cliprects = dPriv->pClipRects; - *num_cliprects = dPriv->numClipRects; - *x_off = dPriv->x; - *y_off = dPriv->y; - } - else { - /* use the back clip rects */ - *num_cliprects = dPriv->numBackClipRects; - *cliprects = dPriv->pBackClipRects; - *x_off = dPriv->backX; - *y_off = dPriv->backY; - } -} - - /** * Check if we're about to draw into the front color buffer. * If so, set the intel->front_buffer_dirty field to true. @@ -202,7 +128,6 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) || (fb->_NumColorDrawBuffers == 0)) { /* writing to 0 */ colorRegions[0] = NULL; - intel->constant_cliprect = GL_TRUE; } else if (fb->_NumColorDrawBuffers > 1) { int i; @@ -212,34 +137,23 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); colorRegions[i] = irb ? irb->region : NULL; } - intel->constant_cliprect = GL_TRUE; } else { /* Get the intel_renderbuffer for the single colorbuffer we're drawing - * into, and set up cliprects if it's a DRI1 window front buffer. + * into. */ if (fb->Name == 0) { - intel->constant_cliprect = intel->driScreen->dri2.enabled; /* drawing to window system buffer */ - if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { - if (!intel->constant_cliprect && !intel->front_cliprects) - intel_batchbuffer_flush(intel->batch); - intel->front_cliprects = GL_TRUE; + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT); - } - else { - if (!intel->constant_cliprect && intel->front_cliprects) - intel_batchbuffer_flush(intel->batch); - intel->front_cliprects = GL_FALSE; + else colorRegions[0] = intel_get_rb_region(fb, BUFFER_BACK_LEFT); - } } else { /* drawing to user-created FBO */ struct intel_renderbuffer *irb; irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); colorRegions[0] = (irb && irb->region) ? irb->region : NULL; - intel->constant_cliprect = GL_TRUE; } } @@ -291,6 +205,12 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); } + /* If we have a (packed) stencil buffer attached but no depth buffer, + * we still need to set up the shared depth/stencil state so we can use it. + */ + if (depthRegion == NULL && irbStencil && irbStencil->region) + depthRegion = irbStencil->region; + /* * Update depth and stencil test state */ @@ -351,13 +271,12 @@ intelDrawBuffer(GLcontext * ctx, GLenum mode) intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) || (mode == GL_FRONT); - /* If we weren't front-buffer rendering before but we are now, make sure - * that the front-buffer has actually been allocated. + /* If we weren't front-buffer rendering before but we are now, + * invalidate our DRI drawable so we'll ask for new buffers + * (including the fake front) before we start rendering again. */ - if (!was_front_buffer_rendering && intel->is_front_buffer_rendering) { - intel_update_renderbuffers(intel->driContext, - intel->driContext->driDrawablePriv); - } + if (!was_front_buffer_rendering && intel->is_front_buffer_rendering) + dri2InvalidateDrawable(intel->driContext->driDrawablePriv); } intel_draw_buffer(ctx, ctx->DrawBuffer); @@ -375,13 +294,12 @@ intelReadBuffer(GLcontext * ctx, GLenum mode) intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT) || (mode == GL_FRONT); - /* If we weren't front-buffer reading before but we are now, make sure - * that the front-buffer has actually been allocated. + /* If we weren't front-buffer reading before but we are now, + * invalidate our DRI drawable so we'll ask for new buffers + * (including the fake front) before we start reading again. */ - if (!was_front_buffer_reading && intel->is_front_buffer_reading) { - intel_update_renderbuffers(intel->driContext, - intel->driContext->driDrawablePriv); - } + if (!was_front_buffer_reading && intel->is_front_buffer_reading) + dri2InvalidateDrawable(intel->driContext->driReadablePriv); } if (ctx->ReadBuffer == ctx->DrawBuffer) { diff --git a/shared/intel_buffers.h b/shared/intel_buffers.h index d7800f2..abb86aa 100644 --- a/shared/intel_buffers.h +++ b/shared/intel_buffers.h @@ -35,12 +35,6 @@ struct intel_context; struct intel_framebuffer; - -extern GLboolean -intel_intersect_cliprects(drm_clip_rect_t * dest, - const drm_clip_rect_t * a, - const drm_clip_rect_t * b); - extern struct intel_region *intel_readbuf_region(struct intel_context *intel); extern struct intel_region *intel_drawbuf_region(struct intel_context *intel); diff --git a/shared/intel_chipset.h b/shared/intel_chipset.h index 3dc8653..a0b2266 100644 --- a/shared/intel_chipset.h +++ b/shared/intel_chipset.h @@ -1,4 +1,4 @@ -/* + /* * Copyright © 2007 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a @@ -71,6 +71,8 @@ #define PCI_CHIP_ILD_G 0x0042 #define PCI_CHIP_ILM_G 0x0046 +#define PCI_CHIP_SANDYBRIDGE 0x0102 + #define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ devid == PCI_CHIP_I915_GM || \ devid == PCI_CHIP_I945_GM || \ @@ -104,14 +106,20 @@ devid == PCI_CHIP_Q33_G || \ devid == PCI_CHIP_Q35_G || IS_IGD(devid)) -#define IS_965(devid) (devid == PCI_CHIP_I965_G || \ +#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \ devid == PCI_CHIP_I965_Q || \ devid == PCI_CHIP_I965_G_1 || \ devid == PCI_CHIP_I965_GM || \ devid == PCI_CHIP_I965_GME || \ devid == PCI_CHIP_I946_GZ || \ + IS_G4X(devid)) + +#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE) + +#define IS_965(devid) (IS_GEN4(devid) || \ IS_G4X(devid) || \ - IS_IGDNG(devid)) + IS_IGDNG(devid) || \ + IS_GEN6(devid)) #define IS_9XX(devid) (IS_915(devid) || \ IS_945(devid) || \ diff --git a/shared/intel_clear.c b/shared/intel_clear.c index f682ee3..03b24e2 100644 --- a/shared/intel_clear.c +++ b/shared/intel_clear.c @@ -33,12 +33,9 @@ #include "intel_context.h" #include "intel_blit.h" -#include "intel_chipset.h" #include "intel_clear.h" #include "intel_fbo.h" -#include "intel_pixel.h" #include "intel_regions.h" -#include "intel_batchbuffer.h" #define FILE_DEBUG_FLAG DEBUG_BLIT @@ -68,7 +65,7 @@ static void intelClear(GLcontext *ctx, GLbitfield mask) { struct intel_context *intel = intel_context(ctx); - const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); + const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]); GLbitfield tri_mask = 0; GLbitfield blit_mask = 0; GLbitfield swrast_mask = 0; @@ -136,6 +133,12 @@ intelClear(GLcontext *ctx, GLbitfield mask) } } + if (intel->gen >= 6) { + /* Blits are in a different ringbuffer so we don't use them. */ + tri_mask |= blit_mask; + blit_mask = 0; + } + /* SW fallback clearing */ swrast_mask = mask & ~tri_mask & ~blit_mask; diff --git a/shared/intel_context.c b/shared/intel_context.c index dd24b71..d6a1ba6 100644 --- a/shared/intel_context.c +++ b/shared/intel_context.c @@ -28,7 +28,6 @@ #include "main/glheader.h" #include "main/context.h" -/* #include "main/arrayobj.h" */ #include "main/extensions.h" #include "main/framebuffer.h" #include "main/imports.h" @@ -52,15 +51,11 @@ #include "intel_regions.h" #include "intel_buffer_objects.h" #include "intel_fbo.h" -#include "intel_decode.h" #include "intel_bufmgr.h" #include "intel_screen.h" -#include "intel_swapbuffers.h" #include "drirenderbuffer.h" -#include "vblank.h" #include "utils.h" -#include "xmlpool.h" /* for symbolic values of enum-type options */ #ifndef INTEL_DEBUG @@ -68,12 +63,10 @@ int INTEL_DEBUG = (0); #endif -#define DRIVER_DATE "20091221 2009Q4" +#define DRIVER_DATE "20091221 DEVELOPMENT" #define DRIVER_DATE_GEM "GEM " DRIVER_DATE -static void intel_flush(GLcontext *ctx, GLboolean needs_mi_flush); - static const GLubyte * intelGetString(GLcontext * ctx, GLenum name) { @@ -176,9 +169,7 @@ intelGetString(GLcontext * ctx, GLenum name) break; } - (void) driGetRendererString(buffer, chipset, - (intel->ttm) ? DRIVER_DATE_GEM : DRIVER_DATE, - 0); + (void) driGetRendererString(buffer, chipset, DRIVER_DATE_GEM, 0); return (GLubyte *) buffer; default: @@ -195,17 +186,31 @@ intel_bits_per_pixel(const struct intel_renderbuffer *rb) void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) { - struct intel_framebuffer *intel_fb = drawable->driverPrivate; + struct gl_framebuffer *fb = drawable->driverPrivate; struct intel_renderbuffer *rb; struct intel_region *region, *depth_region; struct intel_context *intel = context->driverPrivate; + struct intel_renderbuffer *front_rb, *back_rb, *depth_rb, *stencil_rb; __DRIbuffer *buffers = NULL; __DRIscreen *screen; int i, count; unsigned int attachments[10]; - uint32_t name; const char *region_name; + /* If we're rendering to the fake front buffer, make sure all the + * pending drawing has landed on the real front buffer. Otherwise + * when we eventually get to DRI2GetBuffersWithFormat the stale + * real front buffer contents will get copied to the new fake front + * buffer. + */ + if (intel->is_front_buffer_rendering) + intel_flush(&intel->ctx, GL_FALSE); + + /* Set this up front, so that in case our buffers get invalidated + * while we're getting new buffers, we don't clobber the stamp and + * thus ignore the invalidate. */ + drawable->lastStamp = drawable->dri2.stamp; + if (INTEL_DEBUG & DEBUG_DRI) fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); @@ -214,26 +219,25 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) if (screen->dri2.loader && (screen->dri2.loader->base.version > 2) && (screen->dri2.loader->getBuffersWithFormat != NULL)) { - struct intel_renderbuffer *depth_rb; - struct intel_renderbuffer *stencil_rb; + + front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); + back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); + depth_rb = intel_get_renderbuffer(fb, BUFFER_DEPTH); + stencil_rb = intel_get_renderbuffer(fb, BUFFER_STENCIL); i = 0; if ((intel->is_front_buffer_rendering || intel->is_front_buffer_reading || - !intel_fb->color_rb[1]) - && intel_fb->color_rb[0]) { + !back_rb) && front_rb) { attachments[i++] = __DRI_BUFFER_FRONT_LEFT; - attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[0]); + attachments[i++] = intel_bits_per_pixel(front_rb); } - if (intel_fb->color_rb[1]) { + if (back_rb) { attachments[i++] = __DRI_BUFFER_BACK_LEFT; - attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[1]); + attachments[i++] = intel_bits_per_pixel(back_rb); } - depth_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); - stencil_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); - if ((depth_rb != NULL) && (stencil_rb != NULL)) { attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL; attachments[i++] = intel_bits_per_pixel(depth_rb); @@ -254,13 +258,13 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) drawable->loaderPrivate); } else if (screen->dri2.loader) { i = 0; - if (intel_fb->color_rb[0]) + if (intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT)) attachments[i++] = __DRI_BUFFER_FRONT_LEFT; - if (intel_fb->color_rb[1]) + if (intel_get_renderbuffer(fb, BUFFER_BACK_LEFT)) attachments[i++] = __DRI_BUFFER_BACK_LEFT; - if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH)) + if (intel_get_renderbuffer(fb, BUFFER_DEPTH)) attachments[i++] = __DRI_BUFFER_DEPTH; - if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL)) + if (intel_get_renderbuffer(fb, BUFFER_STENCIL)) attachments[i++] = __DRI_BUFFER_STENCIL; buffers = (*screen->dri2.loader->getBuffers)(drawable, @@ -293,32 +297,32 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) for (i = 0; i < count; i++) { switch (buffers[i].attachment) { case __DRI_BUFFER_FRONT_LEFT: - rb = intel_fb->color_rb[0]; + rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); region_name = "dri2 front buffer"; break; case __DRI_BUFFER_FAKE_FRONT_LEFT: - rb = intel_fb->color_rb[0]; + rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); region_name = "dri2 fake front buffer"; break; case __DRI_BUFFER_BACK_LEFT: - rb = intel_fb->color_rb[1]; + rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); region_name = "dri2 back buffer"; break; case __DRI_BUFFER_DEPTH: - rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + rb = intel_get_renderbuffer(fb, BUFFER_DEPTH); region_name = "dri2 depth buffer"; break; case __DRI_BUFFER_DEPTH_STENCIL: - rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + rb = intel_get_renderbuffer(fb, BUFFER_DEPTH); region_name = "dri2 depth / stencil buffer"; break; case __DRI_BUFFER_STENCIL: - rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + rb = intel_get_renderbuffer(fb, BUFFER_STENCIL); region_name = "dri2 stencil buffer"; break; @@ -333,11 +337,8 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) if (rb == NULL) continue; - if (rb->region) { - dri_bo_flink(rb->region->buffer, &name); - if (name == buffers[i].name) + if (rb->region && rb->region->name == buffers[i].name) continue; - } if (INTEL_DEBUG & DEBUG_DRI) fprintf(stderr, @@ -365,15 +366,12 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) intel_region_release(®ion); if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) { - rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + rb = intel_get_renderbuffer(fb, BUFFER_STENCIL); if (rb != NULL) { struct intel_region *stencil_region = NULL; - if (rb->region) { - dri_bo_flink(rb->region->buffer, &name); - if (name == buffers[i].name) + if (rb->region && rb->region->name == buffers[i].name) continue; - } intel_region_reference(&stencil_region, region); intel_renderbuffer_set_region(rb, stencil_region); @@ -385,41 +383,41 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) driUpdateFramebufferSize(&intel->ctx, drawable); } +void +intel_prepare_render(struct intel_context *intel) +{ + __DRIcontext *driContext = intel->driContext; + __DRIdrawable *drawable; + + drawable = intel->driDrawable; + if (drawable->dri2.stamp != driContext->dri2.draw_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + intel_update_renderbuffers(driContext, drawable); + intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer); + driContext->dri2.draw_stamp = drawable->dri2.stamp; + } + + drawable = intel->driReadDrawable; + if (drawable->dri2.stamp != driContext->dri2.read_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + intel_update_renderbuffers(driContext, drawable); + driContext->dri2.read_stamp = drawable->dri2.stamp; + } +} + void intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) { struct intel_context *intel = intel_context(ctx); __DRIcontext *driContext = intel->driContext; - void (*old_viewport)(GLcontext *ctx, GLint x, GLint y, - GLsizei w, GLsizei h); - - if (!driContext->driScreenPriv->dri2.enabled) - return; - - if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) { - /* If we're rendering to the fake front buffer, make sure all the pending - * drawing has landed on the real front buffer. Otherwise when we - * eventually get to DRI2GetBuffersWithFormat the stale real front - * buffer contents will get copied to the new fake front buffer. - */ - if (intel->is_front_buffer_rendering) { - intel_flush(ctx, GL_FALSE); - } - intel_update_renderbuffers(driContext, driContext->driDrawablePriv); - if (driContext->driDrawablePriv != driContext->driReadablePriv) - intel_update_renderbuffers(driContext, driContext->driReadablePriv); + if (!intel->using_dri2_swapbuffers && + !intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) { + dri2InvalidateDrawable(driContext->driDrawablePriv); + dri2InvalidateDrawable(driContext->driReadablePriv); } - - old_viewport = ctx->Driver.Viewport; - ctx->Driver.Viewport = NULL; - intel->driDrawable = driContext->driDrawablePriv; - intelWindowMoved(intel); - intel_draw_buffer(ctx, intel->ctx.DrawBuffer); - ctx->Driver.Viewport = old_viewport; } - static const struct dri_debug_control debug_control[] = { { "tex", DEBUG_TEXTURE}, { "state", DEBUG_STATE}, @@ -469,7 +467,7 @@ intelInvalidateState(GLcontext * ctx, GLuint new_state) intel->vtbl.invalidate_state( intel, new_state ); } -static void +void intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) { struct intel_context *intel = intel_context(ctx); @@ -480,13 +478,6 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) if (intel->gen < 4) INTEL_FIREVERTICES(intel); - /* Emit a flush so that any frontbuffer rendering that might have occurred - * lands onscreen in a timely manner, even if the X Server doesn't trigger - * a flush for us. - */ - if (!intel->driScreen->dri2.enabled && needs_mi_flush) - intel_batchbuffer_emit_mi_flush(intel->batch); - if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); @@ -538,7 +529,8 @@ intel_glFlush(GLcontext *ctx) * and getting our hands on that doesn't seem worth it, so we just us the * first batch we emitted after the last swap. */ - if (intel->first_post_swapbuffers_batch != NULL) { + if (!intel->using_dri2_swapbuffers && + intel->first_post_swapbuffers_batch != NULL) { drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); intel->first_post_swapbuffers_batch = NULL; @@ -592,39 +584,55 @@ intelInitDriverFunctions(struct dd_function_table *functions) GLboolean intelInitContext(struct intel_context *intel, const __GLcontextModes * mesaVis, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate, struct dd_function_table *functions) { GLcontext *ctx = &intel->ctx; GLcontext *shareCtx = (GLcontext *) sharedContextPrivate; - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; - int fthrottle_mode; + __DRIscreen *sPriv = driContextPriv->driScreenPriv; + struct intel_screen *intelScreen = sPriv->private; + int bo_reuse_mode; + + /* we can't do anything without a connection to the device */ + if (intelScreen->bufmgr == NULL) + return GL_FALSE; if (!_mesa_initialize_context(&intel->ctx, mesaVis, shareCtx, functions, (void *) intel)) { - _mesa_printf("%s: failed to init mesa context\n", __FUNCTION__); + printf("%s: failed to init mesa context\n", __FUNCTION__); return GL_FALSE; } driContextPriv->driverPrivate = intel; intel->intelScreen = intelScreen; intel->driScreen = sPriv; - intel->sarea = intelScreen->sarea; intel->driContext = driContextPriv; + intel->driFd = sPriv->fd; - if (IS_965(intel->intelScreen->deviceID)) + if (IS_GEN6(intel->intelScreen->deviceID)) { + intel->gen = 6; + intel->needs_ff_sync = GL_TRUE; + intel->has_luminance_srgb = GL_TRUE; + } else if (IS_965(intel->intelScreen->deviceID)) { intel->gen = 4; - else if (IS_9XX(intel->intelScreen->deviceID)) + } else if (IS_9XX(intel->intelScreen->deviceID)) { intel->gen = 3; - else + if (IS_945(intel->intelScreen->deviceID)) { + intel->is_945 = GL_TRUE; + } + } else { intel->gen = 2; + } - /* Dri stuff */ - intel->hHWContext = driContextPriv->hHWContext; - intel->driFd = sPriv->fd; - intel->driHwLock = sPriv->lock; + if (IS_IGDNG(intel->intelScreen->deviceID)) { + intel->is_ironlake = GL_TRUE; + intel->needs_ff_sync = GL_TRUE; + intel->has_luminance_srgb = GL_TRUE; + } else if (IS_G4X(intel->intelScreen->deviceID)) { + intel->has_luminance_srgb = GL_TRUE; + intel->is_g4x = GL_TRUE; + } driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, intel->driScreen->myNum, @@ -635,18 +643,14 @@ intelInitContext(struct intel_context *intel, intel->maxBatchSize = BATCH_SZ; intel->bufmgr = intelScreen->bufmgr; - intel->ttm = intelScreen->ttm; - if (intel->ttm) { - int bo_reuse_mode; - bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse"); - switch (bo_reuse_mode) { - case DRI_CONF_BO_REUSE_DISABLED: - break; - case DRI_CONF_BO_REUSE_ALL: - intel_bufmgr_gem_enable_reuse(intel->bufmgr); - break; - } + bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse"); + switch (bo_reuse_mode) { + case DRI_CONF_BO_REUSE_DISABLED: + break; + case DRI_CONF_BO_REUSE_ALL: + intel_bufmgr_gem_enable_reuse(intel->bufmgr); + break; } /* This doesn't yet catch all non-conformant rendering, but it's a @@ -732,28 +736,12 @@ intelInitContext(struct intel_context *intel, intel->RenderIndex = ~0; - fthrottle_mode = driQueryOptioni(&intel->optionCache, "fthrottle_mode"); - intel->irqsEmitted = 0; - - intel->do_irqs = (intel->intelScreen->irq_active && - fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS); - - intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); - - if (intel->gen >= 4 && !intel->intelScreen->irq_active) { - _mesa_printf("IRQs not active. Exiting\n"); - exit(1); - } - intelInitExtensions(ctx); INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); if (INTEL_DEBUG & DEBUG_BUFMGR) dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE); - if (!sPriv->dri2.enabled) - intel_recreate_static_regions(intel); - intel->batch = intel_batchbuffer_alloc(intel); intel_fbo_init(intel); @@ -767,12 +755,6 @@ intelInitContext(struct intel_context *intel, } intel->use_texture_tiling = driQueryOptionb(&intel->optionCache, "texture_tiling"); - if (intel->use_texture_tiling && - !intel->intelScreen->kernel_exec_fencing) { - fprintf(stderr, "No kernel support for execution fencing, " - "disabling texture tiling\n"); - intel->use_texture_tiling = GL_FALSE; - } intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z"); intel->prim.primitive = ~0; @@ -802,7 +784,7 @@ intelInitContext(struct intel_context *intel, } void -intelDestroyContext(__DRIcontextPrivate * driContextPriv) +intelDestroyContext(__DRIcontext * driContextPriv) { struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate; @@ -849,57 +831,6 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) */ } - /* XXX In intelMakeCurrent() below, the context's static regions are - * referenced inside the frame buffer; it's listed as a hack, - * with a comment of "XXX FBO temporary fix-ups!", but - * as long as it's there, we should release the regions here. - * The do/while loop around the block is used to allow the - * "continue" statements inside the block to exit the block, - * to avoid many layers of "if" constructs. - */ - do { - __DRIdrawablePrivate * driDrawPriv = intel->driDrawable; - struct intel_framebuffer *intel_fb; - struct intel_renderbuffer *irbDepth, *irbStencil; - if (!driDrawPriv) { - /* We're already detached from the drawable; exit this block. */ - continue; - } - intel_fb = (struct intel_framebuffer *) driDrawPriv->driverPrivate; - if (!intel_fb) { - /* The frame buffer is already gone; exit this block. */ - continue; - } - irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); - irbStencil = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); - - /* If the regions of the frame buffer still match the regions - * of the context, release them. If they've changed somehow, - * leave them alone. - */ - if (intel_fb->color_rb[0] && intel_fb->color_rb[0]->region == intel->front_region) { - intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL); - } - if (intel_fb->color_rb[1] && intel_fb->color_rb[1]->region == intel->back_region) { - intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL); - } - - if (irbDepth && irbDepth->region == intel->depth_region) { - intel_renderbuffer_set_region(irbDepth, NULL); - } - /* Usually, the stencil buffer is the same as the depth buffer; - * but they're handled separately in MakeCurrent, so we'll - * handle them separately here. - */ - if (irbStencil && irbStencil->region == intel->depth_region) { - intel_renderbuffer_set_region(irbStencil, NULL); - } - } while (0); - - intel_region_release(&intel->front_region); - intel_region_release(&intel->back_region); - intel_region_release(&intel->depth_region); - driDestroyOptionCache(&intel->optionCache); /* free the Mesa context */ @@ -911,7 +842,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) } GLboolean -intelUnbindContext(__DRIcontextPrivate * driContextPriv) +intelUnbindContext(__DRIcontext * driContextPriv) { struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate; @@ -925,11 +856,10 @@ intelUnbindContext(__DRIcontextPrivate * driContextPriv) } GLboolean -intelMakeCurrent(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) +intelMakeCurrent(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) { - __DRIscreenPrivate *psp = driDrawPriv->driScreenPriv; struct intel_context *intel; GET_CURRENT_CONTEXT(curCtx); @@ -947,80 +877,15 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, } if (driContextPriv) { - struct intel_framebuffer *intel_fb = - (struct intel_framebuffer *) driDrawPriv->driverPrivate; - GLframebuffer *readFb = (GLframebuffer *) driReadPriv->driverPrivate; - - if (driContextPriv->driScreenPriv->dri2.enabled) { - intel_update_renderbuffers(driContextPriv, driDrawPriv); - if (driDrawPriv != driReadPriv) - intel_update_renderbuffers(driContextPriv, driReadPriv); - } else { - /* XXX FBO temporary fix-ups! These are released in - * intelDextroyContext(), above. Changes here should be - * reflected there. - */ - /* if the renderbuffers don't have regions, init them from the context */ - struct intel_renderbuffer *irbDepth - = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); - struct intel_renderbuffer *irbStencil - = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); - - if (intel_fb->color_rb[0]) { - intel_renderbuffer_set_region(intel_fb->color_rb[0], - intel->front_region); - } - if (intel_fb->color_rb[1]) { - intel_renderbuffer_set_region(intel_fb->color_rb[1], - intel->back_region); - } - - if (irbDepth) { - intel_renderbuffer_set_region(irbDepth, intel->depth_region); - } - if (irbStencil) { - intel_renderbuffer_set_region(irbStencil, intel->depth_region); - } - } - - /* set GLframebuffer size to match window, if needed */ - driUpdateFramebufferSize(&intel->ctx, driDrawPriv); - - if (driReadPriv != driDrawPriv) { - driUpdateFramebufferSize(&intel->ctx, driReadPriv); - } - - _mesa_make_current(&intel->ctx, &intel_fb->Base, readFb); + struct gl_framebuffer *fb = driDrawPriv->driverPrivate; + struct gl_framebuffer *readFb = driReadPriv->driverPrivate; + _mesa_make_current(&intel->ctx, fb, readFb); intel->driReadDrawable = driReadPriv; - - if (intel->driDrawable != driDrawPriv) { - if (driDrawPriv->swap_interval == (unsigned)-1) { - int i; - - driDrawPriv->vblFlags = (intel->intelScreen->irq_active != 0) - ? driGetDefaultVBlankFlags(&intel->optionCache) - : VBLANK_FLAG_NO_IRQ; - - /* Prevent error printf if one crtc is disabled, this will - * be properly calculated in intelWindowMoved() next. - */ - driDrawPriv->vblFlags = intelFixupVblank(intel, driDrawPriv); - - (*psp->systemTime->getUST) (&intel_fb->swap_ust); - driDrawableInitVBlank(driDrawPriv); - intel_fb->vbl_waited = driDrawPriv->vblSeq; - - for (i = 0; i < 2; i++) { - if (intel_fb->color_rb[i]) - intel_fb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq; - } - } - intel->driDrawable = driDrawPriv; - intelWindowMoved(intel); - } - - intel_draw_buffer(&intel->ctx, &intel_fb->Base); + intel->driDrawable = driDrawPriv; + driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; + driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; + intel_prepare_render(intel); } else { _mesa_make_current(NULL, NULL, NULL); @@ -1028,143 +893,3 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, return GL_TRUE; } - -static void -intelContendedLock(struct intel_context *intel, GLuint flags) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - __DRIscreenPrivate *sPriv = intel->driScreen; - volatile drm_i915_sarea_t *sarea = intel->sarea; - int me = intel->hHWContext; - - drmGetLock(intel->driFd, intel->hHWContext, flags); - - if (INTEL_DEBUG & DEBUG_LOCK) - _mesa_printf("%s - got contended lock\n", __progname); - - /* If the window moved, may need to set a new cliprect now. - * - * NOTE: This releases and regains the hw lock, so all state - * checking must be done *after* this call: - */ - if (dPriv) - DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); - - if (sarea && sarea->ctxOwner != me) { - if (INTEL_DEBUG & DEBUG_BUFMGR) { - fprintf(stderr, "Lost Context: sarea->ctxOwner %x me %x\n", - sarea->ctxOwner, me); - } - sarea->ctxOwner = me; - } - - /* If the last consumer of the texture memory wasn't us, notify the fake - * bufmgr and record the new owner. We should have the memory shared - * between contexts of a single fake bufmgr, but this will at least make - * things correct for now. - */ - if (!intel->ttm && sarea->texAge != intel->hHWContext) { - sarea->texAge = intel->hHWContext; - intel_bufmgr_fake_contended_lock_take(intel->bufmgr); - if (INTEL_DEBUG & DEBUG_BATCH) - intel_decode_context_reset(); - if (INTEL_DEBUG & DEBUG_BUFMGR) - fprintf(stderr, "Lost Textures: sarea->texAge %x hw context %x\n", - sarea->ctxOwner, intel->hHWContext); - } - - /* Drawable changed? - */ - if (dPriv && intel->lastStamp != dPriv->lastStamp) { - intelWindowMoved(intel); - intel->lastStamp = dPriv->lastStamp; - } -} - - -_glthread_DECLARE_STATIC_MUTEX(lockMutex); - -/* Lock the hardware and validate our state. - */ -void LOCK_HARDWARE( struct intel_context *intel ) -{ - __DRIdrawable *dPriv = intel->driDrawable; - __DRIscreen *sPriv = intel->driScreen; - char __ret = 0; - struct intel_framebuffer *intel_fb = NULL; - struct intel_renderbuffer *intel_rb = NULL; - - intel->locked++; - if (intel->locked >= 2) - return; - - if (!sPriv->dri2.enabled) - _glthread_LOCK_MUTEX(lockMutex); - - if (intel->driDrawable) { - intel_fb = intel->driDrawable->driverPrivate; - - if (intel_fb) - intel_rb = - intel_get_renderbuffer(&intel_fb->Base, - intel_fb->Base._ColorDrawBufferIndexes[0]); - } - - if (intel_rb && dPriv->vblFlags && - !(dPriv->vblFlags & VBLANK_FLAG_NO_IRQ) && - (intel_fb->vbl_waited - intel_rb->vbl_pending) > (1<<23)) { - drmVBlank vbl; - - vbl.request.type = DRM_VBLANK_ABSOLUTE; - - if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) { - vbl.request.type |= DRM_VBLANK_SECONDARY; - } - - vbl.request.sequence = intel_rb->vbl_pending; - drmWaitVBlank(intel->driFd, &vbl); - intel_fb->vbl_waited = vbl.reply.sequence; - } - - if (!sPriv->dri2.enabled) { - DRM_CAS(intel->driHwLock, intel->hHWContext, - (DRM_LOCK_HELD|intel->hHWContext), __ret); - - if (__ret) - intelContendedLock( intel, 0 ); - } - - - if (INTEL_DEBUG & DEBUG_LOCK) - _mesa_printf("%s - locked\n", __progname); -} - - -/* Unlock the hardware using the global current context - */ -void UNLOCK_HARDWARE( struct intel_context *intel ) -{ - __DRIscreen *sPriv = intel->driScreen; - - intel->locked--; - if (intel->locked > 0) - return; - - assert(intel->locked == 0); - - if (!sPriv->dri2.enabled) { - DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext); - _glthread_UNLOCK_MUTEX(lockMutex); - } - - if (INTEL_DEBUG & DEBUG_LOCK) - _mesa_printf("%s - unlocked\n", __progname); - - /** - * Nothing should be left in batch outside of LOCK/UNLOCK which references - * cliprects. - */ - if (intel->batch->cliprect_mode == REFERENCES_CLIPRECTS) - intel_batchbuffer_flush(intel->batch); -} - diff --git a/shared/intel_context.h b/shared/intel_context.h index eb7be7d..22736a9 100644 --- a/shared/intel_context.h +++ b/shared/intel_context.h @@ -107,7 +107,6 @@ struct intel_context void (*finish_batch) (struct intel_context * intel); void (*new_batch) (struct intel_context * intel); void (*emit_invarient_state) (struct intel_context * intel); - void (*note_fence) (struct intel_context *intel, GLuint fence); void (*update_texture_state) (struct intel_context * intel); void (*render_start) (struct intel_context * intel); @@ -125,48 +124,6 @@ struct intel_context void (*invalidate_state) (struct intel_context *intel, GLuint new_state); - - /* Metaops: - */ - void (*install_meta_state) (struct intel_context * intel); - void (*leave_meta_state) (struct intel_context * intel); - - void (*meta_draw_region) (struct intel_context * intel, - struct intel_region * draw_region, - struct intel_region * depth_region); - - void (*meta_draw_quad)(struct intel_context *intel, - GLfloat x0, GLfloat x1, - GLfloat y0, GLfloat y1, - GLfloat z, - GLuint color, /* ARGB32 */ - GLfloat s0, GLfloat s1, - GLfloat t0, GLfloat t1); - - void (*meta_color_mask) (struct intel_context * intel, GLboolean); - - void (*meta_stencil_replace) (struct intel_context * intel, - GLuint mask, GLuint clear); - - void (*meta_depth_replace) (struct intel_context * intel); - - void (*meta_texture_blend_replace) (struct intel_context * intel); - - void (*meta_no_stencil_write) (struct intel_context * intel); - void (*meta_no_depth_write) (struct intel_context * intel); - void (*meta_no_texture) (struct intel_context * intel); - - void (*meta_import_pixel_state) (struct intel_context * intel); - void (*meta_frame_buffer_texture) (struct intel_context *intel, - GLint xoff, GLint yoff); - - GLboolean(*meta_tex_rect_source) (struct intel_context * intel, - dri_bo * buffer, - GLuint offset, - GLuint pitch, - GLuint height, - GLenum format, GLenum type); - void (*assert_not_dirty) (struct intel_context *intel); void (*debug_batch)(struct intel_context *intel); @@ -184,20 +141,18 @@ struct intel_context * Generation number of the hardware: 2 is 8xx, 3 is 9xx pre-965, 4 is 965. */ int gen; + GLboolean needs_ff_sync; + GLboolean is_ironlake; + GLboolean is_g4x; + GLboolean is_945; + GLboolean has_luminance_srgb; - struct intel_region *front_region; - struct intel_region *back_region; - struct intel_region *depth_region; - - /** - * This value indicates that the kernel memory manager is being used - * instead of the fake client-side memory manager. - */ - GLboolean ttm; + int urb_size; struct intel_batchbuffer *batch; drm_intel_bo *first_post_swapbuffers_batch; GLboolean no_batch_wrap; + GLboolean using_dri2_swapbuffers; struct { @@ -217,10 +172,6 @@ struct intel_context char *prevLockFile; int prevLockLine; - GLuint ClearColor565; - GLuint ClearColor8888; - - /* Offsets of fields within the current vertex: */ GLuint coloroffset; @@ -237,6 +188,7 @@ struct intel_context GLboolean hw_stipple; GLboolean depth_buffer_is_float; GLboolean no_rast; + GLboolean no_hw; GLboolean always_flush_batch; GLboolean always_flush_cache; @@ -261,19 +213,6 @@ struct intel_context intel_line_func draw_line; intel_tri_func draw_tri; - /** - * Set to true if a single constant cliprect should be used in the - * batchbuffer. Otherwise, cliprects must be calculated at batchbuffer - * flush time while the lock is held. - */ - GLboolean constant_cliprect; - - /** - * In !constant_cliprect mode, set to true if the front cliprects should be - * used instead of back. - */ - GLboolean front_cliprects; - /** * Set if rendering has occured to the drawable's front buffer. * @@ -300,50 +239,21 @@ struct intel_context GLboolean use_texture_tiling; GLboolean use_early_z; - drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ - - int perf_boxes; - GLuint do_usleeps; - int do_irqs; - GLuint irqsEmitted; - - GLboolean scissor; - drm_clip_rect_t draw_rect; - drm_clip_rect_t scissor_rect; - - drm_context_t hHWContext; - drmLock *driHwLock; int driFd; - __DRIcontextPrivate *driContext; - __DRIdrawablePrivate *driDrawable; - __DRIdrawablePrivate *driReadDrawable; - __DRIscreenPrivate *driScreen; - intelScreenPrivate *intelScreen; - volatile drm_i915_sarea_t *sarea; - - GLuint lastStamp; - - GLboolean no_hw; + __DRIcontext *driContext; + __DRIdrawable *driDrawable; + __DRIdrawable *driReadDrawable; + __DRIscreen *driScreen; + struct intel_screen *intelScreen; /** * Configuration cache */ driOptionCache optionCache; - - int64_t swap_ust; - int64_t swap_missed_ust; - - GLuint swap_count; - GLuint swap_missed_count; }; -/* These are functions now: - */ -void LOCK_HARDWARE( struct intel_context *intel ); -void UNLOCK_HARDWARE( struct intel_context *intel ); - extern char *__progname; @@ -354,14 +264,14 @@ extern char *__progname; #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) #define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0) -static inline uint32_t +static INLINE uint32_t U_FIXED(float value, uint32_t frac_bits) { value *= (1 << frac_bits); return value < 0 ? 0 : value; } -static inline uint32_t +static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits) { return value * (1 << frac_bits); @@ -373,29 +283,6 @@ do { \ (intel)->prim.flush(intel); \ } while (0) -/* ================================================================ - * Color packing: - */ - -#define INTEL_PACKCOLOR4444(r,g,b,a) \ - ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) - -#define INTEL_PACKCOLOR1555(r,g,b,a) \ - ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ - ((a) ? 0x8000 : 0)) - -#define INTEL_PACKCOLOR565(r,g,b) \ - ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) - -#define INTEL_PACKCOLOR8888(r,g,b,a) \ - ((a<<24) | (r<<16) | (g<<8) | b) - -#define INTEL_PACKCOLOR(format, r, g, b, a) \ -(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) : \ - (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) : \ - (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) : \ - 0))) - /* ================================================================ * From linux kernel i386 header files, copes with odd sizes better * than COPY_DWORDS would: @@ -458,7 +345,7 @@ extern int INTEL_DEBUG; #define DBG(...) do { \ if (INTEL_DEBUG & FILE_DEBUG_FLAG) \ - _mesa_printf(__VA_ARGS__); \ + printf(__VA_ARGS__); \ } while(0) #define PCI_CHIP_845_G 0x2562 @@ -481,14 +368,13 @@ extern int INTEL_DEBUG; extern GLboolean intelInitContext(struct intel_context *intel, const __GLcontextModes * mesaVis, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate, struct dd_function_table *functions); -extern void intelGetLock(struct intel_context *intel, GLuint flags); - extern void intelFinish(GLcontext * ctx); extern void intelFlush(GLcontext * ctx); +extern void intel_flush(GLcontext * ctx, GLboolean needs_mi_flush); extern void intelInitDriverFunctions(struct dd_function_table *functions); @@ -568,6 +454,7 @@ void intel_viewport(GLcontext * ctx, GLint x, GLint y, void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); +void intel_prepare_render(struct intel_context *intel); void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, uint32_t buffer_id); @@ -588,25 +475,4 @@ is_power_of_two(uint32_t value) return (value & (value - 1)) == 0; } -static inline void -intel_bo_map_gtt_preferred(struct intel_context *intel, - drm_intel_bo *bo, - GLboolean write) -{ - if (intel->intelScreen->kernel_exec_fencing) - drm_intel_gem_bo_map_gtt(bo); - else - drm_intel_bo_map(bo, write); -} - -static inline void -intel_bo_unmap_gtt_preferred(struct intel_context *intel, - drm_intel_bo *bo) -{ - if (intel->intelScreen->kernel_exec_fencing) - drm_intel_gem_bo_unmap_gtt(bo); - else - drm_intel_bo_unmap(bo); -} - #endif diff --git a/shared/intel_decode.c b/shared/intel_decode.c index a9dfe28..5293482 100644 --- a/shared/intel_decode.c +++ b/shared/intel_decode.c @@ -1437,6 +1437,12 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, { 0x7b00, 6, 6, "3DPRIMITIVE" }, + { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" }, + { 0x7810, 6, 6, "3DSTATE_VS_STATE" }, + { 0x7811, 6, 6, "3DSTATE_GS_STATE" }, + { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, }; len = (data[0] & 0x0000ffff) + 2; @@ -1592,7 +1598,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; case 0x7905: - if (len != 5 && len != 6) + if (len < 5 || len > 7) fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n"); if (count < len) BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER"); @@ -1611,6 +1617,8 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) instr_out(data, hw_offset, 4, "volume depth\n"); if (len == 6) instr_out(data, hw_offset, 5, "\n"); + if (len == 7) + instr_out(data, hw_offset, 6, "render target view extent\n"); return len; diff --git a/shared/intel_depthtmp.h b/shared/intel_depthtmp.h deleted file mode 100644 index a9c75d4..0000000 --- a/shared/intel_depthtmp.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -/** - * Wrapper around the depthtmp.h macrofest to generate spans code for - * all the tiling styles. - */ - -#define VALUE_TYPE INTEL_VALUE_TYPE -#define WRITE_DEPTH(_x, _y, d) \ - (*(INTEL_VALUE_TYPE *)(irb->region->buffer->virtual + \ - NO_TILE(_x, _y)) = d) -#define READ_DEPTH(d, _x, _y) \ - d = *(INTEL_VALUE_TYPE *)(irb->region->buffer->virtual + \ - NO_TILE(_x, _y)) -#define TAG(x) INTEL_TAG(intel_gttmap_##x) -#include "depthtmp.h" - -#define VALUE_TYPE INTEL_VALUE_TYPE -#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(NO_TILE(_x, _y), d) -#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(NO_TILE(_x, _y)) -#define TAG(x) INTEL_TAG(intel##x) -#include "depthtmp.h" - -#define VALUE_TYPE INTEL_VALUE_TYPE -#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(X_TILE(_x, _y), d) -#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(X_TILE(_x, _y)) -#define TAG(x) INTEL_TAG(intel_XTile_##x) -#include "depthtmp.h" - -#define VALUE_TYPE INTEL_VALUE_TYPE -#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(Y_TILE(_x, _y), d) -#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(Y_TILE(_x, _y)) -#define TAG(x) INTEL_TAG(intel_YTile_##x) -#include "depthtmp.h" - -#undef INTEL_VALUE_TYPE -#undef INTEL_WRITE_DEPTH -#undef INTEL_READ_DEPTH -#undef INTEL_TAG diff --git a/shared/intel_extensions.c b/shared/intel_extensions.c index 48cdae5..a1aac69 100644 --- a/shared/intel_extensions.c +++ b/shared/intel_extensions.c @@ -48,6 +48,7 @@ #define need_GL_EXT_blend_func_separate #define need_GL_EXT_blend_minmax #define need_GL_EXT_cull_vertex +#define need_GL_EXT_draw_buffers2 #define need_GL_EXT_fog_coord #define need_GL_EXT_framebuffer_object #define need_GL_EXT_framebuffer_blit @@ -57,6 +58,7 @@ #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side #define need_GL_APPLE_vertex_array_object +#define need_GL_APPLE_object_purgeable #define need_GL_ATI_separate_stencil #define need_GL_ATI_envmap_bumpmap #define need_GL_NV_point_sprite @@ -79,6 +81,7 @@ static const struct dri_extension card_extensions[] = { { "GL_ARB_half_float_pixel", NULL }, { "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions }, { "GL_ARB_multitexture", NULL }, + { "GL_ARB_pixel_buffer_object", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, { "GL_ARB_point_sprite", NULL }, { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, @@ -104,6 +107,8 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_blend_logic_op", NULL }, { "GL_EXT_blend_subtract", NULL }, { "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions }, + { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions }, + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions }, { "GL_EXT_packed_depth_stencil", NULL }, @@ -117,6 +122,7 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_texture_lod_bias", NULL }, { "GL_3DFX_texture_compression_FXT1", NULL }, { "GL_APPLE_client_storage", NULL }, + { "GL_APPLE_object_purgeable", GL_APPLE_object_purgeable_functions }, { "GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions}, { "GL_MESA_pack_invert", NULL }, { "GL_MESA_ycbcr_texture", NULL }, @@ -147,16 +153,19 @@ static const struct dri_extension i915_extensions[] = { static const struct dri_extension brw_extensions[] = { { "GL_ARB_depth_clamp", NULL }, { "GL_ARB_depth_texture", NULL }, + { "GL_ARB_fragment_coord_conventions", NULL }, { "GL_ARB_fragment_program", NULL }, { "GL_ARB_fragment_program_shadow", NULL }, { "GL_ARB_fragment_shader", NULL }, { "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions}, + { "GL_ARB_half_float_vertex", NULL }, { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { "GL_ARB_point_sprite", NULL }, { "GL_ARB_seamless_cube_map", NULL }, { "GL_ARB_shadow", NULL }, { "GL_MESA_texture_signed_rgba", NULL }, { "GL_ARB_texture_non_power_of_two", NULL }, + { "GL_EXT_draw_buffers2", GL_EXT_draw_buffers2_functions }, { "GL_EXT_shadow_funcs", NULL }, { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions }, { "GL_EXT_texture_sRGB", NULL }, @@ -176,13 +185,6 @@ static const struct dri_extension arb_oq_extensions[] = { }; -static const struct dri_extension ttm_extensions[] = { - { "GL_ARB_pixel_buffer_object", NULL }, - { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions }, - { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, - { NULL, NULL } -}; - static const struct dri_extension fragment_shader_extensions[] = { { "GL_ARB_fragment_shader", NULL }, { NULL, NULL } @@ -201,14 +203,10 @@ intelInitExtensions(GLcontext *ctx) */ driInitExtensions(ctx, card_extensions, GL_FALSE); - if (intel->ttm) - driInitExtensions(ctx, ttm_extensions, GL_FALSE); - - if (IS_965(intel->intelScreen->deviceID)) + if (intel->gen >= 4) driInitExtensions(ctx, brw_extensions, GL_FALSE); - if (IS_915(intel->intelScreen->deviceID) - || IS_945(intel->intelScreen->deviceID)) { + if (intel->gen == 3) { driInitExtensions(ctx, i915_extensions, GL_FALSE); if (driQueryOptionb(&intel->optionCache, "fragment_shader")) diff --git a/shared/intel_extensions.h b/shared/intel_extensions.h index 1d1c97a..e78e073 100644 --- a/shared/intel_extensions.h +++ b/shared/intel_extensions.h @@ -32,5 +32,8 @@ extern void intelInitExtensions(GLcontext *ctx); +extern void +intelFlushDrawable(__DRIdrawable *drawable); + #endif diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c index 608f75b..a429f8d 100644 --- a/shared/intel_fbo.c +++ b/shared/intel_fbo.c @@ -70,14 +70,11 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb) ASSERT(irb); - if (irb->span_cache != NULL) - _mesa_free(irb->span_cache); - if (intel && irb->region) { intel_region_release(&irb->region); } - _mesa_free(irb); + free(irb); } @@ -200,6 +197,38 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, } +#if FEATURE_OES_EGL_image +static void +intel_image_target_renderbuffer_storage(GLcontext *ctx, + struct gl_renderbuffer *rb, + void *image_handle) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_renderbuffer *irb; + __DRIscreen *screen; + __DRIimage *image; + + screen = intel->intelScreen->driScrnPriv; + image = screen->dri2.image->lookupEGLImage(intel->driContext, image_handle, + intel->driContext->loaderPrivate); + if (image == NULL) + return; + + irb = intel_renderbuffer(rb); + if (irb->region) + intel_region_release(&irb->region); + intel_region_reference(&irb->region, image->region); + + rb->InternalFormat = image->internal_format; + rb->Width = image->region->width; + rb->Height = image->region->height; + rb->Format = image->format; + rb->DataType = image->data_type; + rb->_BaseFormat = _mesa_base_fbo_format(&intel->ctx, + image->internal_format); +} +#endif + /** * Called for each hardware renderbuffer when a _window_ is resized. * Just update fields. @@ -222,7 +251,6 @@ static void intel_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb, GLuint width, GLuint height) { - struct intel_framebuffer *intel_fb = (struct intel_framebuffer*)fb; int i; _mesa_resize_framebuffer(ctx, fb, width, height); @@ -233,9 +261,10 @@ intel_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb, return; } + /* Make sure all window system renderbuffers are up to date */ - for (i = 0; i < 2; i++) { - struct gl_renderbuffer *rb = &intel_fb->color_rb[i]->Base; + for (i = BUFFER_FRONT_LEFT; i <= BUFFER_BACK_RIGHT; i++) { + struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer; /* only resize if size is changing */ if (rb && (rb->Width != width || rb->Height != height)) { @@ -316,7 +345,7 @@ intel_create_renderbuffer(gl_format format) default: _mesa_problem(NULL, "Unexpected intFormat in intel_create_renderbuffer"); - _mesa_free(irb); + free(irb); return NULL; } @@ -398,8 +427,6 @@ static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, struct gl_texture_image *texImage) { - gl_format texFormat; - if (texImage->TexFormat == MESA_FORMAT_ARGB8888) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGBA8 texture OK\n"); @@ -429,14 +456,13 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, DBG("Render to DEPTH_STENCIL texture OK\n"); } else { - DBG("Render to texture BAD FORMAT %d\n", texImage->TexFormat); + DBG("Render to texture BAD FORMAT %s\n", + _mesa_get_format_name(texImage->TexFormat)); return GL_FALSE; } irb->Base.Format = texImage->TexFormat; - texFormat = texImage->TexFormat; - irb->Base.InternalFormat = texImage->InternalFormat; irb->Base._BaseFormat = _mesa_base_fbo_format(ctx, irb->Base.InternalFormat); irb->Base.Width = texImage->Width; @@ -471,7 +497,7 @@ intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage) irb->Base.ClassID = INTEL_RB_CLASS; if (!intel_update_wrapper(ctx, irb, texImage)) { - _mesa_free(irb); + free(irb); return NULL; } @@ -528,7 +554,7 @@ intel_render_texture(GLcontext * ctx, return; } - DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n", + DBG("Begin render texture tid %lx tex=%u w=%d h=%d refcount=%d\n", _glthread_GetID(), att->Texture->Name, newImage->Width, newImage->Height, irb->Base.RefCount); @@ -594,11 +620,21 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) intel_get_renderbuffer(fb, BUFFER_STENCIL); int i; - if (stencilRb && stencilRb != depthRb) { - /* we only support combined depth/stencil buffers, not separate - * stencil buffers. - */ - fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + if (depthRb && stencilRb && stencilRb != depthRb) { + if (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE && + ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE && + (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Texture->Name == + ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Texture->Name)) { + /* OK */ + } else { + /* we only support combined depth/stencil buffers, not separate + * stencil buffers. + */ + DBG("Only supports combined depth/stencil (found %s, %s)\n", + depthRb ? _mesa_get_format_name(depthRb->Base.Format): "NULL", + stencilRb ? _mesa_get_format_name(stencilRb->Base.Format): "NULL"); + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + } } for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { @@ -609,6 +645,7 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) continue; if (irb == NULL) { + DBG("software rendering renderbuffer\n"); fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; continue; } @@ -643,4 +680,9 @@ intel_fbo_init(struct intel_context *intel) intel->ctx.Driver.ResizeBuffers = intel_resize_buffers; intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer; intel->ctx.Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer; + +#if FEATURE_OES_EGL_image + intel->ctx.Driver.EGLImageTargetRenderbufferStorage = + intel_image_target_renderbuffer_storage; +#endif } diff --git a/shared/intel_fbo.h b/shared/intel_fbo.h index fa43077..72413f7 100644 --- a/shared/intel_fbo.h +++ b/shared/intel_fbo.h @@ -33,27 +33,6 @@ struct intel_context; -/** - * Intel framebuffer, derived from gl_framebuffer. - */ -struct intel_framebuffer -{ - struct gl_framebuffer Base; - - struct intel_renderbuffer *color_rb[2]; - - /* VBI - */ - GLuint vbl_waited; - - int64_t swap_ust; - int64_t swap_missed_ust; - - GLuint swap_count; - GLuint swap_missed_count; -}; - - /** * Intel renderbuffer, derived from gl_renderbuffer. */ @@ -61,11 +40,6 @@ struct intel_renderbuffer { struct gl_renderbuffer Base; struct intel_region *region; - - GLuint vbl_pending; /**< vblank sequence number of pending flip */ - - uint8_t *span_cache; - unsigned long span_cache_offset; }; @@ -121,7 +95,7 @@ intel_fbo_init(struct intel_context *intel); extern void -intel_flip_renderbuffers(struct intel_framebuffer *intel_fb); +intel_flip_renderbuffers(struct gl_framebuffer *fb); static INLINE struct intel_region * diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c index abb3024..4f14946 100644 --- a/shared/intel_mipmap_tree.c +++ b/shared/intel_mipmap_tree.c @@ -29,7 +29,6 @@ #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_tex_layout.h" -#include "intel_chipset.h" #ifndef I915 #include "brw_state.h" #endif @@ -87,7 +86,7 @@ intel_miptree_create_internal(struct intel_context *intel, mt->pitch = 0; #ifdef I915 - if (IS_945(intel->intelScreen->deviceID)) + if (intel->is_945) ok = i945_miptree_layout(intel, mt, tiling); else ok = i915_miptree_layout(intel, mt, tiling); @@ -120,8 +119,7 @@ intel_miptree_create(struct intel_context *intel, struct intel_mipmap_tree *mt; uint32_t tiling; - if (intel->use_texture_tiling && compress_byte == 0 && - intel->intelScreen->kernel_exec_fencing) { + if (intel->use_texture_tiling && compress_byte == 0) { if (intel->gen >= 4 && (base_format == GL_DEPTH_COMPONENT || base_format == GL_DEPTH_STENCIL_EXT)) @@ -224,16 +222,12 @@ int intel_miptree_pitch_align (struct intel_context *intel, if (!mt->compressed) { int pitch_align; - if (intel->ttm) { - /* XXX: Align pitch to multiple of 64 bytes for now to allow - * render-to-texture to work in all cases. This should probably be - * replaced at some point by some scheme to only do this when really - * necessary. - */ - pitch_align = 64; - } else { - pitch_align = 4; - } + /* XXX: Align pitch to multiple of 64 bytes for now to allow + * render-to-texture to work in all cases. This should probably be + * replaced at some point by some scheme to only do this when really + * necessary. + */ + pitch_align = 64; if (tiling == I915_TILING_X) pitch_align = 512; @@ -243,11 +237,11 @@ int intel_miptree_pitch_align (struct intel_context *intel, pitch = ALIGN(pitch * mt->cpp, pitch_align); #ifdef I915 - /* XXX: At least the i915 seems very upset when the pitch is a multiple - * of 1024 and sometimes 512 bytes - performance can drop by several - * times. Go to the next multiple of the required alignment for now. + /* Do a little adjustment to linear allocations so that we avoid + * hitting the same channel of memory for 2 different pages when + * reading a 2x2 subspan or doing bilinear filtering. */ - if (!(pitch & 511) && + if (tiling == I915_TILING_NONE && !(pitch & 511) && (pitch + pitch_align) < (1 << ctx->Const.MaxTextureLevels)) pitch += pitch_align; #endif diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c index 993e427..cb088e4 100644 --- a/shared/intel_pixel.c +++ b/shared/intel_pixel.c @@ -29,14 +29,7 @@ #include "main/state.h" #include "main/bufferobj.h" #include "main/context.h" -#include "main/enable.h" -#include "main/matrix.h" -#include "main/texstate.h" -#include "main/varray.h" -#include "main/viewport.h" #include "swrast/swrast.h" -#include "shader/arbprogram.h" -#include "shader/program.h" #include "intel_context.h" #include "intel_pixel.h" @@ -88,10 +81,10 @@ intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one) return GL_FALSE; } - if (!(ctx->Color.ColorMask[0] && - ctx->Color.ColorMask[1] && - ctx->Color.ColorMask[2] && - ctx->Color.ColorMask[3])) { + if (!(ctx->Color.ColorMask[0][0] && + ctx->Color.ColorMask[0][1] && + ctx->Color.ColorMask[0][2] && + ctx->Color.ColorMask[0][3])) { DBG("fallback due to color masking\n"); return GL_FALSE; } diff --git a/shared/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c index 204a233..076fee8 100644 --- a/shared/intel_pixel_bitmap.c +++ b/shared/intel_pixel_bitmap.c @@ -32,11 +32,11 @@ #include "main/mtypes.h" #include "main/macros.h" #include "main/bufferobj.h" +#include "main/polygon.h" #include "main/pixelstore.h" #include "main/polygon.h" #include "main/state.h" #include "main/teximage.h" -#include "main/texenv.h" #include "main/texobj.h" #include "main/texstate.h" #include "main/texparam.h" @@ -45,7 +45,6 @@ #include "main/enable.h" #include "main/viewport.h" #include "shader/arbprogram.h" -#include "glapi/dispatch.h" #include "swrast/swrast.h" #include "intel_screen.h" @@ -53,7 +52,6 @@ #include "intel_batchbuffer.h" #include "intel_blit.h" #include "intel_regions.h" -#include "intel_buffer_objects.h" #include "intel_buffers.h" #include "intel_pixel.h" #include "intel_reg.h" @@ -105,7 +103,7 @@ static void set_bit( GLubyte *dest, GLuint bit ) } /* Extract a rectangle's worth of data from the bitmap. Called - * per-cliprect. + * per chunk of HW-sized bitmap. */ static GLuint get_bitmap_rect(GLsizei width, GLsizei height, const struct gl_pixelstore_attrib *unpack, @@ -125,7 +123,7 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height, GLuint count = 0; if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", + printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); if (invert) { @@ -165,7 +163,7 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height, * Returns the low Y value of the vertical range given, flipped according to * whether the framebuffer is or not. */ -static inline int +static INLINE int y_flip(struct gl_framebuffer *fb, int y, int height) { if (fb->Name != 0) @@ -190,11 +188,12 @@ do_blit_bitmap( GLcontext *ctx, GLfloat tmpColor[4]; GLubyte ubcolor[4]; GLuint color; - unsigned int num_cliprects; - drm_clip_rect_t *cliprects; - int x_off, y_off; GLsizei bitmap_width = width; GLsizei bitmap_height = height; + GLint px, py; + GLuint stipple[32]; + GLint orig_dstx = dstx; + GLint orig_dsty = dsty; /* Update draw buffer bounds */ _mesa_update_state(ctx); @@ -228,104 +227,72 @@ do_blit_bitmap( GLcontext *ctx, UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]); if (dst->cpp == 2) - color = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]); + color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]); else - color = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], - ubcolor[2], ubcolor[3]); + color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]); if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F)) return GL_FALSE; - LOCK_HARDWARE(intel); - - intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); - if (num_cliprects != 0) { - GLuint i; - GLint orig_dstx = dstx; - GLint orig_dsty = dsty; - - /* Clip to buffer bounds and scissor. */ - if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, - fb->_Xmax, fb->_Ymax, - &dstx, &dsty, &width, &height)) - goto out; - - dstx = x_off + dstx; - dsty = y_off + y_flip(fb, dsty, height); - - for (i = 0; i < num_cliprects; i++) { - int box_x, box_y, box_w, box_h; - GLint px, py; - GLuint stipple[32]; - - box_x = dstx; - box_y = dsty; - box_w = width; - box_h = height; - - /* Clip to drawable cliprect */ - if (!_mesa_clip_to_region(cliprects[i].x1, - cliprects[i].y1, - cliprects[i].x2, - cliprects[i].y2, - &box_x, &box_y, &box_w, &box_h)) - continue; + intel_prepare_render(intel); + + /* Clip to buffer bounds and scissor. */ + if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, + fb->_Xmax, fb->_Ymax, + &dstx, &dsty, &width, &height)) + goto out; + + dsty = y_flip(fb, dsty, height); #define DY 32 #define DX 32 - /* Then, finally, chop it all into chunks that can be - * digested by hardware: + /* Chop it all into chunks that can be digested by hardware: */ + for (py = 0; py < height; py += DY) { + for (px = 0; px < width; px += DX) { + int h = MIN2(DY, height - py); + int w = MIN2(DX, width - px); + GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8; + GLenum logic_op = ctx->Color.ColorLogicOpEnabled ? + ctx->Color.LogicOp : GL_COPY; + + assert(sz <= sizeof(stipple)); + memset(stipple, 0, sz); + + /* May need to adjust this when padding has been introduced in + * sz above: + * + * Have to translate destination coordinates back into source + * coordinates. */ - for (py = 0; py < box_h; py += DY) { - for (px = 0; px < box_w; px += DX) { - int h = MIN2(DY, box_h - py); - int w = MIN2(DX, box_w - px); - GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8; - GLenum logic_op = ctx->Color.ColorLogicOpEnabled ? - ctx->Color.LogicOp : GL_COPY; - - assert(sz <= sizeof(stipple)); - memset(stipple, 0, sz); - - /* May need to adjust this when padding has been introduced in - * sz above: - * - * Have to translate destination coordinates back into source - * coordinates. - */ - if (get_bitmap_rect(bitmap_width, bitmap_height, unpack, - bitmap, - -orig_dstx + (box_x + px - x_off), - -orig_dsty + y_flip(fb, - box_y + py - y_off, h), - w, h, - (GLubyte *)stipple, - 8, - fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0) - continue; - - if (!intelEmitImmediateColorExpandBlit(intel, - dst->cpp, - (GLubyte *)stipple, - sz, - color, - dst->pitch, - dst->buffer, - 0, - dst->tiling, - box_x + px, - box_y + py, - w, h, - logic_op)) { - return GL_FALSE; - } - } - } + if (get_bitmap_rect(bitmap_width, bitmap_height, unpack, + bitmap, + -orig_dstx + (dstx + px), + -orig_dsty + y_flip(fb, dsty + py, h), + w, h, + (GLubyte *)stipple, + 8, + fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0) + continue; + + if (!intelEmitImmediateColorExpandBlit(intel, + dst->cpp, + (GLubyte *)stipple, + sz, + color, + dst->pitch, + dst->buffer, + 0, + dst->tiling, + dstx + px, + dsty + py, + w, h, + logic_op)) { + return GL_FALSE; + } } } out: - UNLOCK_HARDWARE(intel); if (INTEL_DEBUG & DEBUG_SYNC) intel_batchbuffer_flush(intel->batch); @@ -428,7 +395,7 @@ intel_texture_bitmap(GLcontext * ctx, } /* Convert the A1 bitmap to an A8 format suitable for glTexImage */ - a8_bitmap = _mesa_calloc(width * height); + a8_bitmap = calloc(1, width * height); _mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff); if (_mesa_is_bufferobj(unpack->BufferObj)) { @@ -463,7 +430,7 @@ intel_texture_bitmap(GLcontext * ctx, _mesa_PixelStorei(GL_UNPACK_ALIGNMENT, 1); _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, width, height, 0, GL_ALPHA, GL_UNSIGNED_BYTE, a8_bitmap); - _mesa_free(a8_bitmap); + free(a8_bitmap); meta_set_fragment_program(&intel->meta, &intel->meta.bitmap_fp, fp); _mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, @@ -504,6 +471,7 @@ intel_texture_bitmap(GLcontext * ctx, meta_restore_fragment_program(&intel->meta); meta_restore_vertex_program(&intel->meta); + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); _mesa_PopClientAttrib(); _mesa_PopAttrib(); @@ -548,7 +516,7 @@ intelBitmap(GLcontext * ctx, return; if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); + printf("%s: fallback to swrast\n", __FUNCTION__); _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels); } diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c index 622aaa2..f4f3fd6 100644 --- a/shared/intel_pixel_copy.c +++ b/shared/intel_pixel_copy.c @@ -35,28 +35,33 @@ #include "intel_buffers.h" #include "intel_regions.h" #include "intel_pixel.h" +#include "intel_fbo.h" #define FILE_DEBUG_FLAG DEBUG_PIXEL static struct intel_region * copypix_src_region(struct intel_context *intel, GLenum type) { + struct intel_renderbuffer *depth; + + depth = (struct intel_renderbuffer *) + &intel->ctx.DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; + switch (type) { case GL_COLOR: return intel_readbuf_region(intel); case GL_DEPTH: - /* Don't think this is really possible execpt at 16bpp, when we have no stencil. - */ - if (intel->depth_region && intel->depth_region->cpp == 2) - return intel->depth_region; + /* Don't think this is really possible execpt at 16bpp, when we + * have no stencil. */ + if (depth && depth->region->cpp == 2) + return depth->region; case GL_STENCIL: - /* Don't think this is really possible. - */ + /* Don't think this is really possible. */ break; case GL_DEPTH_STENCIL_EXT: /* Does it matter whether it is stencil/depth or depth/stencil? */ - return intel->depth_region; + return depth->region; default: break; } @@ -83,10 +88,10 @@ intel_check_copypixel_blit_fragment_ops(GLcontext * ctx) ctx->Depth.Test || ctx->Fog.Enabled || ctx->Stencil._Enabled || - !ctx->Color.ColorMask[0] || - !ctx->Color.ColorMask[1] || - !ctx->Color.ColorMask[2] || - !ctx->Color.ColorMask[3] || + !ctx->Color.ColorMask[0][0] || + !ctx->Color.ColorMask[0][1] || + !ctx->Color.ColorMask[0][2] || + !ctx->Color.ColorMask[0][3] || ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled || ctx->Color.BlendEnabled); @@ -107,9 +112,10 @@ do_blit_copypixels(GLcontext * ctx, struct intel_region *src = copypix_src_region(intel, type); struct gl_framebuffer *fb = ctx->DrawBuffer; struct gl_framebuffer *read_fb = ctx->ReadBuffer; - unsigned int num_cliprects; - drm_clip_rect_t *cliprects; - int x_off, y_off; + GLint orig_dstx; + GLint orig_dsty; + GLint orig_srcx; + GLint orig_srcy; if (type == GL_DEPTH || type == GL_STENCIL) { if (INTEL_DEBUG & DEBUG_FALLBACKS) @@ -130,98 +136,58 @@ do_blit_copypixels(GLcontext * ctx, if (!src || !dst) return GL_FALSE; - - intelFlush(&intel->ctx); - LOCK_HARDWARE(intel); - - intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); - if (num_cliprects != 0) { - GLint delta_x; - GLint delta_y; - GLint orig_dstx; - GLint orig_dsty; - GLint orig_srcx; - GLint orig_srcy; - GLuint i; - - /* XXX: We fail to handle different inversion between read and draw framebuffer. */ - - /* Clip to destination buffer. */ - orig_dstx = dstx; - orig_dsty = dsty; - if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, - fb->_Xmax, fb->_Ymax, - &dstx, &dsty, &width, &height)) - goto out; - /* Adjust src coords for our post-clipped destination origin */ - srcx += dstx - orig_dstx; - srcy += dsty - orig_dsty; - - /* Clip to source buffer. */ - orig_srcx = srcx; - orig_srcy = srcy; - if (!_mesa_clip_to_region(0, 0, - read_fb->Width, read_fb->Height, - &srcx, &srcy, &width, &height)) - goto out; - /* Adjust dst coords for our post-clipped source origin */ - dstx += srcx - orig_srcx; - dsty += srcy - orig_srcy; - - /* Convert from GL to hardware coordinates: - */ - if (fb->Name == 0) { - /* copypixels to a system framebuffer */ - dstx = x_off + dstx; - dsty = y_off + (fb->Height - dsty - height); - } else { - /* copypixels to a user framebuffer object */ - dstx = x_off + dstx; - dsty = y_off + dsty; - } - - /* Flip source Y if it's a system framebuffer. */ - if (read_fb->Name == 0) { - srcx = intel->driReadDrawable->x + srcx; - srcy = intel->driReadDrawable->y + (fb->Height - srcy - height); - } - - delta_x = srcx - dstx; - delta_y = srcy - dsty; - /* Could do slightly more clipping: Eg, take the intersection of - * the destination cliprects and the read drawable cliprects - * - * This code will not overwrite other windows, but will - * introduce garbage when copying from obscured window regions. - */ - for (i = 0; i < num_cliprects; i++) { - GLint clip_x = dstx; - GLint clip_y = dsty; - GLint clip_w = width; - GLint clip_h = height; - - if (!_mesa_clip_to_region(cliprects[i].x1, cliprects[i].y1, - cliprects[i].x2, cliprects[i].y2, - &clip_x, &clip_y, &clip_w, &clip_h)) - continue; - - if (!intel_region_copy(intel, - dst, 0, clip_x, clip_y, - src, 0, clip_x + delta_x, clip_y + delta_y, - clip_w, clip_h, - ctx->Color.ColorLogicOpEnabled ? - ctx->Color.LogicOp : GL_COPY)) { - DBG("%s: blit failure\n", __FUNCTION__); - UNLOCK_HARDWARE(intel); - return GL_FALSE; - } - } + intel_prepare_render(intel); + + /* XXX: We fail to handle different inversion between read and draw framebuffer. */ + + /* Clip to destination buffer. */ + orig_dstx = dstx; + orig_dsty = dsty; + if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, + fb->_Xmax, fb->_Ymax, + &dstx, &dsty, &width, &height)) + goto out; + /* Adjust src coords for our post-clipped destination origin */ + srcx += dstx - orig_dstx; + srcy += dsty - orig_dsty; + + /* Clip to source buffer. */ + orig_srcx = srcx; + orig_srcy = srcy; + if (!_mesa_clip_to_region(0, 0, + read_fb->Width, read_fb->Height, + &srcx, &srcy, &width, &height)) + goto out; + /* Adjust dst coords for our post-clipped source origin */ + dstx += srcx - orig_srcx; + dsty += srcy - orig_srcy; + + /* Convert from GL to hardware coordinates: */ + if (fb->Name == 0) { + /* copypixels to a system framebuffer */ + dsty = fb->Height - dsty - height; + } else { + /* copypixels to a user framebuffer object */ + dsty = dsty; + } + + /* Flip source Y if it's a system framebuffer. */ + if (read_fb->Name == 0) + srcy = fb->Height - srcy - height; + + if (!intel_region_copy(intel, + dst, 0, dstx, dsty, + src, 0, srcx, srcy, + width, height, + ctx->Color.ColorLogicOpEnabled ? + ctx->Color.LogicOp : GL_COPY)) { + DBG("%s: blit failure\n", __FUNCTION__); + return GL_FALSE; } -out: - UNLOCK_HARDWARE(intel); +out: intel_check_front_buffer_rendering(intel); DBG("%s: success\n", __FUNCTION__); diff --git a/shared/intel_pixel_draw.c b/shared/intel_pixel_draw.c index 9b382e3..bd1dd13 100644 --- a/shared/intel_pixel_draw.c +++ b/shared/intel_pixel_draw.c @@ -46,10 +46,6 @@ #include "drivers/common/meta.h" #include "intel_context.h" -#include "intel_batchbuffer.h" -#include "intel_blit.h" -#include "intel_buffers.h" -#include "intel_regions.h" #include "intel_pixel.h" #include "intel_fbo.h" @@ -69,7 +65,6 @@ intel_stencil_drawpixels(GLcontext * ctx, GLfloat vertices[4][2]; struct intel_renderbuffer *irb; struct intel_renderbuffer *depth_irb; - struct gl_renderbuffer *rb; struct gl_pixelstore_attrib old_unpack; GLstencil *stencil_pixels; int row, y1, y2; @@ -152,7 +147,7 @@ intel_stencil_drawpixels(GLcontext * ctx, /* Unpack the supplied stencil values into a ubyte buffer. */ assert(sizeof(GLstencil) == sizeof(GLubyte)); - stencil_pixels = _mesa_malloc(width * height * sizeof(GLstencil)); + stencil_pixels = malloc(width * height * sizeof(GLstencil)); for (row = 0; row < height; row++) { GLvoid *source = _mesa_image_address2d(unpack, pixels, width, height, @@ -170,7 +165,6 @@ intel_stencil_drawpixels(GLcontext * ctx, */ depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); irb = intel_create_renderbuffer(MESA_FORMAT_ARGB8888); - rb = &irb->Base; irb->Base.Width = depth_irb->Base.Width; irb->Base.Height = depth_irb->Base.Height; intel_renderbuffer_set_region(irb, depth_irb->region); @@ -207,7 +201,7 @@ intel_stencil_drawpixels(GLcontext * ctx, _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_INTENSITY, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, stencil_pixels); ctx->Unpack = old_unpack; - _mesa_free(stencil_pixels); + free(stencil_pixels); meta_set_passthrough_transform(&intel->meta); diff --git a/shared/intel_pixel_read.c b/shared/intel_pixel_read.c index 20424e2..2ac3da7 100644 --- a/shared/intel_pixel_read.c +++ b/shared/intel_pixel_read.c @@ -36,7 +36,6 @@ #include "intel_screen.h" #include "intel_context.h" -#include "intel_batchbuffer.h" #include "intel_blit.h" #include "intel_buffers.h" #include "intel_regions.h" @@ -65,103 +64,6 @@ * any case. */ - -static GLboolean -do_texture_readpixels(GLcontext * ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, - struct intel_region *dest_region) -{ -#if 0 - struct intel_context *intel = intel_context(ctx); - intelScreenPrivate *screen = intel->intelScreen; - GLint pitch = pack->RowLength ? pack->RowLength : width; - __DRIdrawablePrivate *dPriv = intel->driDrawable; - int textureFormat; - GLenum glTextureFormat; - int destFormat, depthFormat, destPitch; - drm_clip_rect_t tmp; - - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - - if (ctx->_ImageTransferState || - pack->SwapBytes || pack->LsbFirst || !pack->Invert) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: check_color failed\n", __FUNCTION__); - return GL_FALSE; - } - - intel->vtbl.meta_texrect_source(intel, intel_readbuf_region(intel)); - - if (!intel->vtbl.meta_render_dest(intel, dest_region, type, format)) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: couldn't set dest %s/%s\n", - __FUNCTION__, - _mesa_lookup_enum_by_nr(type), - _mesa_lookup_enum_by_nr(format)); - return GL_FALSE; - } - - LOCK_HARDWARE(intel); - - if (intel->driDrawable->numClipRects) { - intel->vtbl.install_meta_state(intel); - intel->vtbl.meta_no_depth_write(intel); - intel->vtbl.meta_no_stencil_write(intel); - - if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) { - UNLOCK_HARDWARE(intel); - SET_STATE(i830, state); - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__); - return GL_TRUE; - } - - y = dPriv->h - y - height; - x += dPriv->x; - y += dPriv->y; - - - /* Set the frontbuffer up as a large rectangular texture. - */ - intel->vtbl.meta_tex_rect_source(intel, src_region, textureFormat); - - - intel->vtbl.meta_texture_blend_replace(i830, glTextureFormat); - - - /* Set the 3d engine to draw into the destination region: - */ - - intel->vtbl.meta_draw_region(intel, dest_region); - intel->vtbl.meta_draw_format(intel, destFormat, depthFormat); /* ?? */ - - - /* Draw a single quad, no cliprects: - */ - intel->vtbl.meta_disable_cliprects(intel); - - intel->vtbl.draw_quad(intel, - 0, width, 0, height, - 0x00ff00ff, x, x + width, y, y + height); - - intel->vtbl.leave_meta_state(intel); - } - UNLOCK_HARDWARE(intel); - - intel_region_wait_fence(ctx, dest_region); /* required by GL */ - return GL_TRUE; -#endif - - return GL_FALSE; -} - - - - static GLboolean do_blit_readpixels(GLcontext * ctx, GLint x, GLint y, GLsizei width, GLsizei height, @@ -173,9 +75,12 @@ do_blit_readpixels(GLcontext * ctx, struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); GLuint dst_offset; GLuint rowLength; + drm_intel_bo *dst_buffer; + GLboolean all; + GLint dst_x, dst_y; if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s\n", __FUNCTION__); + printf("%s\n", __FUNCTION__); if (!src) return GL_FALSE; @@ -184,7 +89,7 @@ do_blit_readpixels(GLcontext * ctx, /* PBO only for now: */ if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s - not PBO\n", __FUNCTION__); + printf("%s - not PBO\n", __FUNCTION__); return GL_FALSE; } @@ -192,13 +97,13 @@ do_blit_readpixels(GLcontext * ctx, if (ctx->_ImageTransferState || !intel_check_blit_format(src, format, type)) { if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s - bad format for blit\n", __FUNCTION__); + printf("%s - bad format for blit\n", __FUNCTION__); return GL_FALSE; } if (pack->Alignment != 1 || pack->SwapBytes || pack->LsbFirst) { if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s: bad packing params\n", __FUNCTION__); + printf("%s: bad packing params\n", __FUNCTION__); return GL_FALSE; } @@ -209,67 +114,52 @@ do_blit_readpixels(GLcontext * ctx, if (pack->Invert) { if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__); + printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__); return GL_FALSE; } else { - rowLength = -rowLength; + if (ctx->ReadBuffer->Name == 0) + rowLength = -rowLength; } dst_offset = (GLintptr) _mesa_image_address(2, pack, pixels, width, height, format, type, 0, 0, 0); + if (!_mesa_clip_copytexsubimage(ctx, + &dst_x, &dst_y, + &x, &y, + &width, &height)) { + return GL_TRUE; + } - /* Although the blits go on the command buffer, need to do this and - * fire with lock held to guarentee cliprects are correct. - */ - intelFlush(&intel->ctx); - LOCK_HARDWARE(intel); - - if (intel->driReadDrawable->numClipRects) { - GLboolean all = (width * height * src->cpp == dst->Base.Size && - x == 0 && dst_offset == 0); - - dri_bo *dst_buffer = intel_bufferobj_buffer(intel, dst, - all ? INTEL_WRITE_FULL : - INTEL_WRITE_PART); - __DRIdrawablePrivate *dPriv = intel->driReadDrawable; - int nbox = dPriv->numClipRects; - drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t rect; - drm_clip_rect_t src_rect; - int i; + intel_prepare_render(intel); - src_rect.x1 = dPriv->x + x; - src_rect.y1 = dPriv->y + dPriv->h - (y + height); - src_rect.x2 = src_rect.x1 + width; - src_rect.y2 = src_rect.y1 + height; + all = (width * height * src->cpp == dst->Base.Size && + x == 0 && dst_offset == 0); + dst_x = 0; + dst_y = 0; + dst_buffer = intel_bufferobj_buffer(intel, dst, + all ? INTEL_WRITE_FULL : + INTEL_WRITE_PART); - for (i = 0; i < nbox; i++) { - if (!intel_intersect_cliprects(&rect, &src_rect, &box[i])) - continue; + if (ctx->ReadBuffer->Name == 0) + y = ctx->ReadBuffer->Height - (y + height); - if (!intelEmitCopyBlit(intel, - src->cpp, - src->pitch, src->buffer, 0, src->tiling, - rowLength, dst_buffer, dst_offset, GL_FALSE, - rect.x1, - rect.y1, - rect.x1 - src_rect.x1, - rect.y2 - src_rect.y2, - rect.x2 - rect.x1, rect.y2 - rect.y1, - GL_COPY)) { - UNLOCK_HARDWARE(intel); - return GL_FALSE; - } - } + if (!intelEmitCopyBlit(intel, + src->cpp, + src->pitch, src->buffer, 0, src->tiling, + rowLength, dst_buffer, dst_offset, GL_FALSE, + x, y, + dst_x, dst_y, + width, height, + GL_COPY)) { + return GL_FALSE; } - UNLOCK_HARDWARE(intel); if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s - DONE\n", __FUNCTION__); + printf("%s - DONE\n", __FUNCTION__); return GL_TRUE; } @@ -284,22 +174,14 @@ intelReadPixels(GLcontext * ctx, fprintf(stderr, "%s\n", __FUNCTION__); intelFlush(ctx); + intel_prepare_render(intel_context(ctx)); if (do_blit_readpixels (ctx, x, y, width, height, format, type, pack, pixels)) return; -#ifdef I915 - if (do_texture_readpixels - (ctx, x, y, width, height, format, type, pack, pixels)) - return; -#else - (void)do_blit_readpixels; - (void)do_texture_readpixels; -#endif - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); + printf("%s: fallback to swrast\n", __FUNCTION__); /* Update Mesa state before calling down into _swrast_ReadPixels, as * the spans code requires the computed buffer states to be up to date, diff --git a/shared/intel_regions.c b/shared/intel_regions.c index 8097516..f042bcb 100644 --- a/shared/intel_regions.c +++ b/shared/intel_regions.c @@ -42,13 +42,13 @@ #include #include +#include "main/hash.h" #include "intel_context.h" #include "intel_regions.h" #include "intel_blit.h" #include "intel_buffer_objects.h" #include "intel_bufmgr.h" #include "intel_batchbuffer.h" -#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_REGION @@ -118,8 +118,7 @@ intel_region_map(struct intel_context *intel, struct intel_region *region) if (region->pbo) intel_region_cow(intel, region); - if (region->tiling != I915_TILING_NONE && - intel->intelScreen->kernel_exec_fencing) + if (region->tiling != I915_TILING_NONE) drm_intel_gem_bo_map_gtt(region->buffer); else dri_bo_map(region->buffer, GL_TRUE); @@ -134,8 +133,7 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region) { _DBG("%s %p\n", __FUNCTION__, region); if (!--region->map_refcount) { - if (region->tiling != I915_TILING_NONE && - intel->intelScreen->kernel_exec_fencing) + if (region->tiling != I915_TILING_NONE) drm_intel_gem_bo_unmap_gtt(region->buffer); else dri_bo_unmap(region->buffer); @@ -180,36 +178,19 @@ intel_region_alloc(struct intel_context *intel, { dri_bo *buffer; struct intel_region *region; + unsigned long flags = 0; + unsigned long aligned_pitch; - /* If we're tiled, our allocations are in 8 or 32-row blocks, so - * failure to align our height means that we won't allocate enough pages. - * - * If we're untiled, we still have to align to 2 rows high because the - * data port accesses 2x2 blocks even if the bottom row isn't to be - * rendered, so failure to align means we could walk off the end of the - * GTT and fault. - */ - if (tiling == I915_TILING_X) - height = ALIGN(height, 8); - else if (tiling == I915_TILING_Y) - height = ALIGN(height, 32); - else - height = ALIGN(height, 2); - - /* If we're untiled, we have to align to 2 rows high because the - * data port accesses 2x2 blocks even if the bottom row isn't to be - * rendered, so failure to align means we could walk off the end of the - * GTT and fault. + if (expect_accelerated_upload) + flags |= BO_ALLOC_FOR_RENDER; + + buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region", + width, height, cpp, + &tiling, &aligned_pitch, flags); + /* We've already chosen a pitch as part of miptree layout. It had + * better be the same. */ - height = ALIGN(height, 2); - - if (expect_accelerated_upload) { - buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region", - pitch * cpp * height, 64); - } else { - buffer = drm_intel_bo_alloc(intel->bufmgr, "region", - pitch * cpp * height, 64); - } + assert(aligned_pitch == pitch * cpp); region = intel_region_alloc_internal(intel, cpp, width, height, pitch, buffer); @@ -229,10 +210,24 @@ intel_region_alloc_for_handle(struct intel_context *intel, GLuint width, GLuint height, GLuint pitch, GLuint handle, const char *name) { - struct intel_region *region; + struct intel_region *region, *dummy; dri_bo *buffer; int ret; + region = _mesa_HashLookup(intel->intelScreen->named_regions, handle); + if (region != NULL) { + dummy = NULL; + if (region->width != width || region->height != height || + region->cpp != cpp || region->pitch != pitch) { + fprintf(stderr, + "Region for name %d already exists but is not compatible\n", + handle); + return NULL; + } + intel_region_reference(&dummy, region); + return dummy; + } + buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle); region = intel_region_alloc_internal(intel, cpp, @@ -249,6 +244,10 @@ intel_region_alloc_for_handle(struct intel_context *intel, return NULL; } + region->name = handle; + region->screen = intel->intelScreen; + _mesa_HashInsert(intel->intelScreen->named_regions, handle, region); + return region; } @@ -288,10 +287,8 @@ intel_region_release(struct intel_region **region_handle) region->pbo = NULL; dri_bo_unreference(region->buffer); - if (region->classic_map != NULL) { - drmUnmap(region->classic_map, - region->pitch * region->cpp * region->height); - } + if (region->name > 0) + _mesa_HashRemove(region->screen->named_regions, region->name); free(region); } @@ -362,14 +359,14 @@ intel_region_data(struct intel_context *intel, intel_region_cow(intel, dst); } - LOCK_HARDWARE(intel); + intel_prepare_render(intel); + _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset, dst->cpp, dst->pitch, dstx, dsty, width, height, src, src_pitch, srcx, srcy); intel_region_unmap(intel, dst); - UNLOCK_HARDWARE(intel); } /* Copy rectangular sub-regions. Need better logic about when to @@ -445,6 +442,7 @@ intel_region_attach_pbo(struct intel_context *intel, region->pbo->region = region; dri_bo_reference(buffer); region->buffer = buffer; + region->tiling = I915_TILING_NONE; } @@ -485,7 +483,7 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) /* Now blit from the texture buffer to the new buffer: */ - LOCK_HARDWARE(intel); + intel_prepare_render(intel); ok = intelEmitCopyBlit(intel, region->cpp, region->pitch, pbo->buffer, 0, region->tiling, @@ -494,7 +492,6 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) region->pitch, region->height, GL_COPY); assert(ok); - UNLOCK_HARDWARE(intel); } dri_bo * @@ -510,125 +507,3 @@ intel_region_buffer(struct intel_context *intel, return region->buffer; } - -static struct intel_region * -intel_recreate_static(struct intel_context *intel, - const char *name, - struct intel_region *region, - intelRegion *region_desc) -{ - intelScreenPrivate *intelScreen = intel->intelScreen; - int ret; - - if (region == NULL) { - region = calloc(sizeof(*region), 1); - region->refcount = 1; - _DBG("%s creating new region %p\n", __FUNCTION__, region); - } - else { - _DBG("%s %p\n", __FUNCTION__, region); - } - - if (intel->ctx.Visual.rgbBits == 24) - region->cpp = 4; - else - region->cpp = intel->ctx.Visual.rgbBits / 8; - region->pitch = intelScreen->pitch; - region->width = intelScreen->width; - region->height = intelScreen->height; - - if (region->buffer != NULL) { - dri_bo_unreference(region->buffer); - region->buffer = NULL; - } - - if (intel->ttm) { - assert(region_desc->bo_handle != -1); - region->buffer = intel_bo_gem_create_from_name(intel->bufmgr, - name, - region_desc->bo_handle); - - ret = dri_bo_get_tiling(region->buffer, ®ion->tiling, - ®ion->bit_6_swizzle); - if (ret != 0) { - fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n", - region_desc->bo_handle, name, strerror(-ret)); - intel_region_release(®ion); - return NULL; - } - } else { - if (region->classic_map != NULL) { - drmUnmap(region->classic_map, - region->pitch * region->cpp * region->height); - region->classic_map = NULL; - } - ret = drmMap(intel->driFd, region_desc->handle, - region->pitch * region->cpp * region->height, - ®ion->classic_map); - if (ret != 0) { - fprintf(stderr, "Failed to drmMap %s buffer\n", name); - free(region); - return NULL; - } - - region->buffer = intel_bo_fake_alloc_static(intel->bufmgr, - name, - region_desc->offset, - region->pitch * region->cpp * - region->height, - region->classic_map); - - /* The sarea just gives us a boolean for whether it's tiled or not, - * instead of which tiling mode it is. Guess. - */ - if (region_desc->tiled) { - if (intel->gen >= 4 && region_desc == &intelScreen->depth) - region->tiling = I915_TILING_Y; - else - region->tiling = I915_TILING_X; - } else { - region->tiling = I915_TILING_NONE; - } - - region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; - } - - assert(region->buffer != NULL); - - return region; -} - -/** - * Create intel_region structs to describe the static front, back, and depth - * buffers created by the xserver. - * - * Although FBO's mean we now no longer use these as render targets in - * all circumstances, they won't go away until the back and depth - * buffers become private, and the front buffer will remain even then. - * - * Note that these don't allocate video memory, just describe - * allocations alread made by the X server. - */ -void -intel_recreate_static_regions(struct intel_context *intel) -{ - intelScreenPrivate *intelScreen = intel->intelScreen; - - intel->front_region = - intel_recreate_static(intel, "front", - intel->front_region, - &intelScreen->front); - - intel->back_region = - intel_recreate_static(intel, "back", - intel->back_region, - &intelScreen->back); - - /* Still assumes front.cpp == depth.cpp. We can kill this when we move to - * private buffers. - */ - intel->depth_region = - intel_recreate_static(intel, "depth", - intel->depth_region, - &intelScreen->depth); -} diff --git a/shared/intel_regions.h b/shared/intel_regions.h index 535fcd7..7ee6a98 100644 --- a/shared/intel_regions.h +++ b/shared/intel_regions.h @@ -52,7 +52,7 @@ struct intel_buffer_object; */ struct intel_region { - dri_bo *buffer; /**< buffer manager's buffer */ + drm_intel_bo *buffer; /**< buffer manager's buffer */ GLuint refcount; /**< Reference count for region */ GLuint cpp; /**< bytes per pixel */ GLuint width; /**< in pixels */ @@ -66,8 +66,10 @@ struct intel_region uint32_t tiling; /**< Which tiling mode the region is in */ uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */ - drmAddress classic_map; /**< drmMap of the region when not in GEM mode */ struct intel_buffer_object *pbo; /* zero-copy uploads */ + + uint32_t name; /**< Global name for the bo */ + struct intel_screen *screen; }; @@ -146,4 +148,12 @@ void _mesa_copy_rect(GLubyte * dst, const GLubyte * src, GLuint src_pitch, GLuint src_x, GLuint src_y); +struct __DRIimageRec { + struct intel_region *region; + GLenum internal_format; + GLuint format; + GLenum data_type; + void *data; +}; + #endif diff --git a/shared/intel_screen.c b/shared/intel_screen.c index 789135b..6e4bb64 100644 --- a/shared/intel_screen.c +++ b/shared/intel_screen.c @@ -29,35 +29,28 @@ #include "main/context.h" #include "main/framebuffer.h" #include "main/renderbuffer.h" +#include "main/hash.h" +#include "main/fbobject.h" #include "utils.h" -#include "vblank.h" #include "xmlpool.h" #include "intel_batchbuffer.h" #include "intel_buffers.h" #include "intel_bufmgr.h" #include "intel_chipset.h" -#include "intel_extensions.h" #include "intel_fbo.h" -#include "intel_regions.h" -#include "intel_swapbuffers.h" #include "intel_screen.h" -#include "intel_span.h" #include "intel_tex.h" +#include "intel_regions.h" #include "i915_drm.h" -#include "i830_dri.h" #define DRI_CONF_TEXTURE_TILING(def) \ - DRI_CONF_OPT_BEGIN(texture_tiling, bool, def) \ - DRI_CONF_DESC(en, "Enable texture tiling") \ - DRI_CONF_OPT_END \ PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE - DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC) /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, * DRI_CONF_BO_REUSE_ALL @@ -69,11 +62,9 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_DESC_END DRI_CONF_OPT_END -#ifdef I915 - DRI_CONF_TEXTURE_TILING(false) -#else - DRI_CONF_TEXTURE_TILING(true) -#endif + DRI_CONF_OPT_BEGIN(texture_tiling, bool, true) + DRI_CONF_DESC(en, "Enable texture tiling") + DRI_CONF_OPT_END DRI_CONF_OPT_BEGIN(early_z, bool, false) DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") @@ -99,157 +90,146 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 12; +const GLuint __driNConfigOptions = 11; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; #endif /*USE_NEW_INTERFACE */ -/** - * Map all the memory regions described by the screen. - * \return GL_TRUE if success, GL_FALSE if error. - */ -GLboolean -intelMapScreenRegions(__DRIscreenPrivate * sPriv) +static const __DRItexBufferExtension intelTexBufferExtension = { + { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, + intelSetTexBuffer, + intelSetTexBuffer2, +}; + +static void +intelDRI2Flush(__DRIdrawable *drawable) { - intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; - - if (0) - _mesa_printf("TEX 0x%08x ", intelScreen->tex.handle); - if (intelScreen->tex.size != 0) { - if (drmMap(sPriv->fd, - intelScreen->tex.handle, - intelScreen->tex.size, - (drmAddress *) & intelScreen->tex.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - } + struct intel_context *intel = drawable->driContextPriv->driverPrivate; - return GL_TRUE; -} + if (intel->gen < 4) + INTEL_FIREVERTICES(intel); -void -intelUnmapScreenRegions(intelScreenPrivate * intelScreen) -{ - if (intelScreen->tex.map) { - drmUnmap(intelScreen->tex.map, intelScreen->tex.size); - intelScreen->tex.map = NULL; - } + if (intel->batch->map != intel->batch->ptr) + intel_batchbuffer_flush(intel->batch); } - static void -intelPrintDRIInfo(intelScreenPrivate * intelScreen, - __DRIscreenPrivate * sPriv, I830DRIPtr gDRIPriv) +intelDRI2Invalidate(__DRIdrawable *drawable) { - fprintf(stderr, "*** Front size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->front.size, intelScreen->front.offset, - intelScreen->pitch); - fprintf(stderr, "*** Back size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->back.size, intelScreen->back.offset, - intelScreen->pitch); - fprintf(stderr, "*** Depth size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->depth.size, intelScreen->depth.offset, - intelScreen->pitch); - fprintf(stderr, "*** Texture size: 0x%x offset: 0x%x\n", - intelScreen->tex.size, intelScreen->tex.offset); - fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem); + struct intel_context *intel = drawable->driContextPriv->driverPrivate; + + intel->using_dri2_swapbuffers = GL_TRUE; + dri2InvalidateDrawable(drawable); } +static const struct __DRI2flushExtensionRec intelFlushExtension = { + { __DRI2_FLUSH, __DRI2_FLUSH_VERSION }, + intelDRI2Flush, + intelDRI2Invalidate, +}; -static void -intelPrintSAREA(const drm_i915_sarea_t * sarea) +static __DRIimage * +intel_create_image_from_name(__DRIcontext *context, + int width, int height, int format, + int name, int pitch, void *loaderPrivate) { - fprintf(stderr, "SAREA: sarea width %d height %d\n", sarea->width, - sarea->height); - fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); - fprintf(stderr, - "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", - sarea->front_offset, sarea->front_size, - (unsigned) sarea->front_handle, sarea->front_tiled); - fprintf(stderr, - "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", - sarea->back_offset, sarea->back_size, - (unsigned) sarea->back_handle, sarea->back_tiled); - fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", - sarea->depth_offset, sarea->depth_size, - (unsigned) sarea->depth_handle, sarea->depth_tiled); - fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle); -} + __DRIimage *image; + struct intel_context *intel = context->driverPrivate; + int cpp; + + image = CALLOC(sizeof *image); + if (image == NULL) + return NULL; + + switch (format) { + case __DRI_IMAGE_FORMAT_RGB565: + image->format = MESA_FORMAT_RGB565; + image->internal_format = GL_RGB; + image->data_type = GL_UNSIGNED_BYTE; + break; + case __DRI_IMAGE_FORMAT_XRGB8888: + image->format = MESA_FORMAT_XRGB8888; + image->internal_format = GL_RGB; + image->data_type = GL_UNSIGNED_BYTE; + break; + case __DRI_IMAGE_FORMAT_ARGB8888: + image->format = MESA_FORMAT_ARGB8888; + image->internal_format = GL_RGBA; + image->data_type = GL_UNSIGNED_BYTE; + break; + default: + free(image); + return NULL; + } + image->data = loaderPrivate; + cpp = _mesa_get_format_bytes(image->format); -/** - * A number of the screen parameters are obtained/computed from - * information in the SAREA. This function updates those parameters. - */ -static void -intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen, - drm_i915_sarea_t * sarea) + image->region = intel_region_alloc_for_handle(intel, cpp, width, height, + pitch, name, "image"); + if (image->region == NULL) { + FREE(image); + return NULL; + } + + return image; +} + +static __DRIimage * +intel_create_image_from_renderbuffer(__DRIcontext *context, + int renderbuffer, void *loaderPrivate) { - intelScreen->width = sarea->width; - intelScreen->height = sarea->height; - intelScreen->pitch = sarea->pitch; - - intelScreen->front.offset = sarea->front_offset; - intelScreen->front.handle = sarea->front_handle; - intelScreen->front.size = sarea->front_size; - intelScreen->front.tiled = sarea->front_tiled; - - intelScreen->back.offset = sarea->back_offset; - intelScreen->back.handle = sarea->back_handle; - intelScreen->back.size = sarea->back_size; - intelScreen->back.tiled = sarea->back_tiled; - - intelScreen->depth.offset = sarea->depth_offset; - intelScreen->depth.handle = sarea->depth_handle; - intelScreen->depth.size = sarea->depth_size; - intelScreen->depth.tiled = sarea->depth_tiled; - - if (intelScreen->driScrnPriv->ddx_version.minor >= 9) { - intelScreen->front.bo_handle = sarea->front_bo_handle; - intelScreen->back.bo_handle = sarea->back_bo_handle; - intelScreen->depth.bo_handle = sarea->depth_bo_handle; - } else { - intelScreen->front.bo_handle = -1; - intelScreen->back.bo_handle = -1; - intelScreen->depth.bo_handle = -1; + __DRIimage *image; + struct intel_context *intel = context->driverPrivate; + struct gl_renderbuffer *rb; + struct intel_renderbuffer *irb; + + rb = _mesa_lookup_renderbuffer(&intel->ctx, renderbuffer); + if (!rb) { + _mesa_error(&intel->ctx, + GL_INVALID_OPERATION, "glRenderbufferExternalMESA"); + return NULL; } - intelScreen->tex.offset = sarea->tex_offset; - intelScreen->logTextureGranularity = sarea->log_tex_granularity; - intelScreen->tex.handle = sarea->tex_handle; - intelScreen->tex.size = sarea->tex_size; + irb = intel_renderbuffer(rb); + image = CALLOC(sizeof *image); + if (image == NULL) + return NULL; - if (0) - intelPrintSAREA(sarea); + image->internal_format = rb->InternalFormat; + image->format = rb->Format; + image->data_type = rb->DataType; + image->data = loaderPrivate; + intel_region_reference(&image->region, irb->region); + + return image; } -static const __DRItexOffsetExtension intelTexOffsetExtension = { - { __DRI_TEX_OFFSET }, - intelSetTexOffset, -}; +static void +intel_destroy_image(__DRIimage *image) +{ + intel_region_release(&image->region); + FREE(image); +} -static const __DRItexBufferExtension intelTexBufferExtension = { - { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, - intelSetTexBuffer, - intelSetTexBuffer2, +static struct __DRIimageExtensionRec intelImageExtension = { + { __DRI_IMAGE, __DRI_IMAGE_VERSION }, + intel_create_image_from_name, + intel_create_image_from_renderbuffer, + intel_destroy_image, }; static const __DRIextension *intelScreenExtensions[] = { &driReadDrawableExtension, - &driCopySubBufferExtension.base, - &driSwapControlExtension.base, - &driFrameTrackingExtension.base, - &driMediaStreamCounterExtension.base, - &intelTexOffsetExtension.base, &intelTexBufferExtension.base, + &intelFlushExtension.base, + &intelImageExtension.base, NULL }; static GLboolean -intel_get_param(__DRIscreenPrivate *psp, int param, int *value) +intel_get_param(__DRIscreen *psp, int param, int *value) { int ret; struct drm_i915_getparam gp; @@ -266,70 +246,25 @@ intel_get_param(__DRIscreenPrivate *psp, int param, int *value) return GL_TRUE; } -static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv) +static void +nop_callback(GLuint key, void *data, void *userData) { - intelScreenPrivate *intelScreen; - I830DRIPtr gDRIPriv = (I830DRIPtr) sPriv->pDevPriv; - drm_i915_sarea_t *sarea; - - if (sPriv->devPrivSize != sizeof(I830DRIRec)) { - fprintf(stderr, - "\nERROR! sizeof(I830DRIRec) does not match passed size from device driver\n"); - return GL_FALSE; - } - - /* Allocate the private area */ - intelScreen = (intelScreenPrivate *) CALLOC(sizeof(intelScreenPrivate)); - if (!intelScreen) { - fprintf(stderr, "\nERROR! Allocating private area failed\n"); - return GL_FALSE; - } - /* parse information in __driConfigOptions */ - driParseOptionInfo(&intelScreen->optionCache, - __driConfigOptions, __driNConfigOptions); - - intelScreen->driScrnPriv = sPriv; - sPriv->private = (void *) intelScreen; - sarea = (drm_i915_sarea_t *) - (((GLubyte *) sPriv->pSAREA) + gDRIPriv->sarea_priv_offset); - intelScreen->sarea = sarea; - - intelScreen->deviceID = gDRIPriv->deviceID; - - intelUpdateScreenFromSAREA(intelScreen, sarea); - - if (!intelMapScreenRegions(sPriv)) { - fprintf(stderr, "\nERROR! mapping regions\n"); - _mesa_free(intelScreen); - sPriv->private = NULL; - return GL_FALSE; - } - - if (0) - intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv); - - intelScreen->drmMinor = sPriv->drm_version.minor; - - /* Determine if IRQs are active? */ - if (!intel_get_param(sPriv, I915_PARAM_IRQ_ACTIVE, - &intelScreen->irq_active)) - return GL_FALSE; - - sPriv->extensions = intelScreenExtensions; - - return GL_TRUE; } - static void -intelDestroyScreen(__DRIscreenPrivate * sPriv) +intelDestroyScreen(__DRIscreen * sPriv) { - intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; + struct intel_screen *intelScreen = sPriv->private; dri_bufmgr_destroy(intelScreen->bufmgr); - intelUnmapScreenRegions(intelScreen); driDestroyOptionInfo(&intelScreen->optionCache); + /* Some regions may still have references to them at this point, so + * flush the hash table to prevent _mesa_DeleteHashTable() from + * complaining about the hash not being empty; */ + _mesa_HashDeleteAll(intelScreen->named_regions, nop_callback, NULL); + _mesa_DeleteHashTable(intelScreen->named_regions); + FREE(intelScreen); sPriv->private = NULL; } @@ -339,10 +274,12 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv) * This is called when we need to set up GL rendering to a new X window. */ static GLboolean -intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, - __DRIdrawablePrivate * driDrawPriv, +intelCreateBuffer(__DRIscreen * driScrnPriv, + __DRIdrawable * driDrawPriv, const __GLcontextModes * mesaVis, GLboolean isPixmap) { + struct intel_renderbuffer *rb; + if (isPixmap) { return GL_FALSE; /* not implemented */ } @@ -351,12 +288,12 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, mesaVis->depthBits != 24); gl_format rgbFormat; - struct intel_framebuffer *intel_fb = CALLOC_STRUCT(intel_framebuffer); + struct gl_framebuffer *fb = CALLOC_STRUCT(gl_framebuffer); - if (!intel_fb) + if (!fb) return GL_FALSE; - _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis); + _mesa_initialize_window_framebuffer(fb, mesaVis); if (mesaVis->redBits == 5) rgbFormat = MESA_FORMAT_RGB565; @@ -366,16 +303,12 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, rgbFormat = MESA_FORMAT_ARGB8888; /* setup the hardware-based renderbuffers */ - intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); - _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, - &intel_fb->color_rb[0]->Base); + rb = intel_create_renderbuffer(rgbFormat); + _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &rb->Base); if (mesaVis->doubleBufferMode) { - intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); - - _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT, - &intel_fb->color_rb[1]->Base); - + rb = intel_create_renderbuffer(rgbFormat); + _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &rb->Base); } if (mesaVis->depthBits == 24) { @@ -384,116 +317,64 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, struct intel_renderbuffer *depthStencilRb = intel_create_renderbuffer(MESA_FORMAT_S8_Z24); /* note: bind RB to two attachment points */ - _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, - &depthStencilRb->Base); - _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_STENCIL, - &depthStencilRb->Base); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthStencilRb->Base); + _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &depthStencilRb->Base); } else { struct intel_renderbuffer *depthRb = intel_create_renderbuffer(MESA_FORMAT_X8_Z24); - _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, - &depthRb->Base); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); } } else if (mesaVis->depthBits == 16) { /* just 16-bit depth buffer, no hw stencil */ struct intel_renderbuffer *depthRb = intel_create_renderbuffer(MESA_FORMAT_Z16); - _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); } /* now add any/all software-based renderbuffers we may need */ - _mesa_add_soft_renderbuffers(&intel_fb->Base, + _mesa_add_soft_renderbuffers(fb, GL_FALSE, /* never sw color */ GL_FALSE, /* never sw depth */ swStencil, mesaVis->accumRedBits > 0, GL_FALSE, /* never sw alpha */ GL_FALSE /* never sw aux */ ); - driDrawPriv->driverPrivate = (void *) intel_fb; + driDrawPriv->driverPrivate = fb; return GL_TRUE; } } static void -intelDestroyBuffer(__DRIdrawablePrivate * driDrawPriv) -{ - struct intel_framebuffer *intel_fb = driDrawPriv->driverPrivate; - struct intel_renderbuffer *depth_rb; - struct intel_renderbuffer *stencil_rb; - - if (intel_fb) { - if (intel_fb->color_rb[0]) { - intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL); - } - - if (intel_fb->color_rb[1]) { - intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL); - } - - depth_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); - if (depth_rb) { - intel_renderbuffer_set_region(depth_rb, NULL); - } - - stencil_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); - if (stencil_rb) { - intel_renderbuffer_set_region(stencil_rb, NULL); - } - } - - _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); -} - - -/** - * Get information about previous buffer swaps. - */ -static int -intelGetSwapInfo(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo) +intelDestroyBuffer(__DRIdrawable * driDrawPriv) { - struct intel_framebuffer *intel_fb; - - if ((dPriv == NULL) || (dPriv->driverPrivate == NULL) - || (sInfo == NULL)) { - return -1; - } - - intel_fb = dPriv->driverPrivate; - sInfo->swap_count = intel_fb->swap_count; - sInfo->swap_ust = intel_fb->swap_ust; - sInfo->swap_missed_count = intel_fb->swap_missed_count; - - sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0) - ? driCalculateSwapUsage(dPriv, 0, intel_fb->swap_missed_ust) - : 0.0; - - return 0; + struct gl_framebuffer *fb = driDrawPriv->driverPrivate; + + _mesa_reference_framebuffer(&fb, NULL); } - /* There are probably better ways to do this, such as an * init-designated function to register chipids and createcontext * functions. */ extern GLboolean i830CreateContext(const __GLcontextModes * mesaVis, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); extern GLboolean i915CreateContext(const __GLcontextModes * mesaVis, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); extern GLboolean brwCreateContext(const __GLcontextModes * mesaVis, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); static GLboolean intelCreateContext(const __GLcontextModes * mesaVis, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate) { - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; + __DRIscreen *sPriv = driContextPriv->driScreenPriv; + struct intel_screen *intelScreen = sPriv->private; #ifdef I915 if (IS_9XX(intelScreen->deviceID)) { @@ -513,217 +394,33 @@ intelCreateContext(const __GLcontextModes * mesaVis, return GL_FALSE; } - -static __DRIconfig ** -intelFillInModes(__DRIscreenPrivate *psp, - unsigned pixel_bits, unsigned depth_bits, - unsigned stencil_bits, GLboolean have_back_buffer) -{ - __DRIconfig **configs; - __GLcontextModes *m; - unsigned depth_buffer_factor; - unsigned back_buffer_factor; - int i; - - /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't - * support pageflipping at all. - */ - static const GLenum back_buffer_modes[] = { - GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML - }; - - uint8_t depth_bits_array[3]; - uint8_t stencil_bits_array[3]; - uint8_t msaa_samples_array[1]; - - depth_bits_array[0] = 0; - depth_bits_array[1] = depth_bits; - depth_bits_array[2] = depth_bits; - - /* Just like with the accumulation buffer, always provide some modes - * with a stencil buffer. It will be a sw fallback, but some apps won't - * care about that. - */ - stencil_bits_array[0] = 0; - stencil_bits_array[1] = 0; - if (depth_bits == 24) - stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; - - stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; - - msaa_samples_array[0] = 0; - - depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; - back_buffer_factor = (have_back_buffer) ? 3 : 1; - - if (pixel_bits == 16) { - configs = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5, - depth_bits_array, stencil_bits_array, - depth_buffer_factor, back_buffer_modes, - back_buffer_factor, - msaa_samples_array, 1); - } - else { - __DRIconfig **configs_a8r8g8b8; - __DRIconfig **configs_x8r8g8b8; - - configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, - depth_bits_array, - stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, - back_buffer_factor, - msaa_samples_array, 1); - configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV, - depth_bits_array, - stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, - back_buffer_factor, - msaa_samples_array, 1); - configs = driConcatConfigs(configs_a8r8g8b8, configs_x8r8g8b8); - } - - if (configs == NULL) { - fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, - __LINE__); - return NULL; - } - - /* Mark the visual as slow if there are "fake" stencil bits. - */ - for (i = 0; configs[i]; i++) { - m = &configs[i]->modes; - if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) { - m->visualRating = GLX_SLOW_CONFIG; - } - } - - return configs; -} - static GLboolean -intel_init_bufmgr(intelScreenPrivate *intelScreen) +intel_init_bufmgr(struct intel_screen *intelScreen) { - GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL; - int gem_kernel = 0; - GLboolean gem_supported; - struct drm_i915_getparam gp; - __DRIscreenPrivate *spriv = intelScreen->driScrnPriv; + __DRIscreen *spriv = intelScreen->driScrnPriv; int num_fences = 0; intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; - gp.param = I915_PARAM_HAS_GEM; - gp.value = &gem_kernel; - - (void) drmCommandWriteRead(spriv->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); - - /* If we've got a new enough DDX that's initializing GEM and giving us - * object handles for the shared buffers, use that. - */ - intelScreen->ttm = GL_FALSE; - if (intelScreen->driScrnPriv->dri2.enabled) - gem_supported = GL_TRUE; - else if (intelScreen->driScrnPriv->ddx_version.minor >= 9 && - gem_kernel && - intelScreen->front.bo_handle != -1) - gem_supported = GL_TRUE; - else - gem_supported = GL_FALSE; - - if (!gem_disable && gem_supported) { - intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ); - if (intelScreen->bufmgr != NULL) - intelScreen->ttm = GL_TRUE; - } + intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ); /* Otherwise, use the classic buffer manager. */ if (intelScreen->bufmgr == NULL) { - if (gem_disable) { - _mesa_warning(NULL, "GEM disabled. Using classic."); - } else { - _mesa_warning(NULL, - "Failed to initialize GEM. Falling back to classic."); - } - - if (intelScreen->tex.size == 0) { - fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n", - __func__, __LINE__); - return GL_FALSE; - } - - intelScreen->bufmgr = - intel_bufmgr_fake_init(spriv->fd, - intelScreen->tex.offset, - intelScreen->tex.map, - intelScreen->tex.size, - (unsigned int * volatile) - &intelScreen->sarea->last_dispatch); + fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n", + __func__, __LINE__); + return GL_FALSE; } - if (intel_get_param(spriv, I915_PARAM_NUM_FENCES_AVAIL, &num_fences)) - intelScreen->kernel_exec_fencing = !!num_fences; - else - intelScreen->kernel_exec_fencing = GL_FALSE; - - return GL_TRUE; -} - -/** - * This is the driver specific part of the createNewScreen entry point. - * Called when using legacy DRI. - * - * \todo maybe fold this into intelInitDriver - * - * \return the __GLcontextModes supported by this driver - */ -static const __DRIconfig **intelInitScreen(__DRIscreenPrivate *psp) -{ - intelScreenPrivate *intelScreen; -#ifdef I915 - static const __DRIversion ddx_expected = { 1, 5, 0 }; -#else - static const __DRIversion ddx_expected = { 1, 6, 0 }; -#endif - static const __DRIversion dri_expected = { 4, 0, 0 }; - static const __DRIversion drm_expected = { 1, 5, 0 }; - I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv; - - if (!driCheckDriDdxDrmVersions2("i915", - &psp->dri_version, &dri_expected, - &psp->ddx_version, &ddx_expected, - &psp->drm_version, &drm_expected)) { - return NULL; + if (!intel_get_param(spriv, I915_PARAM_NUM_FENCES_AVAIL, &num_fences) || + num_fences == 0) { + fprintf(stderr, "[%s: %u] Kernel 2.6.29 required.\n", __func__, __LINE__); + return GL_FALSE; } - if (!intelInitDriver(psp)) - return NULL; + drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr); - psp->extensions = intelScreenExtensions; + intelScreen->named_regions = _mesa_NewHashTable(); - intelScreen = psp->private; - if (!intel_init_bufmgr(intelScreen)) - return GL_FALSE; - - return (const __DRIconfig **) - intelFillInModes(psp, dri_priv->cpp * 8, - (dri_priv->cpp == 2) ? 16 : 24, - (dri_priv->cpp == 2) ? 0 : 8, 1); -} - -struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen) -{ - /* - * This should probably change to have the screen allocate a dummy - * context at screen creation. For now just use the current context. - */ - - GET_CURRENT_CONTEXT(ctx); - if (ctx == NULL) { - _mesa_problem(NULL, "No current context in intelScreenContext\n"); - return NULL; - } - return intel_context(ctx); + return GL_TRUE; } /** @@ -733,23 +430,21 @@ struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen) * \return the __GLcontextModes supported by this driver */ static const -__DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) +__DRIconfig **intelInitScreen2(__DRIscreen *psp) { - intelScreenPrivate *intelScreen; + struct intel_screen *intelScreen; GLenum fb_format[3]; GLenum fb_type[3]; - /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't - * support pageflipping at all. - */ + static const GLenum back_buffer_modes[] = { - GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML + GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML }; uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1]; int color; __DRIconfig **configs = NULL; /* Allocate the private area */ - intelScreen = (intelScreenPrivate *) CALLOC(sizeof(intelScreenPrivate)); + intelScreen = CALLOC(sizeof *intelScreen); if (!intelScreen) { fprintf(stderr, "\nERROR! Allocating private area failed\n"); return GL_FALSE; @@ -761,8 +456,6 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) intelScreen->driScrnPriv = psp; psp->private = (void *) intelScreen; - intelScreen->drmMinor = psp->drm_version.minor; - /* Determine chipset ID */ if (!intel_get_param(psp, I915_PARAM_CHIPSET_ID, &intelScreen->deviceID)) @@ -771,18 +464,8 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) if (!intel_init_bufmgr(intelScreen)) return GL_FALSE; - intelScreen->irq_active = 1; psp->extensions = intelScreenExtensions; - depth_bits[0] = 0; - stencil_bits[0] = 0; - depth_bits[1] = 16; - stencil_bits[1] = 0; - depth_bits[2] = 24; - stencil_bits[2] = 0; - depth_bits[3] = 24; - stencil_bits[3] = 8; - msaa_samples_array[0] = 0; fb_format[0] = GL_RGB; @@ -797,27 +480,27 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) depth_bits[0] = 0; stencil_bits[0] = 0; + /* Generate a rich set of useful configs that do not include an + * accumulation buffer. + */ for (color = 0; color < ARRAY_SIZE(fb_format); color++) { __DRIconfig **new_configs; int depth_factor; - /* With DRI2 right now, GetBuffers always returns a depth/stencil buffer - * with the same cpp as the drawable. So we can't support depth cpp != - * color cpp currently. + /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil + * buffer that has a diffferent number of bits per pixel than the color + * buffer. This isn't yet supported here. */ if (fb_type[color] == GL_UNSIGNED_SHORT_5_6_5) { depth_bits[1] = 16; stencil_bits[1] = 0; - - depth_factor = 2; } else { depth_bits[1] = 24; - stencil_bits[1] = 0; - depth_bits[2] = 24; - stencil_bits[2] = 8; - - depth_factor = 3; + stencil_bits[1] = 8; } + + depth_factor = 2; + new_configs = driCreateConfigs(fb_format[color], fb_type[color], depth_bits, stencil_bits, @@ -825,7 +508,33 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) back_buffer_modes, ARRAY_SIZE(back_buffer_modes), msaa_samples_array, - ARRAY_SIZE(msaa_samples_array)); + ARRAY_SIZE(msaa_samples_array), + GL_FALSE); + if (configs == NULL) + configs = new_configs; + else + configs = driConcatConfigs(configs, new_configs); + } + + /* Generate the minimum possible set of configs that include an + * accumulation buffer. + */ + for (color = 0; color < ARRAY_SIZE(fb_format); color++) { + __DRIconfig **new_configs; + + if (fb_type[color] == GL_UNSIGNED_SHORT_5_6_5) { + depth_bits[0] = 16; + stencil_bits[0] = 0; + } else { + depth_bits[0] = 24; + stencil_bits[0] = 8; + } + + new_configs = driCreateConfigs(fb_format[color], fb_type[color], + depth_bits, stencil_bits, 1, + back_buffer_modes + 1, 1, + msaa_samples_array, 1, + GL_TRUE); if (configs == NULL) configs = new_configs; else @@ -842,19 +551,19 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) } const struct __DriverAPIRec driDriverAPI = { - .InitScreen = intelInitScreen, .DestroyScreen = intelDestroyScreen, .CreateContext = intelCreateContext, .DestroyContext = intelDestroyContext, .CreateBuffer = intelCreateBuffer, .DestroyBuffer = intelDestroyBuffer, - .SwapBuffers = intelSwapBuffers, .MakeCurrent = intelMakeCurrent, .UnbindContext = intelUnbindContext, - .GetSwapInfo = intelGetSwapInfo, - .GetDrawableMSC = driDrawableGetMSC32, - .WaitForMSC = driWaitForMSC32, - .CopySubBuffer = intelCopySubBuffer, - .InitScreen2 = intelInitScreen2, }; + +/* This is the table of extensions that the loader will dlsym() for. */ +PUBLIC const __DRIextension *__driDriverExtensions[] = { + &driCoreExtension.base, + &driDRI2Extension.base, + NULL +}; diff --git a/shared/intel_screen.h b/shared/intel_screen.h index a9b9e10..5863093 100644 --- a/shared/intel_screen.h +++ b/shared/intel_screen.h @@ -34,74 +34,35 @@ #include "i915_drm.h" #include "xmlconfig.h" -/* XXX: change name or eliminate to avoid conflict with "struct - * intel_region"!!! - */ -typedef struct +struct intel_screen { - drm_handle_t handle; - drmSize size; /* region size in bytes */ - char *map; /* memory map */ - int offset; /* from start of video mem, in bytes */ - unsigned int bo_handle; /* buffer object id if available, or -1 */ - /** - * Flags if the region is tiled. - * - * Not included is Y versus X tiling. - */ - GLboolean tiled; -} intelRegion; - -typedef struct -{ - intelRegion front; - intelRegion back; - intelRegion depth; - intelRegion tex; - int deviceID; - int width; - int height; - int pitch; /* common row stride, in pixels */ int logTextureGranularity; - __DRIscreenPrivate *driScrnPriv; - - volatile drm_i915_sarea_t *sarea; - - int drmMinor; - - int irq_active; + __DRIscreen *driScrnPriv; GLboolean no_hw; GLboolean no_vbo; - int ttm; dri_bufmgr *bufmgr; - GLboolean kernel_exec_fencing; + struct _mesa_HashTable *named_regions; /** * Configuration cache with default values for all contexts */ driOptionCache optionCache; -} intelScreenPrivate; - - +}; -extern GLboolean intelMapScreenRegions(__DRIscreenPrivate * sPriv); +extern GLboolean intelMapScreenRegions(__DRIscreen * sPriv); -extern void intelUnmapScreenRegions(intelScreenPrivate * intelScreen); +extern void intelDestroyContext(__DRIcontext * driContextPriv); -extern void intelDestroyContext(__DRIcontextPrivate * driContextPriv); - -extern GLboolean intelUnbindContext(__DRIcontextPrivate * driContextPriv); +extern GLboolean intelUnbindContext(__DRIcontext * driContextPriv); extern GLboolean -intelMakeCurrent(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv); - -extern struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen); +intelMakeCurrent(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv); #endif diff --git a/shared/intel_span.c b/shared/intel_span.c index 34c3d9d..fb5c01b 100644 --- a/shared/intel_span.c +++ b/shared/intel_span.c @@ -43,252 +43,24 @@ static void intel_set_span_functions(struct intel_context *intel, struct gl_renderbuffer *rb); -#define SPAN_CACHE_SIZE 4096 - -static void -get_span_cache(struct intel_renderbuffer *irb, uint32_t offset) -{ - if (irb->span_cache == NULL) { - irb->span_cache = _mesa_malloc(SPAN_CACHE_SIZE); - irb->span_cache_offset = -1; - } - - if ((offset & ~(SPAN_CACHE_SIZE - 1)) != irb->span_cache_offset) { - irb->span_cache_offset = offset & ~(SPAN_CACHE_SIZE - 1); - dri_bo_get_subdata(irb->region->buffer, irb->span_cache_offset, - SPAN_CACHE_SIZE, irb->span_cache); - } -} - -static void -clear_span_cache(struct intel_renderbuffer *irb) -{ - irb->span_cache_offset = -1; -} - -static uint32_t -pread_32(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); -} - -static uint32_t -pread_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))) | - 0xff000000; -} - -static uint16_t -pread_16(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint16_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); -} - -static uint8_t -pread_8(struct intel_renderbuffer *irb, uint32_t offset) -{ - get_span_cache(irb, offset); - - return *(uint8_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); -} - -static void -pwrite_32(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 4, &val); -} - -static void -pwrite_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 3, &val); -} - -static void -pwrite_16(struct intel_renderbuffer *irb, uint32_t offset, uint16_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 2, &val); -} - -static void -pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val) -{ - clear_span_cache(irb); - - dri_bo_subdata(irb->region->buffer, offset, 1, &val); -} - -static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb, - int x, int y) -{ - return (y * irb->region->pitch + x) * irb->region->cpp; -} - -/* - * Deal with tiled surfaces - */ - -static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, - int x, int y) -{ - int tile_stride; - int xbyte; - int x_tile_off, y_tile_off; - int x_tile_number, y_tile_number; - int tile_off, tile_base; - - x += irb->region->draw_x; - y += irb->region->draw_y; - - tile_stride = (irb->region->pitch * irb->region->cpp) << 3; - - xbyte = x * irb->region->cpp; - - x_tile_off = xbyte & 0x1ff; - y_tile_off = y & 7; - - x_tile_number = xbyte >> 9; - y_tile_number = y >> 3; - - tile_off = (y_tile_off << 9) + x_tile_off; - - switch (irb->region->bit_6_swizzle) { - case I915_BIT_6_SWIZZLE_NONE: - break; - case I915_BIT_6_SWIZZLE_9: - tile_off ^= ((tile_off >> 3) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); - break; - case I915_BIT_6_SWIZZLE_9_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ - ((tile_off >> 5) & 64); - break; - default: - fprintf(stderr, "Unknown tile swizzling mode %d\n", - irb->region->bit_6_swizzle); - exit(1); - } - - tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; - -#if 0 - printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n", - x, y, tile_off, tile_base, - tile_off + tile_base, - irb->region->pitch, tile_stride); -#endif - - return tile_base + tile_off; -} - -static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, - int x, int y) -{ - int tile_stride; - int xbyte; - int x_tile_off, y_tile_off; - int x_tile_number, y_tile_number; - int tile_off, tile_base; - - x += irb->region->draw_x; - y += irb->region->draw_y; - - tile_stride = (irb->region->pitch * irb->region->cpp) << 5; - - xbyte = x * irb->region->cpp; - - x_tile_off = xbyte & 0x7f; - y_tile_off = y & 0x1f; - - x_tile_number = xbyte >> 7; - y_tile_number = y >> 5; - - tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + - (x_tile_off & 0xf); - - switch (irb->region->bit_6_swizzle) { - case I915_BIT_6_SWIZZLE_NONE: - break; - case I915_BIT_6_SWIZZLE_9: - tile_off ^= ((tile_off >> 3) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); - break; - case I915_BIT_6_SWIZZLE_9_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64); - break; - case I915_BIT_6_SWIZZLE_9_10_11: - tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ - ((tile_off >> 5) & 64); - break; - default: - fprintf(stderr, "Unknown tile swizzling mode %d\n", - irb->region->bit_6_swizzle); - exit(1); - } - - tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; - - return tile_base + tile_off; -} - -/* - break intelWriteRGBASpan_ARGB8888 -*/ - #undef DBG #define DBG 0 #define LOCAL_VARS \ - struct intel_context *intel = intel_context(ctx); \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\ - unsigned int num_cliprects; \ - struct drm_clip_rect *cliprects; \ - int x_off, y_off; \ + int minx = 0, miny = 0; \ + int maxx = ctx->DrawBuffer->Width; \ + int maxy = ctx->DrawBuffer->Height; \ int pitch = irb->region->pitch * irb->region->cpp; \ void *buf = irb->region->buffer->virtual; \ GLuint p; \ (void) p; \ (void)buf; (void)pitch; /* unused for non-gttmap. */ \ - intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); -/* XXX FBO: this is identical to the macro in spantmp2.h except we get - * the cliprect info from the context, not the driDrawable. - * Move this into spantmp2.h someday. - */ -#define HW_CLIPLOOP() \ - do { \ - int _nc = num_cliprects; \ - while ( _nc-- ) { \ - int minx = cliprects[_nc].x1 - x_off; \ - int miny = cliprects[_nc].y1 - y_off; \ - int maxx = cliprects[_nc].x2 - x_off; \ - int maxy = cliprects[_nc].y2 - y_off; - -#if 0 - }} -#endif +#define HW_CLIPLOOP() +#define HW_ENDCLIPLOOP() #define Y_FLIP(_y) ((_y) * yScale + yBias) @@ -296,114 +68,74 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define HW_UNLOCK() -/* Convenience macros to avoid typing the swizzle argument over and over */ -#define NO_TILE(_X, _Y) no_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off) -#define X_TILE(_X, _Y) x_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off) -#define Y_TILE(_X, _Y) y_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off) +/* Convenience macros to avoid typing the address argument over and over */ +#define NO_TILE(_X, _Y) (((_Y) * irb->region->pitch + (_X)) * irb->region->cpp) /* r5g6b5 color span and pixel functions */ -#define INTEL_PIXEL_FMT GL_RGB -#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 -#define INTEL_READ_VALUE(offset) pread_16(irb, offset) -#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) -#define INTEL_TAG(x) x##_RGB565 -#include "intel_spantmp.h" +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 +#define TAG(x) intel_##x##_RGB565 +#define TAG2(x,y) intel_##x##y_RGB565 +#include "spantmp2.h" /* a4r4g4b4 color span and pixel functions */ -#define INTEL_PIXEL_FMT GL_BGRA -#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV -#define INTEL_READ_VALUE(offset) pread_16(irb, offset) -#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) -#define INTEL_TAG(x) x##_ARGB4444 -#include "intel_spantmp.h" +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV +#define TAG(x) intel_##x##_ARGB4444 +#define TAG2(x,y) intel_##x##y_ARGB4444 +#include "spantmp2.h" /* a1r5g5b5 color span and pixel functions */ -#define INTEL_PIXEL_FMT GL_BGRA -#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV -#define INTEL_READ_VALUE(offset) pread_16(irb, offset) -#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) -#define INTEL_TAG(x) x##_ARGB1555 -#include "intel_spantmp.h" +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV +#define TAG(x) intel_##x##_ARGB1555 +#define TAG2(x,y) intel_##x##y##_ARGB1555 +#include "spantmp2.h" /* a8r8g8b8 color span and pixel functions */ -#define INTEL_PIXEL_FMT GL_BGRA -#define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV -#define INTEL_READ_VALUE(offset) pread_32(irb, offset) -#define INTEL_WRITE_VALUE(offset, v) pwrite_32(irb, offset, v) -#define INTEL_TAG(x) x##_ARGB8888 -#include "intel_spantmp.h" +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV +#define TAG(x) intel_##x##_ARGB8888 +#define TAG2(x,y) intel_##x##y##_ARGB8888 +#include "spantmp2.h" /* x8r8g8b8 color span and pixel functions */ -#define INTEL_PIXEL_FMT GL_BGR -#define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV -#define INTEL_READ_VALUE(offset) pread_xrgb8888(irb, offset) -#define INTEL_WRITE_VALUE(offset, v) pwrite_xrgb8888(irb, offset, v) -#define INTEL_TAG(x) x##_xRGB8888 -#include "intel_spantmp.h" +#define SPANTMP_PIXEL_FMT GL_BGR +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV +#define TAG(x) intel_##x##_xRGB8888 +#define TAG2(x,y) intel_##x##y##_xRGB8888 +#include "spantmp2.h" #define LOCAL_DEPTH_VARS \ - struct intel_context *intel = intel_context(ctx); \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\ - unsigned int num_cliprects; \ - struct drm_clip_rect *cliprects; \ - int x_off, y_off; \ + int minx = 0, miny = 0; \ + int maxx = ctx->DrawBuffer->Width; \ + int maxy = ctx->DrawBuffer->Height; \ int pitch = irb->region->pitch * irb->region->cpp; \ void *buf = irb->region->buffer->virtual; \ (void)buf; (void)pitch; /* unused for non-gttmap. */ \ - intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); - #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS /* z16 depthbuffer functions. */ -#define INTEL_VALUE_TYPE GLushort -#define INTEL_WRITE_DEPTH(offset, d) pwrite_16(irb, offset, d) -#define INTEL_READ_DEPTH(offset) pread_16(irb, offset) -#define INTEL_TAG(name) name##_z16 -#include "intel_depthtmp.h" - -/* z24x8 depthbuffer functions. */ -#define INTEL_VALUE_TYPE GLuint -#define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, d) -#define INTEL_READ_DEPTH(offset) pread_32(irb, offset) -#define INTEL_TAG(name) name##_z24_x8 -#include "intel_depthtmp.h" - - -/** - ** 8-bit stencil function (XXX FBO: This is obsolete) - **/ -/* XXX */ -#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, NO_TILE(_x, _y) + 3, d) -#define READ_STENCIL(d, _x, _y) d = pread_8(irb, NO_TILE(_x, _y) + 3); -#define TAG(x) intel_gttmap_##x##_z24_s8 -#include "stenciltmp.h" - -/** - ** 8-bit stencil function (XXX FBO: This is obsolete) - **/ -#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, NO_TILE(_x, _y) + 3, d) -#define READ_STENCIL(d, _x, _y) d = pread_8(irb, NO_TILE(_x, _y) + 3); -#define TAG(x) intel##x##_z24_s8 -#include "stenciltmp.h" - -/** - ** 8-bit x-tile stencil function (XXX FBO: This is obsolete) - **/ -#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, X_TILE(_x, _y) + 3, d) -#define READ_STENCIL(d, _x, _y) d = pread_8(irb, X_TILE(_x, _y) + 3); -#define TAG(x) intel_XTile_##x##_z24_s8 -#include "stenciltmp.h" - -/** - ** 8-bit y-tile stencil function (XXX FBO: This is obsolete) - **/ -#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, Y_TILE(_x, _y) + 3, d) -#define READ_STENCIL(d, _x, _y) d = pread_8(irb, Y_TILE(_x, _y) + 3) -#define TAG(x) intel_YTile_##x##_z24_s8 -#include "stenciltmp.h" +#define VALUE_TYPE GLushort +#define WRITE_DEPTH(_x, _y, d) \ + (*(uint16_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y)) = d) +#define READ_DEPTH(d, _x, _y) \ + d = *(uint16_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y)) +#define TAG(x) intel_##x##_z16 +#include "depthtmp.h" + +/* z24_s8 and z24_x8 depthbuffer functions. */ +#define VALUE_TYPE GLuint +#define WRITE_DEPTH(_x, _y, d) \ + (*(uint32_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y)) = d) +#define READ_DEPTH(d, _x, _y) \ + d = *(uint32_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y)) +#define TAG(x) intel_##x##_z24_x8 +#include "depthtmp.h" void intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) @@ -413,8 +145,7 @@ intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) if (irb == NULL || irb->region == NULL) return; - if (intel->intelScreen->kernel_exec_fencing) - drm_intel_gem_bo_map_gtt(irb->region->buffer); + drm_intel_gem_bo_map_gtt(irb->region->buffer); intel_set_span_functions(intel, rb); } @@ -428,10 +159,7 @@ intel_renderbuffer_unmap(struct intel_context *intel, if (irb == NULL || irb->region == NULL) return; - if (intel->intelScreen->kernel_exec_fencing) - drm_intel_gem_bo_unmap_gtt(irb->region->buffer); - else - clear_span_cache(irb); + drm_intel_gem_bo_unmap_gtt(irb->region->buffer); rb->GetRow = NULL; rb->PutRow = NULL; @@ -517,7 +245,7 @@ intelSpanRenderStart(GLcontext * ctx) GLuint i; intelFlush(&intel->ctx); - LOCK_HARDWARE(intel); + intel_prepare_render(intel); for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { @@ -553,8 +281,6 @@ intelSpanRenderFinish(GLcontext * ctx) intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_FALSE); if (ctx->ReadBuffer != ctx->DrawBuffer) intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_FALSE); - - UNLOCK_HARDWARE(intel); } @@ -613,187 +339,34 @@ intel_set_span_functions(struct intel_context *intel, struct gl_renderbuffer *rb) { struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; - uint32_t tiling; - - /* If in GEM mode, we need to do the tile address swizzling ourselves, - * instead of the fence registers handling it. - */ - if (intel->ttm) - tiling = irb->region->tiling; - else - tiling = I915_TILING_NONE; - - if (intel->intelScreen->kernel_exec_fencing) { - switch (irb->Base.Format) { - case MESA_FORMAT_RGB565: - intel_gttmap_InitPointers_RGB565(rb); - break; - case MESA_FORMAT_ARGB4444: - intel_gttmap_InitPointers_ARGB4444(rb); - break; - case MESA_FORMAT_ARGB1555: - intel_gttmap_InitPointers_ARGB1555(rb); - break; - case MESA_FORMAT_XRGB8888: - intel_gttmap_InitPointers_xRGB8888(rb); - break; - case MESA_FORMAT_ARGB8888: - intel_gttmap_InitPointers_ARGB8888(rb); - break; - case MESA_FORMAT_Z16: - intel_gttmap_InitDepthPointers_z16(rb); - break; - case MESA_FORMAT_X8_Z24: - intel_gttmap_InitDepthPointers_z24_x8(rb); - break; - case MESA_FORMAT_S8_Z24: - /* There are a few different ways SW asks us to access the S8Z24 data: - * Z24 depth-only depth reads - * S8Z24 depth reads - * S8Z24 stencil reads. - */ - if (rb->Format == MESA_FORMAT_S8_Z24) { - intel_gttmap_InitDepthPointers_z24_x8(rb); - } else if (rb->Format == MESA_FORMAT_S8) { - intel_gttmap_InitStencilPointers_z24_s8(rb); - } - break; - default: - _mesa_problem(NULL, - "Unexpected MesaFormat %d in intelSetSpanFunctions", - irb->Base.Format); - break; - } - return; - } switch (irb->Base.Format) { case MESA_FORMAT_RGB565: - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_RGB565(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_RGB565(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_RGB565(rb); - break; - } + intel_InitPointers_RGB565(rb); break; case MESA_FORMAT_ARGB4444: - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_ARGB4444(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_ARGB4444(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_ARGB4444(rb); - break; - } + intel_InitPointers_ARGB4444(rb); break; case MESA_FORMAT_ARGB1555: - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_ARGB1555(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_ARGB1555(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_ARGB1555(rb); - break; - } + intel_InitPointers_ARGB1555(rb); break; case MESA_FORMAT_XRGB8888: - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_xRGB8888(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_xRGB8888(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_xRGB8888(rb); - break; - } + intel_InitPointers_xRGB8888(rb); break; case MESA_FORMAT_ARGB8888: - /* 8888 RGBA */ - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitPointers_ARGB8888(rb); - break; - case I915_TILING_X: - intel_XTile_InitPointers_ARGB8888(rb); - break; - case I915_TILING_Y: - intel_YTile_InitPointers_ARGB8888(rb); - break; - } + intel_InitPointers_ARGB8888(rb); break; case MESA_FORMAT_Z16: - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z16(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z16(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z16(rb); - break; - } + intel_InitDepthPointers_z16(rb); break; case MESA_FORMAT_X8_Z24: case MESA_FORMAT_S8_Z24: - /* There are a few different ways SW asks us to access the S8Z24 data: - * Z24 depth-only depth reads - * S8Z24 depth reads - * S8Z24 stencil reads. - */ - if (rb->Format == MESA_FORMAT_S8_Z24) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z24_x8(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z24_x8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z24_x8(rb); - break; - } - } else if (rb->Format == MESA_FORMAT_S8) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitStencilPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitStencilPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitStencilPointers_z24_s8(rb); - break; - } - } else { - _mesa_problem(NULL, - "Unexpected ActualFormat in intelSetSpanFunctions"); - } + intel_InitDepthPointers_z24_x8(rb); break; default: _mesa_problem(NULL, - "Unexpected MesaFormat in intelSetSpanFunctions"); + "Unexpected MesaFormat %d in intelSetSpanFunctions", + irb->Base.Format); break; } } diff --git a/shared/intel_spantmp.h b/shared/intel_spantmp.h deleted file mode 100644 index bad0339..0000000 --- a/shared/intel_spantmp.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -/** - * Wrapper around the spantmp.h macrofest to generate spans code for - * all the tiling styles. - */ - -#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT -#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE -#define TAG(x) INTEL_TAG(intel_gttmap_##x) -#define TAG2(x, y) INTEL_TAG(intel_gttmap_##x##y) -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT -#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE -#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(NO_TILE(_x, _y), v) -#define GET_VALUE(_x, _y) INTEL_READ_VALUE(NO_TILE(_x, _y)) -#define TAG(x) INTEL_TAG(intel##x) -#define TAG2(x, y) INTEL_TAG(intel##x)##y -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT -#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE -#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(X_TILE(_x, _y), v) -#define GET_VALUE(_x, _y) INTEL_READ_VALUE(X_TILE(_x, _y)) -#define TAG(x) INTEL_TAG(intel_XTile_##x) -#define TAG2(x, y) INTEL_TAG(intel_XTile_##x)##y -#include "spantmp2.h" - -#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT -#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE -#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(Y_TILE(_x, _y), v) -#define GET_VALUE(_x, _y) INTEL_READ_VALUE(Y_TILE(_x, _y)) -#define TAG(x) INTEL_TAG(intel_YTile_##x) -#define TAG2(x, y) INTEL_TAG(intel_YTile_##x)##y -#include "spantmp2.h" - -#undef INTEL_PIXEL_FMT -#undef INTEL_PIXEL_TYPE -#undef INTEL_WRITE_VALUE -#undef INTEL_READ_VALUE -#undef INTEL_TAG diff --git a/shared/intel_state.c b/shared/intel_state.c index 4ee7423..c5ef909 100644 --- a/shared/intel_state.c +++ b/shared/intel_state.c @@ -35,8 +35,6 @@ #include "intel_screen.h" #include "intel_context.h" -#include "intel_regions.h" -#include "swrast/swrast.h" int intel_translate_shadow_compare_func(GLenum func) @@ -196,25 +194,6 @@ intel_translate_logic_op(GLenum opcode) } } - -static void -intelClearColor(GLcontext *ctx, const GLfloat color[4]) -{ - struct intel_context *intel = intel_context(ctx); - GLubyte clear[4]; - - CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]); - CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]); - CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); - CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); - - /* compute both 32 and 16-bit clear values */ - intel->ClearColor8888 = INTEL_PACKCOLOR8888(clear[0], clear[1], - clear[2], clear[3]); - intel->ClearColor565 = INTEL_PACKCOLOR565(clear[0], clear[1], clear[2]); -} - - /* Fallback to swrast for select and feedback. */ static void @@ -229,5 +208,4 @@ void intelInitStateFuncs(struct dd_function_table *functions) { functions->RenderMode = intelRenderMode; - functions->ClearColor = intelClearColor; } diff --git a/shared/intel_swapbuffers.c b/shared/intel_swapbuffers.c deleted file mode 100644 index 7d035b9..0000000 --- a/shared/intel_swapbuffers.c +++ /dev/null @@ -1,248 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "intel_blit.h" -#include "intel_buffers.h" -#include "intel_swapbuffers.h" -#include "intel_fbo.h" -#include "intel_batchbuffer.h" -#include "drirenderbuffer.h" -#include "vblank.h" -#include "i915_drm.h" - - - -/* - * Correct a drawablePrivate's set of vblank flags WRT the current context. - * When considering multiple crtcs. - */ -GLuint -intelFixupVblank(struct intel_context *intel, __DRIdrawablePrivate *dPriv) -{ - if (!intel->intelScreen->driScrnPriv->dri2.enabled && - intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) { - volatile drm_i915_sarea_t *sarea = intel->sarea; - drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w, - .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h }; - drm_clip_rect_t planeA_rect = { .x1 = sarea->planeA_x, .y1 = sarea->planeA_y, - .x2 = sarea->planeA_x + sarea->planeA_w, - .y2 = sarea->planeA_y + sarea->planeA_h }; - drm_clip_rect_t planeB_rect = { .x1 = sarea->planeB_x, .y1 = sarea->planeB_y, - .x2 = sarea->planeB_x + sarea->planeB_w, - .y2 = sarea->planeB_y + sarea->planeB_h }; - GLint areaA = driIntersectArea( drw_rect, planeA_rect ); - GLint areaB = driIntersectArea( drw_rect, planeB_rect ); - GLuint flags = dPriv->vblFlags; - - /* Update vblank info - */ - if (areaB > areaA || (areaA == areaB && areaB > 0)) { - flags = dPriv->vblFlags | VBLANK_FLAG_SECONDARY; - } else { - flags = dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY; - } - - /* Do the stupid test: Is one of them actually disabled? - */ - if (sarea->planeA_w == 0 || sarea->planeA_h == 0) { - flags = dPriv->vblFlags | VBLANK_FLAG_SECONDARY; - } else if (sarea->planeB_w == 0 || sarea->planeB_h == 0) { - flags = dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY; - } - - return flags; - } else { - return dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY; - } -} - - -/** - * Called from driSwapBuffers() - */ -void -intelSwapBuffers(__DRIdrawablePrivate * dPriv) -{ - __DRIscreenPrivate *psp = dPriv->driScreenPriv; - - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - GET_CURRENT_CONTEXT(ctx); - struct intel_context *intel; - - if (ctx == NULL) - return; - - intel = intel_context(ctx); - - if (ctx->Visual.doubleBufferMode) { - GLboolean missed_target; - struct intel_framebuffer *intel_fb = dPriv->driverPrivate; - int64_t ust; - - _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ - - /* - * The old swapping ioctl was incredibly racy, just wait for vblank - * and do the swap ourselves. - */ - driWaitForVBlank(dPriv, &missed_target); - - /* - * Update each buffer's vbl_pending so we don't get too out of - * sync - */ - intel_get_renderbuffer(&intel_fb->Base, - BUFFER_BACK_LEFT)->vbl_pending = dPriv->vblSeq; - intel_get_renderbuffer(&intel_fb->Base, - BUFFER_FRONT_LEFT)->vbl_pending = dPriv->vblSeq; - - intelCopyBuffer(dPriv, NULL); - - intel_fb->swap_count++; - (*psp->systemTime->getUST) (&ust); - if (missed_target) { - intel_fb->swap_missed_count++; - intel_fb->swap_missed_ust = ust - intel_fb->swap_ust; - } - - intel_fb->swap_ust = ust; - } - drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE); - } - else { - /* XXX this shouldn't be an error but we can't handle it for now */ - fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); - } -} - - -/** - * Called from driCopySubBuffer() - */ -void -intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - struct intel_context *intel = - (struct intel_context *) dPriv->driContextPriv->driverPrivate; - GLcontext *ctx = &intel->ctx; - - if (ctx->Visual.doubleBufferMode) { - drm_clip_rect_t rect; - rect.x1 = x + dPriv->x; - rect.y1 = (dPriv->h - y - h) + dPriv->y; - rect.x2 = rect.x1 + w; - rect.y2 = rect.y1 + h; - _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ - intelCopyBuffer(dPriv, &rect); - } - } - else { - /* XXX this shouldn't be an error but we can't handle it for now */ - fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); - } -} - - -/** - * This will be called whenever the currently bound window is moved/resized. - * XXX: actually, it seems to NOT be called when the window is only moved (BP). - */ -void -intelWindowMoved(struct intel_context *intel) -{ - GLcontext *ctx = &intel->ctx; - __DRIdrawablePrivate *dPriv = intel->driDrawable; - struct intel_framebuffer *intel_fb = dPriv->driverPrivate; - - if (!intel->intelScreen->driScrnPriv->dri2.enabled && - intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) { - GLuint flags = intelFixupVblank(intel, dPriv); - - /* Check to see if we changed pipes */ - if (flags != dPriv->vblFlags && dPriv->vblFlags && - !(dPriv->vblFlags & VBLANK_FLAG_NO_IRQ)) { - int64_t count; - drmVBlank vbl; - int i; - - /* - * Deal with page flipping - */ - vbl.request.type = DRM_VBLANK_ABSOLUTE; - - if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) { - vbl.request.type |= DRM_VBLANK_SECONDARY; - } - - for (i = 0; i < 2; i++) { - if (!intel_fb->color_rb[i] || - (intel_fb->vbl_waited - intel_fb->color_rb[i]->vbl_pending) <= - (1<<23)) - continue; - - vbl.request.sequence = intel_fb->color_rb[i]->vbl_pending; - drmWaitVBlank(intel->driFd, &vbl); - } - - /* - * Update msc_base from old pipe - */ - driDrawableGetMSC32(dPriv->driScreenPriv, dPriv, &count); - dPriv->msc_base = count; - /* - * Then get new vblank_base and vblSeq values - */ - dPriv->vblFlags = flags; - driGetCurrentVBlank(dPriv); - dPriv->vblank_base = dPriv->vblSeq; - - intel_fb->vbl_waited = dPriv->vblSeq; - - for (i = 0; i < 2; i++) { - if (intel_fb->color_rb[i]) - intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited; - } - } - } else { - dPriv->vblFlags &= ~VBLANK_FLAG_SECONDARY; - } - - /* Update Mesa's notion of window size */ - driUpdateFramebufferSize(ctx, dPriv); - intel_fb->Base.Initialized = GL_TRUE; /* XXX remove someday */ - - /* Update hardware scissor */ - if (ctx->Driver.Scissor != NULL) { - ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y, - ctx->Scissor.Width, ctx->Scissor.Height); - } - - /* Re-calculate viewport related state */ - if (ctx->Driver.DepthRange != NULL) - ctx->Driver.DepthRange( ctx, ctx->Viewport.Near, ctx->Viewport.Far ); -} diff --git a/shared/intel_swapbuffers.h b/shared/intel_swapbuffers.h deleted file mode 100644 index 75bb624..0000000 --- a/shared/intel_swapbuffers.h +++ /dev/null @@ -1,52 +0,0 @@ - -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_SWAPBUFFERS_H -#define INTEL_SWAPBUFFERS_H - -#include "dri_util.h" -#include "drm.h" - -struct intel_context; -struct intel_framebuffer; - - -extern void -intelSwapBuffers(__DRIdrawablePrivate * dPriv); - -extern void -intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h); - -extern GLuint -intelFixupVblank(struct intel_context *intel, __DRIdrawablePrivate *dPriv); - -extern void -intelWindowMoved(struct intel_context *intel); - - -#endif /* INTEL_SWAPBUFFERS_H */ diff --git a/shared/intel_syncobj.c b/shared/intel_syncobj.c index 0d7889d..d67f0cb 100644 --- a/shared/intel_syncobj.c +++ b/shared/intel_syncobj.c @@ -50,7 +50,7 @@ intel_new_sync_object(GLcontext *ctx, GLuint id) { struct intel_sync_object *sync; - sync = _mesa_calloc(sizeof(struct intel_sync_object)); + sync = calloc(1, sizeof(struct intel_sync_object)); return &sync->Base; } @@ -61,7 +61,7 @@ intel_delete_sync_object(GLcontext *ctx, struct gl_sync_object *s) struct intel_sync_object *sync = (struct intel_sync_object *)s; drm_intel_bo_unreference(sync->bo); - _mesa_free(sync); + free(sync); } static void diff --git a/shared/intel_tex.c b/shared/intel_tex.c index 215a534..8bb6ae9 100644 --- a/shared/intel_tex.c +++ b/shared/intel_tex.c @@ -146,7 +146,7 @@ timed_memcpy(void *dest, const void *src, size_t n) double rate; if ((((unsigned) src) & 63) || (((unsigned) dest) & 63)) - _mesa_printf("Warning - non-aligned texture copy!\n"); + printf("Warning - non-aligned texture copy!\n"); t1 = fastrdtsc(); ret = do_memcpy(dest, src, n); @@ -154,7 +154,7 @@ timed_memcpy(void *dest, const void *src, size_t n) rate = time_diff(t1, t2); rate /= (double) n; - _mesa_printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate); + printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate); return ret; } #endif /* DO_DEBUG */ diff --git a/shared/intel_tex.h b/shared/intel_tex.h index f3cc0ff..4bb012d 100644 --- a/shared/intel_tex.h +++ b/shared/intel_tex.h @@ -45,8 +45,6 @@ void intelInitTextureCopyImageFuncs(struct dd_function_table *functions); gl_format intelChooseTextureFormat(GLcontext *ctx, GLint internalFormat, GLenum format, GLenum type); -void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, - unsigned long long offset, GLint depth, GLuint pitch); void intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *pDraw); void intelSetTexBuffer2(__DRIcontext *pDRICtx, diff --git a/shared/intel_tex_copy.c b/shared/intel_tex_copy.c index 767d04d..13b8bcf 100644 --- a/shared/intel_tex_copy.c +++ b/shared/intel_tex_copy.c @@ -36,7 +36,6 @@ #include "intel_screen.h" #include "intel_context.h" -#include "intel_batchbuffer.h" #include "intel_buffers.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" @@ -109,14 +108,12 @@ do_copy_texsubimage(struct intel_context *intel, return GL_FALSE; } - // intelFlush(ctx); - LOCK_HARDWARE(intel); + /* intelFlush(ctx); */ + intel_prepare_render(intel); { drm_intel_bo *dst_bo = intel_region_buffer(intel, intelImage->mt->region, INTEL_WRITE_PART); - const GLint orig_x = x; - const GLint orig_y = y; GLuint image_x, image_y; GLshort src_pitch; @@ -126,19 +123,15 @@ do_copy_texsubimage(struct intel_context *intel, intelImage->face, 0, &image_x, &image_y); - /* Update dst for clipped src. Need to also clip the source rect. */ - dstx += x - orig_x; - dsty += y - orig_y; /* Can't blit to tiled buffers with non-tile-aligned offset. */ if (intelImage->mt->region->tiling == I915_TILING_Y) { - UNLOCK_HARDWARE(intel); return GL_FALSE; } if (ctx->ReadBuffer->Name == 0) { /* reading from a window, adjust x, y */ - const __DRIdrawablePrivate *dPriv = intel->driReadDrawable; + const __DRIdrawable *dPriv = intel->driReadDrawable; y = dPriv->y + (dPriv->h - (y + height)); x += dPriv->x; @@ -160,22 +153,20 @@ do_copy_texsubimage(struct intel_context *intel, intelImage->mt->cpp, src_pitch, src->buffer, - src->draw_offset, + 0, src->tiling, intelImage->mt->pitch, dst_bo, 0, intelImage->mt->region->tiling, - x, y, image_x + dstx, image_y + dsty, + src->draw_x + x, src->draw_y + y, + image_x + dstx, image_y + dsty, width, height, GL_COPY)) { - UNLOCK_HARDWARE(intel); return GL_FALSE; } } - UNLOCK_HARDWARE(intel); - return GL_TRUE; } diff --git a/shared/intel_tex_format.c b/shared/intel_tex_format.c index 87efb72..7be5231 100644 --- a/shared/intel_tex_format.c +++ b/shared/intel_tex_format.c @@ -1,6 +1,5 @@ #include "intel_context.h" #include "intel_tex.h" -#include "intel_chipset.h" #include "main/enums.h" @@ -173,13 +172,13 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, return MESA_FORMAT_SARGB8; case GL_SLUMINANCE_EXT: case GL_SLUMINANCE8_EXT: - if (IS_G4X(intel->intelScreen->deviceID)) + if (intel->has_luminance_srgb) return MESA_FORMAT_SL8; else return MESA_FORMAT_SARGB8; case GL_SLUMINANCE_ALPHA_EXT: case GL_SLUMINANCE8_ALPHA8_EXT: - if (IS_G4X(intel->intelScreen->deviceID)) + if (intel->has_luminance_srgb) return MESA_FORMAT_SLA8; else return MESA_FORMAT_SARGB8; diff --git a/shared/intel_tex_image.c b/shared/intel_tex_image.c index 66d61f9..bac36ee 100644 --- a/shared/intel_tex_image.c +++ b/shared/intel_tex_image.c @@ -7,7 +7,6 @@ #include "main/convolve.h" #include "main/context.h" #include "main/formats.h" -#include "main/image.h" #include "main/texcompress.h" #include "main/texstore.h" #include "main/texgetimage.h" @@ -178,6 +177,7 @@ check_pbo_format(GLint internalFormat, switch (internalFormat) { case 4: case GL_RGBA: + case GL_RGBA8: return (format == GL_BGRA && (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) && @@ -187,6 +187,11 @@ check_pbo_format(GLint internalFormat, return (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 && mesa_format == MESA_FORMAT_RGB565); + case 1: + case GL_LUMINANCE: + return (format == GL_LUMINANCE && + type == GL_UNSIGNED_BYTE && + mesa_format == MESA_FORMAT_L8); case GL_YCBCR_MESA: return (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE); default: @@ -235,21 +240,20 @@ try_pbo_upload(struct intel_context *intel, if (drm_intel_bo_references(intel->batch->buf, dst_buffer)) intelFlush(&intel->ctx); - LOCK_HARDWARE(intel); + intel_prepare_render(intel); { dri_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ); if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_stride, src_buffer, src_offset, GL_FALSE, - dst_stride, dst_buffer, 0, GL_FALSE, + dst_stride, dst_buffer, 0, + intelImage->mt->region->tiling, 0, 0, dst_x, dst_y, width, height, GL_COPY)) { - UNLOCK_HARDWARE(intel); return GL_FALSE; } } - UNLOCK_HARDWARE(intel); return GL_TRUE; } @@ -469,7 +473,7 @@ intelTexImage(GLcontext * ctx, pixels, unpack, "glTexImage"); } - LOCK_HARDWARE(intel); + intel_prepare_render(intel); if (intelImage->mt) { if (pixels != NULL) { @@ -551,8 +555,6 @@ intelTexImage(GLcontext * ctx, intel_miptree_image_unmap(intel, intelImage->mt); texImage->Data = NULL; } - - UNLOCK_HARDWARE(intel); } @@ -704,35 +706,12 @@ intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, texObj, texImage, GL_TRUE); } - -void -intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, - unsigned long long offset, GLint depth, GLuint pitch) -{ - struct intel_context *intel = pDRICtx->driverPrivate; - struct gl_texture_object *tObj = _mesa_lookup_texture(&intel->ctx, texname); - struct intel_texture_object *intelObj = intel_texture_object(tObj); - - if (!intelObj) - return; - - if (intelObj->mt) - intel_miptree_release(intel, &intelObj->mt); - - intelObj->imageOverride = GL_TRUE; - intelObj->depthOverride = depth; - intelObj->pitchOverride = pitch; - - if (offset) - intelObj->textureOffset = offset; -} - void intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, - GLint glx_texture_format, + GLint texture_format, __DRIdrawable *dPriv) { - struct intel_framebuffer *intel_fb = dPriv->driverPrivate; + struct gl_framebuffer *fb = dPriv->driverPrivate; struct intel_context *intel = pDRICtx->driverPrivate; GLcontext *ctx = &intel->ctx; struct intel_texture_object *intelObj; @@ -749,16 +728,17 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, if (!intelObj) return; - intel_update_renderbuffers(pDRICtx, dPriv); + if (dPriv->lastStamp != dPriv->dri2.stamp) + intel_update_renderbuffers(pDRICtx, dPriv); - rb = intel_fb->color_rb[0]; + rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); /* If the region isn't set, then intel_update_renderbuffers was unable * to get the buffers for the drawable. */ if (rb->region == NULL) return; - if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + if (texture_format == __DRI_TEXTURE_FORMAT_RGB) internalFormat = GL_RGB; else internalFormat = GL_RGBA; @@ -788,7 +768,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, intelImage->face = target_to_face(target); intelImage->level = level; - if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + if (texture_format == __DRI_TEXTURE_FORMAT_RGB) texImage->TexFormat = MESA_FORMAT_XRGB8888; else texImage->TexFormat = MESA_FORMAT_ARGB8888; @@ -808,9 +788,57 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) /* The old interface didn't have the format argument, so copy our * implementation's behavior at the time. */ - intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); + intelSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); } +#if FEATURE_OES_EGL_image +static void +intel_image_target_texture_2d(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + GLeglImageOES image_handle) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + struct intel_texture_image *intelImage = intel_texture_image(texImage); + struct intel_mipmap_tree *mt; + __DRIscreen *screen; + __DRIimage *image; + + screen = intel->intelScreen->driScrnPriv; + image = screen->dri2.image->lookupEGLImage(intel->driContext, image_handle, + intel->driContext->loaderPrivate); + if (image == NULL) + return; + + mt = intel_miptree_create_for_region(intel, target, + image->internal_format, + 0, 0, image->region, 1, 0); + if (mt == NULL) + return; + + if (intelImage->mt) { + intel_miptree_release(intel, &intelImage->mt); + assert(!texImage->Data); + } + if (intelObj->mt) + intel_miptree_release(intel, &intelObj->mt); + + intelObj->mt = mt; + _mesa_init_teximage_fields(&intel->ctx, target, texImage, + image->region->width, image->region->height, 1, + 0, image->internal_format); + + intelImage->face = target_to_face(target); + intelImage->level = 0; + texImage->TexFormat = image->format; + texImage->RowStride = image->region->pitch; + intel_miptree_reference(&intelImage->mt, intelObj->mt); + + if (!intel_miptree_match_image(intelObj->mt, &intelImage->base)) + fprintf(stderr, "miptree doesn't match image\n"); +} +#endif void intelInitTextureImageFuncs(struct dd_function_table *functions) @@ -822,4 +850,8 @@ intelInitTextureImageFuncs(struct dd_function_table *functions) functions->CompressedTexImage2D = intelCompressedTexImage2D; functions->GetCompressedTexImage = intelGetCompressedTexImage; + +#if FEATURE_OES_EGL_image + functions->EGLImageTargetTexture2D = intel_image_target_texture_2d; +#endif } diff --git a/shared/intel_tex_obj.h b/shared/intel_tex_obj.h index 3ad10d3..5f60e0e 100644 --- a/shared/intel_tex_obj.h +++ b/shared/intel_tex_obj.h @@ -46,10 +46,6 @@ struct intel_texture_object * regions will be copied to this region and the old storage freed. */ struct intel_mipmap_tree *mt; - - GLboolean imageOverride; - GLint depthOverride; - GLuint pitchOverride; }; struct intel_texture_image diff --git a/shared/intel_tex_subimage.c b/shared/intel_tex_subimage.c index 1f68208..c35d2e8 100644 --- a/shared/intel_tex_subimage.c +++ b/shared/intel_tex_subimage.c @@ -72,7 +72,7 @@ intelTexSubimage(GLcontext * ctx, if (!pixels) return; - LOCK_HARDWARE(intel); + intel_prepare_render(intel); /* Map buffer if necessary. Need to lock to prevent other contexts * from uploading the buffer under us. @@ -129,8 +129,6 @@ intelTexSubimage(GLcontext * ctx, intel_miptree_image_unmap(intel, intelImage->mt); texImage->Data = NULL; } - - UNLOCK_HARDWARE(intel); } diff --git a/shared/intel_tex_validate.c b/shared/intel_tex_validate.c index c9a24ac..ed5c5d8 100644 --- a/shared/intel_tex_validate.c +++ b/shared/intel_tex_validate.c @@ -2,10 +2,8 @@ #include "main/macros.h" #include "intel_context.h" -#include "intel_batchbuffer.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" -#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_TEXTURE -- cgit v1.2.3