diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-13 02:36:00 +0100 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-13 02:36:00 +0100 |
commit | fedcb3219e8f9a587c693bbb2178ec3e83bf0320 (patch) | |
tree | b37f142039934c27eb13d9ff2344776d7f92bff6 | |
parent | 6e23622cb869c14d82f8c901c4bbea80ded6220e (diff) |
Import i915 and i965 dri drivers from mesa 7.1.0.7.1.0
184 files changed, 26068 insertions, 27037 deletions
diff --git a/configure.ac b/configure.ac index 70d46ac..8e2661f 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-i9xx], 7.0.3, [], mesa-dri-i9xx) +AC_INIT([mesa-dri-i9xx], 7.1.0, [], mesa-dri-i9xx) AM_INIT_AUTOMAKE([dist-bzip2]) @@ -16,8 +16,8 @@ AC_PROG_CC AC_HEADER_STDC PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0]) -PKG_CHECK_MODULES([DRI], [libmesadri >= 7.0.3 libmesadri < 7.1.0 - libmesadricommon >= 7.0.3 libmesadricommon < 7.1.0]) +PKG_CHECK_MODULES([DRI], [libmesadri >= 7.1.0 libmesadri < 7.3.0 + libmesadricommon >= 7.1.0 libmesadricommon < 7.3.0]) AC_OUTPUT([ Makefile diff --git a/i915/Makefile.am b/i915/Makefile.am index c921354..e483d0c 100644 --- a/i915/Makefile.am +++ b/i915/Makefile.am @@ -1,21 +1,13 @@ AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING +I915_CFLAGS = -I../shared -I../shared/server -DI915 + i915_dri_la_LTLIBRARIES = i915_dri.la -i915_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) -Iserver +i915_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(I915_CFLAGS) i915_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \ $(DRM_LIBS) $(DRI_LIBS) i915_dri_ladir = @libdir@/dri i915_dri_la_SOURCES = \ - i915_context.c \ - i915_debug.c \ - i915_fragprog.c \ - i915_metaops.c \ - i915_program.c \ - i915_state.c \ - i915_tex.c \ - i915_texprog.c \ - i915_texstate.c \ - i915_vtbl.c \ i830_context.c \ i830_metaops.c \ i830_state.c \ @@ -23,15 +15,43 @@ i915_dri_la_SOURCES = \ i830_tex.c \ i830_texstate.c \ i830_vtbl.c \ - intel_batchbuffer.c \ - intel_context.c \ - intel_ioctl.c \ - intel_pixel.c \ intel_render.c \ - intel_rotate.c \ - intel_screen.c \ - intel_span.c \ + ../shared/intel_regions.c \ + ../shared/intel_buffer_objects.c \ + ../shared/intel_batchbuffer.c \ + ../shared/intel_mipmap_tree.c \ + i915_tex_layout.c \ + ../shared/intel_tex_layout.c \ + ../shared/intel_tex_image.c \ + ../shared/intel_tex_subimage.c \ + ../shared/intel_tex_copy.c \ + ../shared/intel_tex_validate.c \ + ../shared/intel_tex_format.c \ + ../shared/intel_tex.c \ + ../shared/intel_pixel.c \ + ../shared/intel_pixel_bitmap.c \ + ../shared/intel_pixel_copy.c \ + intel_pixel_read.c \ + ../shared/intel_pixel_draw.c \ + ../shared/intel_buffers.c \ + ../shared/intel_blit.c \ + i915_tex.c \ + i915_texstate.c \ + i915_context.c \ + i915_debug.c \ + i915_debug_fp.c \ + i915_fragprog.c \ + i915_metaops.c \ + i915_program.c \ + i915_state.c \ + i915_vtbl.c \ + ../shared/intel_context.c \ + ../shared/intel_decode.c \ + ../shared/intel_ioctl.c \ + ../shared/intel_screen.c \ + ../shared/intel_span.c \ intel_state.c \ - intel_tex.c \ - intel_texmem.c \ - intel_tris.c + intel_tris.c \ + ../shared/intel_fbo.c \ + ../shared/intel_depthstencil.c \ + ../shared/intel_bufmgr_ttm.c diff --git a/i915/i830_context.c b/i915/i830_context.c index 7ca601e..16c8a8d 100644 --- a/i915/i830_context.c +++ b/i915/i830_context.c @@ -32,93 +32,78 @@ #include "tnl/tnl.h" #include "tnl/t_vertex.h" #include "tnl/t_context.h" +#include "tnl/t_pipeline.h" #include "utils.h" +#include "intel_span.h" +#include "intel_pixel.h" +#include "intel_tris.h" /*************************************** * Mesa's Driver Functions ***************************************/ -static const struct dri_extension i830_extensions[] = +static void +i830InitDriverFunctions(struct dd_function_table *functions) { - { "GL_ARB_texture_env_crossbar", NULL }, - { NULL, NULL } -}; - - -static void i830InitDriverFunctions( struct dd_function_table *functions ) -{ - intelInitDriverFunctions( functions ); - i830InitStateFuncs( functions ); - i830InitTextureFuncs( functions ); + intelInitDriverFunctions(functions); + i830InitStateFuncs(functions); + i830InitTextureFuncs(functions); } +extern const struct tnl_pipeline_stage *intel_pipeline[]; -GLboolean i830CreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate) +GLboolean +i830CreateContext(const __GLcontextModes * mesaVis, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate) { struct dd_function_table functions; - i830ContextPtr i830 = (i830ContextPtr) CALLOC_STRUCT(i830_context); - intelContextPtr intel = &i830->intel; + struct i830_context *i830 = CALLOC_STRUCT(i830_context); + struct intel_context *intel = &i830->intel; GLcontext *ctx = &intel->ctx; - GLuint i; - if (!i830) return GL_FALSE; + if (!i830) + return GL_FALSE; - i830InitVtbl( i830 ); - i830InitDriverFunctions( &functions ); + i830InitVtbl(i830); + i830InitDriverFunctions(&functions); - if (!intelInitContext( intel, mesaVis, driContextPriv, - sharedContextPrivate, &functions )) { + if (!intelInitContext(intel, mesaVis, driContextPriv, + sharedContextPrivate, &functions)) { FREE(i830); return GL_FALSE; } + /* Initialize swrast, tnl driver tables: */ + intelInitSpanFuncs(ctx); + intelInitTriFuncs(ctx); + + /* Install the customized pipeline: */ + _tnl_destroy_pipeline(ctx); + _tnl_install_pipeline(ctx, intel_pipeline); + intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS; intel->ctx.Const.MaxTextureImageUnits = I830_TEX_UNITS; intel->ctx.Const.MaxTextureCoordUnits = I830_TEX_UNITS; - intel->nr_heaps = 1; - intel->texture_heaps[0] = - driCreateTextureHeap( 0, intel, - intel->intelScreen->tex.size, - 12, - I830_NR_TEX_REGIONS, - intel->sarea->texList, - (unsigned *) & intel->sarea->texAge, - & intel->swapped, - sizeof( struct i830_texture_object ), - (destroy_texture_object_t *)intelDestroyTexObj ); - - /* FIXME: driCalculateMaxTextureLevels assumes that mipmaps are tightly - * FIXME: packed, but they're not in Intel graphics hardware. + /* Advertise the full hardware capabilities. The new memory + * manager should cope much better with overload situations: */ - intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS; - i = driQueryOptioni( &intel->optionCache, "allow_large_textures"); - driCalculateMaxTextureLevels( intel->texture_heaps, - intel->nr_heaps, - &intel->ctx.Const, - 4, - 11, /* max 2D texture size is 2048x2048 */ - 8, /* max 3D texture size is 256^3 */ - 10, /* max CUBE texture size is 1024x1024 */ - 11, /* max RECT. supported */ - 12, - GL_FALSE, - i ); - - _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, - 18 * sizeof(GLfloat) ); - - intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf; + ctx->Const.MaxTextureLevels = 12; + ctx->Const.Max3DTextureLevels = 9; + ctx->Const.MaxCubeTextureLevels = 11; + ctx->Const.MaxTextureRectSize = (1 << 11); + ctx->Const.MaxTextureUnits = I830_TEX_UNITS; - driInitExtensions( ctx, i830_extensions, GL_FALSE ); + _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12, + 18 * sizeof(GLfloat)); - i830InitState( i830 ); + intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf; + i830InitState(i830); + i830InitMetaFuncs(i830); - _tnl_allow_vertex_fog( ctx, 1 ); - _tnl_allow_pixel_fog( ctx, 0 ); + _tnl_allow_vertex_fog(ctx, 1); + _tnl_allow_pixel_fog(ctx, 0); return GL_TRUE; } - diff --git a/i915/i830_context.h b/i915/i830_context.h index bae777d..a298c14 100644 --- a/i915/i830_context.h +++ b/i915/i830_context.h @@ -49,17 +49,15 @@ */ #define I830_DESTREG_CBUFADDR0 0 #define I830_DESTREG_CBUFADDR1 1 -#define I830_DESTREG_CBUFADDR2 2 -#define I830_DESTREG_DBUFADDR0 3 -#define I830_DESTREG_DBUFADDR1 4 -#define I830_DESTREG_DBUFADDR2 5 -#define I830_DESTREG_DV0 6 -#define I830_DESTREG_DV1 7 -#define I830_DESTREG_SENABLE 8 -#define I830_DESTREG_SR0 9 -#define I830_DESTREG_SR1 10 -#define I830_DESTREG_SR2 11 -#define I830_DEST_SETUP_SIZE 12 +#define I830_DESTREG_DBUFADDR0 2 +#define I830_DESTREG_DBUFADDR1 3 +#define I830_DESTREG_DV0 4 +#define I830_DESTREG_DV1 5 +#define I830_DESTREG_SENABLE 6 +#define I830_DESTREG_SR0 7 +#define I830_DESTREG_SR1 8 +#define I830_DESTREG_SR2 9 +#define I830_DEST_SETUP_SIZE 10 #define I830_CTXREG_STATE1 0 #define I830_CTXREG_STATE2 1 @@ -73,7 +71,7 @@ #define I830_CTXREG_AA 9 #define I830_CTXREG_FOGCOLOR 10 #define I830_CTXREG_BLENDCOLOR0 11 -#define I830_CTXREG_BLENDCOLOR1 12 +#define I830_CTXREG_BLENDCOLOR1 12 #define I830_CTXREG_VF 13 #define I830_CTXREG_VF2 14 #define I830_CTXREG_MCSB0 15 @@ -84,17 +82,16 @@ #define I830_STPREG_ST1 1 #define I830_STP_SETUP_SIZE 2 -#define I830_TEXREG_TM0LI 0 /* load immediate 2 texture map n */ -#define I830_TEXREG_TM0S0 1 -#define I830_TEXREG_TM0S1 2 -#define I830_TEXREG_TM0S2 3 -#define I830_TEXREG_TM0S3 4 -#define I830_TEXREG_TM0S4 5 -#define I830_TEXREG_MCS 6 /* _3DSTATE_MAP_COORD_SETS */ -#define I830_TEXREG_CUBE 7 /* _3DSTATE_MAP_SUBE */ -#define I830_TEX_SETUP_SIZE 8 +#define I830_TEXREG_TM0LI 0 /* load immediate 2 texture map n */ +#define I830_TEXREG_TM0S1 1 +#define I830_TEXREG_TM0S2 2 +#define I830_TEXREG_TM0S3 3 +#define I830_TEXREG_TM0S4 4 +#define I830_TEXREG_MCS 5 /* _3DSTATE_MAP_COORD_SETS */ +#define I830_TEXREG_CUBE 6 /* _3DSTATE_MAP_SUBE */ +#define I830_TEX_SETUP_SIZE 7 -#define I830_TEXBLEND_SIZE 12 /* (4 args + op) * 2 + COLOR_FACTOR */ +#define I830_TEXBLEND_SIZE 12 /* (4 args + op) * 2 + COLOR_FACTOR */ struct i830_texture_object { @@ -104,30 +101,39 @@ struct i830_texture_object #define I830_TEX_UNITS 4 -struct i830_hw_state { +struct i830_hw_state +{ GLuint Ctx[I830_CTX_SETUP_SIZE]; GLuint Buffer[I830_DEST_SETUP_SIZE]; GLuint Stipple[I830_STP_SETUP_SIZE]; GLuint Tex[I830_TEX_UNITS][I830_TEX_SETUP_SIZE]; GLuint TexBlend[I830_TEX_UNITS][I830_TEXBLEND_SIZE]; GLuint TexBlendWordsUsed[I830_TEX_UNITS]; - GLuint emitted; /* I810_UPLOAD_* */ + + struct intel_region *draw_region; + struct intel_region *depth_region; + + /* Regions aren't actually that appropriate here as the memory may + * be from a PBO or FBO. Will have to do this for draw and depth for + * FBO's... + */ + dri_bo *tex_buffer[I830_TEX_UNITS]; + GLuint tex_offset[I830_TEX_UNITS]; + + GLuint emitted; /* I810_UPLOAD_* */ GLuint active; }; -struct i830_context +struct i830_context { struct intel_context intel; - - DECLARE_RENDERINPUTS(last_index_bitset); + + GLuint lodbias_tm0s3[MAX_TEXTURE_UNITS]; + DECLARE_RENDERINPUTS(last_index_bitset); struct i830_hw_state meta, initial, state, *current; }; -typedef struct i830_context *i830ContextPtr; -typedef struct i830_texture_object *i830TextureObjectPtr; - -#define I830_CONTEXT(ctx) ((i830ContextPtr)(ctx)) @@ -148,71 +154,56 @@ do { \ /* i830_vtbl.c */ -extern void -i830InitVtbl( i830ContextPtr i830 ); +extern void i830InitVtbl(struct i830_context *i830); +extern void +i830_state_draw_region(struct intel_context *intel, + struct i830_hw_state *state, + struct intel_region *color_region, + struct intel_region *depth_region); /* i830_context.c */ -extern GLboolean -i830CreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); +extern GLboolean +i830CreateContext(const __GLcontextModes * mesaVis, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); /* i830_tex.c, i830_texstate.c */ -extern void -i830UpdateTextureState( intelContextPtr intel ); - -extern void -i830InitTextureFuncs( struct dd_function_table *functions ); +extern void i830UpdateTextureState(struct intel_context *intel); -extern intelTextureObjectPtr -i830AllocTexObj( struct gl_texture_object *tObj ); +extern void i830InitTextureFuncs(struct dd_function_table *functions); /* i830_texblend.c */ -extern GLuint i830SetTexEnvCombine(i830ContextPtr i830, - const struct gl_tex_env_combine_state * combine, GLint blendUnit, - GLuint texel_op, GLuint *state, const GLfloat *factor ); +extern GLuint i830SetTexEnvCombine(struct i830_context *i830, + const struct gl_tex_env_combine_state + *combine, GLint blendUnit, GLuint texel_op, + GLuint * state, const GLfloat * factor); -extern void -i830EmitTextureBlend( i830ContextPtr i830 ); +extern void i830EmitTextureBlend(struct i830_context *i830); /* i830_state.c */ -extern void -i830InitStateFuncs( struct dd_function_table *functions ); +extern void i830InitStateFuncs(struct dd_function_table *functions); -extern void -i830EmitState( i830ContextPtr i830 ); +extern void i830EmitState(struct i830_context *i830); -extern void -i830InitState( i830ContextPtr i830 ); +extern void i830InitState(struct i830_context *i830); /* i830_metaops.c */ -extern GLboolean -i830TryTextureReadPixels( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, - GLvoid *pixels ); - -extern GLboolean -i830TryTextureDrawPixels( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels ); - -extern void -i830ClearWithTris( intelContextPtr intel, GLbitfield mask, - GLboolean all, GLint cx, GLint cy, GLint cw, GLint ch); +extern void i830InitMetaFuncs(struct i830_context *i830); -extern void -i830RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv, - GLuint srcBuf); +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct i830_context * +i830_context(GLcontext * ctx) +{ + return (struct i830_context *) ctx; +} #endif - diff --git a/i915/i830_metaops.c b/i915/i830_metaops.c index c1d7fe3..13e4ab3 100644 --- a/i915/i830_metaops.c +++ b/i915/i830_metaops.c @@ -34,6 +34,7 @@ #include "intel_screen.h" #include "intel_batchbuffer.h" #include "intel_ioctl.h" +#include "intel_regions.h" #include "i830_context.h" #include "i830_reg.h" @@ -41,34 +42,26 @@ /* A large amount of state doesn't need to be uploaded. */ #define ACTIVE (I830_UPLOAD_INVARIENT | \ - I830_UPLOAD_TEXBLEND(0) | \ - I830_UPLOAD_STIPPLE | \ I830_UPLOAD_CTX | \ I830_UPLOAD_BUFFERS | \ - I830_UPLOAD_TEX(0)) + I830_UPLOAD_STIPPLE | \ + I830_UPLOAD_TEXBLEND(0) | \ + I830_UPLOAD_TEX(0)) #define SET_STATE( i830, STATE ) \ do { \ - i830->current->emitted = 0; \ + i830->current->emitted &= ~ACTIVE; \ i830->current = &i830->STATE; \ - i830->current->emitted = 0; \ + i830->current->emitted &= ~ACTIVE; \ } while (0) -/* Operations where the 3D engine is decoupled temporarily from the - * current GL state and used for other purposes than simply rendering - * incoming triangles. - */ -static void set_initial_state( i830ContextPtr i830 ) -{ - memcpy(&i830->meta, &i830->initial, sizeof(i830->meta) ); - i830->meta.active = ACTIVE; - i830->meta.emitted = 0; -} - -static void set_no_depth_stencil_write( i830ContextPtr i830 ) +static void +set_no_stencil_write(struct intel_context *intel) { + struct i830_context *i830 = i830_context(&intel->ctx); + /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_FALSE ) */ i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST; @@ -76,6 +69,13 @@ static void set_no_depth_stencil_write( i830ContextPtr i830 ) i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST; i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_STENCIL_WRITE; + i830->meta.emitted &= ~I830_UPLOAD_CTX; +} + +static void +set_no_depth_write(struct intel_context *intel) +{ + struct i830_context *i830 = i830_context(&intel->ctx); /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE ) */ @@ -87,35 +87,56 @@ static void set_no_depth_stencil_write( i830ContextPtr i830 ) i830->meta.emitted &= ~I830_UPLOAD_CTX; } -/* Set stencil unit to replace always with the reference value. +/* Set depth unit to replace. */ -static void set_stencil_replace( i830ContextPtr i830, - GLuint s_mask, - GLuint s_clear) +static void +set_depth_replace(struct intel_context *intel) { - /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE ) - */ - i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST; - i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE; - + struct i830_context *i830 = i830_context(&intel->ctx); /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE ) + * ctx->Driver.DepthMask( ctx, GL_TRUE ) */ i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK; i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK; - i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST; - i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE; + i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST; + i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DEPTH_WRITE; + + /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS ) + */ + i830->meta.Ctx[I830_CTXREG_STATE3] &= ~DEPTH_TEST_FUNC_MASK; + i830->meta.Ctx[I830_CTXREG_STATE3] |= (ENABLE_DEPTH_TEST_FUNC | + DEPTH_TEST_FUNC + (COMPAREFUNC_ALWAYS)); + + i830->meta.emitted &= ~I830_UPLOAD_CTX; +} + + +/* Set stencil unit to replace always with the reference value. + */ +static void +set_stencil_replace(struct intel_context *intel, + GLuint s_mask, GLuint s_clear) +{ + struct i830_context *i830 = i830_context(&intel->ctx); + + /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE ) + */ + i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST; + i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE; /* ctx->Driver.StencilMask( ctx, s_mask ) */ i830->meta.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK; i830->meta.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK((s_mask&0xff))); + STENCIL_WRITE_MASK((s_mask & + 0xff))); /* ctx->Driver.StencilOp( ctx, GL_REPLACE, GL_REPLACE, GL_REPLACE ) */ i830->meta.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_OPS_MASK); - i830->meta.Ctx[I830_CTXREG_STENCILTST] |= + i830->meta.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_PARMS | STENCIL_FAIL_OP(STENCILOP_REPLACE) | STENCIL_PASS_DEPTH_FAIL_OP(STENCILOP_REPLACE) | @@ -125,14 +146,14 @@ static void set_stencil_replace( i830ContextPtr i830, */ i830->meta.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK; i830->meta.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(0xff)); + STENCIL_TEST_MASK(0xff)); i830->meta.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_REF_VALUE_MASK | - ENABLE_STENCIL_TEST_FUNC_MASK); - i830->meta.Ctx[I830_CTXREG_STENCILTST] |= + ENABLE_STENCIL_TEST_FUNC_MASK); + i830->meta.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_REF_VALUE | ENABLE_STENCIL_TEST_FUNC | - STENCIL_REF_VALUE((s_clear&0xff)) | + STENCIL_REF_VALUE((s_clear & 0xff)) | STENCIL_TEST_FUNC(COMPAREFUNC_ALWAYS)); @@ -141,38 +162,43 @@ static void set_stencil_replace( i830ContextPtr i830, } -static void set_color_mask( i830ContextPtr i830, GLboolean state ) +static void +set_color_mask(struct intel_context *intel, GLboolean state) { + struct i830_context *i830 = i830_context(&intel->ctx); + const GLuint mask = ((1 << WRITEMASK_RED_SHIFT) | - (1 << WRITEMASK_GREEN_SHIFT) | - (1 << WRITEMASK_BLUE_SHIFT) | - (1 << WRITEMASK_ALPHA_SHIFT)); + (1 << WRITEMASK_GREEN_SHIFT) | + (1 << WRITEMASK_BLUE_SHIFT) | + (1 << WRITEMASK_ALPHA_SHIFT)); i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~mask; if (state) { - i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= - (i830->state.Ctx[I830_CTXREG_ENABLES_2] & mask); + i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= + (i830->state.Ctx[I830_CTXREG_ENABLES_2] & mask); } - + i830->meta.emitted &= ~I830_UPLOAD_CTX; } /* Installs a one-stage passthrough texture blend pipeline. Is there * more that can be done to turn off texturing? */ -static void set_no_texture( i830ContextPtr i830 ) +static void +set_no_texture(struct intel_context *intel) { + struct i830_context *i830 = i830_context(&intel->ctx); static const struct gl_tex_env_combine_state comb = { GL_NONE, GL_NONE, - { GL_TEXTURE, 0, 0, }, { GL_TEXTURE, 0, 0, }, - { GL_SRC_COLOR, 0, 0 }, { GL_SRC_ALPHA, 0, 0 }, + {GL_TEXTURE, 0, 0,}, {GL_TEXTURE, 0, 0,}, + {GL_SRC_COLOR, 0, 0}, {GL_SRC_ALPHA, 0, 0}, 0, 0, 0, 0 }; i830->meta.TexBlendWordsUsed[0] = - i830SetTexEnvCombine( i830, & comb, 0, TEXBLENDARG_TEXEL0, - i830->meta.TexBlend[0], NULL); + i830SetTexEnvCombine(i830, &comb, 0, TEXBLENDARG_TEXEL0, + i830->meta.TexBlend[0], NULL); i830->meta.TexBlend[0][0] |= TEXOP_LAST_STAGE; i830->meta.emitted &= ~I830_UPLOAD_TEXBLEND(0); @@ -181,18 +207,22 @@ static void set_no_texture( i830ContextPtr i830 ) /* Set up a single element blend stage for 'replace' texturing with no * funny ops. */ -static void enable_texture_blend_replace( i830ContextPtr i830 ) +static void +set_texture_blend_replace(struct intel_context *intel) { + struct i830_context *i830 = i830_context(&intel->ctx); static const struct gl_tex_env_combine_state comb = { GL_REPLACE, GL_REPLACE, - { GL_TEXTURE, GL_TEXTURE, GL_TEXTURE }, { GL_TEXTURE, GL_TEXTURE, GL_TEXTURE, }, - { GL_SRC_COLOR, GL_SRC_COLOR, GL_SRC_COLOR }, { GL_SRC_ALPHA, GL_SRC_ALPHA, GL_SRC_ALPHA }, + {GL_TEXTURE, GL_TEXTURE, GL_TEXTURE,}, {GL_TEXTURE, GL_TEXTURE, + GL_TEXTURE,}, + {GL_SRC_COLOR, GL_SRC_COLOR, GL_SRC_COLOR}, {GL_SRC_ALPHA, GL_SRC_ALPHA, + GL_SRC_ALPHA}, 0, 0, 1, 1 }; i830->meta.TexBlendWordsUsed[0] = - i830SetTexEnvCombine( i830, & comb, 0, TEXBLENDARG_TEXEL0, - i830->meta.TexBlend[0], NULL); + i830SetTexEnvCombine(i830, &comb, 0, TEXBLENDARG_TEXEL0, + i830->meta.TexBlend[0], NULL); i830->meta.TexBlend[0][0] |= TEXOP_LAST_STAGE; i830->meta.emitted &= ~I830_UPLOAD_TEXBLEND(0); @@ -206,717 +236,222 @@ static void enable_texture_blend_replace( i830ContextPtr i830 ) /* Set up an arbitary piece of memory as a rectangular texture * (including the front or back buffer). */ -static void set_tex_rect_source( i830ContextPtr i830, - GLuint offset, - GLuint width, - GLuint height, - GLuint pitch, /* in bytes */ - GLuint textureFormat ) +static GLboolean +set_tex_rect_source(struct intel_context *intel, + dri_bo *buffer, + GLuint offset, + GLuint pitch, GLuint height, GLenum format, GLenum type) { - GLint numLevels = 1; + struct i830_context *i830 = i830_context(&intel->ctx); GLuint *setup = i830->meta.Tex[0]; + GLint numLevels = 1; + GLuint textureFormat; + GLuint cpp; -/* fprintf(stderr, "%s: offset: %x w: %d h: %d pitch %d format %x\n", */ -/* __FUNCTION__, offset, width, height, pitch, textureFormat ); */ + /* A full implementation of this would do the upload through + * glTexImage2d, and get all the conversion operations at that + * point. We are restricted, but still at least have access to the + * fragment program swizzle. + */ + switch (format) { + case GL_BGRA: + switch (type) { + case GL_UNSIGNED_INT_8_8_8_8_REV: + case GL_UNSIGNED_BYTE: + textureFormat = (MAPSURF_32BIT | MT_32BIT_ARGB8888); + cpp = 4; + break; + default: + return GL_FALSE; + } + break; + case GL_RGBA: + switch (type) { + case GL_UNSIGNED_INT_8_8_8_8_REV: + case GL_UNSIGNED_BYTE: + textureFormat = (MAPSURF_32BIT | MT_32BIT_ABGR8888); + cpp = 4; + break; + default: + return GL_FALSE; + } + break; + case GL_BGR: + switch (type) { + case GL_UNSIGNED_SHORT_5_6_5_REV: + textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565); + cpp = 2; + break; + default: + return GL_FALSE; + } + break; + case GL_RGB: + switch (type) { + case GL_UNSIGNED_SHORT_5_6_5: + textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565); + cpp = 2; + break; + default: + return GL_FALSE; + } + break; - setup[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | - (LOAD_TEXTURE_MAP0 << 0) | 4); - setup[I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE | offset); + default: + return GL_FALSE; + } + + i830->meta.tex_buffer[0] = buffer; + i830->meta.tex_offset[0] = offset; + + setup[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + (LOAD_TEXTURE_MAP0 << 0) | 4); setup[I830_TEXREG_TM0S1] = (((height - 1) << TM0S1_HEIGHT_SHIFT) | - ((width - 1) << TM0S1_WIDTH_SHIFT) | - textureFormat); - setup[I830_TEXREG_TM0S2] = ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT)); - setup[I830_TEXREG_TM0S3] &= ~TM0S3_MAX_MIP_MASK; - setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIN_MIP_MASK; - setup[I830_TEXREG_TM0S3] |= ((numLevels - 1)*4) << TM0S3_MIN_MIP_SHIFT; + ((pitch - 1) << TM0S1_WIDTH_SHIFT) | + textureFormat); + setup[I830_TEXREG_TM0S2] = + (((((pitch * cpp) / 4) - + 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK); + + setup[I830_TEXREG_TM0S3] = + ((((numLevels - + 1) * + 4) << TM0S3_MIN_MIP_SHIFT) | (FILTER_NEAREST << + TM0S3_MIN_FILTER_SHIFT) | + (MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT) | (FILTER_NEAREST << + TM0S3_MAG_FILTER_SHIFT)); + + setup[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(0)); setup[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD | - MAP_UNIT(0) | - ENABLE_TEXCOORD_PARAMS | - TEXCOORDS_ARE_IN_TEXELUNITS | - TEXCOORDTYPE_CARTESIAN | - ENABLE_ADDR_V_CNTL | - TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP) | - ENABLE_ADDR_U_CNTL | - TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP)); + MAP_UNIT(0) | + ENABLE_TEXCOORD_PARAMS | + TEXCOORDS_ARE_IN_TEXELUNITS | + TEXCOORDTYPE_CARTESIAN | + ENABLE_ADDR_V_CNTL | + TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP) | + ENABLE_ADDR_U_CNTL | + TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP)); i830->meta.emitted &= ~I830_UPLOAD_TEX(0); + return GL_TRUE; } -/* Select between front and back draw buffers. - */ -static void set_draw_region( i830ContextPtr i830, - const intelRegion *region ) -{ - i830->meta.Buffer[I830_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE); - i830->meta.Buffer[I830_DESTREG_CBUFADDR2] = region->offset; - i830->meta.emitted &= ~I830_UPLOAD_BUFFERS; -} - -/* Setup an arbitary draw format, useful for targeting - * texture or agp memory. - */ -#if 0 -static void set_draw_format( i830ContextPtr i830, - GLuint format, - GLuint depth_format) -{ - i830->meta.Buffer[I830_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - format | - DEPTH_IS_Z | - depth_format); -} -#endif - - -static void set_vertex_format( i830ContextPtr i830 ) +static void +set_vertex_format(struct intel_context *intel) { - i830->meta.Ctx[I830_CTXREG_VF] = (_3DSTATE_VFT0_CMD | - VFT0_TEX_COUNT(1) | - VFT0_DIFFUSE | - VFT0_SPEC | - VFT0_XYZW); + struct i830_context *i830 = i830_context(&intel->ctx); + i830->meta.Ctx[I830_CTXREG_VF] = (_3DSTATE_VFT0_CMD | + VFT0_TEX_COUNT(1) | + VFT0_DIFFUSE | VFT0_XYZ); i830->meta.Ctx[I830_CTXREG_VF2] = (_3DSTATE_VFT1_CMD | - VFT1_TEX0_FMT(TEXCOORDFMT_2D) | - VFT1_TEX1_FMT(TEXCOORDFMT_2D) | - VFT1_TEX2_FMT(TEXCOORDFMT_2D) | - VFT1_TEX3_FMT(TEXCOORDFMT_2D)); + VFT1_TEX0_FMT(TEXCOORDFMT_2D) | + VFT1_TEX1_FMT(TEXCOORDFMT_2D) | + VFT1_TEX2_FMT(TEXCOORDFMT_2D) | + VFT1_TEX3_FMT(TEXCOORDFMT_2D)); i830->meta.emitted &= ~I830_UPLOAD_CTX; } -static void draw_quad(i830ContextPtr i830, - GLfloat x0, GLfloat x1, - GLfloat y0, GLfloat y1, - GLubyte red, GLubyte green, - GLubyte blue, GLubyte alpha, - GLfloat s0, GLfloat s1, - GLfloat t0, GLfloat t1 ) -{ - GLuint vertex_size = 8; - GLuint *vb = intelEmitInlinePrimitiveLocked( &i830->intel, - PRIM3D_TRIFAN, - 4*vertex_size, - vertex_size ); - intelVertex tmp; - int i; - - -/* fprintf(stderr, "%s: %f,%f-%f,%f 0x%x%x%x%x %f,%f-%f,%f\n", */ -/* __FUNCTION__, */ -/* x0,y0,x1,y1,red,green,blue,alpha,s0,t0,s1,t1); */ - - - /* initial vertex, left bottom */ - tmp.v.x = x0; - tmp.v.y = y0; - tmp.v.z = 1.0; - tmp.v.w = 1.0; - tmp.v.color.red = red; - tmp.v.color.green = green; - tmp.v.color.blue = blue; - tmp.v.color.alpha = alpha; - tmp.v.specular.red = 0; - tmp.v.specular.green = 0; - tmp.v.specular.blue = 0; - tmp.v.specular.alpha = 0; - tmp.v.u0 = s0; - tmp.v.v0 = t0; - for (i = 0 ; i < 8 ; i++) - vb[i] = tmp.ui[i]; - - /* right bottom */ - vb += 8; - tmp.v.x = x1; - tmp.v.u0 = s1; - for (i = 0 ; i < 8 ; i++) - vb[i] = tmp.ui[i]; - - /* right top */ - vb += 8; - tmp.v.y = y1; - tmp.v.v0 = t1; - for (i = 0 ; i < 8 ; i++) - vb[i] = tmp.ui[i]; - - /* left top */ - vb += 8; - tmp.v.x = x0; - tmp.v.u0 = s0; - for (i = 0 ; i < 8 ; i++) - vb[i] = tmp.ui[i]; - -/* fprintf(stderr, "%s: DV1: %x\n", */ -/* __FUNCTION__, i830->meta.Buffer[I830_DESTREG_DV1]); */ -} - -static void draw_poly(i830ContextPtr i830, - GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha, - GLuint numVerts, - GLfloat verts[][2], - GLfloat texcoords[][2]) +static void +meta_import_pixel_state(struct intel_context *intel) { - GLuint vertex_size = 8; - GLuint *vb = intelEmitInlinePrimitiveLocked( &i830->intel, - PRIM3D_TRIFAN, - numVerts * vertex_size, - vertex_size ); - intelVertex tmp; - int i, k; - - /* initial constant vertex fields */ - tmp.v.z = 1.0; - tmp.v.w = 1.0; - tmp.v.color.red = red; - tmp.v.color.green = green; - tmp.v.color.blue = blue; - tmp.v.color.alpha = alpha; - tmp.v.specular.red = 0; - tmp.v.specular.green = 0; - tmp.v.specular.blue = 0; - tmp.v.specular.alpha = 0; - - for (k = 0; k < numVerts; k++) { - tmp.v.x = verts[k][0]; - tmp.v.y = verts[k][1]; - tmp.v.u0 = texcoords[k][0]; - tmp.v.v0 = texcoords[k][1]; - - for (i = 0 ; i < vertex_size ; i++) - vb[i] = tmp.ui[i]; - - vb += vertex_size; - } -} - -void -i830ClearWithTris(intelContextPtr intel, GLbitfield mask, - GLboolean allFoo, - GLint cxFoo, GLint cyFoo, GLint cwFoo, GLint chFoo) -{ - i830ContextPtr i830 = I830_CONTEXT( intel ); - __DRIdrawablePrivate *dPriv = intel->driDrawable; - intelScreenPrivate *screen = intel->intelScreen; - int x0, y0, x1, y1; - GLint cx, cy, cw, ch; - GLboolean all; - - INTEL_FIREVERTICES(intel); - SET_STATE( i830, meta ); - set_initial_state( i830 ); -/* set_no_texture( i830 ); */ - set_vertex_format( i830 ); - - LOCK_HARDWARE(intel); - - /* get clear bounds after locking */ - cx = intel->ctx.DrawBuffer->_Xmin; - cy = intel->ctx.DrawBuffer->_Ymin; - cw = intel->ctx.DrawBuffer->_Xmax - cx; - ch = intel->ctx.DrawBuffer->_Ymax - cy; - all = (cw == intel->ctx.DrawBuffer->Width && - ch == intel->ctx.DrawBuffer->Height); - - if(!all) { - x0 = cx; - y0 = cy; - x1 = x0 + cw; - y1 = y0 + ch; - } else { - x0 = 0; - y0 = 0; - x1 = x0 + dPriv->w; - y1 = y0 + dPriv->h; - } - - /* Don't do any clipping to screen - these are window coordinates. - * The active cliprects will be applied as for any other geometry. - */ - - if(mask & BUFFER_BIT_FRONT_LEFT) { - set_no_depth_stencil_write( i830 ); - set_color_mask( i830, GL_TRUE ); - set_draw_region( i830, &screen->front ); - draw_quad(i830, x0, x1, y0, y1, - intel->clear_red, intel->clear_green, - intel->clear_blue, intel->clear_alpha, - 0, 0, 0, 0); - } - - if(mask & BUFFER_BIT_BACK_LEFT) { - set_no_depth_stencil_write( i830 ); - set_color_mask( i830, GL_TRUE ); - set_draw_region( i830, &screen->back ); - - draw_quad(i830, x0, x1, y0, y1, - intel->clear_red, intel->clear_green, - intel->clear_blue, intel->clear_alpha, - 0, 0, 0, 0); - } - - if(mask & BUFFER_BIT_STENCIL) { - set_stencil_replace( i830, - intel->ctx.Stencil.WriteMask[0], - intel->ctx.Stencil.Clear); - - set_color_mask( i830, GL_FALSE ); - set_draw_region( i830, &screen->front ); - draw_quad( i830, x0, x1, y0, y1, 0, 0, 0, 0, 0, 0, 0, 0 ); - } + struct i830_context *i830 = i830_context(&intel->ctx); + + i830->meta.Ctx[I830_CTXREG_STATE1] = i830->state.Ctx[I830_CTXREG_STATE1]; + i830->meta.Ctx[I830_CTXREG_STATE2] = i830->state.Ctx[I830_CTXREG_STATE2]; + i830->meta.Ctx[I830_CTXREG_STATE3] = i830->state.Ctx[I830_CTXREG_STATE3]; + i830->meta.Ctx[I830_CTXREG_STATE4] = i830->state.Ctx[I830_CTXREG_STATE4]; + i830->meta.Ctx[I830_CTXREG_STATE5] = i830->state.Ctx[I830_CTXREG_STATE5]; + i830->meta.Ctx[I830_CTXREG_IALPHAB] = i830->state.Ctx[I830_CTXREG_IALPHAB]; + i830->meta.Ctx[I830_CTXREG_STENCILTST] = + i830->state.Ctx[I830_CTXREG_STENCILTST]; + i830->meta.Ctx[I830_CTXREG_ENABLES_1] = + i830->state.Ctx[I830_CTXREG_ENABLES_1]; + i830->meta.Ctx[I830_CTXREG_ENABLES_2] = + i830->state.Ctx[I830_CTXREG_ENABLES_2]; + i830->meta.Ctx[I830_CTXREG_AA] = i830->state.Ctx[I830_CTXREG_AA]; + i830->meta.Ctx[I830_CTXREG_FOGCOLOR] = + i830->state.Ctx[I830_CTXREG_FOGCOLOR]; + i830->meta.Ctx[I830_CTXREG_BLENDCOLOR0] = + i830->state.Ctx[I830_CTXREG_BLENDCOLOR0]; + i830->meta.Ctx[I830_CTXREG_BLENDCOLOR1] = + i830->state.Ctx[I830_CTXREG_BLENDCOLOR1]; + i830->meta.Ctx[I830_CTXREG_MCSB0] = i830->state.Ctx[I830_CTXREG_MCSB0]; + i830->meta.Ctx[I830_CTXREG_MCSB1] = i830->state.Ctx[I830_CTXREG_MCSB1]; + + + i830->meta.Ctx[I830_CTXREG_STATE3] &= ~CULLMODE_MASK; + i830->meta.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE; + i830->meta.emitted &= ~I830_UPLOAD_CTX; - UNLOCK_HARDWARE(intel); - INTEL_FIREVERTICES(intel); - SET_STATE( i830, state ); + i830->meta.Buffer[I830_DESTREG_SENABLE] = + i830->state.Buffer[I830_DESTREG_SENABLE]; + i830->meta.Buffer[I830_DESTREG_SR1] = i830->state.Buffer[I830_DESTREG_SR1]; + i830->meta.Buffer[I830_DESTREG_SR2] = i830->state.Buffer[I830_DESTREG_SR2]; + i830->meta.emitted &= ~I830_UPLOAD_BUFFERS; } -#if 0 -GLboolean -i830TryTextureReadPixels( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, - GLvoid *pixels ) +/* Select between front and back draw buffers. + */ +static void +meta_draw_region(struct intel_context *intel, + struct intel_region *color_region, + struct intel_region *depth_region) { - i830ContextPtr i830 = I830_CONTEXT(ctx); - intelContextPtr intel = INTEL_CONTEXT(ctx); - intelScreenPrivate *screen = i830->intel.intelScreen; - GLint pitch = pack->RowLength ? pack->RowLength : width; - __DRIdrawablePrivate *dPriv = i830->intel.driDrawable; - int textureFormat; - GLenum glTextureFormat; - int src_offset = i830->meta.Buffer[I830_DESTREG_CBUFADDR2]; - int destOffset = intelAgpOffsetFromVirtual( &i830->intel, pixels); - int destFormat, depthFormat, destPitch; - drm_clip_rect_t tmp; - - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - - if ( ctx->_ImageTransferState || - pack->SwapBytes || - pack->LsbFirst || - !pack->Invert) { - fprintf(stderr, "%s: check_color failed\n", __FUNCTION__); - return GL_FALSE; - } - - switch (screen->fbFormat) { - case DV_PF_565: - textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565; - glTextureFormat = GL_RGB; - break; - case DV_PF_555: - textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555; - glTextureFormat = GL_RGBA; - break; - case DV_PF_8888: - textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888; - glTextureFormat = GL_RGBA; - break; - default: - fprintf(stderr, "%s: textureFormat failed %x\n", __FUNCTION__, - screen->fbFormat); - return GL_FALSE; - } - - - switch (type) { - case GL_UNSIGNED_SHORT_5_6_5: - if (format != GL_RGB) return GL_FALSE; - destFormat = COLR_BUF_RGB565; - depthFormat = DEPTH_FRMT_16_FIXED; - destPitch = pitch * 2; - break; - case GL_UNSIGNED_INT_8_8_8_8_REV: - if (format != GL_BGRA) return GL_FALSE; - destFormat = COLR_BUF_ARGB8888; - depthFormat = DEPTH_FRMT_24_FIXED_8_OTHER; - destPitch = pitch * 4; - break; - default: - fprintf(stderr, "%s: destFormat failed %s\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(type)); - return GL_FALSE; - } - - destFormat |= (0x02<<24); - -/* fprintf(stderr, "type: %s destFormat: %x\n", */ -/* _mesa_lookup_enum_by_nr(type), */ -/* destFormat); */ - - intelFlush( ctx ); - - SET_STATE( i830, meta ); - set_initial_state( i830 ); - set_no_depth_stencil_write( i830 ); - - LOCK_HARDWARE( intel ); - { - intelWaitForIdle( intel ); /* required by GL */ - - if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) { - UNLOCK_HARDWARE( intel ); - SET_STATE(i830, state); - fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__); - return GL_TRUE; - } - -#if 0 - /* FIXME -- Just emit the correct state - */ - if (i830SetParam(i830->driFd, I830_SETPARAM_CBUFFER_PITCH, - destPitch) != 0) { - UNLOCK_HARDWARE( intel ); - SET_STATE(i830, state); - fprintf(stderr, "%s: setparam failed\n", __FUNCTION__); - return GL_FALSE; - } -#endif - - - y = dPriv->h - y - height; - x += dPriv->x; - y += dPriv->y; - - - /* Set the frontbuffer up as a large rectangular texture. - */ - set_tex_rect_source( i830, - src_offset, - screen->width, - screen->height, - screen->front.pitch, - textureFormat ); - - - enable_texture_blend_replace( i830 ); - - - /* Set the 3d engine to draw into the agp memory - */ + struct i830_context *i830 = i830_context(&intel->ctx); - set_draw_region( i830, destOffset ); - set_draw_format( i830, destFormat, depthFormat ); - - - /* Draw a single quad, no cliprects: - */ - i830->intel.numClipRects = 1; - i830->intel.pClipRects = &tmp; - i830->intel.pClipRects[0].x1 = 0; - i830->intel.pClipRects[0].y1 = 0; - i830->intel.pClipRects[0].x2 = width; - i830->intel.pClipRects[0].y2 = height; - - draw_quad( i830, - 0, width, 0, height, - 0, 255, 0, 0, - x, x+width, y, y+height ); - - intelWindowMoved( intel ); - } - UNLOCK_HARDWARE( intel ); - intelFinish( ctx ); /* required by GL */ - - SET_STATE( i830, state ); - return GL_TRUE; + i830_state_draw_region(intel, &i830->meta, color_region, depth_region); } -GLboolean -i830TryTextureDrawPixels( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels ) +/* Operations where the 3D engine is decoupled temporarily from the + * current GL state and used for other purposes than simply rendering + * incoming triangles. + */ +static void +install_meta_state(struct intel_context *intel) { - intelContextPtr intel = INTEL_CONTEXT(ctx); - i830ContextPtr i830 = I830_CONTEXT(ctx); - GLint pitch = unpack->RowLength ? unpack->RowLength : width; - __DRIdrawablePrivate *dPriv = intel->driDrawable; - int textureFormat; - GLenum glTextureFormat; - int dst_offset = i830->meta.Buffer[I830_DESTREG_CBUFADDR2]; - int src_offset = intelAgpOffsetFromVirtual( intel, pixels ); - - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - /* Todo -- upload images that aren't in agp space, then texture - * from them. - */ + struct i830_context *i830 = i830_context(&intel->ctx); + memcpy(&i830->meta, &i830->initial, sizeof(i830->meta)); - if ( !intelIsAgpMemory( intel, pixels, pitch*height ) ) { - fprintf(stderr, "%s: intelIsAgpMemory failed\n", __FUNCTION__); - return GL_FALSE; - } - - /* Todo -- don't want to clobber all the drawing state like we do - * for readpixels -- most of this state can be handled just fine. - */ - if ( ctx->_ImageTransferState || - unpack->SwapBytes || - unpack->LsbFirst || - ctx->Color.AlphaEnabled || - ctx->Depth.Test || - ctx->Fog.Enabled || - ctx->Scissor.Enabled || - ctx->Stencil.Enabled || - !ctx->Color.ColorMask[0] || - !ctx->Color.ColorMask[1] || - !ctx->Color.ColorMask[2] || - !ctx->Color.ColorMask[3] || - ctx->Color.ColorLogicOpEnabled || - ctx->Texture._EnabledUnits || - ctx->Depth.OcclusionTest) { - fprintf(stderr, "%s: other tests failed\n", __FUNCTION__); - return GL_FALSE; - } - - /* Todo -- remove these restrictions: - */ - if (ctx->Pixel.ZoomX != 1.0F || - ctx->Pixel.ZoomY != -1.0F) - return GL_FALSE; - - - - switch (type) { - case GL_UNSIGNED_SHORT_1_5_5_5_REV: - if (format != GL_BGRA) return GL_FALSE; - textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555; - glTextureFormat = GL_RGBA; - break; - case GL_UNSIGNED_SHORT_5_6_5: - if (format != GL_RGB) return GL_FALSE; - textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565; - glTextureFormat = GL_RGB; - break; - case GL_UNSIGNED_SHORT_8_8_MESA: - if (format != GL_YCBCR_MESA) return GL_FALSE; - textureFormat = (MAPSURF_422 | MT_422_YCRCB_SWAPY -/* | TM0S1_COLORSPACE_CONVERSION */ - ); - glTextureFormat = GL_YCBCR_MESA; - break; - case GL_UNSIGNED_SHORT_8_8_REV_MESA: - if (format != GL_YCBCR_MESA) return GL_FALSE; - textureFormat = (MAPSURF_422 | MT_422_YCRCB_NORMAL -/* | TM0S1_COLORSPACE_CONVERSION */ - ); - glTextureFormat = GL_YCBCR_MESA; - break; - case GL_UNSIGNED_INT_8_8_8_8_REV: - if (format != GL_BGRA) return GL_FALSE; - textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888; - glTextureFormat = GL_RGBA; - break; - default: - fprintf(stderr, "%s: destFormat failed\n", __FUNCTION__); - return GL_FALSE; - } - - intelFlush( ctx ); - - SET_STATE( i830, meta ); - - LOCK_HARDWARE( intel ); - { - intelWaitForIdle( intel ); /* required by GL */ - - y -= height; /* cope with pixel zoom */ - - if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) { - UNLOCK_HARDWARE( intel ); - SET_STATE(i830, state); - fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__); - return GL_TRUE; - } - - - y = dPriv->h - y - height; - - set_initial_state( i830 ); - - /* Set the pixel image up as a rectangular texture. - */ - set_tex_rect_source( i830, - src_offset, - width, - height, - pitch, /* XXXX!!!! -- /2 sometimes */ - textureFormat ); - - - enable_texture_blend_replace( i830 ); - - - /* Draw to the current draw buffer: - */ - set_draw_offset( i830, dst_offset ); - - /* Draw a quad, use regular cliprects - */ -/* fprintf(stderr, "x: %d y: %d width %d height %d\n", x, y, width, height); */ + i830->meta.active = ACTIVE; + i830->meta.emitted = 0; - draw_quad( i830, - x, x+width, y, y+height, - 0, 255, 0, 0, - 0, width, 0, height ); + SET_STATE(i830, meta); + set_vertex_format(intel); + set_no_texture(intel); +} - intelWindowMoved( intel ); - } - UNLOCK_HARDWARE( intel ); - intelFinish( ctx ); /* required by GL */ - +static void +leave_meta_state(struct intel_context *intel) +{ + struct i830_context *i830 = i830_context(&intel->ctx); + intel_region_release(&i830->meta.draw_region); + intel_region_release(&i830->meta.depth_region); +/* intel_region_release(intel, &i830->meta.tex_region[0]); */ SET_STATE(i830, state); - - return GL_TRUE; } -#endif -/** - * Copy the window contents named by dPriv to the rotated (or reflected) - * color buffer. - * srcBuf is BUFFER_BIT_FRONT_LEFT or BUFFER_BIT_BACK_LEFT to indicate the source. - */ + void -i830RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv, - GLuint srcBuf) +i830InitMetaFuncs(struct i830_context *i830) { - i830ContextPtr i830 = I830_CONTEXT( intel ); - intelScreenPrivate *screen = intel->intelScreen; - const GLuint cpp = screen->cpp; - drm_clip_rect_t fullRect; - GLuint textureFormat, srcOffset, srcPitch; - const drm_clip_rect_t *clipRects; - int numClipRects; - int i; - - int xOrig, yOrig; - int origNumClipRects; - drm_clip_rect_t *origRects; - - /* - * set up hardware state - */ - intelFlush( &intel->ctx ); - - SET_STATE( i830, meta ); - set_initial_state( i830 ); - set_no_texture( i830 ); - set_vertex_format( i830 ); - set_no_depth_stencil_write( i830 ); - set_color_mask( i830, GL_FALSE ); - - LOCK_HARDWARE(intel); - - /* save current drawing origin and cliprects (restored at end) */ - xOrig = intel->drawX; - yOrig = intel->drawY; - origNumClipRects = intel->numClipRects; - origRects = intel->pClipRects; - - if (!intel->numClipRects) - goto done; - - /* - * set drawing origin, cliprects for full-screen access to rotated screen - */ - fullRect.x1 = 0; - fullRect.y1 = 0; - fullRect.x2 = screen->rotatedWidth; - fullRect.y2 = screen->rotatedHeight; - intel->drawX = 0; - intel->drawY = 0; - intel->numClipRects = 1; - intel->pClipRects = &fullRect; - - set_draw_region( i830, &screen->rotated ); - - if (cpp == 4) - textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888; - else - textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565; - - if (srcBuf == BUFFER_BIT_FRONT_LEFT) { - srcPitch = screen->front.pitch; /* in bytes */ - srcOffset = screen->front.offset; /* bytes */ - clipRects = dPriv->pClipRects; - numClipRects = dPriv->numClipRects; - } - else { - srcPitch = screen->back.pitch; /* in bytes */ - srcOffset = screen->back.offset; /* bytes */ - clipRects = dPriv->pBackClipRects; - numClipRects = dPriv->numBackClipRects; - } - - /* set the whole screen up as a texture to avoid alignment issues */ - set_tex_rect_source(i830, - srcOffset, - screen->width, - screen->height, - srcPitch, - textureFormat); - - enable_texture_blend_replace(i830); - - /* - * loop over the source window's cliprects - */ - for (i = 0; i < numClipRects; i++) { - int srcX0 = clipRects[i].x1; - int srcY0 = clipRects[i].y1; - int srcX1 = clipRects[i].x2; - int srcY1 = clipRects[i].y2; - GLfloat verts[4][2], tex[4][2]; - int j; - - /* build vertices for four corners of clip rect */ - verts[0][0] = srcX0; verts[0][1] = srcY0; - verts[1][0] = srcX1; verts[1][1] = srcY0; - verts[2][0] = srcX1; verts[2][1] = srcY1; - verts[3][0] = srcX0; verts[3][1] = srcY1; - - /* .. and texcoords */ - tex[0][0] = srcX0; tex[0][1] = srcY0; - tex[1][0] = srcX1; tex[1][1] = srcY0; - tex[2][0] = srcX1; tex[2][1] = srcY1; - tex[3][0] = srcX0; tex[3][1] = srcY1; - - /* transform coords to rotated screen coords */ - - for (j = 0; j < 4; j++) { - matrix23TransformCoordf(&screen->rotMatrix, - &verts[j][0], &verts[j][1]); - } - - /* draw polygon to map source image to dest region */ - draw_poly(i830, 255, 255, 255, 255, 4, verts, tex); - - } /* cliprect loop */ - - intelFlushBatchLocked( intel, GL_FALSE, GL_FALSE, GL_FALSE ); - - done: - /* restore original drawing origin and cliprects */ - intel->drawX = xOrig; - intel->drawY = yOrig; - intel->numClipRects = origNumClipRects; - intel->pClipRects = origRects; - - UNLOCK_HARDWARE(intel); - - SET_STATE( i830, state ); + i830->intel.vtbl.install_meta_state = install_meta_state; + i830->intel.vtbl.leave_meta_state = leave_meta_state; + i830->intel.vtbl.meta_no_depth_write = set_no_depth_write; + i830->intel.vtbl.meta_no_stencil_write = set_no_stencil_write; + i830->intel.vtbl.meta_stencil_replace = set_stencil_replace; + i830->intel.vtbl.meta_depth_replace = set_depth_replace; + i830->intel.vtbl.meta_color_mask = set_color_mask; + i830->intel.vtbl.meta_no_texture = set_no_texture; + i830->intel.vtbl.meta_texture_blend_replace = set_texture_blend_replace; + i830->intel.vtbl.meta_tex_rect_source = set_tex_rect_source; + i830->intel.vtbl.meta_draw_region = meta_draw_region; + i830->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state; } - diff --git a/i915/i830_reg.h b/i915/i830_reg.h index 98cee2f..41280bc 100644 --- a/i915/i830_reg.h +++ b/i915/i830_reg.h @@ -407,7 +407,7 @@ #define LOGICOP_SET 0xf #define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) #define ENABLE_STENCIL_TEST_MASK (1<<17) -#define STENCIL_TEST_MASK(x) ((x)<<8) +#define STENCIL_TEST_MASK(x) (((x)&0xff)<<8) #define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) #define ENABLE_STENCIL_WRITE_MASK (1<<16) #define STENCIL_WRITE_MASK(x) ((x)&0xff) @@ -554,8 +554,8 @@ #define MAPSURF_4BIT_INDEXED (7<<6) #define TM0S1_MT_FORMAT_MASK (0x7 << 3) #define TM0S1_MT_FORMAT_SHIFT 3 -#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ -#define MT_8BIT_IDX_RGB565 (0<<3) /* SURFACE_8BIT_INDEXED */ +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_IDX_RGB565 (0<<3) /* SURFACE_8BIT_INDEXED */ #define MT_8BIT_IDX_ARGB1555 (1<<3) #define MT_8BIT_IDX_ARGB4444 (2<<3) #define MT_8BIT_IDX_AY88 (3<<3) @@ -563,9 +563,9 @@ #define MT_8BIT_IDX_BUMP_88DVDU (5<<3) #define MT_8BIT_IDX_BUMP_655LDVDU (6<<3) #define MT_8BIT_IDX_ARGB8888 (7<<3) -#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ #define MT_8BIT_L8 (1<<3) -#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ #define MT_16BIT_ARGB1555 (1<<3) #define MT_16BIT_ARGB4444 (2<<3) #define MT_16BIT_AY88 (3<<3) @@ -573,16 +573,17 @@ #define MT_16BIT_BUMP_88DVDU (5<<3) #define MT_16BIT_BUMP_655LDVDU (6<<3) #define MT_16BIT_DIB_RGB565_8888 (7<<3) -#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ #define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) /* XXX: Guess from i915_reg.h */ #define MT_32BIT_BUMP_XLDVDU_8888 (6<<3) #define MT_32BIT_DIB_8888 (7<<3) -#define MT_411_YUV411 (0<<3) /* SURFACE_411 */ -#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_411_YUV411 (0<<3) /* SURFACE_411 */ +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ #define MT_422_YCRCB_NORMAL (1<<3) #define MT_422_YCRCB_SWAPUV (2<<3) #define MT_422_YCRCB_SWAPUVY (3<<3) -#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ #define MT_COMPRESS_DXT2_3 (1<<3) #define MT_COMPRESS_DXT4_5 (2<<3) #define MT_COMPRESS_FXT1 (3<<3) diff --git a/i915/i830_state.c b/i915/i830_state.c index 9512519..e44a7df 100644 --- a/i915/i830_state.c +++ b/i915/i830_state.c @@ -34,151 +34,155 @@ #include "texmem.h" +#include "drivers/common/driverfuncs.h" + #include "intel_screen.h" #include "intel_batchbuffer.h" +#include "intel_fbo.h" #include "i830_context.h" #include "i830_reg.h" +#define FILE_DEBUG_FLAG DEBUG_STATE + static void -i830StencilFuncSeparate(GLcontext *ctx, GLenum face, GLenum func, GLint ref, +i830StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref, GLuint mask) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); int test = intel_translate_compare_func(func); mask = mask & 0xff; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(func), ref, mask); + DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(func), ref, mask); I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK; i830->state.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(mask)); + STENCIL_TEST_MASK(mask)); i830->state.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_REF_VALUE_MASK | - ENABLE_STENCIL_TEST_FUNC_MASK); + ENABLE_STENCIL_TEST_FUNC_MASK); i830->state.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_REF_VALUE | - ENABLE_STENCIL_TEST_FUNC | - STENCIL_REF_VALUE(ref) | - STENCIL_TEST_FUNC(test)); + ENABLE_STENCIL_TEST_FUNC | + STENCIL_REF_VALUE(ref) | + STENCIL_TEST_FUNC(test)); } static void -i830StencilMaskSeparate(GLcontext *ctx, GLenum face, GLuint mask) +i830StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) { - i830ContextPtr i830 = I830_CONTEXT(ctx); - - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s : mask 0x%x\n", __FUNCTION__, mask); + struct i830_context *i830 = i830_context(ctx); + DBG("%s : mask 0x%x\n", __FUNCTION__, mask); + mask = mask & 0xff; I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK; i830->state.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK(mask)); + STENCIL_WRITE_MASK(mask)); } static void -i830StencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, GLenum zfail, +i830StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail, GLenum zpass) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); int fop, dfop, dpop; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(fail), - _mesa_lookup_enum_by_nr(zfail), - _mesa_lookup_enum_by_nr(zpass)); + DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(fail), + _mesa_lookup_enum_by_nr(zfail), + _mesa_lookup_enum_by_nr(zpass)); - fop = 0; dfop = 0; dpop = 0; + fop = 0; + dfop = 0; + dpop = 0; - switch(fail) { - case GL_KEEP: - fop = STENCILOP_KEEP; + switch (fail) { + case GL_KEEP: + fop = STENCILOP_KEEP; break; - case GL_ZERO: - fop = STENCILOP_ZERO; + case GL_ZERO: + fop = STENCILOP_ZERO; break; - case GL_REPLACE: - fop = STENCILOP_REPLACE; + case GL_REPLACE: + fop = STENCILOP_REPLACE; break; - case GL_INCR: + case GL_INCR: fop = STENCILOP_INCRSAT; break; - case GL_DECR: + case GL_DECR: fop = STENCILOP_DECRSAT; break; case GL_INCR_WRAP: - fop = STENCILOP_INCR; + fop = STENCILOP_INCR; break; case GL_DECR_WRAP: - fop = STENCILOP_DECR; + fop = STENCILOP_DECR; break; - case GL_INVERT: - fop = STENCILOP_INVERT; + case GL_INVERT: + fop = STENCILOP_INVERT; break; - default: + default: break; } - switch(zfail) { - case GL_KEEP: - dfop = STENCILOP_KEEP; + switch (zfail) { + case GL_KEEP: + dfop = STENCILOP_KEEP; break; - case GL_ZERO: - dfop = STENCILOP_ZERO; + case GL_ZERO: + dfop = STENCILOP_ZERO; break; - case GL_REPLACE: - dfop = STENCILOP_REPLACE; + case GL_REPLACE: + dfop = STENCILOP_REPLACE; break; - case GL_INCR: + case GL_INCR: dfop = STENCILOP_INCRSAT; break; - case GL_DECR: + case GL_DECR: dfop = STENCILOP_DECRSAT; break; case GL_INCR_WRAP: - dfop = STENCILOP_INCR; + dfop = STENCILOP_INCR; break; case GL_DECR_WRAP: - dfop = STENCILOP_DECR; + dfop = STENCILOP_DECR; break; - case GL_INVERT: - dfop = STENCILOP_INVERT; + case GL_INVERT: + dfop = STENCILOP_INVERT; break; - default: + default: break; } - switch(zpass) { - case GL_KEEP: - dpop = STENCILOP_KEEP; + switch (zpass) { + case GL_KEEP: + dpop = STENCILOP_KEEP; break; - case GL_ZERO: - dpop = STENCILOP_ZERO; + case GL_ZERO: + dpop = STENCILOP_ZERO; break; - case GL_REPLACE: - dpop = STENCILOP_REPLACE; + case GL_REPLACE: + dpop = STENCILOP_REPLACE; break; - case GL_INCR: + case GL_INCR: dpop = STENCILOP_INCRSAT; break; - case GL_DECR: + case GL_DECR: dpop = STENCILOP_DECRSAT; break; case GL_INCR_WRAP: - dpop = STENCILOP_INCR; + dpop = STENCILOP_INCR; break; case GL_DECR_WRAP: - dpop = STENCILOP_DECR; + dpop = STENCILOP_DECR; break; - case GL_INVERT: - dpop = STENCILOP_INVERT; + case GL_INVERT: + dpop = STENCILOP_INVERT; break; - default: + default: break; } @@ -186,27 +190,30 @@ i830StencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, GLenum zfail, I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_OPS_MASK); i830->state.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_PARMS | - STENCIL_FAIL_OP(fop) | - STENCIL_PASS_DEPTH_FAIL_OP(dfop) | - STENCIL_PASS_DEPTH_PASS_OP(dpop)); + STENCIL_FAIL_OP(fop) | + STENCIL_PASS_DEPTH_FAIL_OP + (dfop) | + STENCIL_PASS_DEPTH_PASS_OP + (dpop)); } -static void i830AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref) +static void +i830AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); int test = intel_translate_compare_func(func); GLubyte refByte; GLuint refInt; UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref); - refInt = (GLuint)refByte; + refInt = (GLuint) refByte; I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_STATE2] &= ~ALPHA_TEST_REF_MASK; i830->state.Ctx[I830_CTXREG_STATE2] |= (ENABLE_ALPHA_TEST_FUNC | - ENABLE_ALPHA_REF_VALUE | - ALPHA_TEST_FUNC(test) | - ALPHA_REF_VALUE(refInt)); + ENABLE_ALPHA_REF_VALUE | + ALPHA_TEST_FUNC(test) | + ALPHA_REF_VALUE(refInt)); } /** @@ -219,45 +226,49 @@ static void i830AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref) * This function is substantially different from the old i830-specific driver. * I'm not sure which is correct. */ -static void i830EvalLogicOpBlendState(GLcontext *ctx) +static void +i830EvalLogicOpBlendState(GLcontext * ctx) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); I830_STATECHANGE(i830, I830_UPLOAD_CTX); if (RGBA_LOGICOP_ENABLED(ctx)) { i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND | - ENABLE_LOGIC_OP_MASK); + ENABLE_LOGIC_OP_MASK); i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND | - ENABLE_LOGIC_OP); - } else if (ctx->Color.BlendEnabled) { + ENABLE_LOGIC_OP); + } + else if (ctx->Color.BlendEnabled) { i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND | - ENABLE_LOGIC_OP_MASK); + ENABLE_LOGIC_OP_MASK); i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (ENABLE_COLOR_BLEND | - DISABLE_LOGIC_OP); - } else { + DISABLE_LOGIC_OP); + } + else { i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND | - ENABLE_LOGIC_OP_MASK); + ENABLE_LOGIC_OP_MASK); i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND | - DISABLE_LOGIC_OP); + DISABLE_LOGIC_OP); } } -static void i830BlendColor(GLcontext *ctx, const GLfloat color[4]) +static void +i830BlendColor(GLcontext * ctx, const GLfloat color[4]) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); GLubyte r, g, b, a; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - + DBG("%s\n", __FUNCTION__); + UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]); UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]); UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]); UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]); I830_STATECHANGE(i830, I830_UPLOAD_CTX); - i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] = (a<<24) | (r<<16) | (g<<8) | b; + i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] = + (a << 24) | (r << 16) | (g << 8) | b; } /** @@ -266,9 +277,10 @@ static void i830BlendColor(GLcontext *ctx, const GLfloat color[4]) * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX) * change the interpretation of the blend function. */ -static void i830_set_blend_state( GLcontext * ctx ) +static void +i830_set_blend_state(GLcontext * ctx) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); int funcA; int funcRGB; int eqnA; @@ -277,71 +289,72 @@ static void i830_set_blend_state( GLcontext * ctx ) int s1; - funcRGB = SRC_BLND_FACT( intel_translate_blend_factor( ctx->Color.BlendSrcRGB ) ) - | DST_BLND_FACT( intel_translate_blend_factor( ctx->Color.BlendDstRGB ) ); + funcRGB = + SRC_BLND_FACT(intel_translate_blend_factor(ctx->Color.BlendSrcRGB)) + | DST_BLND_FACT(intel_translate_blend_factor(ctx->Color.BlendDstRGB)); - switch(ctx->Color.BlendEquationRGB) { + switch (ctx->Color.BlendEquationRGB) { case GL_FUNC_ADD: - eqnRGB = BLENDFUNC_ADD; + eqnRGB = BLENDFUNC_ADD; break; case GL_MIN: eqnRGB = BLENDFUNC_MIN; funcRGB = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE); break; - case GL_MAX: + case GL_MAX: eqnRGB = BLENDFUNC_MAX; funcRGB = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE); break; - case GL_FUNC_SUBTRACT: - eqnRGB = BLENDFUNC_SUB; + case GL_FUNC_SUBTRACT: + eqnRGB = BLENDFUNC_SUB; break; case GL_FUNC_REVERSE_SUBTRACT: - eqnRGB = BLENDFUNC_RVRSE_SUB; + eqnRGB = BLENDFUNC_RVRSE_SUB; break; default: - fprintf( stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB ); + fprintf(stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB); return; } - funcA = SRC_ABLEND_FACT( intel_translate_blend_factor( ctx->Color.BlendSrcA ) ) - | DST_ABLEND_FACT( intel_translate_blend_factor( ctx->Color.BlendDstA ) ); + funcA = SRC_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.BlendSrcA)) + | DST_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.BlendDstA)); - switch(ctx->Color.BlendEquationA) { + switch (ctx->Color.BlendEquationA) { case GL_FUNC_ADD: - eqnA = BLENDFUNC_ADD; + eqnA = BLENDFUNC_ADD; break; - case GL_MIN: + case GL_MIN: eqnA = BLENDFUNC_MIN; funcA = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE); break; - case GL_MAX: + case GL_MAX: eqnA = BLENDFUNC_MAX; funcA = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE); break; - case GL_FUNC_SUBTRACT: - eqnA = BLENDFUNC_SUB; + case GL_FUNC_SUBTRACT: + eqnA = BLENDFUNC_SUB; break; case GL_FUNC_REVERSE_SUBTRACT: - eqnA = BLENDFUNC_RVRSE_SUB; + eqnA = BLENDFUNC_RVRSE_SUB; break; default: - fprintf( stderr, "[%s:%u] Invalid alpha blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationA ); + fprintf(stderr, "[%s:%u] Invalid alpha blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationA); return; } iab = eqnA | funcA - | _3DSTATE_INDPT_ALPHA_BLEND_CMD - | ENABLE_SRC_ABLEND_FACTOR | ENABLE_DST_ABLEND_FACTOR - | ENABLE_ALPHA_BLENDFUNC; + | _3DSTATE_INDPT_ALPHA_BLEND_CMD + | ENABLE_SRC_ABLEND_FACTOR | ENABLE_DST_ABLEND_FACTOR + | ENABLE_ALPHA_BLENDFUNC; s1 = eqnRGB | funcRGB - | _3DSTATE_MODES_1_CMD - | ENABLE_SRC_BLND_FACTOR | ENABLE_DST_BLND_FACTOR - | ENABLE_COLR_BLND_FUNC; + | _3DSTATE_MODES_1_CMD + | ENABLE_SRC_BLND_FACTOR | ENABLE_DST_BLND_FACTOR + | ENABLE_COLR_BLND_FUNC; - if ( (eqnA | funcA) != (eqnRGB | funcRGB) ) + if ((eqnA | funcA) != (eqnRGB | funcRGB)) iab |= ENABLE_INDPT_ALPHA_BLEND; else iab |= DISABLE_INDPT_ALPHA_BLEND; @@ -361,70 +374,68 @@ static void i830_set_blend_state( GLcontext * ctx ) i830EvalLogicOpBlendState(ctx); if (0) { - fprintf(stderr, "[%s:%u] STATE1: 0x%08x IALPHAB: 0x%08x blend is %sabled\n", - __FUNCTION__, __LINE__, - i830->state.Ctx[I830_CTXREG_STATE1], - i830->state.Ctx[I830_CTXREG_IALPHAB], - (ctx->Color.BlendEnabled) ? "en" : "dis"); + fprintf(stderr, + "[%s:%u] STATE1: 0x%08x IALPHAB: 0x%08x blend is %sabled\n", + __FUNCTION__, __LINE__, i830->state.Ctx[I830_CTXREG_STATE1], + i830->state.Ctx[I830_CTXREG_IALPHAB], + (ctx->Color.BlendEnabled) ? "en" : "dis"); } } -static void i830BlendEquationSeparate(GLcontext *ctx, GLenum modeRGB, - GLenum modeA) +static void +i830BlendEquationSeparate(GLcontext * ctx, GLenum modeRGB, GLenum modeA) { - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s -> %s, %s\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(modeRGB), - _mesa_lookup_enum_by_nr(modeA)); + DBG("%s -> %s, %s\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(modeRGB), + _mesa_lookup_enum_by_nr(modeA)); (void) modeRGB; (void) modeA; - i830_set_blend_state( ctx ); + i830_set_blend_state(ctx); } -static void i830BlendFuncSeparate(GLcontext *ctx, GLenum sfactorRGB, - GLenum dfactorRGB, GLenum sfactorA, - GLenum dfactorA ) +static void +i830BlendFuncSeparate(GLcontext * ctx, GLenum sfactorRGB, + GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA) { - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s -> RGB(%s, %s) A(%s, %s)\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(sfactorRGB), - _mesa_lookup_enum_by_nr(dfactorRGB), - _mesa_lookup_enum_by_nr(sfactorA), - _mesa_lookup_enum_by_nr(dfactorA)); + DBG("%s -> RGB(%s, %s) A(%s, %s)\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(sfactorRGB), + _mesa_lookup_enum_by_nr(dfactorRGB), + _mesa_lookup_enum_by_nr(sfactorA), + _mesa_lookup_enum_by_nr(dfactorA)); (void) sfactorRGB; (void) dfactorRGB; (void) sfactorA; (void) dfactorA; - i830_set_blend_state( ctx ); + i830_set_blend_state(ctx); } -static void i830DepthFunc(GLcontext *ctx, GLenum func) +static void +i830DepthFunc(GLcontext * ctx, GLenum func) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); int test = intel_translate_compare_func(func); - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - + DBG("%s\n", __FUNCTION__); + I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_STATE3] &= ~DEPTH_TEST_FUNC_MASK; i830->state.Ctx[I830_CTXREG_STATE3] |= (ENABLE_DEPTH_TEST_FUNC | - DEPTH_TEST_FUNC(test)); + DEPTH_TEST_FUNC(test)); } -static void i830DepthMask(GLcontext *ctx, GLboolean flag) +static void +i830DepthMask(GLcontext * ctx, GLboolean flag) { - i830ContextPtr i830 = I830_CONTEXT(ctx); - - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s flag (%d)\n", __FUNCTION__, flag); + struct i830_context *i830 = i830_context(ctx); + DBG("%s flag (%d)\n", __FUNCTION__, flag); + I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK; @@ -441,14 +452,15 @@ static void i830DepthMask(GLcontext *ctx, GLboolean flag) * The i830 supports a 4x4 stipple natively, GL wants 32x32. * Fortunately stipple is usually a repeating pattern. */ -static void i830PolygonStipple( GLcontext *ctx, const GLubyte *mask ) +static void +i830PolygonStipple(GLcontext * ctx, const GLubyte * mask) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); const GLubyte *m = mask; GLubyte p[4]; - int i,j,k; + int i, j, k; int active = (ctx->Polygon.StippleFlag && - i830->intel.reduced_primitive == GL_TRIANGLES); + i830->intel.reduced_primitive == GL_TRIANGLES); GLuint newMask; if (active) { @@ -456,23 +468,26 @@ static void i830PolygonStipple( GLcontext *ctx, const GLubyte *mask ) i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE; } - p[0] = mask[12] & 0xf; p[0] |= p[0] << 4; - p[1] = mask[8] & 0xf; p[1] |= p[1] << 4; - p[2] = mask[4] & 0xf; p[2] |= p[2] << 4; - p[3] = mask[0] & 0xf; p[3] |= p[3] << 4; - - for (k = 0 ; k < 8 ; k++) - for (j = 3 ; j >= 0; j--) - for (i = 0 ; i < 4 ; i++, m++) - if (*m != p[j]) { - i830->intel.hw_stipple = 0; - return; - } + p[0] = mask[12] & 0xf; + p[0] |= p[0] << 4; + p[1] = mask[8] & 0xf; + p[1] |= p[1] << 4; + p[2] = mask[4] & 0xf; + p[2] |= p[2] << 4; + p[3] = mask[0] & 0xf; + p[3] |= p[3] << 4; + + for (k = 0; k < 8; k++) + for (j = 3; j >= 0; j--) + for (i = 0; i < 4; i++, m++) + if (*m != p[j]) { + i830->intel.hw_stipple = 0; + return; + } newMask = (((p[0] & 0xf) << 0) | - ((p[1] & 0xf) << 4) | - ((p[2] & 0xf) << 8) | - ((p[3] & 0xf) << 12)); + ((p[1] & 0xf) << 4) | + ((p[2] & 0xf) << 8) | ((p[3] & 0xf) << 12)); if (newMask == 0xffff || newMask == 0x0) { @@ -493,49 +508,54 @@ static void i830PolygonStipple( GLcontext *ctx, const GLubyte *mask ) /* ============================================================= * Hardware clipping */ -static void i830Scissor(GLcontext *ctx, GLint x, GLint y, - GLsizei w, GLsizei h) +static void +i830Scissor(GLcontext * ctx, GLint x, GLint y, GLsizei w, GLsizei h) { - i830ContextPtr i830 = I830_CONTEXT(ctx); - intelScreenPrivate *screen = i830->intel.intelScreen; + struct i830_context *i830 = i830_context(ctx); int x1, y1, x2, y2; - if (!i830->intel.driDrawable) + if (!ctx->DrawBuffer) return; - x1 = x; - y1 = i830->intel.driDrawable->h - (y + h); - x2 = x + w - 1; - y2 = y1 + h - 1; + DBG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "[%s] x(%d) y(%d) w(%d) h(%d)\n", __FUNCTION__, - x, y, w, h); - - if (x1 < 0) x1 = 0; - if (y1 < 0) y1 = 0; - if (x2 < 0) x2 = 0; - if (y2 < 0) y2 = 0; - - if (x2 >= screen->width) x2 = screen->width-1; - if (y2 >= screen->height) y2 = screen->height-1; - if (x1 >= screen->width) x1 = screen->width-1; - if (y1 >= screen->height) y1 = screen->height-1; + if (ctx->DrawBuffer->Name == 0) { + x1 = x; + y1 = ctx->DrawBuffer->Height - (y + h); + x2 = x + w - 1; + y2 = y1 + h - 1; + DBG("%s %d..%d,%d..%d (inverted)\n", __FUNCTION__, x1, x2, y1, y2); + } + else { + /* FBO - not inverted + */ + x1 = x; + y1 = y; + x2 = x + w - 1; + y2 = y + h - 1; + DBG("%s %d..%d,%d..%d (not inverted)\n", __FUNCTION__, x1, x2, y1, y2); + } + x1 = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1); + y1 = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1); + x2 = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1); + y2 = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1); + + DBG("%s %d..%d,%d..%d (clamped)\n", __FUNCTION__, x1, x2, y1, y2); I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS); i830->state.Buffer[I830_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff); i830->state.Buffer[I830_DESTREG_SR2] = (y2 << 16) | (x2 & 0xffff); } -static void i830LogicOp(GLcontext *ctx, GLenum opcode) +static void +i830LogicOp(GLcontext * ctx, GLenum opcode) { - i830ContextPtr i830 = I830_CONTEXT(ctx); - int tmp = intel_translate_logic_op( opcode ); - - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); + struct i830_context *i830 = i830_context(ctx); + int tmp = intel_translate_logic_op(opcode); + DBG("%s\n", __FUNCTION__); + I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_STATE4] &= ~LOGICOP_MASK; i830->state.Ctx[I830_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp); @@ -543,14 +563,14 @@ static void i830LogicOp(GLcontext *ctx, GLenum opcode) -static void i830CullFaceFrontFace(GLcontext *ctx, GLenum unused) +static void +i830CullFaceFrontFace(GLcontext * ctx, GLenum unused) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); GLuint mode; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - + DBG("%s\n", __FUNCTION__); + if (!ctx->Polygon.CullFlag) { mode = CULLMODE_NONE; } @@ -558,9 +578,9 @@ static void i830CullFaceFrontFace(GLcontext *ctx, GLenum unused) mode = CULLMODE_CW; if (ctx->Polygon.CullFaceMode == GL_FRONT) - mode ^= (CULLMODE_CW ^ CULLMODE_CCW); + mode ^= (CULLMODE_CW ^ CULLMODE_CCW); if (ctx->Polygon.FrontFace != GL_CCW) - mode ^= (CULLMODE_CW ^ CULLMODE_CCW); + mode ^= (CULLMODE_CW ^ CULLMODE_CCW); } else { mode = CULLMODE_BOTH; @@ -571,18 +591,18 @@ static void i830CullFaceFrontFace(GLcontext *ctx, GLenum unused) i830->state.Ctx[I830_CTXREG_STATE3] |= ENABLE_CULL_MODE | mode; } -static void i830LineWidth( GLcontext *ctx, GLfloat widthf ) +static void +i830LineWidth(GLcontext * ctx, GLfloat widthf) { - i830ContextPtr i830 = I830_CONTEXT( ctx ); + struct i830_context *i830 = i830_context(ctx); int width; int state5; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - - width = (int)(widthf * 2); - CLAMP_SELF(width, 1, 15); + DBG("%s\n", __FUNCTION__); + width = (int) (widthf * 2); + CLAMP_SELF(width, 1, 15); + state5 = i830->state.Ctx[I830_CTXREG_STATE5] & ~FIXED_LINE_WIDTH_MASK; state5 |= (ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(width)); @@ -592,19 +612,19 @@ static void i830LineWidth( GLcontext *ctx, GLfloat widthf ) } } -static void i830PointSize(GLcontext *ctx, GLfloat size) +static void +i830PointSize(GLcontext * ctx, GLfloat size) { - i830ContextPtr i830 = I830_CONTEXT(ctx); - GLint point_size = (int)size; - - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); + struct i830_context *i830 = i830_context(ctx); + GLint point_size = (int) size; + DBG("%s\n", __FUNCTION__); + CLAMP_SELF(point_size, 1, 256); I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_STATE5] &= ~FIXED_POINT_WIDTH_MASK; i830->state.Ctx[I830_CTXREG_STATE5] |= (ENABLE_FIXED_POINT_WIDTH | - FIXED_POINT_WIDTH(point_size)); + FIXED_POINT_WIDTH(point_size)); } @@ -612,23 +632,21 @@ static void i830PointSize(GLcontext *ctx, GLfloat size) * Color masks */ -static void i830ColorMask(GLcontext *ctx, - GLboolean r, GLboolean g, - GLboolean b, GLboolean a) +static void +i830ColorMask(GLcontext * ctx, + GLboolean r, GLboolean g, GLboolean b, GLboolean a) { - i830ContextPtr i830 = I830_CONTEXT( ctx ); + struct i830_context *i830 = i830_context(ctx); GLuint tmp = 0; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a); + DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a); tmp = ((i830->state.Ctx[I830_CTXREG_ENABLES_2] & ~WRITEMASK_MASK) | - ENABLE_COLOR_MASK | - ENABLE_COLOR_WRITE | - ((!r) << WRITEMASK_RED_SHIFT) | - ((!g) << WRITEMASK_GREEN_SHIFT) | - ((!b) << WRITEMASK_BLUE_SHIFT) | - ((!a) << WRITEMASK_ALPHA_SHIFT)); + ENABLE_COLOR_MASK | + ENABLE_COLOR_WRITE | + ((!r) << WRITEMASK_RED_SHIFT) | + ((!g) << WRITEMASK_GREEN_SHIFT) | + ((!b) << WRITEMASK_BLUE_SHIFT) | ((!a) << WRITEMASK_ALPHA_SHIFT)); if (tmp != i830->state.Ctx[I830_CTXREG_ENABLES_2]) { I830_STATECHANGE(i830, I830_UPLOAD_CTX); @@ -636,9 +654,10 @@ static void i830ColorMask(GLcontext *ctx, } } -static void update_specular( GLcontext *ctx ) +static void +update_specular(GLcontext * ctx) { - i830ContextPtr i830 = I830_CONTEXT( ctx ); + struct i830_context *i830 = i830_context(ctx); I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_SPEC_ADD_MASK; @@ -649,22 +668,22 @@ static void update_specular( GLcontext *ctx ) i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_SPEC_ADD; } -static void i830LightModelfv(GLcontext *ctx, GLenum pname, - const GLfloat *param) +static void +i830LightModelfv(GLcontext * ctx, GLenum pname, const GLfloat * param) { - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - + DBG("%s\n", __FUNCTION__); + if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) { - update_specular( ctx ); + update_specular(ctx); } } /* In Mesa 3.5 we can reliably do native flatshading. */ -static void i830ShadeModel(GLcontext *ctx, GLenum mode) +static void +i830ShadeModel(GLcontext * ctx, GLenum mode) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); I830_STATECHANGE(i830, I830_UPLOAD_CTX); @@ -673,58 +692,62 @@ static void i830ShadeModel(GLcontext *ctx, GLenum mode) i830->state.Ctx[I830_CTXREG_STATE3] &= ~SHADE_MODE_MASK; if (mode == GL_FLAT) { - i830->state.Ctx[I830_CTXREG_STATE3] |= (ALPHA_SHADE_MODE(SHADE_MODE_FLAT) | - FOG_SHADE_MODE(SHADE_MODE_FLAT) | - SPEC_SHADE_MODE(SHADE_MODE_FLAT) | - COLOR_SHADE_MODE(SHADE_MODE_FLAT)); - } else { - i830->state.Ctx[I830_CTXREG_STATE3] |= (ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) | - FOG_SHADE_MODE(SHADE_MODE_LINEAR) | - SPEC_SHADE_MODE(SHADE_MODE_LINEAR) | - COLOR_SHADE_MODE(SHADE_MODE_LINEAR)); + i830->state.Ctx[I830_CTXREG_STATE3] |= + (ALPHA_SHADE_MODE(SHADE_MODE_FLAT) | FOG_SHADE_MODE(SHADE_MODE_FLAT) + | SPEC_SHADE_MODE(SHADE_MODE_FLAT) | + COLOR_SHADE_MODE(SHADE_MODE_FLAT)); + } + else { + i830->state.Ctx[I830_CTXREG_STATE3] |= + (ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) | + FOG_SHADE_MODE(SHADE_MODE_LINEAR) | + SPEC_SHADE_MODE(SHADE_MODE_LINEAR) | + COLOR_SHADE_MODE(SHADE_MODE_LINEAR)); } } /* ============================================================= * Fog */ -static void i830Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *param) +static void +i830Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) { - i830ContextPtr i830 = I830_CONTEXT(ctx); - - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); + struct i830_context *i830 = i830_context(ctx); - if (pname == GL_FOG_COLOR) { - GLuint color = (((GLubyte)(ctx->Fog.Color[0]*255.0F) << 16) | - ((GLubyte)(ctx->Fog.Color[1]*255.0F) << 8) | - ((GLubyte)(ctx->Fog.Color[2]*255.0F) << 0)); + DBG("%s\n", __FUNCTION__); + + if (pname == GL_FOG_COLOR) { + GLuint color = (((GLubyte) (ctx->Fog.Color[0] * 255.0F) << 16) | + ((GLubyte) (ctx->Fog.Color[1] * 255.0F) << 8) | + ((GLubyte) (ctx->Fog.Color[2] * 255.0F) << 0)); I830_STATECHANGE(i830, I830_UPLOAD_CTX); - i830->state.Ctx[I830_CTXREG_FOGCOLOR] = (_3DSTATE_FOG_COLOR_CMD | color); + i830->state.Ctx[I830_CTXREG_FOGCOLOR] = + (_3DSTATE_FOG_COLOR_CMD | color); } } /* ============================================================= */ -static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state) +static void +i830Enable(GLcontext * ctx, GLenum cap, GLboolean state) { - i830ContextPtr i830 = I830_CONTEXT(ctx); + struct i830_context *i830 = i830_context(ctx); - switch(cap) { + switch (cap) { case GL_LIGHTING: case GL_COLOR_SUM: - update_specular( ctx ); + update_specular(ctx); break; case GL_ALPHA_TEST: I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_ALPHA_TEST_MASK; if (state) - i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_ALPHA_TEST; + i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_ALPHA_TEST; else - i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_ALPHA_TEST; + i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_ALPHA_TEST; break; @@ -737,18 +760,18 @@ static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state) /* Logicop doesn't seem to work at 16bpp: */ - if (i830->intel.intelScreen->cpp == 2) - FALLBACK( &i830->intel, I830_FALLBACK_LOGICOP, state ); + if (i830->intel.ctx.Visual.rgbBits == 16) + FALLBACK(&i830->intel, I830_FALLBACK_LOGICOP, state); break; - + case GL_DITHER: I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DITHER; if (state) - i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DITHER; + i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DITHER; else - i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DITHER; + i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DITHER; break; case GL_DEPTH_TEST: @@ -756,46 +779,44 @@ static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state) i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK; if (state) - i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST; + i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST; else - i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST; + i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST; /* Also turn off depth writes when GL_DEPTH_TEST is disabled: */ - i830DepthMask( ctx, ctx->Depth.Mask ); + i830DepthMask(ctx, ctx->Depth.Mask); break; case GL_SCISSOR_TEST: I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS); - + if (state) - i830->state.Buffer[I830_DESTREG_SENABLE] = - (_3DSTATE_SCISSOR_ENABLE_CMD | - ENABLE_SCISSOR_RECT); + i830->state.Buffer[I830_DESTREG_SENABLE] = + (_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT); else - i830->state.Buffer[I830_DESTREG_SENABLE] = - (_3DSTATE_SCISSOR_ENABLE_CMD | - DISABLE_SCISSOR_RECT); + i830->state.Buffer[I830_DESTREG_SENABLE] = + (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); break; case GL_LINE_SMOOTH: I830_STATECHANGE(i830, I830_UPLOAD_CTX); - + i830->state.Ctx[I830_CTXREG_AA] &= ~AA_LINE_ENABLE; if (state) - i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_ENABLE; + i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_ENABLE; else - i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_DISABLE; + i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_DISABLE; break; case GL_FOG: I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_FOG_MASK; if (state) - i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_FOG; + i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_FOG; else - i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_FOG; + i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_FOG; break; case GL_CULL_FACE: @@ -806,20 +827,32 @@ static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state) break; case GL_STENCIL_TEST: - if (i830->intel.hw_stencil) { - I830_STATECHANGE(i830, I830_UPLOAD_CTX); - - if (state) { - i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST; - i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE; - } else { - i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST; - i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_STENCIL_WRITE; - i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST; - i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_STENCIL_WRITE; - } - } else { - FALLBACK( &i830->intel, I830_FALLBACK_STENCIL, state ); + { + GLboolean hw_stencil = GL_FALSE; + if (ctx->DrawBuffer) { + struct intel_renderbuffer *irbStencil + = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + hw_stencil = (irbStencil && irbStencil->region); + } + if (hw_stencil) { + I830_STATECHANGE(i830, I830_UPLOAD_CTX); + + if (state) { + i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST; + i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE; + } + else { + i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST; + i830->state.Ctx[I830_CTXREG_ENABLES_2] &= + ~ENABLE_STENCIL_WRITE; + i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST; + i830->state.Ctx[I830_CTXREG_ENABLES_2] |= + DISABLE_STENCIL_WRITE; + } + } + else { + FALLBACK(&i830->intel, I830_FALLBACK_STENCIL, state); + } } break; @@ -828,13 +861,12 @@ static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state) * I'll do more testing later to find out exactly which hardware * supports it. Disabled for now. */ - if (i830->intel.hw_stipple && - i830->intel.reduced_primitive == GL_TRIANGLES) - { - I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE); - i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE; - if (state) - i830->state.Stipple[I830_STPREG_ST1] |= ST1_ENABLE; + if (i830->intel.hw_stipple && + i830->intel.reduced_primitive == GL_TRIANGLES) { + I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE); + i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE; + if (state) + i830->state.Stipple[I830_STPREG_ST1] |= ST1_ENABLE; } break; @@ -844,206 +876,172 @@ static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state) } -static void i830_init_packets( i830ContextPtr i830 ) +static void +i830_init_packets(struct i830_context *i830) { - intelScreenPrivate *screen = i830->intel.intelScreen; - /* Zero all state */ memset(&i830->state, 0, sizeof(i830->state)); /* Set default blend state */ i830->state.TexBlend[0][0] = (_3DSTATE_MAP_BLEND_OP_CMD(0) | - TEXPIPE_COLOR | - ENABLE_TEXOUTPUT_WRT_SEL | - TEXOP_OUTPUT_CURRENT | - DISABLE_TEX_CNTRL_STAGE | - TEXOP_SCALE_1X | - TEXOP_MODIFY_PARMS | - TEXOP_LAST_STAGE | - TEXBLENDOP_ARG1); + TEXPIPE_COLOR | + ENABLE_TEXOUTPUT_WRT_SEL | + TEXOP_OUTPUT_CURRENT | + DISABLE_TEX_CNTRL_STAGE | + TEXOP_SCALE_1X | + TEXOP_MODIFY_PARMS | + TEXOP_LAST_STAGE | TEXBLENDOP_ARG1); i830->state.TexBlend[0][1] = (_3DSTATE_MAP_BLEND_OP_CMD(0) | - TEXPIPE_ALPHA | - ENABLE_TEXOUTPUT_WRT_SEL | - TEXOP_OUTPUT_CURRENT | - TEXOP_SCALE_1X | - TEXOP_MODIFY_PARMS | - TEXBLENDOP_ARG1); + TEXPIPE_ALPHA | + ENABLE_TEXOUTPUT_WRT_SEL | + TEXOP_OUTPUT_CURRENT | + TEXOP_SCALE_1X | + TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1); i830->state.TexBlend[0][2] = (_3DSTATE_MAP_BLEND_ARG_CMD(0) | - TEXPIPE_COLOR | - TEXBLEND_ARG1 | - TEXBLENDARG_MODIFY_PARMS | - TEXBLENDARG_DIFFUSE); + TEXPIPE_COLOR | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | + TEXBLENDARG_DIFFUSE); i830->state.TexBlend[0][3] = (_3DSTATE_MAP_BLEND_ARG_CMD(0) | - TEXPIPE_ALPHA | - TEXBLEND_ARG1 | - TEXBLENDARG_MODIFY_PARMS | - TEXBLENDARG_DIFFUSE); + TEXPIPE_ALPHA | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | + TEXBLENDARG_DIFFUSE); i830->state.TexBlendWordsUsed[0] = 4; - i830->state.Ctx[I830_CTXREG_VF] = 0; + i830->state.Ctx[I830_CTXREG_VF] = 0; i830->state.Ctx[I830_CTXREG_VF2] = 0; i830->state.Ctx[I830_CTXREG_AA] = (_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | - AA_LINE_REGION_WIDTH_1_0 | - AA_LINE_DISABLE); + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | + AA_LINE_REGION_WIDTH_1_0 | + AA_LINE_DISABLE); i830->state.Ctx[I830_CTXREG_ENABLES_1] = (_3DSTATE_ENABLES_1_CMD | - DISABLE_LOGIC_OP | - DISABLE_STENCIL_TEST | - DISABLE_DEPTH_BIAS | - DISABLE_SPEC_ADD | - DISABLE_FOG | - DISABLE_ALPHA_TEST | - DISABLE_COLOR_BLEND | - DISABLE_DEPTH_TEST); - + DISABLE_LOGIC_OP | + DISABLE_STENCIL_TEST | + DISABLE_DEPTH_BIAS | + DISABLE_SPEC_ADD | + DISABLE_FOG | + DISABLE_ALPHA_TEST | + DISABLE_COLOR_BLEND | + DISABLE_DEPTH_TEST); + +#if 000 /* XXX all the stencil enable state is set in i830Enable(), right? */ if (i830->intel.hw_stencil) { i830->state.Ctx[I830_CTXREG_ENABLES_2] = (_3DSTATE_ENABLES_2_CMD | - ENABLE_STENCIL_WRITE | - ENABLE_TEX_CACHE | - ENABLE_DITHER | - ENABLE_COLOR_MASK | - /* set no color comps disabled */ - ENABLE_COLOR_WRITE | - ENABLE_DEPTH_WRITE); - } else { + ENABLE_STENCIL_WRITE | + ENABLE_TEX_CACHE | + ENABLE_DITHER | + ENABLE_COLOR_MASK | + /* set no color comps disabled */ + ENABLE_COLOR_WRITE | + ENABLE_DEPTH_WRITE); + } + else +#endif + { i830->state.Ctx[I830_CTXREG_ENABLES_2] = (_3DSTATE_ENABLES_2_CMD | - DISABLE_STENCIL_WRITE | - ENABLE_TEX_CACHE | - ENABLE_DITHER | - ENABLE_COLOR_MASK | - /* set no color comps disabled */ - ENABLE_COLOR_WRITE | - ENABLE_DEPTH_WRITE); + DISABLE_STENCIL_WRITE | + ENABLE_TEX_CACHE | + ENABLE_DITHER | + ENABLE_COLOR_MASK | + /* set no color comps disabled */ + ENABLE_COLOR_WRITE | + ENABLE_DEPTH_WRITE); } i830->state.Ctx[I830_CTXREG_STATE1] = (_3DSTATE_MODES_1_CMD | - ENABLE_COLR_BLND_FUNC | - BLENDFUNC_ADD | - ENABLE_SRC_BLND_FACTOR | - SRC_BLND_FACT(BLENDFACT_ONE) | - ENABLE_DST_BLND_FACTOR | - DST_BLND_FACT(BLENDFACT_ZERO) ); + ENABLE_COLR_BLND_FUNC | + BLENDFUNC_ADD | + ENABLE_SRC_BLND_FACTOR | + SRC_BLND_FACT(BLENDFACT_ONE) | + ENABLE_DST_BLND_FACTOR | + DST_BLND_FACT(BLENDFACT_ZERO)); i830->state.Ctx[I830_CTXREG_STATE2] = (_3DSTATE_MODES_2_CMD | - ENABLE_GLOBAL_DEPTH_BIAS | - GLOBAL_DEPTH_BIAS(0) | - ENABLE_ALPHA_TEST_FUNC | - ALPHA_TEST_FUNC(COMPAREFUNC_ALWAYS) | - ALPHA_REF_VALUE(0) ); + ENABLE_GLOBAL_DEPTH_BIAS | + GLOBAL_DEPTH_BIAS(0) | + ENABLE_ALPHA_TEST_FUNC | + ALPHA_TEST_FUNC(COMPAREFUNC_ALWAYS) + | ALPHA_REF_VALUE(0)); i830->state.Ctx[I830_CTXREG_STATE3] = (_3DSTATE_MODES_3_CMD | - ENABLE_DEPTH_TEST_FUNC | - DEPTH_TEST_FUNC(COMPAREFUNC_LESS) | - ENABLE_ALPHA_SHADE_MODE | - ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) | - ENABLE_FOG_SHADE_MODE | - FOG_SHADE_MODE(SHADE_MODE_LINEAR) | - ENABLE_SPEC_SHADE_MODE | - SPEC_SHADE_MODE(SHADE_MODE_LINEAR) | - ENABLE_COLOR_SHADE_MODE | - COLOR_SHADE_MODE(SHADE_MODE_LINEAR) | - ENABLE_CULL_MODE | - CULLMODE_NONE); + ENABLE_DEPTH_TEST_FUNC | + DEPTH_TEST_FUNC(COMPAREFUNC_LESS) | + ENABLE_ALPHA_SHADE_MODE | + ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) + | ENABLE_FOG_SHADE_MODE | + FOG_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_SPEC_SHADE_MODE | + SPEC_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_COLOR_SHADE_MODE | + COLOR_SHADE_MODE(SHADE_MODE_LINEAR) + | ENABLE_CULL_MODE | CULLMODE_NONE); i830->state.Ctx[I830_CTXREG_STATE4] = (_3DSTATE_MODES_4_CMD | - ENABLE_LOGIC_OP_FUNC | - LOGIC_OP_FUNC(LOGICOP_COPY) | - ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(0xff) | - ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK(0xff)); + ENABLE_LOGIC_OP_FUNC | + LOGIC_OP_FUNC(LOGICOP_COPY) | + ENABLE_STENCIL_TEST_MASK | + STENCIL_TEST_MASK(0xff) | + ENABLE_STENCIL_WRITE_MASK | + STENCIL_WRITE_MASK(0xff)); i830->state.Ctx[I830_CTXREG_STENCILTST] = (_3DSTATE_STENCIL_TEST_CMD | - ENABLE_STENCIL_PARMS | - STENCIL_FAIL_OP(STENCILOP_KEEP) | - STENCIL_PASS_DEPTH_FAIL_OP(STENCILOP_KEEP) | - STENCIL_PASS_DEPTH_PASS_OP(STENCILOP_KEEP) | - ENABLE_STENCIL_TEST_FUNC | - STENCIL_TEST_FUNC(COMPAREFUNC_ALWAYS) | - ENABLE_STENCIL_REF_VALUE | - STENCIL_REF_VALUE(0) ); - - i830->state.Ctx[I830_CTXREG_STATE5] = (_3DSTATE_MODES_5_CMD | - FLUSH_TEXTURE_CACHE | - ENABLE_SPRITE_POINT_TEX | - SPRITE_POINT_TEX_OFF | - ENABLE_FIXED_LINE_WIDTH | - FIXED_LINE_WIDTH(0x2) | /* 1.0 */ - ENABLE_FIXED_POINT_WIDTH | - FIXED_POINT_WIDTH(1) ); + ENABLE_STENCIL_PARMS | + STENCIL_FAIL_OP(STENCILOP_KEEP) + | + STENCIL_PASS_DEPTH_FAIL_OP + (STENCILOP_KEEP) | + STENCIL_PASS_DEPTH_PASS_OP + (STENCILOP_KEEP) | + ENABLE_STENCIL_TEST_FUNC | + STENCIL_TEST_FUNC + (COMPAREFUNC_ALWAYS) | + ENABLE_STENCIL_REF_VALUE | + STENCIL_REF_VALUE(0)); + + i830->state.Ctx[I830_CTXREG_STATE5] = (_3DSTATE_MODES_5_CMD | FLUSH_TEXTURE_CACHE | ENABLE_SPRITE_POINT_TEX | SPRITE_POINT_TEX_OFF | ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(0x2) | /* 1.0 */ + ENABLE_FIXED_POINT_WIDTH | + FIXED_POINT_WIDTH(1)); i830->state.Ctx[I830_CTXREG_IALPHAB] = (_3DSTATE_INDPT_ALPHA_BLEND_CMD | - DISABLE_INDPT_ALPHA_BLEND | - ENABLE_ALPHA_BLENDFUNC | - ABLENDFUNC_ADD); + DISABLE_INDPT_ALPHA_BLEND | + ENABLE_ALPHA_BLENDFUNC | + ABLENDFUNC_ADD); i830->state.Ctx[I830_CTXREG_FOGCOLOR] = (_3DSTATE_FOG_COLOR_CMD | - FOG_COLOR_RED(0) | - FOG_COLOR_GREEN(0) | - FOG_COLOR_BLUE(0)); + FOG_COLOR_RED(0) | + FOG_COLOR_GREEN(0) | + FOG_COLOR_BLUE(0)); i830->state.Ctx[I830_CTXREG_BLENDCOLOR0] = _3DSTATE_CONST_BLEND_COLOR_CMD; i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] = 0; i830->state.Ctx[I830_CTXREG_MCSB0] = _3DSTATE_MAP_COORD_SETBIND_CMD; i830->state.Ctx[I830_CTXREG_MCSB1] = (TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) | - TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) | - TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | - TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); - - - i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE; - - i830->state.Buffer[I830_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - i830->state.Buffer[I830_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(screen->front.pitch) | /* pitch in bytes */ - BUF_3D_USE_FENCE); + TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) | + TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | + TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); - i830->state.Buffer[I830_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - i830->state.Buffer[I830_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | - BUF_3D_PITCH(screen->depth.pitch) | /* pitch in bytes */ - BUF_3D_USE_FENCE); - i830->state.Buffer[I830_DESTREG_DBUFADDR2] = screen->depth.offset; - + i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE; i830->state.Buffer[I830_DESTREG_DV0] = _3DSTATE_DST_BUF_VARS_CMD; - - switch (screen->fbFormat) { - case DV_PF_555: - case DV_PF_565: - i830->state.Buffer[I830_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - screen->fbFormat | - DEPTH_IS_Z | - DEPTH_FRMT_16_FIXED); - break; - case DV_PF_8888: - i830->state.Buffer[I830_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - screen->fbFormat | - DEPTH_IS_Z | - DEPTH_FRMT_24_FIXED_8_OTHER); - break; - } - i830->state.Buffer[I830_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD | - DISABLE_SCISSOR_RECT); + DISABLE_SCISSOR_RECT); i830->state.Buffer[I830_DESTREG_SR0] = _3DSTATE_SCISSOR_RECT_0_CMD; i830->state.Buffer[I830_DESTREG_SR1] = 0; i830->state.Buffer[I830_DESTREG_SR2] = 0; } -void i830InitStateFuncs( struct dd_function_table *functions ) +void +i830InitStateFuncs(struct dd_function_table *functions) { functions->AlphaFunc = i830AlphaFunc; functions->BlendColor = i830BlendColor; @@ -1068,25 +1066,21 @@ void i830InitStateFuncs( struct dd_function_table *functions ) functions->StencilOpSeparate = i830StencilOpSeparate; } -void i830InitState( i830ContextPtr i830 ) +void +i830InitState(struct i830_context *i830) { GLcontext *ctx = &i830->intel.ctx; - i830_init_packets( i830 ); + i830_init_packets(i830); - intelInitState( ctx ); + _mesa_init_driver_state(ctx); - memcpy( &i830->initial, &i830->state, sizeof(i830->state) ); + memcpy(&i830->initial, &i830->state, sizeof(i830->state)); i830->current = &i830->state; i830->state.emitted = 0; - i830->state.active = (I830_UPLOAD_TEXBLEND(0) | - I830_UPLOAD_STIPPLE | - I830_UPLOAD_CTX | - I830_UPLOAD_BUFFERS); + i830->state.active = (I830_UPLOAD_INVARIENT | + I830_UPLOAD_TEXBLEND(0) | + I830_UPLOAD_STIPPLE | + I830_UPLOAD_CTX | I830_UPLOAD_BUFFERS); } - - - - - diff --git a/i915/i830_tex.c b/i915/i830_tex.c index 3c4aedb..79b0fcf 100644 --- a/i915/i830_tex.c +++ b/i915/i830_tex.c @@ -45,261 +45,13 @@ - -/** - * Set the texture wrap modes. - * - * The i830M (and related graphics cores) do not support GL_CLAMP. The Intel - * drivers for "other operating systems" implement GL_CLAMP as - * GL_CLAMP_TO_EDGE, so the same is done here. - * - * \param t Texture object whose wrap modes are to be set - * \param swrap Wrap mode for the \a s texture coordinate - * \param twrap Wrap mode for the \a t texture coordinate - */ -static void i830SetTexWrapping(i830TextureObjectPtr tex, - GLenum swrap, - GLenum twrap) -{ - tex->Setup[I830_TEXREG_MCS] &= ~(TEXCOORD_ADDR_U_MASK|TEXCOORD_ADDR_V_MASK); - - switch( swrap ) { - case GL_REPEAT: - tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP); - break; - case GL_CLAMP: - case GL_CLAMP_TO_EDGE: - tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP); - break; - case GL_CLAMP_TO_BORDER: - tex->Setup[I830_TEXREG_MCS] |= - TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP_BORDER); - break; - case GL_MIRRORED_REPEAT: - tex->Setup[I830_TEXREG_MCS] |= - TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_MIRROR); - break; - default: - break; - } - - switch( twrap ) { - case GL_REPEAT: - tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP); - break; - case GL_CLAMP: - case GL_CLAMP_TO_EDGE: - tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP); - break; - case GL_CLAMP_TO_BORDER: - tex->Setup[I830_TEXREG_MCS] |= - TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP_BORDER); - break; - case GL_MIRRORED_REPEAT: - tex->Setup[I830_TEXREG_MCS] |= - TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_MIRROR); - break; - default: - break; - } -} - - -/** - * Set the texture magnification and minification modes. - * - * \param t Texture whose filter modes are to be set - * \param minf Texture minification mode - * \param magf Texture magnification mode - * \param bias LOD bias for this texture unit. - */ - -static void i830SetTexFilter( i830TextureObjectPtr t, GLenum minf, GLenum magf, - GLfloat maxanisotropy ) -{ - int minFilt = 0, mipFilt = 0, magFilt = 0; - - if(INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - - if ( maxanisotropy > 1.0 ) { - minFilt = FILTER_ANISOTROPIC; - magFilt = FILTER_ANISOTROPIC; - } - else { - switch (minf) { - case GL_NEAREST: - minFilt = FILTER_NEAREST; - mipFilt = MIPFILTER_NONE; - break; - case GL_LINEAR: - minFilt = FILTER_LINEAR; - mipFilt = MIPFILTER_NONE; - break; - case GL_NEAREST_MIPMAP_NEAREST: - minFilt = FILTER_NEAREST; - mipFilt = MIPFILTER_NEAREST; - break; - case GL_LINEAR_MIPMAP_NEAREST: - minFilt = FILTER_LINEAR; - mipFilt = MIPFILTER_NEAREST; - break; - case GL_NEAREST_MIPMAP_LINEAR: - minFilt = FILTER_NEAREST; - mipFilt = MIPFILTER_LINEAR; - break; - case GL_LINEAR_MIPMAP_LINEAR: - minFilt = FILTER_LINEAR; - mipFilt = MIPFILTER_LINEAR; - break; - default: - break; - } - - switch (magf) { - case GL_NEAREST: - magFilt = FILTER_NEAREST; - break; - case GL_LINEAR: - magFilt = FILTER_LINEAR; - break; - default: - break; - } - } - - t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIN_FILTER_MASK; - t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIP_FILTER_MASK; - t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MAG_FILTER_MASK; - t->Setup[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) | - (mipFilt << TM0S3_MIP_FILTER_SHIFT) | - (magFilt << TM0S3_MAG_FILTER_SHIFT)); -} - -static void i830SetTexBorderColor(i830TextureObjectPtr t, GLubyte color[4]) -{ - if(INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - - t->Setup[I830_TEXREG_TM0S4] = - INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3]); -} - - -/** - * Allocate space for and load the mesa images into the texture memory block. - * This will happen before drawing with a new texture, or drawing with a - * texture after it was swapped out or teximaged again. - */ - -intelTextureObjectPtr i830AllocTexObj( struct gl_texture_object *texObj ) -{ - i830TextureObjectPtr t = CALLOC_STRUCT( i830_texture_object ); - if ( !t ) - return NULL; - - texObj->DriverData = t; - t->intel.base.tObj = texObj; - t->intel.dirty = I830_UPLOAD_TEX_ALL; - make_empty_list( &t->intel.base ); - - t->Setup[I830_TEXREG_TM0LI] = 0; /* not used */ - t->Setup[I830_TEXREG_TM0S0] = 0; - t->Setup[I830_TEXREG_TM0S1] = 0; - t->Setup[I830_TEXREG_TM0S2] = 0; - t->Setup[I830_TEXREG_TM0S3] = 0; - t->Setup[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD | - MAP_UNIT(0) | - ENABLE_TEXCOORD_PARAMS | - TEXCOORDS_ARE_NORMAL | - TEXCOORDTYPE_CARTESIAN | - ENABLE_ADDR_V_CNTL | - TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP) | - ENABLE_ADDR_U_CNTL | - TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP)); - - - i830SetTexWrapping( t, texObj->WrapS, texObj->WrapT ); - i830SetTexFilter( t, texObj->MinFilter, texObj->MagFilter, - texObj->MaxAnisotropy ); - i830SetTexBorderColor( t, texObj->_BorderChan ); - - return &t->intel; -} - - -static void i830TexParameter( GLcontext *ctx, GLenum target, - struct gl_texture_object *tObj, - GLenum pname, const GLfloat *params ) -{ - i830TextureObjectPtr t = (i830TextureObjectPtr) tObj->DriverData; - if (!t) - return; - - switch (pname) { - case GL_TEXTURE_MIN_FILTER: - case GL_TEXTURE_MAG_FILTER: - case GL_TEXTURE_MAX_ANISOTROPY_EXT: - i830SetTexFilter( t, tObj->MinFilter, tObj->MagFilter, - tObj->MaxAnisotropy); - break; - - case GL_TEXTURE_WRAP_S: - case GL_TEXTURE_WRAP_T: - i830SetTexWrapping( t, tObj->WrapS, tObj->WrapT ); - break; - - case GL_TEXTURE_BORDER_COLOR: - i830SetTexBorderColor( t, tObj->_BorderChan ); - break; - - case GL_TEXTURE_BASE_LEVEL: - case GL_TEXTURE_MAX_LEVEL: - case GL_TEXTURE_MIN_LOD: - case GL_TEXTURE_MAX_LOD: - /* The i830 and its successors can do a lot of this without - * reloading the textures. A project for someone? - */ - intelFlush( ctx ); - driSwapOutTextureObject( (driTextureObject *) t ); - break; - - default: - return; - } - - t->intel.dirty = I830_UPLOAD_TEX_ALL; -} - - -static void i830TexEnv( GLcontext *ctx, GLenum target, - GLenum pname, const GLfloat *param ) +static void +i830TexEnv(GLcontext * ctx, GLenum target, + GLenum pname, const GLfloat * param) { - i830ContextPtr i830 = I830_CONTEXT( ctx ); - GLuint unit = ctx->Texture.CurrentUnit; switch (pname) { - case GL_TEXTURE_ENV_COLOR: -#if 0 - { - GLubyte r, g, b, a; - GLuint col; - - UNCLAMPED_FLOAT_TO_UBYTE(r, param[RCOMP]); - UNCLAMPED_FLOAT_TO_UBYTE(g, param[GCOMP]); - UNCLAMPED_FLOAT_TO_UBYTE(b, param[BCOMP]); - UNCLAMPED_FLOAT_TO_UBYTE(a, param[ACOMP]); - - col = ((a << 24) | (r << 16) | (g << 8) | b); - - if (col != i830->state.TexEnv[unit][I830_TEXENVREG_COL1]) { - I830_STATECHANGE(i830, I830_UPLOAD_TEXENV); - i830->state.TexEnv[unit][I830_TEXENVREG_COL1] = col; - } - - break; - } -#endif + case GL_TEXTURE_ENV_COLOR: case GL_TEXTURE_ENV_MODE: case GL_COMBINE_RGB: case GL_COMBINE_ALPHA: @@ -319,38 +71,32 @@ static void i830TexEnv( GLcontext *ctx, GLenum target, case GL_ALPHA_SCALE: break; - case GL_TEXTURE_LOD_BIAS: { - int b = (int) ((*param) * 16.0); - if (b > 63) b = 63; - if (b < -64) b = -64; - I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); - i830->state.Tex[unit][I830_TEXREG_TM0S3] &= ~TM0S3_LOD_BIAS_MASK; - i830->state.Tex[unit][I830_TEXREG_TM0S3] |= - ((b << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK); - break; - } + case GL_TEXTURE_LOD_BIAS:{ + struct i830_context *i830 = i830_context(ctx); + GLuint unit = ctx->Texture.CurrentUnit; + int b = (int) ((*param) * 16.0); + if (b > 63) + b = 63; + if (b < -64) + b = -64; + I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); + i830->lodbias_tm0s3[unit] = + ((b << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK); + break; + } default: break; } } -static void i830BindTexture( GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj ) -{ - i830TextureObjectPtr tex; - - if (!texObj->DriverData) - i830AllocTexObj( texObj ); - - tex = (i830TextureObjectPtr)texObj->DriverData; -} -void i830InitTextureFuncs( struct dd_function_table *functions ) +void +i830InitTextureFuncs(struct dd_function_table *functions) { - functions->BindTexture = i830BindTexture; - functions->TexEnv = i830TexEnv; - functions->TexParameter = i830TexParameter; +/* + functions->TexEnv = i830TexEnv; +*/ } diff --git a/i915/i830_texblend.c b/i915/i830_texblend.c index 49e0347..58f220e 100644 --- a/i915/i830_texblend.c +++ b/i915/i830_texblend.c @@ -46,46 +46,42 @@ /* ================================================================ * Texture combine functions */ -static GLuint pass_through( GLuint *state, GLuint blendUnit ) +static GLuint +pass_through(GLuint * state, GLuint blendUnit) { state[0] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) | - TEXPIPE_COLOR | - ENABLE_TEXOUTPUT_WRT_SEL | - TEXOP_OUTPUT_CURRENT | - DISABLE_TEX_CNTRL_STAGE | - TEXOP_SCALE_1X | - TEXOP_MODIFY_PARMS | - TEXBLENDOP_ARG1); + TEXPIPE_COLOR | + ENABLE_TEXOUTPUT_WRT_SEL | + TEXOP_OUTPUT_CURRENT | + DISABLE_TEX_CNTRL_STAGE | + TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1); state[1] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) | - TEXPIPE_ALPHA | - ENABLE_TEXOUTPUT_WRT_SEL | - TEXOP_OUTPUT_CURRENT | - TEXOP_SCALE_1X | - TEXOP_MODIFY_PARMS | - TEXBLENDOP_ARG1); + TEXPIPE_ALPHA | + ENABLE_TEXOUTPUT_WRT_SEL | + TEXOP_OUTPUT_CURRENT | + TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1); state[2] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) | - TEXPIPE_COLOR | - TEXBLEND_ARG1 | - TEXBLENDARG_MODIFY_PARMS | - TEXBLENDARG_CURRENT); + TEXPIPE_COLOR | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_CURRENT); state[3] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) | - TEXPIPE_ALPHA | - TEXBLEND_ARG1 | - TEXBLENDARG_MODIFY_PARMS | - TEXBLENDARG_CURRENT); + TEXPIPE_ALPHA | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_CURRENT); return 4; } -static GLuint emit_factor( GLuint blendUnit, GLuint *state, GLuint count, - const GLfloat *factor ) +static GLuint +emit_factor(GLuint blendUnit, GLuint * state, GLuint count, + const GLfloat * factor) { GLubyte r, g, b, a; GLuint col; - + if (0) fprintf(stderr, "emit constant %d: %.2f %.2f %.2f %.2f\n", - blendUnit, factor[0], factor[1], factor[2], factor[3]); + blendUnit, factor[0], factor[1], factor[2], factor[3]); UNCLAMPED_FLOAT_TO_UBYTE(r, factor[0]); UNCLAMPED_FLOAT_TO_UBYTE(g, factor[1]); @@ -94,21 +90,27 @@ static GLuint emit_factor( GLuint blendUnit, GLuint *state, GLuint count, col = ((a << 24) | (r << 16) | (g << 8) | b); - state[count++] = _3DSTATE_COLOR_FACTOR_N_CMD(blendUnit); + state[count++] = _3DSTATE_COLOR_FACTOR_N_CMD(blendUnit); state[count++] = col; return count; } -static __inline__ GLuint GetTexelOp(GLint unit) +static INLINE GLuint +GetTexelOp(GLint unit) { - switch(unit) { - case 0: return TEXBLENDARG_TEXEL0; - case 1: return TEXBLENDARG_TEXEL1; - case 2: return TEXBLENDARG_TEXEL2; - case 3: return TEXBLENDARG_TEXEL3; - default: return TEXBLENDARG_TEXEL0; + switch (unit) { + case 0: + return TEXBLENDARG_TEXEL0; + case 1: + return TEXBLENDARG_TEXEL1; + case 2: + return TEXBLENDARG_TEXEL2; + case 3: + return TEXBLENDARG_TEXEL3; + default: + return TEXBLENDARG_TEXEL0; } } @@ -132,12 +134,10 @@ static __inline__ GLuint GetTexelOp(GLint unit) * partial support for the extension? */ GLuint -i830SetTexEnvCombine(i830ContextPtr i830, - const struct gl_tex_env_combine_state * combine, - GLint blendUnit, - GLuint texel_op, - GLuint *state, - const GLfloat *factor ) +i830SetTexEnvCombine(struct i830_context * i830, + const struct gl_tex_env_combine_state * combine, + GLint blendUnit, + GLuint texel_op, GLuint * state, const GLfloat * factor) { const GLuint numColorArgs = combine->_NumArgsRGB; const GLuint numAlphaArgs = combine->_NumArgsA; @@ -162,7 +162,7 @@ i830SetTexEnvCombine(i830ContextPtr i830, TEXPIPE_ALPHA | TEXBLEND_ARG0 | TEXBLENDARG_MODIFY_PARMS, }; - if(INTEL_DEBUG&DEBUG_TEXTURE) + if (INTEL_DEBUG & DEBUG_TEXTURE) fprintf(stderr, "%s\n", __FUNCTION__); @@ -188,23 +188,23 @@ i830SetTexEnvCombine(i830ContextPtr i830, } - switch(combine->ModeRGB) { - case GL_REPLACE: + switch (combine->ModeRGB) { + case GL_REPLACE: blendop = TEXBLENDOP_ARG1; break; - case GL_MODULATE: + case GL_MODULATE: blendop = TEXBLENDOP_MODULATE; break; - case GL_ADD: + case GL_ADD: blendop = TEXBLENDOP_ADD; break; case GL_ADD_SIGNED: - blendop = TEXBLENDOP_ADDSIGNED; + blendop = TEXBLENDOP_ADDSIGNED; break; case GL_INTERPOLATE: - blendop = TEXBLENDOP_BLEND; + blendop = TEXBLENDOP_BLEND; break; - case GL_SUBTRACT: + case GL_SUBTRACT: blendop = TEXBLENDOP_SUBTRACT; break; case GL_DOT3_RGB_EXT: @@ -215,55 +215,54 @@ i830SetTexEnvCombine(i830ContextPtr i830, case GL_DOT3_RGBA: blendop = TEXBLENDOP_DOT3; break; - default: - return pass_through( state, blendUnit ); + default: + return pass_through(state, blendUnit); } blendop |= (rgb_shift << TEXOP_SCALE_SHIFT); /* Handle RGB args */ - for(i = 0; i < 3; i++) { - switch(combine->SourceRGB[i]) { - case GL_TEXTURE: - args_RGB[i] = texel_op; - break; + for (i = 0; i < 3; i++) { + switch (combine->SourceRGB[i]) { + case GL_TEXTURE: + args_RGB[i] = texel_op; + break; case GL_TEXTURE0: case GL_TEXTURE1: case GL_TEXTURE2: case GL_TEXTURE3: - args_RGB[i] = GetTexelOp( combine->SourceRGB[i] - GL_TEXTURE0 ); - break; + args_RGB[i] = GetTexelOp(combine->SourceRGB[i] - GL_TEXTURE0); + break; case GL_CONSTANT: - args_RGB[i] = TEXBLENDARG_FACTOR_N; - need_factor = 1; - break; + args_RGB[i] = TEXBLENDARG_FACTOR_N; + need_factor = 1; + break; case GL_PRIMARY_COLOR: - args_RGB[i] = TEXBLENDARG_DIFFUSE; - break; + args_RGB[i] = TEXBLENDARG_DIFFUSE; + break; case GL_PREVIOUS: - args_RGB[i] = TEXBLENDARG_CURRENT; - break; - default: - return pass_through( state, blendUnit ); + args_RGB[i] = TEXBLENDARG_CURRENT; + break; + default: + return pass_through(state, blendUnit); } - switch(combine->OperandRGB[i]) { - case GL_SRC_COLOR: - args_RGB[i] |= 0; - break; - case GL_ONE_MINUS_SRC_COLOR: - args_RGB[i] |= TEXBLENDARG_INV_ARG; - break; - case GL_SRC_ALPHA: - args_RGB[i] |= TEXBLENDARG_REPLICATE_ALPHA; - break; - case GL_ONE_MINUS_SRC_ALPHA: - args_RGB[i] |= (TEXBLENDARG_REPLICATE_ALPHA | - TEXBLENDARG_INV_ARG); - break; - default: - return pass_through( state, blendUnit ); + switch (combine->OperandRGB[i]) { + case GL_SRC_COLOR: + args_RGB[i] |= 0; + break; + case GL_ONE_MINUS_SRC_COLOR: + args_RGB[i] |= TEXBLENDARG_INV_ARG; + break; + case GL_SRC_ALPHA: + args_RGB[i] |= TEXBLENDARG_REPLICATE_ALPHA; + break; + case GL_ONE_MINUS_SRC_ALPHA: + args_RGB[i] |= (TEXBLENDARG_REPLICATE_ALPHA | TEXBLENDARG_INV_ARG); + break; + default: + return pass_through(state, blendUnit); } } @@ -275,76 +274,76 @@ i830SetTexEnvCombine(i830ContextPtr i830, * Note - the global factor is set up with alpha == .5, so * the alpha part of the DOT4 calculation should be zero. */ - if ( combine->ModeRGB == GL_DOT3_RGBA_EXT || - combine->ModeRGB == GL_DOT3_RGBA ) { + if (combine->ModeRGB == GL_DOT3_RGBA_EXT || + combine->ModeRGB == GL_DOT3_RGBA) { ablendop = TEXBLENDOP_DOT4; - args_A[0] = TEXBLENDARG_FACTOR; /* the global factor */ + args_A[0] = TEXBLENDARG_FACTOR; /* the global factor */ args_A[1] = TEXBLENDARG_FACTOR; args_A[2] = TEXBLENDARG_FACTOR; } else { - switch(combine->ModeA) { - case GL_REPLACE: - ablendop = TEXBLENDOP_ARG1; - break; - case GL_MODULATE: - ablendop = TEXBLENDOP_MODULATE; - break; - case GL_ADD: - ablendop = TEXBLENDOP_ADD; - break; + switch (combine->ModeA) { + case GL_REPLACE: + ablendop = TEXBLENDOP_ARG1; + break; + case GL_MODULATE: + ablendop = TEXBLENDOP_MODULATE; + break; + case GL_ADD: + ablendop = TEXBLENDOP_ADD; + break; case GL_ADD_SIGNED: - ablendop = TEXBLENDOP_ADDSIGNED; - break; + ablendop = TEXBLENDOP_ADDSIGNED; + break; case GL_INTERPOLATE: - ablendop = TEXBLENDOP_BLEND; - break; - case GL_SUBTRACT: - ablendop = TEXBLENDOP_SUBTRACT; - break; + ablendop = TEXBLENDOP_BLEND; + break; + case GL_SUBTRACT: + ablendop = TEXBLENDOP_SUBTRACT; + break; default: - return pass_through( state, blendUnit ); + return pass_through(state, blendUnit); } ablendop |= (alpha_shift << TEXOP_SCALE_SHIFT); /* Handle A args */ - for(i = 0; i < 3; i++) { - switch(combine->SourceA[i]) { - case GL_TEXTURE: - args_A[i] = texel_op; - break; - case GL_TEXTURE0: - case GL_TEXTURE1: - case GL_TEXTURE2: - case GL_TEXTURE3: - args_A[i] = GetTexelOp( combine->SourceA[i] - GL_TEXTURE0 ); - break; - case GL_CONSTANT: - args_A[i] = TEXBLENDARG_FACTOR_N; - need_factor = 1; - break; - case GL_PRIMARY_COLOR: - args_A[i] = TEXBLENDARG_DIFFUSE; - break; - case GL_PREVIOUS: - args_A[i] = TEXBLENDARG_CURRENT; - break; - default: - return pass_through( state, blendUnit ); - } - - switch(combine->OperandA[i]) { - case GL_SRC_ALPHA: - args_A[i] |= 0; - break; - case GL_ONE_MINUS_SRC_ALPHA: - args_A[i] |= TEXBLENDARG_INV_ARG; - break; - default: - return pass_through( state, blendUnit ); - } + for (i = 0; i < 3; i++) { + switch (combine->SourceA[i]) { + case GL_TEXTURE: + args_A[i] = texel_op; + break; + case GL_TEXTURE0: + case GL_TEXTURE1: + case GL_TEXTURE2: + case GL_TEXTURE3: + args_A[i] = GetTexelOp(combine->SourceA[i] - GL_TEXTURE0); + break; + case GL_CONSTANT: + args_A[i] = TEXBLENDARG_FACTOR_N; + need_factor = 1; + break; + case GL_PRIMARY_COLOR: + args_A[i] = TEXBLENDARG_DIFFUSE; + break; + case GL_PREVIOUS: + args_A[i] = TEXBLENDARG_CURRENT; + break; + default: + return pass_through(state, blendUnit); + } + + switch (combine->OperandA[i]) { + case GL_SRC_ALPHA: + args_A[i] |= 0; + break; + case GL_ONE_MINUS_SRC_ALPHA: + args_A[i] |= TEXBLENDARG_INV_ARG; + break; + default: + return pass_through(state, blendUnit); + } } } @@ -363,86 +362,86 @@ i830SetTexEnvCombine(i830ContextPtr i830, used = 0; state[used++] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) | - TEXPIPE_COLOR | - ENABLE_TEXOUTPUT_WRT_SEL | - TEXOP_OUTPUT_CURRENT | - DISABLE_TEX_CNTRL_STAGE | - TEXOP_MODIFY_PARMS | - blendop); + TEXPIPE_COLOR | + ENABLE_TEXOUTPUT_WRT_SEL | + TEXOP_OUTPUT_CURRENT | + DISABLE_TEX_CNTRL_STAGE | TEXOP_MODIFY_PARMS | blendop); state[used++] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) | - TEXPIPE_ALPHA | - ENABLE_TEXOUTPUT_WRT_SEL | - TEXOP_OUTPUT_CURRENT | - TEXOP_MODIFY_PARMS | - ablendop); + TEXPIPE_ALPHA | + ENABLE_TEXOUTPUT_WRT_SEL | + TEXOP_OUTPUT_CURRENT | TEXOP_MODIFY_PARMS | ablendop); - for ( i = 0 ; i < numColorArgs ; i++ ) { + for (i = 0; i < numColorArgs; i++) { state[used++] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) | - tex_blend_rgb[i] | args_RGB[i]); + tex_blend_rgb[i] | args_RGB[i]); } - for ( i = 0 ; i < numAlphaArgs ; i++ ) { + for (i = 0; i < numAlphaArgs; i++) { state[used++] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) | - tex_blend_a[i] | args_A[i]); + tex_blend_a[i] | args_A[i]); } - if (need_factor) - return emit_factor( blendUnit, state, used, factor ); - else + if (need_factor) + return emit_factor(blendUnit, state, used, factor); + else return used; } -static void emit_texblend( i830ContextPtr i830, GLuint unit, GLuint blendUnit, - GLboolean last_stage ) +static void +emit_texblend(struct i830_context *i830, GLuint unit, GLuint blendUnit, + GLboolean last_stage) { struct gl_texture_unit *texUnit = &i830->intel.ctx.Texture.Unit[unit]; GLuint tmp[I830_TEXBLEND_SIZE], tmp_sz; - if (0) fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit); + if (0) + fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit); /* Update i830->state.TexBlend - */ - tmp_sz = i830SetTexEnvCombine(i830, texUnit->_CurrentCombine, blendUnit, - GetTexelOp(unit), tmp, - texUnit->EnvColor ); + */ + tmp_sz = i830SetTexEnvCombine(i830, texUnit->_CurrentCombine, blendUnit, + GetTexelOp(unit), tmp, texUnit->EnvColor); - if (last_stage) + if (last_stage) tmp[0] |= TEXOP_LAST_STAGE; if (tmp_sz != i830->state.TexBlendWordsUsed[blendUnit] || - memcmp( tmp, i830->state.TexBlend[blendUnit], tmp_sz * sizeof(GLuint))) { - - I830_STATECHANGE( i830, I830_UPLOAD_TEXBLEND(blendUnit) ); - memcpy( i830->state.TexBlend[blendUnit], tmp, tmp_sz * sizeof(GLuint)); + memcmp(tmp, i830->state.TexBlend[blendUnit], + tmp_sz * sizeof(GLuint))) { + + I830_STATECHANGE(i830, I830_UPLOAD_TEXBLEND(blendUnit)); + memcpy(i830->state.TexBlend[blendUnit], tmp, tmp_sz * sizeof(GLuint)); i830->state.TexBlendWordsUsed[blendUnit] = tmp_sz; } I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND(blendUnit), GL_TRUE); } -static void emit_passthrough( i830ContextPtr i830 ) +static void +emit_passthrough(struct i830_context *i830) { GLuint tmp[I830_TEXBLEND_SIZE], tmp_sz; GLuint unit = 0; - tmp_sz = pass_through( tmp, unit ); + tmp_sz = pass_through(tmp, unit); tmp[0] |= TEXOP_LAST_STAGE; if (tmp_sz != i830->state.TexBlendWordsUsed[unit] || - memcmp( tmp, i830->state.TexBlend[unit], tmp_sz * sizeof(GLuint))) { - - I830_STATECHANGE( i830, I830_UPLOAD_TEXBLEND(unit) ); - memcpy( i830->state.TexBlend[unit], tmp, tmp_sz * sizeof(GLuint)); + memcmp(tmp, i830->state.TexBlend[unit], tmp_sz * sizeof(GLuint))) { + + I830_STATECHANGE(i830, I830_UPLOAD_TEXBLEND(unit)); + memcpy(i830->state.TexBlend[unit], tmp, tmp_sz * sizeof(GLuint)); i830->state.TexBlendWordsUsed[unit] = tmp_sz; } I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND(unit), GL_TRUE); } -void i830EmitTextureBlend( i830ContextPtr i830 ) +void +i830EmitTextureBlend(struct i830_context *i830) { GLcontext *ctx = &i830->intel.ctx; GLuint unit, last_stage = 0, blendunit = 0; @@ -450,16 +449,15 @@ void i830EmitTextureBlend( i830ContextPtr i830 ) I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND_ALL, GL_FALSE); if (ctx->Texture._EnabledUnits) { - for (unit = 0 ; unit < ctx->Const.MaxTextureUnits ; unit++) - if (ctx->Texture.Unit[unit]._ReallyEnabled) - last_stage = unit; + for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) + if (ctx->Texture.Unit[unit]._ReallyEnabled) + last_stage = unit; - for (unit = 0 ; unit < ctx->Const.MaxTextureUnits ; unit++) - if (ctx->Texture.Unit[unit]._ReallyEnabled) - emit_texblend( i830, unit, blendunit++, last_stage == unit ); + for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) + if (ctx->Texture.Unit[unit]._ReallyEnabled) + emit_texblend(i830, unit, blendunit++, last_stage == unit); } else { - emit_passthrough( i830 ); + emit_passthrough(i830); } } - diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c index ba972da..4e9b022 100644 --- a/i915/i830_texstate.c +++ b/i915/i830_texstate.c @@ -25,459 +25,327 @@ * **************************************************************************/ -#include "glheader.h" -#include "macros.h" #include "mtypes.h" -#include "simple_list.h" #include "enums.h" #include "texformat.h" -#include "texstore.h" +#include "dri_bufmgr.h" -#include "mm.h" - -#include "intel_screen.h" -#include "intel_ioctl.h" +#include "intel_mipmap_tree.h" #include "intel_tex.h" #include "i830_context.h" #include "i830_reg.h" -static const GLint initial_offsets[6][2] = { {0,0}, - {0,2}, - {1,0}, - {1,2}, - {1,1}, - {1,3} }; - -static const GLint step_offsets[6][2] = { {0,2}, - {0,2}, - {-1,2}, - {-1,2}, - {-1,1}, - {-1,1} }; -#define I830_TEX_UNIT_ENABLED(unit) (1<<unit) -static GLboolean i830SetTexImages( i830ContextPtr i830, - struct gl_texture_object *tObj ) +static GLuint +translate_texture_format(GLuint mesa_format) { - GLuint total_height, pitch, i, textureFormat; - i830TextureObjectPtr t = (i830TextureObjectPtr) tObj->DriverData; - const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint firstLevel, lastLevel, numLevels; - - switch( baseImage->TexFormat->MesaFormat ) { + switch (mesa_format) { case MESA_FORMAT_L8: - t->intel.texelBytes = 1; - textureFormat = MAPSURF_8BIT | MT_8BIT_L8; - break; - + return MAPSURF_8BIT | MT_8BIT_L8; case MESA_FORMAT_I8: - t->intel.texelBytes = 1; - textureFormat = MAPSURF_8BIT | MT_8BIT_I8; - break; - + return MAPSURF_8BIT | MT_8BIT_I8; case MESA_FORMAT_A8: - t->intel.texelBytes = 1; - textureFormat = MAPSURF_8BIT | MT_8BIT_I8; /* Kludge -- check with conform, glean */ - break; - + return MAPSURF_8BIT | MT_8BIT_I8; /* Kludge! */ case MESA_FORMAT_AL88: - t->intel.texelBytes = 2; - textureFormat = MAPSURF_16BIT | MT_16BIT_AY88; - break; - + return MAPSURF_16BIT | MT_16BIT_AY88; case MESA_FORMAT_RGB565: - t->intel.texelBytes = 2; - textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565; - break; - + return MAPSURF_16BIT | MT_16BIT_RGB565; case MESA_FORMAT_ARGB1555: - t->intel.texelBytes = 2; - textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555; - break; - + return MAPSURF_16BIT | MT_16BIT_ARGB1555; case MESA_FORMAT_ARGB4444: - t->intel.texelBytes = 2; - textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB4444; - break; - + return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - t->intel.texelBytes = 4; - textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888; - break; - + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: - t->intel.texelBytes = 2; - textureFormat = (MAPSURF_422 | MT_422_YCRCB_NORMAL | - TM0S1_COLORSPACE_CONVERSION); - break; - + return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: - t->intel.texelBytes = 2; - textureFormat = (MAPSURF_422 | MT_422_YCRCB_SWAPY | /* ??? */ - TM0S1_COLORSPACE_CONVERSION); - break; - + return (MAPSURF_422 | MT_422_YCRCB_SWAPY); case MESA_FORMAT_RGB_FXT1: case MESA_FORMAT_RGBA_FXT1: - t->intel.texelBytes = 2; - textureFormat = MAPSURF_COMPRESSED | MT_COMPRESS_FXT1; - break; - + return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); case MESA_FORMAT_RGBA_DXT1: case MESA_FORMAT_RGB_DXT1: - /* - * DXTn pitches are Width/4 * blocksize in bytes - * for DXT1: blocksize=8 so Width/4*8 = Width * 2 - * for DXT3/5: blocksize=16 so Width/4*16 = Width * 4 - */ - t->intel.texelBytes = 2; - textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); - break; + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); case MESA_FORMAT_RGBA_DXT3: - t->intel.texelBytes = 4; - textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); - break; + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); case MESA_FORMAT_RGBA_DXT5: - t->intel.texelBytes = 4; - textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); - break; - + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); default: - fprintf(stderr, "%s: bad image format\n", __FUNCTION__); + fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); abort(); + return 0; } - - /* Compute which mipmap levels we really want to send to the hardware. - * This depends on the base image size, GL_TEXTURE_MIN_LOD, - * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL. - * Yes, this looks overly complicated, but it's all needed. - */ - driCalculateTextureFirstLastLevel( (driTextureObject *) t ); +} - /* Figure out the amount of memory required to hold all the mipmap - * levels. Choose the smallest pitch to accomodate the largest - * mipmap: - */ - firstLevel = t->intel.base.firstLevel; - lastLevel = t->intel.base.lastLevel; - numLevels = lastLevel - firstLevel + 1; - /* All images must be loaded at this pitch. Count the number of - * lines required: - */ - switch (tObj->Target) { - case GL_TEXTURE_CUBE_MAP: { - const GLuint dim = tObj->Image[0][firstLevel]->Width; - GLuint face; - - pitch = dim * t->intel.texelBytes; - pitch *= 2; /* double pitch for cube layouts */ - pitch = (pitch + 3) & ~3; - - total_height = dim * 4; - - for ( face = 0 ; face < 6 ; face++) { - GLuint x = initial_offsets[face][0] * dim; - GLuint y = initial_offsets[face][1] * dim; - GLuint d = dim; - - t->intel.base.dirty_images[face] = ~0; - - assert(tObj->Image[face][firstLevel]->Width == dim); - assert(tObj->Image[face][firstLevel]->Height == dim); - - for (i = 0; i < numLevels; i++) { - t->intel.image[face][i].image = tObj->Image[face][firstLevel + i]; - if (!t->intel.image[face][i].image) { - fprintf(stderr, "no image %d %d\n", face, i); - break; /* can't happen */ - } - - t->intel.image[face][i].offset = - y * pitch + x * t->intel.texelBytes; - t->intel.image[face][i].internalFormat = baseImage->_BaseFormat; - - d >>= 1; - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; - } - } - break; - } +/* The i915 (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static GLuint +translate_wrap_mode(GLenum wrap) +{ + switch (wrap) { + case GL_REPEAT: + return TEXCOORDMODE_WRAP; + case GL_CLAMP: + case GL_CLAMP_TO_EDGE: + return TEXCOORDMODE_CLAMP; /* not really correct */ + case GL_CLAMP_TO_BORDER: + return TEXCOORDMODE_CLAMP_BORDER; + case GL_MIRRORED_REPEAT: + return TEXCOORDMODE_MIRROR; default: - pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes; - pitch = (pitch + 3) & ~3; - t->intel.base.dirty_images[0] = ~0; - - for ( total_height = i = 0 ; i < numLevels ; i++ ) { - t->intel.image[0][i].image = tObj->Image[0][firstLevel + i]; - if (!t->intel.image[0][i].image) - break; - - t->intel.image[0][i].offset = total_height * pitch; - t->intel.image[0][i].internalFormat = baseImage->_BaseFormat; - if (t->intel.image[0][i].image->IsCompressed) - { - if (t->intel.image[0][i].image->Height > 4) - total_height += t->intel.image[0][i].image->Height/4; - else - total_height += 1; - } - else - total_height += MAX2(2, t->intel.image[0][i].image->Height); - } - break; + return TEXCOORDMODE_WRAP; } - - t->intel.Pitch = pitch; - t->intel.base.totalSize = total_height*pitch; - t->intel.max_level = i-1; - t->Setup[I830_TEXREG_TM0S1] = - (((tObj->Image[0][firstLevel]->Height - 1) << TM0S1_HEIGHT_SHIFT) | - ((tObj->Image[0][firstLevel]->Width - 1) << TM0S1_WIDTH_SHIFT) | - textureFormat); - t->Setup[I830_TEXREG_TM0S2] = - (((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | - TM0S2_CUBE_FACE_ENA_MASK; - t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MAX_MIP_MASK; - t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIN_MIP_MASK; - t->Setup[I830_TEXREG_TM0S3] |= ((numLevels - 1)*4) << TM0S3_MIN_MIP_SHIFT; - t->intel.dirty = I830_UPLOAD_TEX_ALL; - - return intelUploadTexImages( &i830->intel, &t->intel, 0 ); } -static void i830_import_tex_unit( i830ContextPtr i830, - i830TextureObjectPtr t, - GLuint unit ) +/* Recalculate all state from scratch. Perhaps not the most + * efficient, but this has gotten complex enough that we need + * something which is understandable and reliable. + */ +static GLboolean +i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { - if(INTEL_DEBUG&DEBUG_TEXTURE) - fprintf(stderr, "%s unit(%d)\n", __FUNCTION__, unit); - - if (i830->intel.CurrentTexObj[unit]) - i830->intel.CurrentTexObj[unit]->base.bound &= ~(1U << unit); - - i830->intel.CurrentTexObj[unit] = (intelTextureObjectPtr)t; - t->intel.base.bound |= (1 << unit); - - I830_STATECHANGE( i830, I830_UPLOAD_TEX(unit) ); - - i830->state.Tex[unit][I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | - (LOAD_TEXTURE_MAP0 << unit) | 4); - i830->state.Tex[unit][I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE | - t->intel.TextureOffset); - - i830->state.Tex[unit][I830_TEXREG_TM0S1] = t->Setup[I830_TEXREG_TM0S1]; - i830->state.Tex[unit][I830_TEXREG_TM0S2] = t->Setup[I830_TEXREG_TM0S2]; - - i830->state.Tex[unit][I830_TEXREG_TM0S3] &= TM0S3_LOD_BIAS_MASK; - i830->state.Tex[unit][I830_TEXREG_TM0S3] |= (t->Setup[I830_TEXREG_TM0S3] & - ~TM0S3_LOD_BIAS_MASK); - - i830->state.Tex[unit][I830_TEXREG_TM0S4] = t->Setup[I830_TEXREG_TM0S4]; - i830->state.Tex[unit][I830_TEXREG_MCS] = (t->Setup[I830_TEXREG_MCS] & - ~MAP_UNIT_MASK); - i830->state.Tex[unit][I830_TEXREG_CUBE] = t->Setup[I830_TEXREG_CUBE]; - i830->state.Tex[unit][I830_TEXREG_MCS] |= MAP_UNIT(unit); - - t->intel.dirty &= ~I830_UPLOAD_TEX(unit); -} - + GLcontext *ctx = &intel->ctx; + struct i830_context *i830 = i830_context(ctx); + struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = tUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_texture_image *firstImage; + GLuint *state = i830->state.Tex[unit], format, pitch; + GLint lodbias; + memset(state, 0, sizeof(state)); -static GLboolean enable_tex_common( GLcontext *ctx, GLuint unit ) -{ - i830ContextPtr i830 = I830_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - i830TextureObjectPtr t = (i830TextureObjectPtr)tObj->DriverData; + /*We need to refcount these. */ - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (i830->state.tex_buffer[unit] != NULL) { + dri_bo_unreference(i830->state.tex_buffer[unit]); + i830->state.tex_buffer[unit] = NULL; + } - /* Fallback if there's a texture border */ - if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) { - fprintf(stderr, "Texture border\n"); + if (!intelObj->imageOverride && !intel_finalize_mipmap_tree(intel, unit)) return GL_FALSE; - } - /* Upload teximages (not pipelined) + /* Get first image here, since intelObj->firstLevel will get set in + * the intel_finalize_mipmap_tree() call above. */ - if (t->intel.base.dirty_images[0]) { - if (!i830SetTexImages( i830, tObj )) { - return GL_FALSE; + firstImage = tObj->Image[0][intelObj->firstLevel]; + + if (intelObj->imageOverride) { + i830->state.tex_buffer[unit] = NULL; + i830->state.tex_offset[unit] = intelObj->textureOffset; + + switch (intelObj->depthOverride) { + case 32: + format = MAPSURF_32BIT | MT_32BIT_ARGB8888; + break; + case 24: + default: + format = MAPSURF_32BIT | MT_32BIT_XRGB8888; + break; + case 16: + format = MAPSURF_16BIT | MT_16BIT_RGB565; + break; } - } - - /* Update state if this is a different texture object to last - * time. - */ - if (i830->intel.CurrentTexObj[unit] != &t->intel || - (t->intel.dirty & I830_UPLOAD_TEX(unit))) { - i830_import_tex_unit( i830, t, unit); - } - - I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE); - - return GL_TRUE; -} - -static GLboolean enable_tex_rect( GLcontext *ctx, GLuint unit ) -{ - i830ContextPtr i830 = I830_CONTEXT(ctx); - GLuint mcs = i830->state.Tex[unit][I830_TEXREG_MCS]; - mcs &= ~TEXCOORDS_ARE_NORMAL; - mcs |= TEXCOORDS_ARE_IN_TEXELUNITS; + pitch = intelObj->pitchOverride; + } else { + dri_bo_reference(intelObj->mt->region->buffer); + i830->state.tex_buffer[unit] = intelObj->mt->region->buffer; + i830->state.tex_offset[unit] = intel_miptree_image_offset(intelObj->mt, + 0, intelObj-> + firstLevel); - if ((mcs != i830->state.Tex[unit][I830_TEXREG_MCS]) - || (0 != i830->state.Tex[unit][I830_TEXREG_CUBE])) { - I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); - i830->state.Tex[unit][I830_TEXREG_MCS] = mcs; - i830->state.Tex[unit][I830_TEXREG_CUBE] = 0; + format = translate_texture_format(firstImage->TexFormat->MesaFormat); + pitch = intelObj->mt->pitch * intelObj->mt->cpp; } - return GL_TRUE; -} + state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + (LOAD_TEXTURE_MAP0 << unit) | 4); +/* state[I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE | */ +/* t->intel.TextureOffset); */ -static GLboolean enable_tex_2d( GLcontext *ctx, GLuint unit ) -{ - i830ContextPtr i830 = I830_CONTEXT(ctx); - GLuint mcs = i830->state.Tex[unit][I830_TEXREG_MCS]; - mcs &= ~TEXCOORDS_ARE_IN_TEXELUNITS; - mcs |= TEXCOORDS_ARE_NORMAL; + state[I830_TEXREG_TM0S1] = + (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) | + ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format); + + state[I830_TEXREG_TM0S2] = + ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK); - if ((mcs != i830->state.Tex[unit][I830_TEXREG_MCS]) - || (0 != i830->state.Tex[unit][I830_TEXREG_CUBE])) { - I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); - i830->state.Tex[unit][I830_TEXREG_MCS] = mcs; - i830->state.Tex[unit][I830_TEXREG_CUBE] = 0; + { + if (tObj->Target == GL_TEXTURE_CUBE_MAP) + state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit) | + CUBE_NEGX_ENABLE | + CUBE_POSX_ENABLE | + CUBE_NEGY_ENABLE | + CUBE_POSY_ENABLE | + CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE); + else + state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit)); } - return GL_TRUE; -} - -static GLboolean enable_tex_cube( GLcontext *ctx, GLuint unit ) -{ - i830ContextPtr i830 = I830_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - i830TextureObjectPtr t = (i830TextureObjectPtr)tObj->DriverData; - GLuint mcs = i830->state.Tex[unit][I830_TEXREG_MCS]; - const GLuint cube = CUBE_NEGX_ENABLE | CUBE_POSX_ENABLE - | CUBE_NEGY_ENABLE | CUBE_POSY_ENABLE - | CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE; - GLuint face; - - mcs &= ~TEXCOORDS_ARE_IN_TEXELUNITS; - mcs |= TEXCOORDS_ARE_NORMAL; - - if ((mcs != i830->state.Tex[unit][I830_TEXREG_MCS]) - || (cube != i830->state.Tex[unit][I830_TEXREG_CUBE])) { - I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); - i830->state.Tex[unit][I830_TEXREG_MCS] = mcs; - i830->state.Tex[unit][I830_TEXREG_CUBE] = cube; - } - /* Upload teximages (not pipelined) - */ - if ( t->intel.base.dirty_images[0] || t->intel.base.dirty_images[1] || - t->intel.base.dirty_images[2] || t->intel.base.dirty_images[3] || - t->intel.base.dirty_images[4] || t->intel.base.dirty_images[5] ) { - i830SetTexImages( i830, tObj ); - } - /* upload (per face) */ - for (face = 0; face < 6; face++) { - if (t->intel.base.dirty_images[face]) { - if (!intelUploadTexImages( &i830->intel, &t->intel, face )) { - return GL_FALSE; - } + { + GLuint minFilt, mipFilt, magFilt; + + switch (tObj->MinFilter) { + case GL_NEAREST: + minFilt = FILTER_NEAREST; + mipFilt = MIPFILTER_NONE; + break; + case GL_LINEAR: + minFilt = FILTER_LINEAR; + mipFilt = MIPFILTER_NONE; + break; + case GL_NEAREST_MIPMAP_NEAREST: + minFilt = FILTER_NEAREST; + mipFilt = MIPFILTER_NEAREST; + break; + case GL_LINEAR_MIPMAP_NEAREST: + minFilt = FILTER_LINEAR; + mipFilt = MIPFILTER_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + minFilt = FILTER_NEAREST; + mipFilt = MIPFILTER_LINEAR; + break; + case GL_LINEAR_MIPMAP_LINEAR: + minFilt = FILTER_LINEAR; + mipFilt = MIPFILTER_LINEAR; + break; + default: + return GL_FALSE; } - } - - return GL_TRUE; -} + if (tObj->MaxAnisotropy > 1.0) { + minFilt = FILTER_ANISOTROPIC; + magFilt = FILTER_ANISOTROPIC; + } + else { + switch (tObj->MagFilter) { + case GL_NEAREST: + magFilt = FILTER_NEAREST; + break; + case GL_LINEAR: + magFilt = FILTER_LINEAR; + break; + default: + return GL_FALSE; + } + } + lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0); + if (lodbias < -64) + lodbias = -64; + if (lodbias > 63) + lodbias = 63; + + state[I830_TEXREG_TM0S3] = ((lodbias << TM0S3_LOD_BIAS_SHIFT) & + TM0S3_LOD_BIAS_MASK); +#if 0 + /* YUV conversion: + */ + if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR || + firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV) + state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION; +#endif + + state[I830_TEXREG_TM0S3] |= ((intelObj->lastLevel - + intelObj->firstLevel) * + 4) << TM0S3_MIN_MIP_SHIFT; + + state[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) | + (mipFilt << TM0S3_MIP_FILTER_SHIFT) | + (magFilt << TM0S3_MAG_FILTER_SHIFT)); + } -static GLboolean disable_tex( GLcontext *ctx, GLuint unit ) -{ - i830ContextPtr i830 = I830_CONTEXT(ctx); + { + GLenum ws = tObj->WrapS; + GLenum wt = tObj->WrapT; - /* This is happening too often. I need to conditionally send diffuse - * state to the card. Perhaps a diffuse dirty flag of some kind. - * Will need to change this logic if more than 2 texture units are - * used. We need to only do this up to the last unit enabled, or unit - * one if nothing is enabled. - */ - if ( i830->intel.CurrentTexObj[unit] != NULL ) { - /* The old texture is no longer bound to this texture unit. - * Mark it as such. + /* 3D textures not available on i830 */ - - i830->intel.CurrentTexObj[unit]->base.bound &= ~(1U << 0); - i830->intel.CurrentTexObj[unit] = NULL; + if (tObj->Target == GL_TEXTURE_3D) + return GL_FALSE; + + state[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD | + MAP_UNIT(unit) | + ENABLE_TEXCOORD_PARAMS | + ss3 | + ENABLE_ADDR_V_CNTL | + TEXCOORD_ADDR_V_MODE(translate_wrap_mode(wt)) + | ENABLE_ADDR_U_CNTL | + TEXCOORD_ADDR_U_MODE(translate_wrap_mode + (ws))); } - return GL_TRUE; -} -static GLboolean i830UpdateTexUnit( GLcontext *ctx, GLuint unit ) -{ - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + state[I830_TEXREG_TM0S4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0], + tObj->_BorderChan[1], + tObj->_BorderChan[2], + tObj->_BorderChan[3]); - if (texUnit->_ReallyEnabled && - INTEL_CONTEXT(ctx)->intelScreen->tex.size < 2048 * 1024) - return GL_FALSE; - switch(texUnit->_ReallyEnabled) { - case TEXTURE_1D_BIT: - case TEXTURE_2D_BIT: - return (enable_tex_common( ctx, unit ) && - enable_tex_2d( ctx, unit )); - case TEXTURE_RECT_BIT: - return (enable_tex_common( ctx, unit ) && - enable_tex_rect( ctx, unit )); - case TEXTURE_CUBE_BIT: - return (enable_tex_common( ctx, unit ) && - enable_tex_cube( ctx, unit )); - case 0: - return disable_tex( ctx, unit ); - default: - return GL_FALSE; - } + I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE); + /* memcmp was already disabled, but definitely won't work as the + * region might now change and that wouldn't be detected: + */ + I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); + return GL_TRUE; } -void i830UpdateTextureState( intelContextPtr intel ) -{ - i830ContextPtr i830 = I830_CONTEXT(intel); - GLcontext *ctx = &intel->ctx; - GLboolean ok; - - if (0) fprintf(stderr, "%s\n", __FUNCTION__); - I830_ACTIVESTATE(i830, I830_UPLOAD_TEX_ALL, GL_FALSE); - ok = (i830UpdateTexUnit( ctx, 0 ) && - i830UpdateTexUnit( ctx, 1 ) && - i830UpdateTexUnit( ctx, 2 ) && - i830UpdateTexUnit( ctx, 3 )); +void +i830UpdateTextureState(struct intel_context *intel) +{ + struct i830_context *i830 = i830_context(&intel->ctx); + GLboolean ok = GL_TRUE; + GLuint i; + + for (i = 0; i < I830_TEX_UNITS && ok; i++) { + switch (intel->ctx.Texture.Unit[i]._ReallyEnabled) { + case TEXTURE_1D_BIT: + case TEXTURE_2D_BIT: + case TEXTURE_CUBE_BIT: + ok = i830_update_tex_unit(intel, i, TEXCOORDS_ARE_NORMAL); + break; + case TEXTURE_RECT_BIT: + ok = i830_update_tex_unit(intel, i, TEXCOORDS_ARE_IN_TEXELUNITS); + break; + case 0:{ + struct i830_context *i830 = i830_context(&intel->ctx); + if (i830->state.active & I830_UPLOAD_TEX(i)) + I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(i), GL_FALSE); + + if (i830->state.tex_buffer[i] != NULL) { + dri_bo_unreference(i830->state.tex_buffer[i]); + i830->state.tex_buffer[i] = NULL; + } + break; + } + case TEXTURE_3D_BIT: + default: + ok = GL_FALSE; + break; + } + } - FALLBACK( intel, I830_FALLBACK_TEXTURE, !ok ); + FALLBACK(intel, I830_FALLBACK_TEXTURE, !ok); if (ok) - i830EmitTextureBlend( i830 ); + i830EmitTextureBlend(i830); } - - - diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c index d40cf70..c5a85fe 100644 --- a/i915/i830_vtbl.c +++ b/i915/i830_vtbl.c @@ -25,17 +25,19 @@ * **************************************************************************/ +#include "glapi.h" #include "i830_context.h" #include "i830_reg.h" - #include "intel_batchbuffer.h" - +#include "intel_regions.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" -static GLboolean i830_check_vertex_size( intelContextPtr intel, - GLuint expected ); +#define FILE_DEBUG_FLAG DEBUG_STATE + +static GLboolean i830_check_vertex_size(struct intel_context *intel, + GLuint expected); #define SZ_TO_HW(sz) ((sz-2)&0x3) #define EMIT_SZ(sz) (EMIT_1F + (sz) - 1) @@ -59,10 +61,16 @@ do { \ #define VRTX_TEX_SET_FMT(n, x) ((x)<<((n)*2)) #define TEXBIND_SET(n, x) ((x)<<((n)*4)) -static void i830_render_start( intelContextPtr intel ) +static void +i830_render_prevalidate(struct intel_context *intel) +{ +} + +static void +i830_render_start(struct intel_context *intel) { GLcontext *ctx = &intel->ctx; - i830ContextPtr i830 = I830_CONTEXT(intel); + struct i830_context *i830 = i830_context(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; DECLARE_RENDERINPUTS(index_bitset); @@ -70,7 +78,7 @@ static void i830_render_start( intelContextPtr intel ) GLuint v2 = _3DSTATE_VFT1_CMD; GLuint mcsb1 = 0; - RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); + RENDERINPUTS_COPY(index_bitset, tnl->render_inputs_bitset); /* Important: */ @@ -80,196 +88,215 @@ static void i830_render_start( intelContextPtr intel ) /* EMIT_ATTR's must be in order as they tell t_vertex.c how to * build up a hardware vertex. */ - if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, VFT0_XYZW ); + if (RENDERINPUTS_TEST_RANGE(index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX)) { + EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, VFT0_XYZW); intel->coloroffset = 4; } else { - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, VFT0_XYZ ); + EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, VFT0_XYZ); intel->coloroffset = 3; } - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { - EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F, VFT0_POINT_WIDTH ); + if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_POINTSIZE)) { + EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, VFT0_POINT_WIDTH); } - EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, VFT0_DIFFUSE ); - + EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, VFT0_DIFFUSE); + intel->specoffset = 0; - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) || - RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR1) || + RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) { + if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR1)) { intel->specoffset = intel->coloroffset + 1; - EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, VFT0_SPEC ); + EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, VFT0_SPEC); } else - EMIT_PAD( 3 ); + EMIT_PAD(3); - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) - EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, VFT0_SPEC ); + if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) + EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F, VFT0_SPEC); else - EMIT_PAD( 1 ); + EMIT_PAD(1); } - if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + if (RENDERINPUTS_TEST_RANGE(index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX)) { int i, count = 0; for (i = 0; i < I830_TEX_UNITS; i++) { - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_TEX(i))) { GLuint sz = VB->TexCoordPtr[i]->size; GLuint emit; - GLuint mcs = (i830->state.Tex[i][I830_TEXREG_MCS] & + GLuint mcs = (i830->state.Tex[i][I830_TEXREG_MCS] & ~TEXCOORDTYPE_MASK); - switch (sz) { - case 1: - case 2: - emit = EMIT_2F; - sz = 2; - mcs |= TEXCOORDTYPE_CARTESIAN; - break; - case 3: - emit = EMIT_3F; - sz = 3; - mcs |= TEXCOORDTYPE_VECTOR; - break; - case 4: - emit = EMIT_3F_XYW; - sz = 3; - mcs |= TEXCOORDTYPE_HOMOGENEOUS; - break; - default: - continue; - }; - - - EMIT_ATTR( _TNL_ATTRIB_TEX0+i, emit, 0 ); - v2 |= VRTX_TEX_SET_FMT(count, SZ_TO_HW(sz)); - mcsb1 |= (count+8)<<(i*4); - - if (mcs != i830->state.Tex[i][I830_TEXREG_MCS]) { - I830_STATECHANGE(i830, I830_UPLOAD_TEX(i)); - i830->state.Tex[i][I830_TEXREG_MCS] = mcs; - } - - count++; - } + switch (sz) { + case 1: + case 2: + emit = EMIT_2F; + sz = 2; + mcs |= TEXCOORDTYPE_CARTESIAN; + break; + case 3: + emit = EMIT_3F; + sz = 3; + mcs |= TEXCOORDTYPE_VECTOR; + break; + case 4: + emit = EMIT_3F_XYW; + sz = 3; + mcs |= TEXCOORDTYPE_HOMOGENEOUS; + break; + default: + continue; + }; + + + EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, emit, 0); + v2 |= VRTX_TEX_SET_FMT(count, SZ_TO_HW(sz)); + mcsb1 |= (count + 8) << (i * 4); + + if (mcs != i830->state.Tex[i][I830_TEXREG_MCS]) { + I830_STATECHANGE(i830, I830_UPLOAD_TEX(i)); + i830->state.Tex[i][I830_TEXREG_MCS] = mcs; + } + + count++; + } } v0 |= VFT0_TEX_COUNT(count); } - + /* Only need to change the vertex emit code if there has been a * statechange to a new hardware vertex format: */ if (v0 != i830->state.Ctx[I830_CTXREG_VF] || v2 != i830->state.Ctx[I830_CTXREG_VF2] || mcsb1 != i830->state.Ctx[I830_CTXREG_MCSB1] || - !RENDERINPUTS_EQUAL( index_bitset, i830->last_index_bitset )) { - - I830_STATECHANGE( i830, I830_UPLOAD_CTX ); + !RENDERINPUTS_EQUAL(index_bitset, i830->last_index_bitset)) { + int k; + + I830_STATECHANGE(i830, I830_UPLOAD_CTX); /* Must do this *after* statechange, so as not to affect * buffered vertices reliant on the old state: */ - intel->vertex_size = - _tnl_install_attrs( ctx, - intel->vertex_attrs, - intel->vertex_attr_count, - intel->ViewportMatrix.m, 0 ); + intel->vertex_size = + _tnl_install_attrs(ctx, + intel->vertex_attrs, + intel->vertex_attr_count, + intel->ViewportMatrix.m, 0); intel->vertex_size >>= 2; i830->state.Ctx[I830_CTXREG_VF] = v0; i830->state.Ctx[I830_CTXREG_VF2] = v2; i830->state.Ctx[I830_CTXREG_MCSB1] = mcsb1; - RENDERINPUTS_COPY( i830->last_index_bitset, index_bitset ); + RENDERINPUTS_COPY(i830->last_index_bitset, index_bitset); - assert(i830_check_vertex_size( intel, intel->vertex_size )); + k = i830_check_vertex_size(intel, intel->vertex_size); + assert(k); } } -static void i830_reduced_primitive_state( intelContextPtr intel, - GLenum rprim ) +static void +i830_reduced_primitive_state(struct intel_context *intel, GLenum rprim) { - i830ContextPtr i830 = I830_CONTEXT(intel); - GLuint st1 = i830->state.Stipple[I830_STPREG_ST1]; - - st1 &= ~ST1_ENABLE; - - switch (rprim) { - case GL_TRIANGLES: - if (intel->ctx.Polygon.StippleFlag && - intel->hw_stipple) - st1 |= ST1_ENABLE; - break; - case GL_LINES: - case GL_POINTS: - default: - break; - } - - i830->intel.reduced_primitive = rprim; - - if (st1 != i830->state.Stipple[I830_STPREG_ST1]) { - I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE); - i830->state.Stipple[I830_STPREG_ST1] = st1; - } + struct i830_context *i830 = i830_context(&intel->ctx); + GLuint st1 = i830->state.Stipple[I830_STPREG_ST1]; + + st1 &= ~ST1_ENABLE; + + switch (rprim) { + case GL_TRIANGLES: + if (intel->ctx.Polygon.StippleFlag && intel->hw_stipple) + st1 |= ST1_ENABLE; + break; + case GL_LINES: + case GL_POINTS: + default: + break; + } + + i830->intel.reduced_primitive = rprim; + + if (st1 != i830->state.Stipple[I830_STPREG_ST1]) { + INTEL_FIREVERTICES(intel); + + I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE); + i830->state.Stipple[I830_STPREG_ST1] = st1; + } } /* Pull apart the vertex format registers and figure out how large a * vertex is supposed to be. */ -static GLboolean i830_check_vertex_size( intelContextPtr intel, - GLuint expected ) +static GLboolean +i830_check_vertex_size(struct intel_context *intel, GLuint expected) { - i830ContextPtr i830 = I830_CONTEXT(intel); + struct i830_context *i830 = i830_context(&intel->ctx); int vft0 = i830->current->Ctx[I830_CTXREG_VF]; int vft1 = i830->current->Ctx[I830_CTXREG_VF2]; int nrtex = (vft0 & VFT0_TEX_COUNT_MASK) >> VFT0_TEX_COUNT_SHIFT; int i, sz = 0; switch (vft0 & VFT0_XYZW_MASK) { - case VFT0_XY: sz = 2; break; - case VFT0_XYZ: sz = 3; break; - case VFT0_XYW: sz = 3; break; - case VFT0_XYZW: sz = 4; break; - default: + case VFT0_XY: + sz = 2; + break; + case VFT0_XYZ: + sz = 3; + break; + case VFT0_XYW: + sz = 3; + break; + case VFT0_XYZW: + sz = 4; + break; + default: fprintf(stderr, "no xyzw specified\n"); return 0; } - if (vft0 & VFT0_SPEC) sz++; - if (vft0 & VFT0_DIFFUSE) sz++; - if (vft0 & VFT0_DEPTH_OFFSET) sz++; - if (vft0 & VFT0_POINT_WIDTH) sz++; - - for (i = 0 ; i < nrtex ; i++) { + if (vft0 & VFT0_SPEC) + sz++; + if (vft0 & VFT0_DIFFUSE) + sz++; + if (vft0 & VFT0_DEPTH_OFFSET) + sz++; + if (vft0 & VFT0_POINT_WIDTH) + sz++; + + for (i = 0; i < nrtex; i++) { switch (vft1 & VFT1_TEX0_MASK) { - case TEXCOORDFMT_2D: sz += 2; break; - case TEXCOORDFMT_3D: sz += 3; break; - case TEXCOORDFMT_4D: sz += 4; break; - case TEXCOORDFMT_1D: sz += 1; break; + case TEXCOORDFMT_2D: + sz += 2; + break; + case TEXCOORDFMT_3D: + sz += 3; + break; + case TEXCOORDFMT_4D: + sz += 4; + break; + case TEXCOORDFMT_1D: + sz += 1; + break; } vft1 >>= VFT1_TEX1_SHIFT; } - - if (sz != expected) + + if (sz != expected) fprintf(stderr, "vertex size mismatch %d/%d\n", sz, expected); - + return sz == expected; } -static void i830_emit_invarient_state( intelContextPtr intel ) +static void +i830_emit_invarient_state(struct intel_context *intel) { BATCH_LOCALS; - BEGIN_BATCH( 40 ); - - OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0)); - OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1)); - OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(2)); - OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(3)); + BEGIN_BATCH(40, IGNORE_CLIPRECTS); OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); OUT_BATCH(0); @@ -282,37 +309,35 @@ static void i830_emit_invarient_state( intelContextPtr intel ) OUT_BATCH(_3DSTATE_FOG_MODE_CMD); OUT_BATCH(FOGFUNC_ENABLE | - FOG_LINEAR_CONST | - FOGSRC_INDEX_Z | - ENABLE_FOG_DENSITY); + FOG_LINEAR_CONST | FOGSRC_INDEX_Z | ENABLE_FOG_DENSITY); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | - MAP_UNIT(0) | - DISABLE_TEX_STREAM_BUMP | - ENABLE_TEX_STREAM_COORD_SET | - TEX_STREAM_COORD_SET(0) | - ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(0)); + MAP_UNIT(0) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(0) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(0)); OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | - MAP_UNIT(1) | - DISABLE_TEX_STREAM_BUMP | - ENABLE_TEX_STREAM_COORD_SET | - TEX_STREAM_COORD_SET(1) | - ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(1)); + MAP_UNIT(1) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(1) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(1)); OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | - MAP_UNIT(2) | - DISABLE_TEX_STREAM_BUMP | - ENABLE_TEX_STREAM_COORD_SET | - TEX_STREAM_COORD_SET(2) | - ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(2)); + MAP_UNIT(2) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(2) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(2)); OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | - MAP_UNIT(3) | - DISABLE_TEX_STREAM_BUMP | - ENABLE_TEX_STREAM_COORD_SET | - TEX_STREAM_COORD_SET(3) | - ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(3)); + MAP_UNIT(3) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(3) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(3)); OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0)); @@ -324,21 +349,13 @@ static void i830_emit_invarient_state( intelContextPtr intel ) OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3)); OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - ENABLE_TRI_STRIP_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | - TRI_STRIP_PROVOKE_VRTX(2)); - - OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | - DISABLE_SCISSOR_RECT); - - OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); - OUT_BATCH(0); - OUT_BATCH(0); + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + ENABLE_TRI_STRIP_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2)); OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM); OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); @@ -349,45 +366,46 @@ static void i830_emit_invarient_state( intelContextPtr intel ) OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD); - OUT_BATCH(0x80808080); /* .5 required in alpha for GL_DOT3_RGBA_EXT */ + OUT_BATCH(0x80808080); /* .5 required in alpha for GL_DOT3_RGBA_EXT */ ADVANCE_BATCH(); } #define emit( intel, state, size ) \ -do { \ - int k; \ - BEGIN_BATCH( size / sizeof(GLuint)); \ - for (k = 0 ; k < size / sizeof(GLuint) ; k++) \ - OUT_BATCH(state[k]); \ - ADVANCE_BATCH(); \ -} while (0); - -static GLuint get_state_size( struct i830_hw_state *state ) + intel_batchbuffer_data(intel->batch, state, size, IGNORE_CLIPRECTS ) + +static GLuint +get_dirty(struct i830_hw_state *state) { - GLuint dirty = state->active & ~state->emitted; + return state->active & ~state->emitted; +} + +static GLuint +get_state_size(struct i830_hw_state *state) +{ + GLuint dirty = get_dirty(state); GLuint sz = 0; GLuint i; if (dirty & I830_UPLOAD_INVARIENT) sz += 40 * sizeof(int); - if (dirty & I830_UPLOAD_CTX) + if (dirty & I830_UPLOAD_CTX) sz += sizeof(state->Ctx); - if (dirty & I830_UPLOAD_BUFFERS) + if (dirty & I830_UPLOAD_BUFFERS) sz += sizeof(state->Buffer); - if (dirty & I830_UPLOAD_STIPPLE) + if (dirty & I830_UPLOAD_STIPPLE) sz += sizeof(state->Stipple); for (i = 0; i < I830_TEX_UNITS; i++) { - if ((dirty & I830_UPLOAD_TEX(i))) - sz += sizeof(state->Tex[i]); + if ((dirty & I830_UPLOAD_TEX(i))) + sz += sizeof(state->Tex[i]); - if (dirty & I830_UPLOAD_TEXBLEND(i)) - sz += state->TexBlendWordsUsed[i] * 4; + if (dirty & I830_UPLOAD_TEXBLEND(i)) + sz += state->TexBlendWordsUsed[i] * 4; } return sz; @@ -396,139 +414,307 @@ static GLuint get_state_size( struct i830_hw_state *state ) /* Push the state into the sarea and/or texture memory. */ -static void i830_emit_state( intelContextPtr intel ) +static void +i830_emit_state(struct intel_context *intel) { - i830ContextPtr i830 = I830_CONTEXT(intel); + struct i830_context *i830 = i830_context(&intel->ctx); struct i830_hw_state *state = i830->current; - int i; - GLuint dirty = state->active & ~state->emitted; - GLuint counter = intel->batch.counter; + int i, ret, count; + GLuint dirty; + GET_CURRENT_CONTEXT(ctx); BATCH_LOCALS; - if (intel->batch.space < get_state_size(state)) { - intelFlushBatch(intel, GL_TRUE); - dirty = state->active & ~state->emitted; - counter = intel->batch.counter; + /* We don't hold the lock at this point, so want to make sure that + * there won't be a buffer wrap between the state emits and the primitive + * emit header. + * + * It might be better to talk about explicit places where + * scheduling is allowed, rather than assume that it is whenever a + * batchbuffer fills up. + * + * Set the space as LOOP_CLIPRECTS now, since that's what our primitives + * will be emitted under. + */ + intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8, + LOOP_CLIPRECTS); + count = 0; + again: + dirty = get_dirty(state); + + ret = 0; + if (dirty & I830_UPLOAD_BUFFERS) { + ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer); + ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer); } + + for (i = 0; i < I830_TEX_UNITS; i++) + if (dirty & I830_UPLOAD_TEX(i)) { + if (state->tex_buffer[i]) { + ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]); + } + } + + if (ret) { + if (count == 0) { + count++; + intel_batchbuffer_flush(intel->batch); + goto again; + } else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "i830 emit state"); + assert(0); + } + } + + + /* Do this here as we may have flushed the batchbuffer above, + * causing more state to be dirty! + */ + dirty = get_dirty(state); + state->emitted |= dirty; + assert(get_dirty(state) == 0); if (dirty & I830_UPLOAD_INVARIENT) { - if (VERBOSE) fprintf(stderr, "I830_UPLOAD_INVARIENT:\n"); - i830_emit_invarient_state( intel ); + DBG("I830_UPLOAD_INVARIENT:\n"); + i830_emit_invarient_state(intel); } if (dirty & I830_UPLOAD_CTX) { - if (VERBOSE) fprintf(stderr, "I830_UPLOAD_CTX:\n"); - emit( i830, state->Ctx, sizeof(state->Ctx) ); + DBG("I830_UPLOAD_CTX:\n"); + emit(intel, state->Ctx, sizeof(state->Ctx)); + } if (dirty & I830_UPLOAD_BUFFERS) { - if (VERBOSE) fprintf(stderr, "I830_UPLOAD_BUFFERS:\n"); - emit( i830, state->Buffer, sizeof(state->Buffer) ); - } + DBG("I830_UPLOAD_BUFFERS:\n"); + BEGIN_BATCH(I830_DEST_SETUP_SIZE + 2, IGNORE_CLIPRECTS); + OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]); + OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]); + OUT_RELOC(state->draw_region->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + state->draw_region->draw_offset); + + if (state->depth_region) { + OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]); + OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]); + OUT_RELOC(state->depth_region->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + state->depth_region->draw_offset); + } + OUT_BATCH(state->Buffer[I830_DESTREG_DV0]); + OUT_BATCH(state->Buffer[I830_DESTREG_DV1]); + OUT_BATCH(state->Buffer[I830_DESTREG_SENABLE]); + OUT_BATCH(state->Buffer[I830_DESTREG_SR0]); + OUT_BATCH(state->Buffer[I830_DESTREG_SR1]); + OUT_BATCH(state->Buffer[I830_DESTREG_SR2]); + ADVANCE_BATCH(); + } + if (dirty & I830_UPLOAD_STIPPLE) { - if (VERBOSE) fprintf(stderr, "I830_UPLOAD_STIPPLE:\n"); - emit( i830, state->Stipple, sizeof(state->Stipple) ); + DBG("I830_UPLOAD_STIPPLE:\n"); + emit(intel, state->Stipple, sizeof(state->Stipple)); } for (i = 0; i < I830_TEX_UNITS; i++) { - if ((dirty & I830_UPLOAD_TEX(i))) { - if (VERBOSE) fprintf(stderr, "I830_UPLOAD_TEX(%d):\n", i); - emit( i830, state->Tex[i], sizeof(state->Tex[i])); - } + if ((dirty & I830_UPLOAD_TEX(i))) { + DBG("I830_UPLOAD_TEX(%d):\n", i); + + BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1, IGNORE_CLIPRECTS); + OUT_BATCH(state->Tex[i][I830_TEXREG_TM0LI]); + + if (state->tex_buffer[i]) { + OUT_RELOC(state->tex_buffer[i], + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + state->tex_offset[i] | TM0S0_USE_FENCE); + } + else if (state == &i830->meta) { + assert(i == 0); + OUT_BATCH(0); + } + else { + OUT_BATCH(state->tex_offset[i]); + } + + OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S1]); + OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S2]); + OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S3]); + OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S4]); + OUT_BATCH(state->Tex[i][I830_TEXREG_MCS]); + OUT_BATCH(state->Tex[i][I830_TEXREG_CUBE]); + } if (dirty & I830_UPLOAD_TEXBLEND(i)) { - if (VERBOSE) fprintf(stderr, "I830_UPLOAD_TEXBLEND(%d):\n", i); - emit( i830, state->TexBlend[i], - state->TexBlendWordsUsed[i] * 4 ); + DBG("I830_UPLOAD_TEXBLEND(%d): %d words\n", i, + state->TexBlendWordsUsed[i]); + emit(intel, state->TexBlend[i], state->TexBlendWordsUsed[i] * 4); } } - state->emitted |= dirty; - intel->batch.last_emit_state = counter; - assert(counter == intel->batch.counter); + intel->batch->dirty_state &= ~dirty; + assert(get_dirty(state) == 0); + assert((intel->batch->dirty_state & (1<<1)) == 0); } -static void i830_destroy_context( intelContextPtr intel ) +static void +i830_destroy_context(struct intel_context *intel) { + GLuint i; + struct i830_context *i830 = i830_context(&intel->ctx); + + for (i = 0; i < I830_TEX_UNITS; i++) { + if (i830->state.tex_buffer[i] != NULL) { + dri_bo_unreference(i830->state.tex_buffer[i]); + i830->state.tex_buffer[i] = NULL; + } + } + _tnl_free_vertices(&intel->ctx); } -static void -i830_set_color_region(intelContextPtr intel, const intelRegion *region) + +void +i830_state_draw_region(struct intel_context *intel, + struct i830_hw_state *state, + struct intel_region *color_region, + struct intel_region *depth_region) { - i830ContextPtr i830 = I830_CONTEXT(intel); - I830_STATECHANGE( i830, I830_UPLOAD_BUFFERS ); - i830->state.Buffer[I830_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE); - i830->state.Buffer[I830_DESTREG_CBUFADDR2] = region->offset; + struct i830_context *i830 = i830_context(&intel->ctx); + GLuint value; + + ASSERT(state == &i830->state || state == &i830->meta); + + if (state->draw_region != color_region) { + intel_region_release(&state->draw_region); + intel_region_reference(&state->draw_region, color_region); + } + if (state->depth_region != depth_region) { + intel_region_release(&state->depth_region); + intel_region_reference(&state->depth_region, depth_region); + } + + /* + * Set stride/cpp values + */ + if (color_region) { + state->Buffer[I830_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD; + state->Buffer[I830_DESTREG_CBUFADDR1] = + (BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(color_region->pitch * color_region->cpp) | + BUF_3D_USE_FENCE); + } + + if (depth_region) { + state->Buffer[I830_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD; + state->Buffer[I830_DESTREG_DBUFADDR1] = + (BUF_3D_ID_DEPTH | + BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) | + BUF_3D_USE_FENCE); + } + + /* + * Compute/set I830_DESTREG_DV1 value + */ + value = (DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z); /* .5 */ + + if (color_region && color_region->cpp == 4) { + value |= DV_PF_8888; + } + else { + value |= DV_PF_565; + } + if (depth_region && depth_region->cpp == 4) { + value |= DEPTH_FRMT_24_FIXED_8_OTHER; + } + else { + value |= DEPTH_FRMT_16_FIXED; + } + state->Buffer[I830_DESTREG_DV1] = value; + + I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS); + + } static void -i830_set_z_region(intelContextPtr intel, const intelRegion *region) +i830_set_draw_region(struct intel_context *intel, + struct intel_region *color_regions[], + struct intel_region *depth_region, + GLuint num_regions) { - i830ContextPtr i830 = I830_CONTEXT(intel); - I830_STATECHANGE( i830, I830_UPLOAD_BUFFERS ); - i830->state.Buffer[I830_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE); - i830->state.Buffer[I830_DESTREG_DBUFADDR2] = region->offset; + struct i830_context *i830 = i830_context(&intel->ctx); + i830_state_draw_region(intel, &i830->state, color_regions[0], depth_region); } - +#if 0 static void i830_update_color_z_regions(intelContextPtr intel, - const intelRegion *colorRegion, - const intelRegion *depthRegion) + const intelRegion * colorRegion, + const intelRegion * depthRegion) { i830ContextPtr i830 = I830_CONTEXT(intel); i830->state.Buffer[I830_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) | BUF_3D_USE_FENCE); + (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) | + BUF_3D_USE_FENCE); i830->state.Buffer[I830_DESTREG_CBUFADDR2] = colorRegion->offset; i830->state.Buffer[I830_DESTREG_DBUFADDR1] = (BUF_3D_ID_DEPTH | BUF_3D_PITCH(depthRegion->pitch) | BUF_3D_USE_FENCE); i830->state.Buffer[I830_DESTREG_DBUFADDR2] = depthRegion->offset; } +#endif /* This isn't really handled at the moment. */ -static void i830_lost_hardware( intelContextPtr intel ) +static void +i830_new_batch(struct intel_context *intel) { - I830_CONTEXT(intel)->state.emitted = 0; + struct i830_context *i830 = i830_context(&intel->ctx); + i830->state.emitted = 0; + + /* Check that we didn't just wrap our batchbuffer at a bad time. */ + assert(!intel->no_batch_wrap); } -static void i830_emit_flush( intelContextPtr intel ) +static GLuint +i830_flush_cmd(void) { - BATCH_LOCALS; - - BEGIN_BATCH(2); - OUT_BATCH( MI_FLUSH | FLUSH_MAP_CACHE ); - OUT_BATCH( 0 ); - ADVANCE_BATCH(); + return MI_FLUSH | FLUSH_MAP_CACHE; } +static void +i830_assert_not_dirty( struct intel_context *intel ) +{ + struct i830_context *i830 = i830_context(&intel->ctx); + struct i830_hw_state *state = i830->current; + assert(!get_dirty(state)); +} +static void +i830_note_unlock( struct intel_context *intel ) +{ + /* nothing */ +} -void i830InitVtbl( i830ContextPtr i830 ) +void +i830InitVtbl(struct i830_context *i830) { - i830->intel.vtbl.alloc_tex_obj = i830AllocTexObj; i830->intel.vtbl.check_vertex_size = i830_check_vertex_size; - i830->intel.vtbl.clear_with_tris = i830ClearWithTris; - i830->intel.vtbl.rotate_window = i830RotateWindow; i830->intel.vtbl.destroy = i830_destroy_context; i830->intel.vtbl.emit_state = i830_emit_state; - i830->intel.vtbl.lost_hardware = i830_lost_hardware; + i830->intel.vtbl.new_batch = i830_new_batch; i830->intel.vtbl.reduced_primitive_state = i830_reduced_primitive_state; - i830->intel.vtbl.set_color_region = i830_set_color_region; - i830->intel.vtbl.set_z_region = i830_set_z_region; - i830->intel.vtbl.update_color_z_regions = i830_update_color_z_regions; + i830->intel.vtbl.set_draw_region = i830_set_draw_region; i830->intel.vtbl.update_texture_state = i830UpdateTextureState; - i830->intel.vtbl.emit_flush = i830_emit_flush; + i830->intel.vtbl.flush_cmd = i830_flush_cmd; i830->intel.vtbl.render_start = i830_render_start; + i830->intel.vtbl.render_prevalidate = i830_render_prevalidate; + i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty; + i830->intel.vtbl.note_unlock = i830_note_unlock; } diff --git a/i915/i915_context.c b/i915/i915_context.c index 2bc1cae..bd9f1d5 100644 --- a/i915/i915_context.c +++ b/i915/i915_context.c @@ -36,151 +36,150 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" -#include "vbo/vbo.h" - #include "utils.h" #include "i915_reg.h" +#include "intel_regions.h" +#include "intel_batchbuffer.h" +#include "intel_tris.h" +#include "intel_span.h" +#include "intel_pixel.h" + /*************************************** * Mesa's Driver Functions ***************************************/ -static const struct dri_extension i915_extensions[] = -{ - { "GL_ARB_depth_texture", NULL }, - { "GL_ARB_fragment_program", NULL }, - { "GL_ARB_shadow", NULL }, - { "GL_ARB_texture_env_crossbar", NULL }, - { "GL_EXT_shadow_funcs", NULL }, - /* ARB extn won't work if not enabled */ - { "GL_SGIX_depth_texture", NULL }, - { NULL, NULL } +static const struct dri_extension i915_extensions[] = { + {"GL_ARB_depth_texture", NULL}, + {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_shadow", NULL}, + {"GL_ARB_texture_non_power_of_two", NULL}, + {"GL_EXT_shadow_funcs", NULL}, + /* ARB extn won't work if not enabled */ + {"GL_SGIX_depth_texture", NULL}, + {NULL, NULL} }; /* Override intel default. */ -static void i915InvalidateState( GLcontext *ctx, GLuint new_state ) +static void +i915InvalidateState(GLcontext * ctx, GLuint new_state) { - _swrast_InvalidateState( ctx, new_state ); - _swsetup_InvalidateState( ctx, new_state ); - _vbo_InvalidateState( ctx, new_state ); - _tnl_InvalidateState( ctx, new_state ); - _tnl_invalidate_vertex_state( ctx, new_state ); - INTEL_CONTEXT(ctx)->NewGLState |= new_state; + _swrast_InvalidateState(ctx, new_state); + _swsetup_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _tnl_invalidate_vertex_state(ctx, new_state); + intel_context(ctx)->NewGLState |= new_state; /* Todo: gather state values under which tracked parameters become * invalidated, add callbacks for things like * ProgramLocalParameters, etc. */ { - struct i915_fragment_program *p = - (struct i915_fragment_program *)ctx->FragmentProgram._Current; + struct i915_fragment_program *p = + (struct i915_fragment_program *) ctx->FragmentProgram._Current; if (p && p->nr_params) - p->params_uptodate = 0; + p->params_uptodate = 0; } - if (new_state & (_NEW_FOG|_NEW_HINT|_NEW_PROGRAM)) + if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM)) i915_update_fog(ctx); } -static void i915InitDriverFunctions( struct dd_function_table *functions ) +static void +i915InitDriverFunctions(struct dd_function_table *functions) { - intelInitDriverFunctions( functions ); - i915InitStateFunctions( functions ); - i915InitTextureFuncs( functions ); - i915InitFragProgFuncs( functions ); + intelInitDriverFunctions(functions); + i915InitStateFunctions(functions); + i915InitTextureFuncs(functions); + i915InitFragProgFuncs(functions); functions->UpdateState = i915InvalidateState; } +extern const struct tnl_pipeline_stage *intel_pipeline[]; -GLboolean i915CreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate) +GLboolean +i915CreateContext(const __GLcontextModes * mesaVis, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate) { struct dd_function_table functions; - i915ContextPtr i915 = (i915ContextPtr) CALLOC_STRUCT(i915_context); - intelContextPtr intel = &i915->intel; + struct i915_context *i915 = + (struct i915_context *) CALLOC_STRUCT(i915_context); + struct intel_context *intel = &i915->intel; GLcontext *ctx = &intel->ctx; - GLuint i; - if (!i915) return GL_FALSE; + if (!i915) + return GL_FALSE; + + if (0) + _mesa_printf("\ntexmem-0-3 branch\n\n"); - i915InitVtbl( i915 ); + i915InitVtbl(i915); + i915InitMetaFuncs(i915); - i915InitDriverFunctions( &functions ); + i915InitDriverFunctions(&functions); - if (!intelInitContext( intel, mesaVis, driContextPriv, - sharedContextPrivate, &functions )) { + if (!intelInitContext(intel, mesaVis, driContextPriv, + sharedContextPrivate, &functions)) { FREE(i915); return GL_FALSE; } + /* Initialize swrast, tnl driver tables: */ + intelInitSpanFuncs(ctx); + intelInitTriFuncs(ctx); + + /* Install the customized pipeline: */ + _tnl_destroy_pipeline(ctx); + _tnl_install_pipeline(ctx, intel_pipeline); + ctx->Const.MaxTextureUnits = I915_TEX_UNITS; ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS; ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS; - intel->nr_heaps = 1; - intel->texture_heaps[0] = - driCreateTextureHeap( 0, intel, - intel->intelScreen->tex.size, - 12, - I830_NR_TEX_REGIONS, - intel->sarea->texList, - (unsigned *) & intel->sarea->texAge, - & intel->swapped, - sizeof( struct i915_texture_object ), - (destroy_texture_object_t *)intelDestroyTexObj ); - - /* FIXME: driCalculateMaxTextureLevels assumes that mipmaps are - * tightly packed, but they're not in Intel graphics - * hardware. + + /* Advertise the full hardware capabilities. The new memory + * manager should cope much better with overload situations: */ + ctx->Const.MaxTextureLevels = 12; + ctx->Const.Max3DTextureLevels = 9; + ctx->Const.MaxCubeTextureLevels = 12; + ctx->Const.MaxTextureRectSize = (1 << 11); ctx->Const.MaxTextureUnits = I915_TEX_UNITS; - i = driQueryOptioni( &intel->optionCache, "allow_large_textures"); - driCalculateMaxTextureLevels( intel->texture_heaps, - intel->nr_heaps, - &intel->ctx.Const, - 4, - 11, /* max 2D texture size is 2048x2048 */ - 8, /* 3D texture */ - 11, /* cube texture. */ - 11, /* rect texture */ - 12, - GL_FALSE, - i ); /* GL_ARB_fragment_program limits - don't think Mesa actually * validates programs against these, and in any case one ARB * instruction can translate to more than one HW instruction, so * we'll still have to check and fallback each time. */ - ctx->Const.FragmentProgram.MaxNativeTemps = I915_MAX_TEMPORARY; - ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* 8 tex, 2 color, fog */ + ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* 8 tex, 2 color, fog */ ctx->Const.FragmentProgram.MaxNativeParameters = I915_MAX_CONSTANT; ctx->Const.FragmentProgram.MaxNativeAluInstructions = I915_MAX_ALU_INSN; ctx->Const.FragmentProgram.MaxNativeTexInstructions = I915_MAX_TEX_INSN; - ctx->Const.FragmentProgram.MaxNativeInstructions = (I915_MAX_ALU_INSN + - I915_MAX_TEX_INSN); - ctx->Const.FragmentProgram.MaxNativeTexIndirections = I915_MAX_TEX_INDIRECT; + ctx->Const.FragmentProgram.MaxNativeInstructions = (I915_MAX_ALU_INSN + + I915_MAX_TEX_INSN); + ctx->Const.FragmentProgram.MaxNativeTexIndirections = + I915_MAX_TEX_INDIRECT; ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* I don't think we have one */ + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE; - - driInitExtensions( ctx, i915_extensions, GL_FALSE ); + driInitExtensions(ctx, i915_extensions, GL_FALSE); - _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, - 36 * sizeof(GLfloat) ); + _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12, + 36 * sizeof(GLfloat)); intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf; - i915InitState( i915 ); + i915InitState(i915); return GL_TRUE; } - diff --git a/i915/i915_context.h b/i915/i915_context.h index ec15501..c6958dd 100644 --- a/i915/i915_context.h +++ b/i915/i915_context.h @@ -29,6 +29,7 @@ #define I915CONTEXT_INC #include "intel_context.h" +#include "i915_reg.h" #define I915_FALLBACK_TEXTURE 0x1000 #define I915_FALLBACK_COLORMASK 0x2000 @@ -46,6 +47,7 @@ #define I915_UPLOAD_CONSTANTS 0x10 #define I915_UPLOAD_FOG 0x20 #define I915_UPLOAD_INVARIENT 0x40 +#define I915_UPLOAD_DEFAULTS 0x80 #define I915_UPLOAD_TEX(i) (0x00010000<<(i)) #define I915_UPLOAD_TEX_ALL (0x00ff0000) #define I915_UPLOAD_TEX_0_SHIFT 16 @@ -55,10 +57,8 @@ */ #define I915_DESTREG_CBUFADDR0 0 #define I915_DESTREG_CBUFADDR1 1 -#define I915_DESTREG_CBUFADDR2 2 #define I915_DESTREG_DBUFADDR0 3 #define I915_DESTREG_DBUFADDR1 4 -#define I915_DESTREG_DBUFADDR2 5 #define I915_DESTREG_DV0 6 #define I915_DESTREG_DV1 7 #define I915_DESTREG_SENABLE 8 @@ -89,7 +89,6 @@ #define I915_STPREG_ST1 1 #define I915_STP_SETUP_SIZE 2 -#define I915_TEXREG_MS2 0 #define I915_TEXREG_MS3 1 #define I915_TEXREG_MS4 2 #define I915_TEXREG_SS2 3 @@ -97,23 +96,34 @@ #define I915_TEXREG_SS4 5 #define I915_TEX_SETUP_SIZE 6 +#define I915_DEFREG_C0 0 +#define I915_DEFREG_C1 1 +#define I915_DEFREG_S0 2 +#define I915_DEFREG_S1 3 +#define I915_DEFREG_Z0 4 +#define I915_DEFREG_Z1 5 +#define I915_DEF_SETUP_SIZE 6 + + #define I915_MAX_CONSTANT 32 #define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT)) #define I915_PROGRAM_SIZE 192 +#define I915_MAX_INSN (I915_MAX_TEX_INSN+I915_MAX_ALU_INSN) /* Hardware version of a parsed fragment program. "Derived" from the * mesa fragment_program struct. */ -struct i915_fragment_program { +struct i915_fragment_program +{ struct gl_fragment_program FragProg; GLboolean translated; GLboolean params_uptodate; GLboolean on_hardware; - GLboolean error; /* If program is malformed for any reason. */ + GLboolean error; /* If program is malformed for any reason. */ GLuint nr_tex_indirect; GLuint nr_tex_insn; @@ -135,52 +145,40 @@ struct i915_fragment_program { GLuint constant_flags[I915_MAX_CONSTANT]; GLuint nr_constants; - GLuint *csr; /* Cursor, points into program. - */ + GLuint *csr; /* Cursor, points into program. + */ - GLuint *decl; /* Cursor, points into declarations. - */ - - GLuint decl_s; /* flags for which s regs need to be decl'd */ - GLuint decl_t; /* flags for which t regs need to be decl'd */ + GLuint *decl; /* Cursor, points into declarations. + */ - GLuint temp_flag; /* Tracks temporary regs which are in - * use. - */ + GLuint decl_s; /* flags for which s regs need to be decl'd */ + GLuint decl_t; /* flags for which t regs need to be decl'd */ - GLuint utemp_flag; /* Tracks TYPE_U temporary regs which are in - * use. - */ + GLuint temp_flag; /* Tracks temporary regs which are in + * use. + */ + GLuint utemp_flag; /* Tracks TYPE_U temporary regs which are in + * use. + */ + /* Track which R registers are "live" for each instruction. + * A register is live between the time it's written to and the last time + * it's read. */ + GLuint usedRegs[I915_MAX_INSN]; + /* Helpers for i915_fragprog.c: */ GLuint wpos_tex; GLboolean depth_written; - struct { - GLuint reg; /* Hardware constant idx */ - const GLfloat *values; /* Pointer to tracked values */ + struct + { + GLuint reg; /* Hardware constant idx */ + const GLfloat *values; /* Pointer to tracked values */ } param[I915_MAX_CONSTANT]; GLuint nr_params; - - - - - /* Helpers for i915_texprog.c: - */ - GLuint src_texture; /* Reg containing sampled texture color, - * else UREG_BAD. - */ - - GLuint src_previous; /* Reg containing color from previous - * stage. May need to be decl'd. - */ - - GLuint last_tex_stage; /* Number of last enabled texture unit */ - - struct vertex_buffer *VB; }; @@ -188,67 +186,68 @@ struct i915_fragment_program { -struct i915_texture_object -{ - struct intel_texture_object intel; - GLenum lastTarget; - GLboolean refs_border_color; - GLuint Setup[I915_TEX_SETUP_SIZE]; -}; #define I915_TEX_UNITS 8 -struct i915_hw_state { +struct i915_hw_state +{ GLuint Ctx[I915_CTX_SETUP_SIZE]; GLuint Buffer[I915_DEST_SETUP_SIZE]; GLuint Stipple[I915_STP_SETUP_SIZE]; GLuint Fog[I915_FOG_SETUP_SIZE]; + GLuint Defaults[I915_DEF_SETUP_SIZE]; GLuint Tex[I915_TEX_UNITS][I915_TEX_SETUP_SIZE]; GLuint Constant[I915_CONSTANT_SIZE]; GLuint ConstantSize; GLuint Program[I915_PROGRAM_SIZE]; GLuint ProgramSize; - GLuint active; /* I915_UPLOAD_* */ - GLuint emitted; /* I915_UPLOAD_* */ + + /* Region pointers for relocation: + */ + struct intel_region *draw_region; + struct intel_region *depth_region; +/* struct intel_region *tex_region[I915_TEX_UNITS]; */ + + /* Regions aren't actually that appropriate here as the memory may + * be from a PBO or FBO. Will have to do this for draw and depth for + * FBO's... + */ + dri_bo *tex_buffer[I915_TEX_UNITS]; + GLuint tex_offset[I915_TEX_UNITS]; + + + GLuint active; /* I915_UPLOAD_* */ + GLuint emitted; /* I915_UPLOAD_* */ }; #define I915_FOG_PIXEL 2 #define I915_FOG_VERTEX 1 #define I915_FOG_NONE 0 -struct i915_context +struct i915_context { struct intel_context intel; GLuint last_ReallyEnabled; GLuint vertex_fog; + GLuint lodbias_ss2[MAX_TEXTURE_UNITS]; + - struct i915_fragment_program tex_program; struct i915_fragment_program *current_program; struct i915_hw_state meta, initial, state, *current; }; -typedef struct i915_context *i915ContextPtr; -typedef struct i915_texture_object *i915TextureObjectPtr; - -#define I915_CONTEXT(ctx) ((i915ContextPtr)(ctx)) - - - #define I915_STATECHANGE(i915, flag) \ do { \ - if (0) fprintf(stderr, "I915_STATECHANGE %x in %s\n", flag, __FUNCTION__); \ INTEL_FIREVERTICES( &(i915)->intel ); \ (i915)->state.emitted &= ~(flag); \ } while (0) #define I915_ACTIVESTATE(i915, flag, mode) \ do { \ - if (0) fprintf(stderr, "I915_ACTIVESTATE %x %d in %s\n", \ - flag, mode, __FUNCTION__); \ INTEL_FIREVERTICES( &(i915)->intel ); \ if (mode) \ (i915)->state.active |= (flag); \ @@ -260,7 +259,13 @@ do { \ /*====================================================================== * i915_vtbl.c */ -extern void i915InitVtbl( i915ContextPtr i915 ); +extern void i915InitVtbl(struct i915_context *i915); + +extern void +i915_state_draw_region(struct intel_context *intel, + struct i915_hw_state *state, + struct intel_region *color_region, + struct intel_region *depth_region); @@ -289,70 +294,58 @@ do { \ /*====================================================================== * i915_context.c */ -extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); - - -/*====================================================================== - * i915_texprog.c - */ -extern void i915ValidateTextureProgram( i915ContextPtr i915 ); +extern GLboolean i915CreateContext(const __GLcontextModes * mesaVis, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); /*====================================================================== * i915_debug.c */ -extern void i915_disassemble_program( const GLuint *program, GLuint sz ); -extern void i915_print_ureg( const char *msg, GLuint ureg ); +extern void i915_disassemble_program(const GLuint * program, GLuint sz); +extern void i915_print_ureg(const char *msg, GLuint ureg); /*====================================================================== * i915_state.c */ -extern void i915InitStateFunctions( struct dd_function_table *functions ); -extern void i915InitState( i915ContextPtr i915 ); -extern void i915_update_fog(GLcontext *ctxx); +extern void i915InitStateFunctions(struct dd_function_table *functions); +extern void i915InitState(struct i915_context *i915); +extern void i915_update_fog(GLcontext * ctx); /*====================================================================== * i915_tex.c */ -extern void i915UpdateTextureState( intelContextPtr intel ); -extern void i915InitTextureFuncs( struct dd_function_table *functions ); -extern intelTextureObjectPtr i915AllocTexObj( struct gl_texture_object *texObj ); +extern void i915UpdateTextureState(struct intel_context *intel); +extern void i915InitTextureFuncs(struct dd_function_table *functions); /*====================================================================== * i915_metaops.c */ -extern GLboolean -i915TryTextureReadPixels( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, - GLvoid *pixels ); - -extern GLboolean -i915TryTextureDrawPixels( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels ); +void i915InitMetaFuncs(struct i915_context *i915); -extern void -i915ClearWithTris( intelContextPtr intel, GLbitfield mask, - GLboolean all, GLint cx, GLint cy, GLint cw, GLint ch); - - -extern void -i915RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv, - GLuint srcBuf); /*====================================================================== * i915_fragprog.c */ -extern void i915ValidateFragmentProgram( i915ContextPtr i915 ); -extern void i915InitFragProgFuncs( struct dd_function_table *functions ); - -#endif +extern void i915ValidateFragmentProgram(struct i915_context *i915); +extern void i915InitFragProgFuncs(struct dd_function_table *functions); + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct i915_context * +i915_context(GLcontext * ctx) +{ + return (struct i915_context *) ctx; +} + + +#define I915_CONTEXT(ctx) i915_context(ctx) + + + +#endif diff --git a/i915/i915_debug.c b/i915/i915_debug.c index 054b561..8eb1c5b 100644 --- a/i915/i915_debug.c +++ b/i915/i915_debug.c @@ -25,275 +25,824 @@ * **************************************************************************/ +#include "imports.h" + #include "i915_reg.h" #include "i915_context.h" -#include <stdio.h> - - -static const char *opcodes[0x20] = { - "NOP", - "ADD", - "MOV", - "MUL", - "MAD", - "DP2ADD", - "DP3", - "DP4", - "FRC", - "RCP", - "RSQ", - "EXP", - "LOG", - "CMP", - "MIN", - "MAX", - "FLR", - "MOD", - "TRC", - "SGE", - "SLT", - "TEXLD", - "TEXLDP", - "TEXLDB", - "TEXKILL", - "DCL", - "0x1a", - "0x1b", - "0x1c", - "0x1d", - "0x1e", - "0x1f", -}; - - -static const int args[0x20] = { - 0, /* 0 nop */ - 2, /* 1 add */ - 1, /* 2 mov */ - 2, /* 3 m ul */ - 3, /* 4 mad */ - 3, /* 5 dp2add */ - 2, /* 6 dp3 */ - 2, /* 7 dp4 */ - 1, /* 8 frc */ - 1, /* 9 rcp */ - 1, /* a rsq */ - 1, /* b exp */ - 1, /* c log */ - 3, /* d cmp */ - 2, /* e min */ - 2, /* f max */ - 1, /* 10 flr */ - 1, /* 11 mod */ - 1, /* 12 trc */ - 2, /* 13 sge */ - 2, /* 14 slt */ - 1, - 1, - 1, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -}; - - -static const char *regname[0x8] = { - "R", - "T", - "CONST", - "S", - "OC", - "OD", - "U", - "UNKNOWN", -}; - -static void print_reg_type_nr( GLuint type, GLuint nr ) +#include "i915_debug.h" + +#define PRINTF( ... ) _mesa_printf( __VA_ARGS__ ) + +static GLboolean debug( struct debug_stream *stream, const char *name, GLuint len ) { - switch (type) { - case REG_TYPE_T: - switch (nr) { - case T_DIFFUSE: fprintf(stderr, "T_DIFFUSE"); return; - case T_SPECULAR: fprintf(stderr, "T_SPECULAR"); return; - case T_FOG_W: fprintf(stderr, "T_FOG_W"); return; - default: fprintf(stderr, "T_TEX%d", nr); return; - } - case REG_TYPE_OC: - if (nr == 0) { - fprintf(stderr, "oC"); - return; - } - break; - case REG_TYPE_OD: - if (nr == 0) { - fprintf(stderr, "oD"); - return; - } - break; - default: - break; + GLuint i; + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF("Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return GL_FALSE; } - fprintf(stderr, "%s[%d]", regname[type], nr); -} + if (stream->print_addresses) + PRINTF("%08x: ", stream->offset); + -#define REG_SWIZZLE_MASK 0x7777 -#define REG_NEGATE_MASK 0x8888 + PRINTF("%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF("\t0x%08x\n", ptr[i]); + PRINTF("\n"); -#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ - (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ - (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ - (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + stream->offset += len * sizeof(GLuint); + + return GL_TRUE; +} -static void print_reg_neg_swizzle( GLuint reg ) +static const char *get_prim_name( GLuint val ) { - int i; - - if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && - (reg & REG_NEGATE_MASK) == 0) - return; - - fprintf(stderr, "."); - - for (i = 3 ; i >= 0; i--) { - if (reg & (1<<((i*4)+3))) - fprintf(stderr, "-"); - - switch ((reg>>(i*4)) & 0x7) { - case 0: fprintf(stderr, "x"); break; - case 1: fprintf(stderr, "y"); break; - case 2: fprintf(stderr, "z"); break; - case 3: fprintf(stderr, "w"); break; - case 4: fprintf(stderr, "0"); break; - case 5: fprintf(stderr, "1"); break; - default: fprintf(stderr, "?"); break; - } + switch (val & PRIM3D_MASK) { + case PRIM3D_TRILIST: return "TRILIST"; break; + case PRIM3D_TRISTRIP: return "TRISTRIP"; break; + case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break; + case PRIM3D_TRIFAN: return "TRIFAN"; break; + case PRIM3D_POLY: return "POLY"; break; + case PRIM3D_LINELIST: return "LINELIST"; break; + case PRIM3D_LINESTRIP: return "LINESTRIP"; break; + case PRIM3D_RECTLIST: return "RECTLIST"; break; + case PRIM3D_POINTLIST: return "POINTLIST"; break; + case PRIM3D_DIB: return "DIB"; break; + case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break; + case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break; + default: return "????"; break; } } - -static void print_src_reg( GLuint dword ) +static GLboolean debug_prim( struct debug_stream *stream, const char *name, + GLboolean dump_floats, + GLuint len ) { - GLuint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; - GLuint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; - print_reg_type_nr( type, nr ); - print_reg_neg_swizzle( dword ); + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + GLuint i; + + + + PRINTF("%s %s (%d dwords):\n", name, prim, len); + PRINTF("\t0x%08x\n", ptr[0]); + for (i = 1; i < len; i++) { + if (dump_floats) + PRINTF("\t0x%08x // %f\n", ptr[i], *(GLfloat *)&ptr[i]); + else + PRINTF("\t0x%08x\n", ptr[i]); + } + + + PRINTF("\n"); + + stream->offset += len * sizeof(GLuint); + + return GL_TRUE; } + + -void i915_print_ureg( const char *msg, GLuint ureg ) + +static GLboolean debug_program( struct debug_stream *stream, const char *name, GLuint len ) { - fprintf(stderr, "%s: ", msg); - print_src_reg( ureg >> 8 ); - fprintf(stderr, "\n"); + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF("Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return GL_FALSE; + } + + if (stream->print_addresses) + PRINTF("%08x: ", stream->offset); + + PRINTF("%s (%d dwords):\n", name, len); + i915_disassemble_program( ptr, len ); + + stream->offset += len * sizeof(GLuint); + return GL_TRUE; } -static void print_dest_reg( GLuint dword ) + +static GLboolean debug_chain( struct debug_stream *stream, const char *name, GLuint len ) { - GLuint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; - GLuint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; - print_reg_type_nr( type, nr ); - if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) - return; - fprintf(stderr, "."); - if (dword & A0_DEST_CHANNEL_X) fprintf(stderr, "x"); - if (dword & A0_DEST_CHANNEL_Y) fprintf(stderr, "y"); - if (dword & A0_DEST_CHANNEL_Z) fprintf(stderr, "z"); - if (dword & A0_DEST_CHANNEL_W) fprintf(stderr, "w"); + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + GLuint old_offset = stream->offset + len * sizeof(GLuint); + GLuint i; + + PRINTF("%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF("\t0x%08x\n", ptr[i]); + + stream->offset = ptr[1] & ~0x3; + + if (stream->offset < old_offset) + PRINTF("\n... skipping backwards from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + else + PRINTF("\n... skipping from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + + + return GL_TRUE; } -#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) -#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) -#define GET_SRC2_REG(r) (r) +static GLboolean debug_variable_length_prim( struct debug_stream *stream ) +{ + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + GLuint i, len; + + GLushort *idx = (GLushort *)(ptr+1); + for (i = 0; idx[i] != 0xffff; i++) + ; + + len = 1+(i+2)/2; + + PRINTF("3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); + for (i = 0; i < len; i++) + PRINTF("\t0x%08x\n", ptr[i]); + PRINTF("\n"); + + stream->offset += len * sizeof(GLuint); + return GL_TRUE; +} -static void print_arith_op( GLuint opcode, const GLuint *program ) +#define BITS( dw, hi, lo, ... ) \ +do { \ + unsigned himask = ~0UL >> (31 - (hi)); \ + PRINTF("\t\t "); \ + PRINTF(__VA_ARGS__); \ + PRINTF(": 0x%x\n", ((dw) & himask) >> (lo)); \ +} while (0) + +#define MBZ( dw, hi, lo) do { \ + unsigned x = (dw) >> (lo); \ + unsigned lomask = (1 << (lo)) - 1; \ + unsigned himask; \ + himask = (1UL << (hi)) - 1; \ + assert ((x & himask & ~lomask) == 0); \ +} while (0) + +#define FLAG( dw, bit, ... ) \ +do { \ + if (((dw) >> (bit)) & 1) { \ + PRINTF("\t\t "); \ + PRINTF(__VA_ARGS__); \ + PRINTF("\n"); \ + } \ +} while (0) + +static GLboolean debug_load_immediate( struct debug_stream *stream, + const char *name, + GLuint len ) { - if (opcode != A0_NOP) { - print_dest_reg(program[0]); - if (program[0] & A0_DEST_SATURATE) - fprintf(stderr, " = SATURATE "); - else - fprintf(stderr, " = "); + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + GLuint bits = (ptr[0] >> 4) & 0xff; + GLuint j = 0; + + PRINTF("%s (%d dwords, flags: %x):\n", name, len, bits); + PRINTF("\t0x%08x\n", ptr[j++]); + + if (bits & (1<<0)) { + PRINTF("\t LIS0: 0x%08x\n", ptr[j]); + PRINTF("\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); + BITS(ptr[j], 0, 0, "vb invalidate disable"); + j++; + } + if (bits & (1<<1)) { + PRINTF("\t LIS1: 0x%08x\n", ptr[j]); + BITS(ptr[j], 29, 24, "vb dword width"); + BITS(ptr[j], 21, 16, "vb dword pitch"); + BITS(ptr[j], 15, 0, "vb max index"); + j++; } + if (bits & (1<<2)) { + int i; + PRINTF("\t LIS2: 0x%08x\n", ptr[j]); + for (i = 0; i < 8; i++) { + unsigned tc = (ptr[j] >> (i * 4)) & 0xf; + if (tc != 0xf) + BITS(tc, 3, 0, "tex coord %d", i); + } + j++; + } + if (bits & (1<<3)) { + PRINTF("\t LIS3: 0x%08x\n", ptr[j]); + j++; + } + if (bits & (1<<4)) { + PRINTF("\t LIS4: 0x%08x\n", ptr[j]); + BITS(ptr[j], 31, 23, "point width"); + BITS(ptr[j], 22, 19, "line width"); + FLAG(ptr[j], 18, "alpha flatshade"); + FLAG(ptr[j], 17, "fog flatshade"); + FLAG(ptr[j], 16, "spec flatshade"); + FLAG(ptr[j], 15, "rgb flatshade"); + BITS(ptr[j], 14, 13, "cull mode"); + FLAG(ptr[j], 12, "vfmt: point width"); + FLAG(ptr[j], 11, "vfmt: specular/fog"); + FLAG(ptr[j], 10, "vfmt: rgba"); + FLAG(ptr[j], 9, "vfmt: depth offset"); + BITS(ptr[j], 8, 6, "vfmt: position (2==xyzw)"); + FLAG(ptr[j], 5, "force dflt diffuse"); + FLAG(ptr[j], 4, "force dflt specular"); + FLAG(ptr[j], 3, "local depth offset enable"); + FLAG(ptr[j], 2, "vfmt: fp32 fog coord"); + FLAG(ptr[j], 1, "sprite point"); + FLAG(ptr[j], 0, "antialiasing"); + j++; + } + if (bits & (1<<5)) { + PRINTF("\t LIS5: 0x%08x\n", ptr[j]); + BITS(ptr[j], 31, 28, "rgba write disables"); + FLAG(ptr[j], 27, "force dflt point width"); + FLAG(ptr[j], 26, "last pixel enable"); + FLAG(ptr[j], 25, "global z offset enable"); + FLAG(ptr[j], 24, "fog enable"); + BITS(ptr[j], 23, 16, "stencil ref"); + BITS(ptr[j], 15, 13, "stencil test"); + BITS(ptr[j], 12, 10, "stencil fail op"); + BITS(ptr[j], 9, 7, "stencil pass z fail op"); + BITS(ptr[j], 6, 4, "stencil pass z pass op"); + FLAG(ptr[j], 3, "stencil write enable"); + FLAG(ptr[j], 2, "stencil test enable"); + FLAG(ptr[j], 1, "color dither enable"); + FLAG(ptr[j], 0, "logiop enable"); + j++; + } + if (bits & (1<<6)) { + PRINTF("\t LIS6: 0x%08x\n", ptr[j]); + FLAG(ptr[j], 31, "alpha test enable"); + BITS(ptr[j], 30, 28, "alpha func"); + BITS(ptr[j], 27, 20, "alpha ref"); + FLAG(ptr[j], 19, "depth test enable"); + BITS(ptr[j], 18, 16, "depth func"); + FLAG(ptr[j], 15, "blend enable"); + BITS(ptr[j], 14, 12, "blend func"); + BITS(ptr[j], 11, 8, "blend src factor"); + BITS(ptr[j], 7, 4, "blend dst factor"); + FLAG(ptr[j], 3, "depth write enable"); + FLAG(ptr[j], 2, "color write enable"); + BITS(ptr[j], 1, 0, "provoking vertex"); + j++; + } + - fprintf(stderr, "%s ", opcodes[opcode]); + PRINTF("\n"); - print_src_reg(GET_SRC0_REG(program[0], program[1])); - if (args[opcode] == 1) { - fprintf(stderr, "\n"); - return; + assert(j == len); + + stream->offset += len * sizeof(GLuint); + + return GL_TRUE; +} + + + +static GLboolean debug_load_indirect( struct debug_stream *stream, + const char *name, + GLuint len ) +{ + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + GLuint bits = (ptr[0] >> 8) & 0x3f; + GLuint i, j = 0; + + PRINTF("%s (%d dwords):\n", name, len); + PRINTF("\t0x%08x\n", ptr[j++]); + + for (i = 0; i < 6; i++) { + if (bits & (1<<i)) { + switch (1<<(8+i)) { + case LI0_STATE_STATIC_INDIRECT: + PRINTF(" STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(" 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_DYNAMIC_INDIRECT: + PRINTF(" DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + break; + case LI0_STATE_SAMPLER: + PRINTF(" SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(" 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_MAP: + PRINTF(" MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(" 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_PROGRAM: + PRINTF(" PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(" 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_CONSTANTS: + PRINTF(" CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(" 0x%08x\n", ptr[j++]); + break; + default: + assert(0); + break; + } + } } - fprintf(stderr, ", "); - print_src_reg(GET_SRC1_REG(program[1], program[2])); - if (args[opcode] == 2) { - fprintf(stderr, "\n"); - return; + if (bits == 0) { + PRINTF("\t DUMMY: 0x%08x\n", ptr[j++]); } - fprintf(stderr, ", "); - print_src_reg(GET_SRC2_REG(program[2])); - fprintf(stderr, "\n"); - return; + PRINTF("\n"); + + + assert(j == len); + + stream->offset += len * sizeof(GLuint); + + return GL_TRUE; +} + +static void BR13( struct debug_stream *stream, + GLuint val ) +{ + PRINTF("\t0x%08x\n", val); + FLAG(val, 30, "clipping enable"); + BITS(val, 25, 24, "color depth (3==32bpp)"); + BITS(val, 23, 16, "raster op"); + BITS(val, 15, 0, "dest pitch"); } -static void print_tex_op( GLuint opcode, const GLuint *program ) +static void BR2223( struct debug_stream *stream, + GLuint val22, GLuint val23 ) { - print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); - fprintf(stderr, " = "); + union { GLuint val; short field[2]; } BR22, BR23; - fprintf(stderr, "%s ", opcodes[opcode]); + BR22.val = val22; + BR23.val = val23; - fprintf(stderr, "S[%d],", - program[0] & T0_SAMPLER_NR_MASK); + PRINTF("\t0x%08x\n", val22); + BITS(val22, 31, 16, "dest y1"); + BITS(val22, 15, 0, "dest x1"); - print_reg_type_nr( (program[1]>>T1_ADDRESS_REG_TYPE_SHIFT) & REG_TYPE_MASK, - (program[1]>>T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK ); - fprintf(stderr, "\n"); + PRINTF("\t0x%08x\n", val23); + BITS(val23, 31, 16, "dest y2"); + BITS(val23, 15, 0, "dest x2"); + + /* The blit engine may produce unexpected results when these aren't met */ + assert(BR22.field[0] < BR23.field[0]); + assert(BR22.field[1] < BR23.field[1]); } -static void print_dcl_op( GLuint opcode, const GLuint *program ) +static void BR09( struct debug_stream *stream, + GLuint val ) { - fprintf(stderr, "%s ", opcodes[opcode]); - print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); - fprintf(stderr, "\n"); + PRINTF("\t0x%08x -- dest address\n", val); } +static void BR26( struct debug_stream *stream, + GLuint val ) +{ + PRINTF("\t0x%08x\n", val); + BITS(val, 31, 16, "src y1"); + BITS(val, 15, 0, "src x1"); +} + +static void BR11( struct debug_stream *stream, + GLuint val ) +{ + PRINTF("\t0x%08x\n", val); + BITS(val, 15, 0, "src pitch"); +} -void i915_disassemble_program( const GLuint *program, GLuint sz ) +static void BR12( struct debug_stream *stream, + GLuint val ) { - GLuint size = program[0] & 0x1ff; - GLint i; + PRINTF("\t0x%08x -- src address\n", val); +} + +static void BR16( struct debug_stream *stream, + GLuint val ) +{ + PRINTF("\t0x%08x -- color\n", val); +} + +static GLboolean debug_copy_blit( struct debug_stream *stream, + const char *name, + GLuint len ) +{ + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF("%s (%d dwords):\n", name, len); + PRINTF("\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR2223(stream, ptr[j], ptr[j+1]); + j += 2; + BR09(stream, ptr[j++]); + BR26(stream, ptr[j++]); + BR11(stream, ptr[j++]); + BR12(stream, ptr[j++]); + + stream->offset += len * sizeof(GLuint); + assert(j == len); + return GL_TRUE; +} + +static GLboolean debug_color_blit( struct debug_stream *stream, + const char *name, + GLuint len ) +{ + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF("%s (%d dwords):\n", name, len); + PRINTF("\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR2223(stream, ptr[j], ptr[j+1]); + j += 2; + BR09(stream, ptr[j++]); + BR16(stream, ptr[j++]); + + stream->offset += len * sizeof(GLuint); + assert(j == len); + return GL_TRUE; +} + +static GLboolean debug_modes4( struct debug_stream *stream, + const char *name, + GLuint len ) +{ + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF("%s (%d dwords):\n", name, len); + PRINTF("\t0x%08x\n", ptr[j]); + BITS(ptr[j], 21, 18, "logicop func"); + FLAG(ptr[j], 17, "stencil test mask modify-enable"); + FLAG(ptr[j], 16, "stencil write mask modify-enable"); + BITS(ptr[j], 15, 8, "stencil test mask"); + BITS(ptr[j], 7, 0, "stencil write mask"); + j++; + + stream->offset += len * sizeof(GLuint); + assert(j == len); + return GL_TRUE; +} + +static GLboolean debug_map_state( struct debug_stream *stream, + const char *name, + GLuint len ) +{ + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF("%s (%d dwords):\n", name, len); + PRINTF("\t0x%08x\n", ptr[j++]); + + { + PRINTF("\t0x%08x\n", ptr[j]); + BITS(ptr[j], 15, 0, "map mask"); + j++; + } + + while (j < len) { + { + PRINTF("\t TMn.0: 0x%08x\n", ptr[j]); + PRINTF("\t map address: 0x%08x\n", (ptr[j] & ~0x3)); + FLAG(ptr[j], 1, "vertical line stride"); + FLAG(ptr[j], 0, "vertical line stride offset"); + j++; + } + + { + PRINTF("\t TMn.1: 0x%08x\n", ptr[j]); + BITS(ptr[j], 31, 21, "height"); + BITS(ptr[j], 20, 10, "width"); + BITS(ptr[j], 9, 7, "surface format"); + BITS(ptr[j], 6, 3, "texel format"); + FLAG(ptr[j], 2, "use fence regs"); + FLAG(ptr[j], 1, "tiled surface"); + FLAG(ptr[j], 0, "tile walk ymajor"); + j++; + } + { + PRINTF("\t TMn.2: 0x%08x\n", ptr[j]); + BITS(ptr[j], 31, 21, "dword pitch"); + BITS(ptr[j], 20, 15, "cube face enables"); + BITS(ptr[j], 14, 9, "max lod"); + FLAG(ptr[j], 8, "mip layout right"); + BITS(ptr[j], 7, 0, "depth"); + j++; + } + } + + stream->offset += len * sizeof(GLuint); + assert(j == len); + return GL_TRUE; +} + +static GLboolean debug_sampler_state( struct debug_stream *stream, + const char *name, + GLuint len ) +{ + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF("%s (%d dwords):\n", name, len); + PRINTF("\t0x%08x\n", ptr[j++]); + + { + PRINTF("\t0x%08x\n", ptr[j]); + BITS(ptr[j], 15, 0, "sampler mask"); + j++; + } + + while (j < len) { + { + PRINTF("\t TSn.0: 0x%08x\n", ptr[j]); + FLAG(ptr[j], 31, "reverse gamma"); + FLAG(ptr[j], 30, "planar to packed"); + FLAG(ptr[j], 29, "yuv->rgb"); + BITS(ptr[j], 28, 27, "chromakey index"); + BITS(ptr[j], 26, 22, "base mip level"); + BITS(ptr[j], 21, 20, "mip mode filter"); + BITS(ptr[j], 19, 17, "mag mode filter"); + BITS(ptr[j], 16, 14, "min mode filter"); + BITS(ptr[j], 13, 5, "lod bias (s4.4)"); + FLAG(ptr[j], 4, "shadow enable"); + FLAG(ptr[j], 3, "max-aniso-4"); + BITS(ptr[j], 2, 0, "shadow func"); + j++; + } + + { + PRINTF("\t TSn.1: 0x%08x\n", ptr[j]); + BITS(ptr[j], 31, 24, "min lod"); + MBZ( ptr[j], 23, 18 ); + FLAG(ptr[j], 17, "kill pixel enable"); + FLAG(ptr[j], 16, "keyed tex filter mode"); + FLAG(ptr[j], 15, "chromakey enable"); + BITS(ptr[j], 14, 12, "tcx wrap mode"); + BITS(ptr[j], 11, 9, "tcy wrap mode"); + BITS(ptr[j], 8, 6, "tcz wrap mode"); + FLAG(ptr[j], 5, "normalized coords"); + BITS(ptr[j], 4, 1, "map (surface) index"); + FLAG(ptr[j], 0, "EAST deinterlacer enable"); + j++; + } + { + PRINTF("\t TSn.2: 0x%08x (default color)\n", ptr[j]); + j++; + } + } + + stream->offset += len * sizeof(GLuint); + assert(j == len); + return GL_TRUE; +} + +static GLboolean debug_dest_vars( struct debug_stream *stream, + const char *name, + GLuint len ) +{ + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF("%s (%d dwords):\n", name, len); + PRINTF("\t0x%08x\n", ptr[j++]); + + { + PRINTF("\t0x%08x\n", ptr[j]); + FLAG(ptr[j], 31, "early classic ztest"); + FLAG(ptr[j], 30, "opengl tex default color"); + FLAG(ptr[j], 29, "bypass iz"); + FLAG(ptr[j], 28, "lod preclamp"); + BITS(ptr[j], 27, 26, "dither pattern"); + FLAG(ptr[j], 25, "linear gamma blend"); + FLAG(ptr[j], 24, "debug dither"); + BITS(ptr[j], 23, 20, "dstorg x"); + BITS(ptr[j], 19, 16, "dstorg y"); + MBZ (ptr[j], 15, 15 ); + BITS(ptr[j], 14, 12, "422 write select"); + BITS(ptr[j], 11, 8, "cbuf format"); + BITS(ptr[j], 3, 2, "zbuf format"); + FLAG(ptr[j], 1, "vert line stride"); + FLAG(ptr[j], 1, "vert line stride offset"); + j++; + } - fprintf(stderr, "BEGIN\n"); + stream->offset += len * sizeof(GLuint); + assert(j == len); + return GL_TRUE; +} - if (size+2 != sz) { - fprintf(stderr, "%s: program size mismatch %d/%d\n", __FUNCTION__, - size+2, sz); - exit(1); +static GLboolean debug_buf_info( struct debug_stream *stream, + const char *name, + GLuint len ) +{ + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF("%s (%d dwords):\n", name, len); + PRINTF("\t0x%08x\n", ptr[j++]); + + { + PRINTF("\t0x%08x\n", ptr[j]); + BITS(ptr[j], 28, 28, "aux buffer id"); + BITS(ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); + FLAG(ptr[j], 23, "use fence regs"); + FLAG(ptr[j], 22, "tiled surface"); + FLAG(ptr[j], 21, "tile walk ymajor"); + MBZ (ptr[j], 20, 14); + BITS(ptr[j], 13, 2, "dword pitch"); + MBZ (ptr[j], 2, 0); + j++; } + + PRINTF("\t0x%08x -- buffer base address\n", ptr[j++]); + + stream->offset += len * sizeof(GLuint); + assert(j == len); + return GL_TRUE; +} - program ++; - for (i = 1 ; i < sz ; i+=3, program+=3) { - GLuint opcode = program[0] & (0x1f<<24); - - if ((GLint) opcode >= A0_NOP && opcode <= A0_SLT) - print_arith_op(opcode >> 24, program); - else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL) - print_tex_op(opcode >> 24, program); - else if (opcode == D0_DCL) - print_dcl_op(opcode >> 24, program); - else - fprintf(stderr, "Unknown opcode 0x%x\n", opcode); +static GLboolean i915_debug_packet( struct debug_stream *stream ) +{ + GLuint *ptr = (GLuint *)(stream->ptr + stream->offset); + GLuint cmd = *ptr; + + switch (((cmd >> 29) & 0x7)) { + case 0x0: + switch ((cmd >> 23) & 0x3f) { + case 0x0: + return debug(stream, "MI_NOOP", 1); + case 0x3: + return debug(stream, "MI_WAIT_FOR_EVENT", 1); + case 0x4: + return debug(stream, "MI_FLUSH", 1); + case 0xA: + debug(stream, "MI_BATCH_BUFFER_END", 1); + return GL_FALSE; + case 0x22: + return debug(stream, "MI_LOAD_REGISTER_IMM", 3); + case 0x31: + return debug_chain(stream, "MI_BATCH_BUFFER_START", 2); + default: + break; + } + break; + case 0x1: + break; + case 0x2: + switch ((cmd >> 22) & 0xff) { + case 0x50: + return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2); + case 0x53: + return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2); + default: + return debug(stream, "blit command", (cmd & 0xff) + 2); + } + break; + case 0x3: + switch ((cmd >> 24) & 0x1f) { + case 0x6: + return debug(stream, "3DSTATE_ANTI_ALIASING", 1); + case 0x7: + return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1); + case 0x8: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2); + case 0x9: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1); + case 0xb: + return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1); + case 0xc: + return debug(stream, "3DSTATE_MODES5", 1); + case 0xd: + return debug_modes4(stream, "3DSTATE_MODES4", 1); + case 0x15: + return debug(stream, "3DSTATE_FOG_COLOR", 1); + case 0x16: + return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1); + case 0x1c: + /* 3DState16NP */ + switch((cmd >> 19) & 0x1f) { + case 0x10: + return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1); + case 0x11: + return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1); + default: + break; + } + break; + case 0x1d: + /* 3DStateMW */ + switch ((cmd >> 16) & 0xff) { + case 0x0: + return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2); + case 0x1: + return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2); + case 0x4: + return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2); + case 0x5: + return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2); + case 0x6: + return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2); + case 0x7: + return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2); + case 0x80: + return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2); + case 0x81: + return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2); + case 0x83: + return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2); + case 0x85: + return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2); + case 0x88: + return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2); + case 0x89: + return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2); + case 0x8e: + return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2); + case 0x97: + return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2); + case 0x98: + return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2); + case 0x99: + return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2); + case 0x9a: + return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2); + case 0x9c: + return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2); + default: + assert(0); + return 0; + } + break; + case 0x1e: + if (cmd & (1 << 23)) + return debug(stream, "???", (cmd & 0xffff) + 1); + else + return debug(stream, "", 1); + break; + case 0x1f: + if ((cmd & (1 << 23)) == 0) + return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2); + else if (cmd & (1 << 17)) + { + if ((cmd & 0xffff) == 0) + return debug_variable_length_prim(stream); + else + return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1); + } + else + return debug_prim(stream, "3DPRIM (indirect sequential)", 0, 2); + break; + default: + return debug(stream, "", 0); + } + default: + assert(0); + return 0; + } + + assert(0); + return 0; +} + + + +void +i915_dump_batchbuffer( GLuint *start, + GLuint *end ) +{ + struct debug_stream stream; + GLuint bytes = (end - start) * 4; + GLboolean done = GL_FALSE; + + PRINTF("\n\nBATCH: (%d)\n", bytes / 4); + + stream.offset = 0; + stream.ptr = (char *)start; + stream.print_addresses = 0; + + while (!done && + stream.offset < bytes && + stream.offset >= 0) + { + if (!i915_debug_packet( &stream )) + break; + + assert(stream.offset <= bytes && + stream.offset >= 0); } - fprintf(stderr, "END\n\n"); + PRINTF("END-BATCH\n\n\n"); } + + diff --git a/i965/intel_tex.h b/i915/i915_debug.h index e389d52..0643a8c 100644 --- a/i965/intel_tex.h +++ b/i915/i915_debug.h @@ -1,8 +1,8 @@ /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -25,18 +25,31 @@ * **************************************************************************/ -#ifndef INTELTEX_INC -#define INTELTEX_INC +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef I915_DEBUG_H +#define I915_DEBUG_H + +struct i915_context; + +struct debug_stream +{ + unsigned offset; /* current gtt offset */ + char *ptr; /* pointer to gtt offset zero */ + char *end; /* pointer to gtt offset zero */ + unsigned print_addresses; +}; -#include "mtypes.h" -#include "intel_context.h" -void intelInitTextureFuncs( struct dd_function_table *functions ); +extern void i915_disassemble_program(const unsigned *program, unsigned sz); +extern void i915_print_ureg(const char *msg, unsigned ureg); -GLuint intel_finalize_mipmap_tree( struct intel_context *intel, - struct gl_texture_object *tObj ); +void +i915_dump_batchbuffer( unsigned *start, + unsigned *end ); #endif diff --git a/i915/i915_debug_fp.c b/i915/i915_debug_fp.c new file mode 100644 index 0000000..84347a0 --- /dev/null +++ b/i915/i915_debug_fp.c @@ -0,0 +1,333 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdio.h> + +#include "i915_reg.h" +#include "i915_debug.h" +#include "main/imports.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" +#include "shader/prog_print.h" + +#define PRINTF( ... ) _mesa_printf( __VA_ARGS__ ) + +static const char *opcodes[0x20] = { + "NOP", + "ADD", + "MOV", + "MUL", + "MAD", + "DP2ADD", + "DP3", + "DP4", + "FRC", + "RCP", + "RSQ", + "EXP", + "LOG", + "CMP", + "MIN", + "MAX", + "FLR", + "MOD", + "TRC", + "SGE", + "SLT", + "TEXLD", + "TEXLDP", + "TEXLDB", + "TEXKILL", + "DCL", + "0x1a", + "0x1b", + "0x1c", + "0x1d", + "0x1e", + "0x1f", +}; + + +static const int args[0x20] = { + 0, /* 0 nop */ + 2, /* 1 add */ + 1, /* 2 mov */ + 2, /* 3 m ul */ + 3, /* 4 mad */ + 3, /* 5 dp2add */ + 2, /* 6 dp3 */ + 2, /* 7 dp4 */ + 1, /* 8 frc */ + 1, /* 9 rcp */ + 1, /* a rsq */ + 1, /* b exp */ + 1, /* c log */ + 3, /* d cmp */ + 2, /* e min */ + 2, /* f max */ + 1, /* 10 flr */ + 1, /* 11 mod */ + 1, /* 12 trc */ + 2, /* 13 sge */ + 2, /* 14 slt */ + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + + +static const char *regname[0x8] = { + "R", + "T", + "CONST", + "S", + "OC", + "OD", + "U", + "UNKNOWN", +}; + +static void +print_reg_type_nr(GLuint type, GLuint nr) +{ + switch (type) { + case REG_TYPE_T: + switch (nr) { + case T_DIFFUSE: + PRINTF("T_DIFFUSE"); + return; + case T_SPECULAR: + PRINTF("T_SPECULAR"); + return; + case T_FOG_W: + PRINTF("T_FOG_W"); + return; + default: + PRINTF("T_TEX%d", nr); + return; + } + case REG_TYPE_OC: + if (nr == 0) { + PRINTF("oC"); + return; + } + break; + case REG_TYPE_OD: + if (nr == 0) { + PRINTF("oD"); + return; + } + break; + default: + break; + } + + PRINTF("%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ + (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ + (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ + (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + + +static void +print_reg_neg_swizzle(GLuint reg) +{ + int i; + + if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && + (reg & REG_NEGATE_MASK) == 0) + return; + + PRINTF("."); + + for (i = 3; i >= 0; i--) { + if (reg & (1 << ((i * 4) + 3))) + PRINTF("-"); + + switch ((reg >> (i * 4)) & 0x7) { + case 0: + PRINTF("x"); + break; + case 1: + PRINTF("y"); + break; + case 2: + PRINTF("z"); + break; + case 3: + PRINTF("w"); + break; + case 4: + PRINTF("0"); + break; + case 5: + PRINTF("1"); + break; + default: + PRINTF("?"); + break; + } + } +} + + +static void +print_src_reg(GLuint dword) +{ + GLuint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; + GLuint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(type, nr); + print_reg_neg_swizzle(dword); +} + + +static void +print_dest_reg(GLuint dword) +{ + GLuint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; + GLuint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(type, nr); + if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) + return; + PRINTF("."); + if (dword & A0_DEST_CHANNEL_X) + PRINTF("x"); + if (dword & A0_DEST_CHANNEL_Y) + PRINTF("y"); + if (dword & A0_DEST_CHANNEL_Z) + PRINTF("z"); + if (dword & A0_DEST_CHANNEL_W) + PRINTF("w"); +} + + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r) (r) + + +static void +print_arith_op(GLuint opcode, const GLuint * program) +{ + if (opcode != A0_NOP) { + print_dest_reg(program[0]); + if (program[0] & A0_DEST_SATURATE) + PRINTF(" = SATURATE "); + else + PRINTF(" = "); + } + + PRINTF("%s ", opcodes[opcode]); + + print_src_reg(GET_SRC0_REG(program[0], program[1])); + if (args[opcode] == 1) { + PRINTF("\n"); + return; + } + + PRINTF(", "); + print_src_reg(GET_SRC1_REG(program[1], program[2])); + if (args[opcode] == 2) { + PRINTF("\n"); + return; + } + + PRINTF(", "); + print_src_reg(GET_SRC2_REG(program[2])); + PRINTF("\n"); + return; +} + + +static void +print_tex_op(GLuint opcode, const GLuint * program) +{ + print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(" = "); + + PRINTF("%s ", opcodes[opcode]); + + PRINTF("S[%d],", program[0] & T0_SAMPLER_NR_MASK); + + print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF("\n"); +} + +static void +print_dcl_op(GLuint opcode, const GLuint * program) +{ + PRINTF("%s ", opcodes[opcode]); + print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); + PRINTF("\n"); +} + + +void +i915_disassemble_program(const GLuint * program, GLuint sz) +{ + GLuint size = program[0] & 0x1ff; + GLint i; + + PRINTF("\t\tBEGIN\n"); + + assert(size + 2 == sz); + + program++; + for (i = 1; i < sz; i += 3, program += 3) { + GLuint opcode = program[0] & (0x1f << 24); + + PRINTF("\t\t"); + + if ((GLint) opcode >= A0_NOP && opcode <= A0_SLT) + print_arith_op(opcode >> 24, program); + else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL) + print_tex_op(opcode >> 24, program); + else if (opcode == D0_DCL) + print_dcl_op(opcode >> 24, program); + else + PRINTF("Unknown opcode 0x%x\n", opcode); + } + + PRINTF("\t\tEND\n\n"); +} + + diff --git a/i915/i915_fragprog.c b/i915/i915_fragprog.c index a28c8bb..1876218 100644 --- a/i915/i915_fragprog.c +++ b/i915/i915_fragprog.c @@ -29,40 +29,56 @@ #include "macros.h" #include "enums.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/program.h" +#include "shader/programopt.h" + #include "tnl/tnl.h" #include "tnl/t_context.h" + #include "intel_batchbuffer.h" #include "i915_reg.h" #include "i915_context.h" #include "i915_program.h" -#include "prog_instruction.h" -#include "prog_parameter.h" -#include "program.h" -#include "programopt.h" - - +static const GLfloat sin_quad_constants[2][4] = { + { + 2.0, + -1.0, + .5, + .75 + }, + { + 4.0, + -4.0, + 1.0 / (2.0 * M_PI), + .2225 + } +}; -/* 1, -1/3!, 1/5!, -1/7! */ -static const GLfloat sin_constants[4] = { 1.0, - -1.0/(3*2*1), - 1.0/(5*4*3*2*1), - -1.0/(7*6*5*4*3*2*1) }; +static const GLfloat sin_constants[4] = { 1.0, + -1.0 / (3 * 2 * 1), + 1.0 / (5 * 4 * 3 * 2 * 1), + -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; /* 1, -1/2!, 1/4!, -1/6! */ -static const GLfloat cos_constants[4] = { 1.0, - -1.0/(2*1), - 1.0/(4*3*2*1), - -1.0/(6*5*4*3*2*1) }; +static const GLfloat cos_constants[4] = { 1.0, + -1.0 / (2 * 1), + 1.0 / (4 * 3 * 2 * 1), + -1.0 / (6 * 5 * 4 * 3 * 2 * 1) +}; /** * Retrieve a ureg for the given source register. Will emit * constants, apply swizzling and negation as needed. */ -static GLuint src_vector( struct i915_fragment_program *p, - const struct prog_src_register *source, - const struct gl_fragment_program *program ) +static GLuint +src_vector(struct i915_fragment_program *p, + const struct prog_src_register *source, + const struct gl_fragment_program *program) { GLuint src; @@ -70,136 +86,152 @@ static GLuint src_vector( struct i915_fragment_program *p, /* Registers: */ - case PROGRAM_TEMPORARY: - if (source->Index >= I915_MAX_TEMPORARY) { - i915_program_error( p, "Exceeded max temporary reg" ); - return 0; - } - src = UREG( REG_TYPE_R, source->Index ); + case PROGRAM_TEMPORARY: + if (source->Index >= I915_MAX_TEMPORARY) { + i915_program_error(p, "Exceeded max temporary reg"); + return 0; + } + src = UREG(REG_TYPE_R, source->Index); + break; + case PROGRAM_INPUT: + switch (source->Index) { + case FRAG_ATTRIB_WPOS: + src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); break; - case PROGRAM_INPUT: - switch (source->Index) { - case FRAG_ATTRIB_WPOS: - src = i915_emit_decl( p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL ); - break; - case FRAG_ATTRIB_COL0: - src = i915_emit_decl( p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL ); - break; - case FRAG_ATTRIB_COL1: - src = i915_emit_decl( p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ ); - src = swizzle( src, X, Y, Z, ONE ); - break; - case FRAG_ATTRIB_FOGC: - src = i915_emit_decl( p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W ); - src = swizzle( src, W, W, W, W ); - break; - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - src = i915_emit_decl( p, REG_TYPE_T, - T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0), - D0_CHANNEL_ALL ); - break; - - default: - i915_program_error( p, "Bad source->Index" ); - return 0; - } + case FRAG_ATTRIB_COL0: + src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); + break; + case FRAG_ATTRIB_COL1: + src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); + src = swizzle(src, X, Y, Z, ONE); + break; + case FRAG_ATTRIB_FOGC: + src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); + src = swizzle(src, W, ZERO, ZERO, ONE); + break; + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + src = i915_emit_decl(p, REG_TYPE_T, + T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0), + D0_CHANNEL_ALL); break; - - /* Various paramters and env values. All emitted to - * hardware as program constants. - */ - case PROGRAM_LOCAL_PARAM: - src = i915_emit_param4fv( - p, program->Base.LocalParams[source->Index]); - break; - - case PROGRAM_ENV_PARAM: - src = i915_emit_param4fv( - p, p->ctx->FragmentProgram.Parameters[source->Index]); - break; - - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - src = i915_emit_param4fv( - p, program->Base.Parameters->ParameterValues[source->Index] ); - break; default: - i915_program_error( p, "Bad source->File" ); - return 0; + i915_program_error(p, "Bad source->Index"); + return 0; + } + break; + + /* Various paramters and env values. All emitted to + * hardware as program constants. + */ + case PROGRAM_LOCAL_PARAM: + src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]); + break; + + case PROGRAM_ENV_PARAM: + src = + i915_emit_param4fv(p, + p->ctx->FragmentProgram.Parameters[source-> + Index]); + break; + + case PROGRAM_CONSTANT: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + src = + i915_emit_param4fv(p, + program->Base.Parameters->ParameterValues[source-> + Index]); + break; + + default: + i915_program_error(p, "Bad source->File"); + return 0; } - src = swizzle(src, - GET_SWZ(source->Swizzle, 0), - GET_SWZ(source->Swizzle, 1), - GET_SWZ(source->Swizzle, 2), - GET_SWZ(source->Swizzle, 3)); + src = swizzle(src, + GET_SWZ(source->Swizzle, 0), + GET_SWZ(source->Swizzle, 1), + GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3)); if (source->NegateBase) - src = negate( src, - GET_BIT(source->NegateBase, 0), - GET_BIT(source->NegateBase, 1), - GET_BIT(source->NegateBase, 2), - GET_BIT(source->NegateBase, 3)); + src = negate(src, + GET_BIT(source->NegateBase, 0), + GET_BIT(source->NegateBase, 1), + GET_BIT(source->NegateBase, 2), + GET_BIT(source->NegateBase, 3)); return src; } -static GLuint get_result_vector( struct i915_fragment_program *p, - const struct prog_instruction *inst ) +static GLuint +get_result_vector(struct i915_fragment_program *p, + const struct prog_instruction *inst) { switch (inst->DstReg.File) { case PROGRAM_OUTPUT: switch (inst->DstReg.Index) { - case FRAG_RESULT_COLR: - return UREG(REG_TYPE_OC, 0); - case FRAG_RESULT_DEPR: - p->depth_written = 1; - return UREG(REG_TYPE_OD, 0); - default: - i915_program_error( p, "Bad inst->DstReg.Index" ); - return 0; + case FRAG_RESULT_COLR: + return UREG(REG_TYPE_OC, 0); + case FRAG_RESULT_DEPR: + p->depth_written = 1; + return UREG(REG_TYPE_OD, 0); + default: + i915_program_error(p, "Bad inst->DstReg.Index"); + return 0; } case PROGRAM_TEMPORARY: return UREG(REG_TYPE_R, inst->DstReg.Index); default: - i915_program_error( p, "Bad inst->DstReg.File" ); + i915_program_error(p, "Bad inst->DstReg.File"); return 0; } } - -static GLuint get_result_flags( const struct prog_instruction *inst ) + +static GLuint +get_result_flags(const struct prog_instruction *inst) { GLuint flags = 0; - if (inst->SaturateMode == SATURATE_ZERO_ONE) flags |= A0_DEST_SATURATE; - if (inst->DstReg.WriteMask & WRITEMASK_X) flags |= A0_DEST_CHANNEL_X; - if (inst->DstReg.WriteMask & WRITEMASK_Y) flags |= A0_DEST_CHANNEL_Y; - if (inst->DstReg.WriteMask & WRITEMASK_Z) flags |= A0_DEST_CHANNEL_Z; - if (inst->DstReg.WriteMask & WRITEMASK_W) flags |= A0_DEST_CHANNEL_W; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + flags |= A0_DEST_SATURATE; + if (inst->DstReg.WriteMask & WRITEMASK_X) + flags |= A0_DEST_CHANNEL_X; + if (inst->DstReg.WriteMask & WRITEMASK_Y) + flags |= A0_DEST_CHANNEL_Y; + if (inst->DstReg.WriteMask & WRITEMASK_Z) + flags |= A0_DEST_CHANNEL_Z; + if (inst->DstReg.WriteMask & WRITEMASK_W) + flags |= A0_DEST_CHANNEL_W; return flags; } -static GLuint translate_tex_src_target( struct i915_fragment_program *p, - GLubyte bit ) +static GLuint +translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit) { switch (bit) { - case TEXTURE_1D_INDEX: return D0_SAMPLE_TYPE_2D; - case TEXTURE_2D_INDEX: return D0_SAMPLE_TYPE_2D; - case TEXTURE_RECT_INDEX: return D0_SAMPLE_TYPE_2D; - case TEXTURE_3D_INDEX: return D0_SAMPLE_TYPE_VOLUME; - case TEXTURE_CUBE_INDEX: return D0_SAMPLE_TYPE_CUBE; - default: i915_program_error(p, "TexSrcBit"); return 0; + case TEXTURE_1D_INDEX: + return D0_SAMPLE_TYPE_2D; + case TEXTURE_2D_INDEX: + return D0_SAMPLE_TYPE_2D; + case TEXTURE_RECT_INDEX: + return D0_SAMPLE_TYPE_2D; + case TEXTURE_3D_INDEX: + return D0_SAMPLE_TYPE_VOLUME; + case TEXTURE_CUBE_INDEX: + return D0_SAMPLE_TYPE_CUBE; + default: + i915_program_error(p, "TexSrcBit"); + return 0; } } @@ -211,7 +243,7 @@ do { \ GLuint coord = src_vector( p, &inst->SrcReg[0], program); \ /* Texel lookup */ \ \ - i915_emit_texld( p, \ + i915_emit_texld( p, get_live_regs(p, inst), \ get_result_vector( p, inst ), \ get_result_flags( inst ), \ sampler, \ @@ -234,6 +266,43 @@ do { \ #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 ) #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 ) +/* + * TODO: consider moving this into core + */ +static void calc_live_regs( struct i915_fragment_program *p ) +{ + const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; + GLuint regsUsed = 0xffff0000; + GLint i; + + for (i = program->Base.NumInstructions - 1; i >= 0; i--) { + struct prog_instruction *inst = &program->Base.Instructions[i]; + int opArgs = _mesa_num_inst_src_regs(inst->Opcode); + int a; + + /* Register is written to: unmark as live for this and preceeding ops */ + if (inst->DstReg.File == PROGRAM_TEMPORARY) + regsUsed &= ~(1 << inst->DstReg.Index); + + for (a = 0; a < opArgs; a++) { + /* Register is read from: mark as live for this and preceeding ops */ + if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) + regsUsed |= 1 << inst->SrcReg[a].Index; + } + + p->usedRegs[i] = regsUsed; + } +} + +static GLuint get_live_regs( struct i915_fragment_program *p, + const struct prog_instruction *inst ) +{ + const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; + GLuint nr = inst - program->Base.Instructions; + + return p->usedRegs[nr]; +} + /* Possible concerns: * @@ -246,9 +315,11 @@ do { \ * can lead to confusion -- hopefully we cope with it ok now. * */ -static void upload_program( struct i915_fragment_program *p ) +static void +upload_program(struct i915_fragment_program *p) { - const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; + const struct gl_fragment_program *program = + p->ctx->FragmentProgram._Current; const struct prog_instruction *inst = program->Base.Instructions; /* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */ @@ -258,510 +329,551 @@ static void upload_program( struct i915_fragment_program *p ) * this being uploaded to hardware. */ if (inst[0].Opcode == OPCODE_END) { - GLuint tmp = i915_get_utemp( p ); - i915_emit_arith( p, - A0_MOV, - UREG(REG_TYPE_OC, 0), - A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp,ONE,ZERO,ONE,ONE), 0, 0); + GLuint tmp = i915_get_utemp(p); + i915_emit_arith(p, + A0_MOV, + UREG(REG_TYPE_OC, 0), + A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0); return; } + if (program->Base.NumInstructions > I915_MAX_INSN) { + i915_program_error( p, "Exceeded max instructions" ); + return; + } + + /* Not always needed: + */ + calc_live_regs(p); + while (1) { GLuint src0, src1, src2, flags; - GLuint tmp = 0; + GLuint tmp = 0, consts0 = 0, consts1 = 0; switch (inst->Opcode) { - case OPCODE_ABS: - src0 = src_vector( p, &inst->SrcReg[0], program); - i915_emit_arith( p, - A0_MAX, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - src0, negate(src0, 1,1,1,1), 0); - break; - - case OPCODE_ADD: - EMIT_2ARG_ARITH( A0_ADD ); - break; - - case OPCODE_CMP: - src0 = src_vector( p, &inst->SrcReg[0], program); - src1 = src_vector( p, &inst->SrcReg[1], program); - src2 = src_vector( p, &inst->SrcReg[2], program); - i915_emit_arith( p, - A0_CMP, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - src0, src2, src1); /* NOTE: order of src2, src1 */ - break; + case OPCODE_ABS: + src0 = src_vector(p, &inst->SrcReg[0], program); + i915_emit_arith(p, + A0_MAX, + get_result_vector(p, inst), + get_result_flags(inst), 0, + src0, negate(src0, 1, 1, 1, 1), 0); + break; - case OPCODE_COS: - src0 = src_vector( p, &inst->SrcReg[0], program); - tmp = i915_get_utemp( p ); + case OPCODE_ADD: + EMIT_2ARG_ARITH(A0_ADD); + break; - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - src0, - i915_emit_const1f(p, 1.0/(M_PI * 2)), - 0); + case OPCODE_CMP: + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + src2 = src_vector(p, &inst->SrcReg[2], program); + i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */ + break; - i915_emit_arith( p, - A0_MOD, + case OPCODE_COS: + src0 = src_vector(p, &inst->SrcReg[0], program); + tmp = i915_get_utemp(p); + consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); + consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); + + /* Reduce range from repeating about [-pi,pi] to [-1,1] */ + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_X, 0, + src0, + swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ + swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */ + + i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + i915_emit_arith(p, + A0_MAD, tmp, A0_DEST_CHANNEL_X, 0, - tmp, - 0, 0 ); + tmp, + swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ + swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ - /* By choosing different taylor constants, could get rid of this mul: + /* Compute COS with the same calculation used for SIN, but a + * different source range has been mapped to [-1,1] this time. */ - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, - i915_emit_const1f(p, (M_PI * 2)), + + /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ + i915_emit_arith(p, + A0_MAX, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 0); - /* - * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 - * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 - * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 - * result = DP4 t0, cos_constants - */ - i915_emit_arith( p, + /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ + i915_emit_arith(p, A0_MUL, - tmp, A0_DEST_CHANNEL_XY, 0, - swizzle(tmp, X,X,ONE,ONE), - swizzle(tmp, X,ONE,ONE,ONE), 0); + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + tmp, + 0); - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XYZ, 0, - swizzle(tmp, X,Y,X,ONE), - swizzle(tmp, X,X,ONE,ONE), 0); + /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ + i915_emit_arith(p, + A0_DP3, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, + swizzle(consts1, X, Y, ZERO, ZERO), + 0); - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XYZ, 0, - swizzle(tmp, X,X,Z,ONE), - swizzle(tmp, Z,ONE,ONE,ONE), 0); - - i915_emit_arith( p, - A0_DP4, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(tmp, ONE,Z,Y,X), - i915_emit_const4fv( p, cos_constants ), 0); - - break; - - case OPCODE_DP3: - EMIT_2ARG_ARITH( A0_DP3 ); - break; - - case OPCODE_DP4: - EMIT_2ARG_ARITH( A0_DP4 ); - break; - - case OPCODE_DPH: - src0 = src_vector( p, &inst->SrcReg[0], program); - src1 = src_vector( p, &inst->SrcReg[1], program); - - i915_emit_arith( p, - A0_DP4, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(src0, X,Y,Z,ONE), src1, 0); - break; - - case OPCODE_DST: - src0 = src_vector( p, &inst->SrcReg[0], program); - src1 = src_vector( p, &inst->SrcReg[1], program); - - /* result[0] = 1 * 1; - * result[1] = a[1] * b[1]; - * result[2] = a[2] * 1; - * result[3] = 1 * b[3]; + /* tmp.x now contains a first approximation (y). Now, weight it + * against tmp.y**2 to get closer. */ - i915_emit_arith( p, - A0_MUL, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(src0, ONE, Y, Z, ONE), - swizzle(src1, ONE, Y, ONE, W ), + i915_emit_arith(p, + A0_MAX, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 0); - break; - case OPCODE_EX2: - src0 = src_vector( p, &inst->SrcReg[0], program); + /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + swizzle(tmp, ZERO, Y, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); + + /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(consts1, W, W, W, W), + swizzle(tmp, Y, Y, Y, Y), + swizzle(tmp, X, X, X, X)); + break; - i915_emit_arith( p, - A0_EXP, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(src0,X,X,X,X), 0, 0); - break; + case OPCODE_DP3: + EMIT_2ARG_ARITH(A0_DP3); + break; - case OPCODE_FLR: - EMIT_1ARG_ARITH( A0_FLR ); - break; + case OPCODE_DP4: + EMIT_2ARG_ARITH(A0_DP4); + break; + + case OPCODE_DPH: + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, X, Y, Z, ONE), src1, 0); + break; + + case OPCODE_DST: + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + + /* result[0] = 1 * 1; + * result[1] = a[1] * b[1]; + * result[2] = a[2] * 1; + * result[3] = 1 * b[3]; + */ + i915_emit_arith(p, + A0_MUL, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, ONE, Y, Z, ONE), + swizzle(src1, ONE, Y, ONE, W), 0); + break; - case OPCODE_FRC: - EMIT_1ARG_ARITH( A0_FRC ); - break; + case OPCODE_EX2: + src0 = src_vector(p, &inst->SrcReg[0], program); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case OPCODE_FLR: + EMIT_1ARG_ARITH(A0_FLR); + break; + + case OPCODE_FRC: + EMIT_1ARG_ARITH(A0_FRC); + break; case OPCODE_KIL: - src0 = src_vector( p, &inst->SrcReg[0], program); - tmp = i915_get_utemp( p ); - - i915_emit_texld( p, - tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ - 0, - src0, - T0_TEXKILL ); - break; - - case OPCODE_LG2: - src0 = src_vector( p, &inst->SrcReg[0], program); - - i915_emit_arith( p, - A0_LOG, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(src0,X,X,X,X), 0, 0); - break; - - case OPCODE_LIT: - src0 = src_vector( p, &inst->SrcReg[0], program); - tmp = i915_get_utemp( p ); - - /* tmp = max( a.xyzw, a.00zw ) - * XXX: Clamp tmp.w to -128..128 - * tmp.y = log(tmp.y) - * tmp.y = tmp.w * tmp.y - * tmp.y = exp(tmp.y) - * result = cmp (a.11-x1, a.1x01, a.1xy1 ) - */ - i915_emit_arith( p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, - src0, swizzle(src0, ZERO, ZERO, Z, W), 0 ); - - i915_emit_arith( p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, - swizzle(tmp, Y, Y, Y, Y), 0, 0 ); - - i915_emit_arith( p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, - swizzle(tmp, ZERO, Y, ZERO, ZERO), - swizzle(tmp, ZERO, W, ZERO, ZERO), 0 ); - - i915_emit_arith( p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, - swizzle(tmp, Y, Y, Y, Y), 0, 0 ); - - i915_emit_arith( p, A0_CMP, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - negate(swizzle(tmp, ONE, ONE, X, ONE),0,0,1,0), - swizzle(tmp, ONE, X, ZERO, ONE), - swizzle(tmp, ONE, X, Y, ONE)); - - break; - - case OPCODE_LRP: - src0 = src_vector( p, &inst->SrcReg[0], program); - src1 = src_vector( p, &inst->SrcReg[1], program); - src2 = src_vector( p, &inst->SrcReg[2], program); - flags = get_result_flags( inst ); - tmp = i915_get_utemp( p ); - - /* b*a + c*(1-a) - * - * b*a + c - ca - * - * tmp = b*a + c, - * result = (-c)*a + tmp - */ - i915_emit_arith( p, A0_MAD, tmp, - flags & A0_DEST_CHANNEL_ALL, 0, - src1, src0, src2 ); + src0 = src_vector(p, &inst->SrcReg[0], program); + tmp = i915_get_utemp(p); - i915_emit_arith( p, A0_MAD, - get_result_vector( p, inst ), - flags, 0, - negate(src2, 1,1,1,1), src0, tmp ); - break; + i915_emit_texld(p, get_live_regs(p, inst), + tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ + 0, src0, T0_TEXKILL); + break; + + case OPCODE_LG2: + src0 = src_vector(p, &inst->SrcReg[0], program); + + i915_emit_arith(p, + A0_LOG, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case OPCODE_LIT: + src0 = src_vector(p, &inst->SrcReg[0], program); + tmp = i915_get_utemp(p); + + /* tmp = max( a.xyzw, a.00zw ) + * XXX: Clamp tmp.w to -128..128 + * tmp.y = log(tmp.y) + * tmp.y = tmp.w * tmp.y + * tmp.y = exp(tmp.y) + * result = cmp (a.11-x1, a.1x01, a.1xy1 ) + */ + i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, + src0, swizzle(src0, ZERO, ZERO, Z, W), 0); + + i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, Y, ZERO, ZERO), + swizzle(tmp, ZERO, W, ZERO, ZERO), 0); + + i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_CMP, + get_result_vector(p, inst), + get_result_flags(inst), 0, + negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), + swizzle(tmp, ONE, X, ZERO, ONE), + swizzle(tmp, ONE, X, Y, ONE)); + + break; + + case OPCODE_LRP: + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + src2 = src_vector(p, &inst->SrcReg[2], program); + flags = get_result_flags(inst); + tmp = i915_get_utemp(p); + + /* b*a + c*(1-a) + * + * b*a + c - ca + * + * tmp = b*a + c, + * result = (-c)*a + tmp + */ + i915_emit_arith(p, A0_MAD, tmp, + flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); + + i915_emit_arith(p, A0_MAD, + get_result_vector(p, inst), + flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); + break; case OPCODE_MAD: - EMIT_3ARG_ARITH( A0_MAD ); - break; + EMIT_3ARG_ARITH(A0_MAD); + break; case OPCODE_MAX: - EMIT_2ARG_ARITH( A0_MAX ); - break; - - case OPCODE_MIN: - src0 = src_vector( p, &inst->SrcReg[0], program); - src1 = src_vector( p, &inst->SrcReg[1], program); - tmp = i915_get_utemp( p ); - flags = get_result_flags( inst ); - - i915_emit_arith( p, - A0_MAX, - tmp, flags & A0_DEST_CHANNEL_ALL, 0, - negate(src0,1,1,1,1), - negate(src1,1,1,1,1), 0); - - i915_emit_arith( p, - A0_MOV, - get_result_vector( p, inst ), - flags, 0, - negate(tmp, 1,1,1,1), 0, 0); - break; - - case OPCODE_MOV: - EMIT_1ARG_ARITH( A0_MOV ); - break; - - case OPCODE_MUL: - EMIT_2ARG_ARITH( A0_MUL ); - break; - - case OPCODE_POW: - src0 = src_vector( p, &inst->SrcReg[0], program); - src1 = src_vector( p, &inst->SrcReg[1], program); - tmp = i915_get_utemp( p ); - flags = get_result_flags( inst ); - - /* XXX: masking on intermediate values, here and elsewhere. - */ - i915_emit_arith( p, - A0_LOG, - tmp, A0_DEST_CHANNEL_X, 0, - swizzle(src0,X,X,X,X), 0, 0); + EMIT_2ARG_ARITH(A0_MAX); + break; - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, src1, 0); + case OPCODE_MIN: + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + i915_emit_arith(p, + A0_MAX, + tmp, flags & A0_DEST_CHANNEL_ALL, 0, + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); + + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, inst), + flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); + break; + + case OPCODE_MOV: + EMIT_1ARG_ARITH(A0_MOV); + break; + + case OPCODE_MUL: + EMIT_2ARG_ARITH(A0_MUL); + break; + + case OPCODE_POW: + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + /* XXX: masking on intermediate values, here and elsewhere. + */ + i915_emit_arith(p, + A0_LOG, + tmp, A0_DEST_CHANNEL_X, 0, + swizzle(src0, X, X, X, X), 0, 0); + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); - i915_emit_arith( p, - A0_EXP, - get_result_vector( p, inst ), - flags, 0, - swizzle(tmp,X,X,X,X), 0, 0); - break; + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, inst), + flags, 0, swizzle(tmp, X, X, X, X), 0, 0); - case OPCODE_RCP: - src0 = src_vector( p, &inst->SrcReg[0], program); + break; + + case OPCODE_RCP: + src0 = src_vector(p, &inst->SrcReg[0], program); + + i915_emit_arith(p, + A0_RCP, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; - i915_emit_arith( p, - A0_RCP, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(src0,X,X,X,X), 0, 0); - break; + case OPCODE_RSQ: - case OPCODE_RSQ: + src0 = src_vector(p, &inst->SrcReg[0], program); - src0 = src_vector( p, &inst->SrcReg[0], program); + i915_emit_arith(p, + A0_RSQ, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; - i915_emit_arith( p, - A0_RSQ, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(src0,X,X,X,X), 0, 0); - break; - case OPCODE_SCS: - src0 = src_vector( p, &inst->SrcReg[0], program); - tmp = i915_get_utemp( p ); - - /* - * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 - * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x - * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x - * scs.x = DP4 t1, sin_constants - * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 - * scs.y = DP4 t1, cos_constants - */ - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XY, 0, - swizzle(src0, X,X,ONE,ONE), - swizzle(src0, X,ONE,ONE,ONE), 0); + src0 = src_vector(p, &inst->SrcReg[0], program); + tmp = i915_get_utemp(p); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * scs.x = DP4 t1, sin_constants + * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * scs.y = DP4 t1, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(src0, X, X, ONE, ONE), + swizzle(src0, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + if (inst->DstReg.WriteMask & WRITEMASK_Y) { + GLuint tmp1; + + if (inst->DstReg.WriteMask & WRITEMASK_X) + tmp1 = i915_get_utemp(p); + else + tmp1 = tmp; + + i915_emit_arith(p, + A0_MUL, + tmp1, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, inst), + A0_DEST_CHANNEL_Y, 0, + swizzle(tmp1, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + } + + if (inst->DstReg.WriteMask & WRITEMASK_X) { + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, inst), + A0_DEST_CHANNEL_X, 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + } + break; - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X,Y,X,Y), - swizzle(tmp, X,X,ONE,ONE), 0); - - if (inst->DstReg.WriteMask & WRITEMASK_Y) { - GLuint tmp1; - - if (inst->DstReg.WriteMask & WRITEMASK_X) - tmp1 = i915_get_utemp( p ); - else - tmp1 = tmp; - - i915_emit_arith( p, - A0_MUL, - tmp1, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X,Y,Y,W), - swizzle(tmp, X,Z,ONE,ONE), 0); - - i915_emit_arith( p, - A0_DP4, - get_result_vector( p, inst ), - A0_DEST_CHANNEL_Y, 0, - swizzle(tmp1, W,Z,Y,X), - i915_emit_const4fv( p, sin_constants ), 0); - } - - if (inst->DstReg.WriteMask & WRITEMASK_X) { - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XYZ, 0, - swizzle(tmp, X,X,Z,ONE), - swizzle(tmp, Z,ONE,ONE,ONE), 0); - - i915_emit_arith( p, - A0_DP4, - get_result_vector( p, inst ), - A0_DEST_CHANNEL_X, 0, - swizzle(tmp, ONE,Z,Y,X), - i915_emit_const4fv( p, cos_constants ), 0); - } - break; - - case OPCODE_SGE: - EMIT_2ARG_ARITH( A0_SGE ); - break; + case OPCODE_SGE: + EMIT_2ARG_ARITH(A0_SGE); + break; case OPCODE_SIN: - src0 = src_vector( p, &inst->SrcReg[0], program); - tmp = i915_get_utemp( p ); + src0 = src_vector(p, &inst->SrcReg[0], program); + tmp = i915_get_utemp(p); + consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); + consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); + + /* Reduce range from repeating about [-pi,pi] to [-1,1] */ + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_X, 0, + src0, + swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ + swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */ + + i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, + swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ + swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ + + /* Compute sin using a quadratic and quartic. It gives continuity + * that repeating the Taylor series lacks every 2*pi, and has + * reduced error. + * + * The idea was described at: + * http://www.devmaster.net/forums/showthread.php?t=5784 + */ + + /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ + i915_emit_arith(p, + A0_MAX, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), + 0); - i915_emit_arith( p, + /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ + i915_emit_arith(p, A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - src0, - i915_emit_const1f(p, 1.0/(M_PI * 2)), + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + tmp, 0); - i915_emit_arith( p, - A0_MOD, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, - 0, 0 ); + /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ + i915_emit_arith(p, + A0_DP3, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, + swizzle(consts1, X, Y, ZERO, ZERO), + 0); - /* By choosing different taylor constants, could get rid of this mul: + /* tmp.x now contains a first approximation (y). Now, weight it + * against tmp.y**2 to get closer. */ - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_X, 0, - tmp, - i915_emit_const1f(p, (M_PI * 2)), + i915_emit_arith(p, + A0_MAX, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 0); - /* - * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 - * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x - * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x - * result = DP4 t1.wzyx, sin_constants - */ - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_XY, 0, - swizzle(tmp, X,X,ONE,ONE), - swizzle(tmp, X,ONE,ONE,ONE), 0); + /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ + i915_emit_arith(p, + A0_MAD, + tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, X, ZERO, ZERO), + swizzle(tmp, ZERO, Y, ZERO, ZERO), + negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X,Y,X,Y), - swizzle(tmp, X,X,ONE,ONE), 0); + /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(consts1, W, W, W, W), + swizzle(tmp, Y, Y, Y, Y), + swizzle(tmp, X, X, X, X)); - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(tmp, X,Y,Y,W), - swizzle(tmp, X,Z,ONE,ONE), 0); - - i915_emit_arith( p, - A0_DP4, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(tmp, W, Z, Y, X ), - i915_emit_const4fv( p, sin_constants ), 0); - break; - - case OPCODE_SLT: - EMIT_2ARG_ARITH( A0_SLT ); - break; - - case OPCODE_SUB: - src0 = src_vector( p, &inst->SrcReg[0], program); - src1 = src_vector( p, &inst->SrcReg[1], program); - - i915_emit_arith( p, - A0_ADD, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - src0, negate(src1, 1,1,1,1), 0); - break; - - case OPCODE_SWZ: - EMIT_1ARG_ARITH( A0_MOV ); /* extended swizzle handled natively */ - break; - - case OPCODE_TEX: - EMIT_TEX( T0_TEXLD ); - break; + break; + + case OPCODE_SLT: + EMIT_2ARG_ARITH(A0_SLT); + break; + + case OPCODE_SUB: + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, inst), + get_result_flags(inst), 0, + src0, negate(src1, 1, 1, 1, 1), 0); + break; + + case OPCODE_SWZ: + EMIT_1ARG_ARITH(A0_MOV); /* extended swizzle handled natively */ + break; + + case OPCODE_TEX: + EMIT_TEX(T0_TEXLD); + break; case OPCODE_TXB: - EMIT_TEX( T0_TEXLDB ); - break; + EMIT_TEX(T0_TEXLDB); + break; case OPCODE_TXP: - EMIT_TEX( T0_TEXLDP ); - break; + EMIT_TEX(T0_TEXLDP); + break; case OPCODE_XPD: - /* Cross product: - * result.x = src0.y * src1.z - src0.z * src1.y; - * result.y = src0.z * src1.x - src0.x * src1.z; - * result.z = src0.x * src1.y - src0.y * src1.x; - * result.w = undef; - */ - src0 = src_vector( p, &inst->SrcReg[0], program); - src1 = src_vector( p, &inst->SrcReg[1], program); - tmp = i915_get_utemp( p ); - - i915_emit_arith( p, - A0_MUL, - tmp, A0_DEST_CHANNEL_ALL, 0, - swizzle(src0,Z,X,Y,ONE), - swizzle(src1,Y,Z,X,ONE), 0); - - i915_emit_arith( p, - A0_MAD, - get_result_vector( p, inst ), - get_result_flags( inst ), 0, - swizzle(src0,Y,Z,X,ONE), - swizzle(src1,Z,X,Y,ONE), - negate(tmp,1,1,1,0)); - break; + /* Cross product: + * result.x = src0.y * src1.z - src0.z * src1.y; + * result.y = src0.z * src1.x - src0.x * src1.z; + * result.z = src0.x * src1.y - src0.y * src1.x; + * result.w = undef; + */ + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(src0, Z, X, Y, ONE), + swizzle(src1, Y, Z, X, ONE), 0); + + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, inst), + get_result_flags(inst), 0, + swizzle(src0, Y, Z, X, ONE), + swizzle(src1, Z, X, Y, ONE), + negate(tmp, 1, 1, 1, 0)); + break; case OPCODE_END: - return; - + return; + default: - i915_program_error( p, "bad opcode" ); - return; + i915_program_error(p, "bad opcode"); + return; } inst++; - i915_release_utemps( p ); + i915_release_utemps(p); } } @@ -769,21 +881,22 @@ static void upload_program( struct i915_fragment_program *p ) * emit, just move the value into its correct position at the end of * the program: */ -static void fixup_depth_write( struct i915_fragment_program *p ) +static void +fixup_depth_write(struct i915_fragment_program *p) { if (p->depth_written) { GLuint depth = UREG(REG_TYPE_OD, 0); - i915_emit_arith( p, - A0_MOV, - depth, A0_DEST_CHANNEL_W, 0, - swizzle(depth,X,Y,Z,Z), - 0, 0); + i915_emit_arith(p, + A0_MOV, + depth, A0_DEST_CHANNEL_W, 0, + swizzle(depth, X, Y, Z, Z), 0, 0); } } -static void check_wpos( struct i915_fragment_program *p ) +static void +check_wpos(struct i915_fragment_program *p) { GLuint inputs = p->FragProg.Base.InputsRead; GLint i; @@ -791,12 +904,12 @@ static void check_wpos( struct i915_fragment_program *p ) p->wpos_tex = -1; for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { - if (inputs & FRAG_BIT_TEX(i)) - continue; + if (inputs & FRAG_BIT_TEX(i)) + continue; else if (inputs & FRAG_BIT_WPOS) { - p->wpos_tex = i; - inputs &= ~FRAG_BIT_WPOS; - } + p->wpos_tex = i; + inputs &= ~FRAG_BIT_WPOS; + } } if (inputs & FRAG_BIT_WPOS) { @@ -805,139 +918,131 @@ static void check_wpos( struct i915_fragment_program *p ) } -static void translate_program( struct i915_fragment_program *p ) +static void +translate_program(struct i915_fragment_program *p) { - i915ContextPtr i915 = I915_CONTEXT(p->ctx); - - i915_init_program( i915, p ); - check_wpos( p ); - upload_program( p ); - fixup_depth_write( p ); - i915_fini_program( p ); - + struct i915_context *i915 = I915_CONTEXT(p->ctx); + + i915_init_program(i915, p); + check_wpos(p); + upload_program(p); + fixup_depth_write(p); + i915_fini_program(p); + p->translated = 1; } -static void track_params( struct i915_fragment_program *p ) +static void +track_params(struct i915_fragment_program *p) { GLint i; if (p->nr_params) - _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters); + _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters); for (i = 0; i < p->nr_params; i++) { GLint reg = p->param[i].reg; - COPY_4V( p->constant[reg], p->param[i].values ); + COPY_4V(p->constant[reg], p->param[i].values); } - + p->params_uptodate = 1; - p->on_hardware = 0; /* overkill */ + p->on_hardware = 0; /* overkill */ } -static void i915BindProgram( GLcontext *ctx, - GLenum target, - struct gl_program *prog ) +static void +i915BindProgram(GLcontext * ctx, GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - struct i915_fragment_program *p = (struct i915_fragment_program *)prog; + struct i915_context *i915 = I915_CONTEXT(ctx); + struct i915_fragment_program *p = (struct i915_fragment_program *) prog; + + if (i915->current_program == p) + return; - if (i915->current_program == p) - return; - if (i915->current_program) { - i915->current_program->on_hardware = 0; - i915->current_program->params_uptodate = 0; + i915->current_program->on_hardware = 0; + i915->current_program->params_uptodate = 0; } - + i915->current_program = p; assert(p->on_hardware == 0); assert(p->params_uptodate == 0); - /* Hack: make sure fog is correctly enabled according to this - * fragment program's fog options. - */ - ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB, - ctx->FragmentProgram.Enabled ); } } -static struct gl_program *i915NewProgram( GLcontext *ctx, - GLenum target, - GLuint id ) +static struct gl_program * +i915NewProgram(GLcontext * ctx, GLenum target, GLuint id) { switch (target) { case GL_VERTEX_PROGRAM_ARB: - return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(gl_vertex_program), - target, id ); - - case GL_FRAGMENT_PROGRAM_ARB: { - struct i915_fragment_program *prog = CALLOC_STRUCT(i915_fragment_program); - if (prog) { - i915_init_program( I915_CONTEXT(ctx), prog ); - - return _mesa_init_fragment_program( ctx, &prog->FragProg, - target, id ); + return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program), + target, id); + + case GL_FRAGMENT_PROGRAM_ARB:{ + struct i915_fragment_program *prog = + CALLOC_STRUCT(i915_fragment_program); + if (prog) { + i915_init_program(I915_CONTEXT(ctx), prog); + + return _mesa_init_fragment_program(ctx, &prog->FragProg, + target, id); + } + else + return NULL; } - else - return NULL; - } default: /* Just fallback: */ - return _mesa_new_program( ctx, target, id ); + return _mesa_new_program(ctx, target, id); } } -static void i915DeleteProgram( GLcontext *ctx, - struct gl_program *prog ) +static void +i915DeleteProgram(GLcontext * ctx, struct gl_program *prog) { if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - struct i915_fragment_program *p = (struct i915_fragment_program *)prog; - - if (i915->current_program == p) - i915->current_program = 0; + struct i915_context *i915 = I915_CONTEXT(ctx); + struct i915_fragment_program *p = (struct i915_fragment_program *) prog; + + if (i915->current_program == p) + i915->current_program = 0; } - _mesa_delete_program( ctx, prog ); + _mesa_delete_program(ctx, prog); } -static GLboolean i915IsProgramNative( GLcontext *ctx, - GLenum target, - struct gl_program *prog ) +static GLboolean +i915IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { - struct i915_fragment_program *p = (struct i915_fragment_program *)prog; + struct i915_fragment_program *p = (struct i915_fragment_program *) prog; if (!p->translated) - translate_program( p ); - + translate_program(p); + return !p->error; } else return GL_TRUE; } -static void i915ProgramStringNotify( GLcontext *ctx, - GLenum target, - struct gl_program *prog ) +static void +i915ProgramStringNotify(GLcontext * ctx, + GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { - struct i915_fragment_program *p = (struct i915_fragment_program *)prog; + struct i915_fragment_program *p = (struct i915_fragment_program *) prog; p->translated = 0; /* Hack: make sure fog is correctly enabled according to this * fragment program's fog options. */ - ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB, - ctx->FragmentProgram.Enabled ); - if (p->FragProg.FogOption) { /* add extra instructions to do fog, then turn off FogOption field */ _mesa_append_fog_code(ctx, &p->FragProg); @@ -949,28 +1054,28 @@ static void i915ProgramStringNotify( GLcontext *ctx, } -void i915ValidateFragmentProgram( i915ContextPtr i915 ) +void +i915ValidateFragmentProgram(struct i915_context *i915) { GLcontext *ctx = &i915->intel.ctx; - intelContextPtr intel = INTEL_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; - struct i915_fragment_program *p = - (struct i915_fragment_program *)ctx->FragmentProgram._Current; + struct i915_fragment_program *p = + (struct i915_fragment_program *) ctx->FragmentProgram._Current; const GLuint inputsRead = p->FragProg.Base.InputsRead; GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK; GLuint s2 = S2_TEXCOORD_NONE; int i, offset = 0; - if (i915->current_program != p) - { + if (i915->current_program != p) { if (i915->current_program) { - i915->current_program->on_hardware = 0; - i915->current_program->params_uptodate = 0; + i915->current_program->on_hardware = 0; + i915->current_program->params_uptodate = 0; } - + i915->current_program = p; } @@ -979,8 +1084,8 @@ void i915ValidateFragmentProgram( i915ContextPtr i915 ) */ VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; - if (!p->translated) - translate_program( p ); + if (!p->translated) + translate_program(p); intel->vertex_attr_count = 0; intel->wpos_offset = 0; @@ -989,31 +1094,31 @@ void i915ValidateFragmentProgram( i915ContextPtr i915 ) intel->specoffset = 0; if (inputsRead & FRAG_BITS_TEX_ANY) { - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 ); + EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16); } else { - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 ); + EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12); } if (inputsRead & FRAG_BIT_COL0) { intel->coloroffset = offset / 4; - EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4 ); + EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4); } - - if ((inputsRead & (FRAG_BIT_COL1|FRAG_BIT_FOGC)) || + + if ((inputsRead & (FRAG_BIT_COL1 | FRAG_BIT_FOGC)) || i915->vertex_fog != I915_FOG_NONE) { if (inputsRead & FRAG_BIT_COL1) { - intel->specoffset = offset / 4; - EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3 ); + intel->specoffset = offset / 4; + EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3); } else - EMIT_PAD(3); + EMIT_PAD(3); - if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) - EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1 ); + if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) + EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1); else - EMIT_PAD( 1 ); + EMIT_PAD(1); } /* XXX this was disabled, but enabling this code helped fix the Glean @@ -1021,63 +1126,66 @@ void i915ValidateFragmentProgram( i915ContextPtr i915 ) */ #if 1 if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) { - EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4 ); + EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4); } #endif for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { if (inputsRead & FRAG_BIT_TEX(i)) { - int sz = VB->TexCoordPtr[i]->size; - - s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); - s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); + int sz = VB->TexCoordPtr[i]->size; - EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 ); + s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); + s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); + + EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4); } else if (i == p->wpos_tex) { - - /* If WPOS is required, duplicate the XYZ position data in an - * unused texture coordinate: - */ - s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); - s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3)); - intel->wpos_offset = offset; - intel->wpos_size = 3 * sizeof(GLuint); + /* If WPOS is required, duplicate the XYZ position data in an + * unused texture coordinate: + */ + s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); + s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3)); + + intel->wpos_offset = offset; + intel->wpos_size = 3 * sizeof(GLuint); - EMIT_PAD( intel->wpos_size ); - } + EMIT_PAD(intel->wpos_size); + } } if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] || s4 != i915->state.Ctx[I915_CTXREG_LIS4]) { - - I915_STATECHANGE( i915, I915_UPLOAD_CTX ); + int k; + + I915_STATECHANGE(i915, I915_UPLOAD_CTX); /* Must do this *after* statechange, so as not to affect * buffered vertices reliant on the old state: */ - intel->vertex_size = _tnl_install_attrs( &intel->ctx, - intel->vertex_attrs, - intel->vertex_attr_count, - intel->ViewportMatrix.m, 0 ); + intel->vertex_size = _tnl_install_attrs(&intel->ctx, + intel->vertex_attrs, + intel->vertex_attr_count, + intel->ViewportMatrix.m, 0); intel->vertex_size >>= 2; i915->state.Ctx[I915_CTXREG_LIS2] = s2; i915->state.Ctx[I915_CTXREG_LIS4] = s4; - assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size )); + k = intel->vtbl.check_vertex_size(intel, intel->vertex_size); + assert(k); } - if (!p->params_uptodate) - track_params( p ); + if (!p->params_uptodate) + track_params(p); - if (!p->on_hardware) - i915_upload_program( i915, p ); + if (!p->on_hardware) + i915_upload_program(i915, p); } -void i915InitFragProgFuncs( struct dd_function_table *functions ) +void +i915InitFragProgFuncs(struct dd_function_table *functions) { functions->BindProgram = i915BindProgram; functions->NewProgram = i915NewProgram; diff --git a/i915/i915_metaops.c b/i915/i915_metaops.c index 1be7ac4..73aa634 100644 --- a/i915/i915_metaops.c +++ b/i915/i915_metaops.c @@ -34,128 +34,169 @@ #include "intel_screen.h" #include "intel_batchbuffer.h" #include "intel_ioctl.h" -#include "intel_rotate.h" +#include "intel_regions.h" #include "i915_context.h" #include "i915_reg.h" -/* A large amount of state doesn't need to be uploaded. +/* We touch almost everything: */ -#define ACTIVE (I915_UPLOAD_INVARIENT | \ - I915_UPLOAD_PROGRAM | \ - I915_UPLOAD_STIPPLE | \ +#define ACTIVE (I915_UPLOAD_INVARIENT | \ I915_UPLOAD_CTX | \ I915_UPLOAD_BUFFERS | \ - I915_UPLOAD_TEX(0)) + I915_UPLOAD_STIPPLE | \ + I915_UPLOAD_PROGRAM | \ + I915_UPLOAD_FOG | \ + I915_UPLOAD_TEX(0)) -#define SET_STATE( i915, STATE ) \ +#define SET_STATE( i915, STATE ) \ do { \ i915->current->emitted &= ~ACTIVE; \ - i915->current = &i915->STATE; \ + i915->current = &i915->STATE; \ i915->current->emitted &= ~ACTIVE; \ } while (0) -/* Operations where the 3D engine is decoupled temporarily from the - * current GL state and used for other purposes than simply rendering - * incoming triangles. - */ -static void set_initial_state( i915ContextPtr i915 ) -{ - memcpy(&i915->meta, &i915->initial, sizeof(i915->meta) ); - i915->meta.active = ACTIVE; - i915->meta.emitted = 0; -} - -static void set_no_depth_stencil_write( i915ContextPtr i915 ) +static void +meta_no_stencil_write(struct intel_context *intel) { + struct i915_context *i915 = i915_context(&intel->ctx); + /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_FALSE ) */ - i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE | - S5_STENCIL_WRITE_ENABLE); + i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE | + S5_STENCIL_WRITE_ENABLE); + + i915->meta.emitted &= ~I915_UPLOAD_CTX; +} + +static void +meta_no_depth_write(struct intel_context *intel) +{ + struct i915_context *i915 = i915_context(&intel->ctx); /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE ) */ i915->meta.Ctx[I915_CTXREG_LIS6] &= ~(S6_DEPTH_TEST_ENABLE | - S6_DEPTH_WRITE_ENABLE); + S6_DEPTH_WRITE_ENABLE); i915->meta.emitted &= ~I915_UPLOAD_CTX; } +static void +meta_depth_replace(struct intel_context *intel) +{ + struct i915_context *i915 = i915_context(&intel->ctx); + + /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_TRUE ) + * ctx->Driver.DepthMask( ctx, GL_TRUE ) + */ + i915->meta.Ctx[I915_CTXREG_LIS6] |= (S6_DEPTH_TEST_ENABLE | + S6_DEPTH_WRITE_ENABLE); + + /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS ) + */ + i915->meta.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_FUNC_MASK; + i915->meta.Ctx[I915_CTXREG_LIS6] |= + COMPAREFUNC_ALWAYS << S6_DEPTH_TEST_FUNC_SHIFT; + + i915->meta.emitted &= ~I915_UPLOAD_CTX; +} + + /* Set stencil unit to replace always with the reference value. */ -static void set_stencil_replace( i915ContextPtr i915, - GLuint s_mask, - GLuint s_clear) +static void +meta_stencil_replace(struct intel_context *intel, + GLuint s_mask, GLuint s_clear) { + struct i915_context *i915 = i915_context(&intel->ctx); GLuint op = STENCILOP_REPLACE; GLuint func = COMPAREFUNC_ALWAYS; /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE ) */ - i915->meta.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE | - S5_STENCIL_WRITE_ENABLE); - - - /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE ) - */ - i915->meta.Ctx[I915_CTXREG_LIS6] &= ~(S6_DEPTH_TEST_ENABLE | - S6_DEPTH_WRITE_ENABLE); - + i915->meta.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE | + S5_STENCIL_WRITE_ENABLE); /* ctx->Driver.StencilMask( ctx, s_mask ) */ i915->meta.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK; i915->meta.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK(s_mask)); - + STENCIL_WRITE_MASK(s_mask)); /* ctx->Driver.StencilOp( ctx, GL_REPLACE, GL_REPLACE, GL_REPLACE ) */ i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK | - S5_STENCIL_PASS_Z_FAIL_MASK | - S5_STENCIL_PASS_Z_PASS_MASK); + S5_STENCIL_PASS_Z_FAIL_MASK | + S5_STENCIL_PASS_Z_PASS_MASK); i915->meta.Ctx[I915_CTXREG_LIS5] |= ((op << S5_STENCIL_FAIL_SHIFT) | - (op << S5_STENCIL_PASS_Z_FAIL_SHIFT) | - (op << S5_STENCIL_PASS_Z_PASS_SHIFT)); + (op << S5_STENCIL_PASS_Z_FAIL_SHIFT) | + (op << S5_STENCIL_PASS_Z_PASS_SHIFT)); /* ctx->Driver.StencilFunc( ctx, GL_ALWAYS, s_ref, ~0 ) */ i915->meta.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK; i915->meta.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(0xff)); + STENCIL_TEST_MASK(0xff)); i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK | - S5_STENCIL_TEST_FUNC_MASK); - - i915->meta.Ctx[I915_CTXREG_LIS5] |= ((s_clear << S5_STENCIL_REF_SHIFT) | - (func << S5_STENCIL_TEST_FUNC_SHIFT)); + S5_STENCIL_TEST_FUNC_MASK); + + i915->meta.Ctx[I915_CTXREG_LIS5] |= ((s_clear << S5_STENCIL_REF_SHIFT) | + (func << S5_STENCIL_TEST_FUNC_SHIFT)); i915->meta.emitted &= ~I915_UPLOAD_CTX; } -static void set_color_mask( i915ContextPtr i915, GLboolean state ) +static void +meta_color_mask(struct intel_context *intel, GLboolean state) { + struct i915_context *i915 = i915_context(&intel->ctx); const GLuint mask = (S5_WRITEDISABLE_RED | - S5_WRITEDISABLE_GREEN | - S5_WRITEDISABLE_BLUE | - S5_WRITEDISABLE_ALPHA); + S5_WRITEDISABLE_GREEN | + S5_WRITEDISABLE_BLUE | S5_WRITEDISABLE_ALPHA); /* Copy colormask state from "regular" hw context. */ if (state) { i915->meta.Ctx[I915_CTXREG_LIS5] &= ~mask; - i915->meta.Ctx[I915_CTXREG_LIS5] |= - (i915->state.Ctx[I915_CTXREG_LIS5] & mask); + i915->meta.Ctx[I915_CTXREG_LIS5] |= + (i915->state.Ctx[I915_CTXREG_LIS5] & mask); } - else + else i915->meta.Ctx[I915_CTXREG_LIS5] |= mask; - + + i915->meta.emitted &= ~I915_UPLOAD_CTX; +} + + + +static void +meta_import_pixel_state(struct intel_context *intel) +{ + struct i915_context *i915 = i915_context(&intel->ctx); + memcpy(i915->meta.Fog, i915->state.Fog, I915_FOG_SETUP_SIZE * 4); + + i915->meta.Ctx[I915_CTXREG_LIS5] = i915->state.Ctx[I915_CTXREG_LIS5]; + i915->meta.Ctx[I915_CTXREG_LIS6] = i915->state.Ctx[I915_CTXREG_LIS6]; + i915->meta.Ctx[I915_CTXREG_STATE4] = i915->state.Ctx[I915_CTXREG_STATE4]; + i915->meta.Ctx[I915_CTXREG_BLENDCOLOR1] = + i915->state.Ctx[I915_CTXREG_BLENDCOLOR1]; + i915->meta.Ctx[I915_CTXREG_IAB] = i915->state.Ctx[I915_CTXREG_IAB]; + + i915->meta.Buffer[I915_DESTREG_SENABLE] = + i915->state.Buffer[I915_DESTREG_SENABLE]; + i915->meta.Buffer[I915_DESTREG_SR1] = i915->state.Buffer[I915_DESTREG_SR1]; + i915->meta.Buffer[I915_DESTREG_SR2] = i915->state.Buffer[I915_DESTREG_SR2]; + + i915->meta.emitted &= ~I915_UPLOAD_FOG; + i915->meta.emitted &= ~I915_UPLOAD_BUFFERS; i915->meta.emitted &= ~I915_UPLOAD_CTX; } @@ -212,69 +253,64 @@ static void set_color_mask( i915ContextPtr i915, GLboolean state ) -static void set_no_texture( i915ContextPtr i915 ) +static void +meta_no_texture(struct intel_context *intel) { + struct i915_context *i915 = i915_context(&intel->ctx); + static const GLuint prog[] = { _3DSTATE_PIXEL_SHADER_PROGRAM, /* Declare incoming diffuse color: */ - (D0_DCL | - D0_DECL_REG( REG_T_DIFFUSE ) | - D0_CHANNEL_ALL), + (D0_DCL | D0_DECL_REG(REG_T_DIFFUSE) | D0_CHANNEL_ALL), D1_MBZ, D2_MBZ, /* output-color = mov(t_diffuse) */ (A0_MOV | - A0_DEST_REG( REG_OC ) | - A0_DEST_CHANNEL_ALL | - A0_SRC0_REG( REG_T_DIFFUSE )), + A0_DEST_REG(REG_OC) | + A0_DEST_CHANNEL_ALL | A0_SRC0_REG(REG_T_DIFFUSE)), (A1_SRC0_XYZW), 0, }; - - memcpy( i915->meta.Program, prog, sizeof(prog) ); + + memcpy(i915->meta.Program, prog, sizeof(prog)); i915->meta.ProgramSize = sizeof(prog) / sizeof(*prog); i915->meta.Program[0] |= i915->meta.ProgramSize - 2; i915->meta.emitted &= ~I915_UPLOAD_PROGRAM; } - -static void enable_texture_blend_replace( i915ContextPtr i915 ) +static void +meta_texture_blend_replace(struct intel_context *intel) { + struct i915_context *i915 = i915_context(&intel->ctx); + static const GLuint prog[] = { _3DSTATE_PIXEL_SHADER_PROGRAM, /* Declare the sampler: */ - (D0_DCL | - D0_DECL_REG( REG_S(0) ) | - D0_SAMPLE_TYPE_2D | - D0_CHANNEL_NONE), + (D0_DCL | D0_DECL_REG(REG_S(0)) | D0_SAMPLE_TYPE_2D | D0_CHANNEL_NONE), D1_MBZ, D2_MBZ, /* Declare the interpolated texture coordinate: */ - (D0_DCL | - D0_DECL_REG( REG_T_TEX(0) ) | - D0_CHANNEL_ALL), + (D0_DCL | D0_DECL_REG(REG_T_TEX(0)) | D0_CHANNEL_ALL), D1_MBZ, D2_MBZ, /* output-color = texld(sample0, texcoord0) */ - (T0_TEXLD | - T0_DEST_REG( REG_OC ) | - T0_SAMPLER( 0 )), + (T0_TEXLD | T0_DEST_REG(REG_OC) | T0_SAMPLER(0)), T1_ADDRESS_REG(REG_TYPE_T, 0), T2_MBZ }; - memcpy( i915->meta.Program, prog, sizeof(prog) ); + memcpy(i915->meta.Program, prog, sizeof(prog)); i915->meta.ProgramSize = sizeof(prog) / sizeof(*prog); i915->meta.Program[0] |= i915->meta.ProgramSize - 2; i915->meta.emitted &= ~I915_UPLOAD_PROGRAM; @@ -287,425 +323,186 @@ static void enable_texture_blend_replace( i915ContextPtr i915 ) /* Set up an arbitary piece of memory as a rectangular texture * (including the front or back buffer). */ -static void set_tex_rect_source( i915ContextPtr i915, - GLuint offset, - GLuint width, - GLuint height, - GLuint pitch, /* in bytes! */ - GLuint textureFormat ) +static GLboolean +meta_tex_rect_source(struct intel_context *intel, + dri_bo *buffer, + GLuint offset, + GLuint pitch, GLuint height, GLenum format, GLenum type) { + struct i915_context *i915 = i915_context(&intel->ctx); GLuint unit = 0; GLint numLevels = 1; GLuint *state = i915->meta.Tex[0]; + GLuint textureFormat; + GLuint cpp; -#if 0 - printf("TexRect source offset 0x%x pitch %d\n", offset, pitch); -#endif + /* A full implementation of this would do the upload through + * glTexImage2d, and get all the conversion operations at that + * point. We are restricted, but still at least have access to the + * fragment program swizzle. + */ + switch (format) { + case GL_BGRA: + switch (type) { + case GL_UNSIGNED_INT_8_8_8_8_REV: + case GL_UNSIGNED_BYTE: + textureFormat = (MAPSURF_32BIT | MT_32BIT_ARGB8888); + cpp = 4; + break; + default: + return GL_FALSE; + } + break; + case GL_RGBA: + switch (type) { + case GL_UNSIGNED_INT_8_8_8_8_REV: + case GL_UNSIGNED_BYTE: + textureFormat = (MAPSURF_32BIT | MT_32BIT_ABGR8888); + cpp = 4; + break; + default: + return GL_FALSE; + } + break; + case GL_BGR: + switch (type) { + case GL_UNSIGNED_SHORT_5_6_5_REV: + textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565); + cpp = 2; + break; + default: + return GL_FALSE; + } + break; + case GL_RGB: + switch (type) { + case GL_UNSIGNED_SHORT_5_6_5: + textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565); + cpp = 2; + break; + default: + return GL_FALSE; + } + break; -/* fprintf(stderr, "%s: offset: %x w: %d h: %d pitch %d format %x\n", */ -/* __FUNCTION__, offset, width, height, pitch, textureFormat ); */ + default: + return GL_FALSE; + } + + + if ((pitch * cpp) & 3) { + _mesa_printf("%s: texture is not dword pitch\n", __FUNCTION__); + return GL_FALSE; + } + +/* intel_region_release(&i915->meta.tex_region[0]); */ +/* intel_region_reference(&i915->meta.tex_region[0], region); */ + i915->meta.tex_buffer[0] = buffer; + i915->meta.tex_offset[0] = offset; - state[I915_TEXREG_MS2] = offset; state[I915_TEXREG_MS3] = (((height - 1) << MS3_HEIGHT_SHIFT) | - ((width - 1) << MS3_WIDTH_SHIFT) | - textureFormat | - MS3_USE_FENCE_REGS); + ((pitch - 1) << MS3_WIDTH_SHIFT) | + textureFormat | MS3_USE_FENCE_REGS); - state[I915_TEXREG_MS4] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | - ((((numLevels-1) * 4)) << MS4_MAX_LOD_SHIFT)); + state[I915_TEXREG_MS4] = (((((pitch * cpp) / 4) - 1) << MS4_PITCH_SHIFT) | + MS4_CUBE_FACE_ENA_MASK | + ((((numLevels - 1) * 4)) << MS4_MAX_LOD_SHIFT)); state[I915_TEXREG_SS2] = ((FILTER_NEAREST << SS2_MIN_FILTER_SHIFT) | - (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | - (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT)); + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT)); + state[I915_TEXREG_SS3] = ((TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT) | - (TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) | - (TEXCOORDMODE_WRAP << SS3_TCZ_ADDR_MODE_SHIFT) | - (unit<<SS3_TEXTUREMAP_INDEX_SHIFT)); + (TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_WRAP << SS3_TCZ_ADDR_MODE_SHIFT) | + (unit << SS3_TEXTUREMAP_INDEX_SHIFT)); state[I915_TEXREG_SS4] = 0; i915->meta.emitted &= ~I915_UPLOAD_TEX(0); + return GL_TRUE; } -/* Select between front and back draw buffers. +/** + * Set the color and depth drawing region for meta ops. */ -static void set_draw_region( i915ContextPtr i915, const intelRegion *region ) +static void +meta_draw_region(struct intel_context *intel, + struct intel_region *color_region, + struct intel_region *depth_region) { -#if 0 - printf("Rotate into region: offset 0x%x pitch %d\n", - region->offset, region->pitch); -#endif - i915->meta.Buffer[I915_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE); - i915->meta.Buffer[I915_DESTREG_CBUFADDR2] = region->offset; - i915->meta.emitted &= ~I915_UPLOAD_BUFFERS; + struct i915_context *i915 = i915_context(&intel->ctx); + i915_state_draw_region(intel, &i915->meta, color_region, depth_region); } -#if 0 -/* Setup an arbitary draw format, useful for targeting texture or agp - * memory. - */ -static void set_draw_format( i915ContextPtr i915, - GLuint format, - GLuint depth_format) +static void +set_vertex_format(struct intel_context *intel) { - i915->meta.Buffer[I915_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - format | - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - depth_format); - - i915->meta.emitted &= ~I915_UPLOAD_BUFFERS; -/* fprintf(stderr, "%s: DV1: %x\n", */ -/* __FUNCTION__, i915->meta.Buffer[I915_DESTREG_DV1]); */ -} -#endif + struct i915_context *i915 = i915_context(&intel->ctx); -static void set_vertex_format( i915ContextPtr i915 ) -{ - i915->meta.Ctx[I915_CTXREG_LIS2] = + i915->meta.Ctx[I915_CTXREG_LIS2] = (S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) | - S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) | - S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT)); i915->meta.Ctx[I915_CTXREG_LIS4] &= ~S4_VFMT_MASK; - i915->meta.Ctx[I915_CTXREG_LIS4] |= - (S4_VFMT_COLOR | - S4_VFMT_SPEC_FOG | - S4_VFMT_XYZW); + i915->meta.Ctx[I915_CTXREG_LIS4] |= (S4_VFMT_COLOR | S4_VFMT_XYZ); i915->meta.emitted &= ~I915_UPLOAD_CTX; - } -static void draw_quad(i915ContextPtr i915, - GLfloat x0, GLfloat x1, - GLfloat y0, GLfloat y1, - GLubyte red, GLubyte green, - GLubyte blue, GLubyte alpha, - GLfloat s0, GLfloat s1, - GLfloat t0, GLfloat t1 ) -{ - GLuint vertex_size = 8; - GLuint *vb = intelEmitInlinePrimitiveLocked( &i915->intel, - PRIM3D_TRIFAN, - 4 * vertex_size, - vertex_size ); - intelVertex tmp; - int i; - - if (0) - fprintf(stderr, "%s: %f,%f-%f,%f 0x%x%x%x%x %f,%f-%f,%f\n", - __FUNCTION__, - x0,y0,x1,y1,red,green,blue,alpha,s0,t0,s1,t1); - - - /* initial vertex, left bottom */ - tmp.v.x = x0; - tmp.v.y = y0; - tmp.v.z = 1.0; - tmp.v.w = 1.0; - tmp.v.color.red = red; - tmp.v.color.green = green; - tmp.v.color.blue = blue; - tmp.v.color.alpha = alpha; - tmp.v.specular.red = 0; - tmp.v.specular.green = 0; - tmp.v.specular.blue = 0; - tmp.v.specular.alpha = 0; - tmp.v.u0 = s0; - tmp.v.v0 = t0; - - for (i = 0 ; i < vertex_size ; i++) - vb[i] = tmp.ui[i]; - - /* right bottom */ - vb += vertex_size; - tmp.v.x = x1; - tmp.v.u0 = s1; - for (i = 0 ; i < vertex_size ; i++) - vb[i] = tmp.ui[i]; - - /* right top */ - vb += vertex_size; - tmp.v.y = y1; - tmp.v.v0 = t1; - for (i = 0 ; i < vertex_size ; i++) - vb[i] = tmp.ui[i]; - - /* left top */ - vb += vertex_size; - tmp.v.x = x0; - tmp.v.u0 = s0; - for (i = 0 ; i < vertex_size ; i++) - vb[i] = tmp.ui[i]; -} - -static void draw_poly(i915ContextPtr i915, - GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha, - GLuint numVerts, - /*const*/ GLfloat verts[][2], - /*const*/ GLfloat texcoords[][2]) +/* Operations where the 3D engine is decoupled temporarily from the + * current GL state and used for other purposes than simply rendering + * incoming triangles. + */ +static void +install_meta_state(struct intel_context *intel) { - GLuint vertex_size = 8; - GLuint *vb = intelEmitInlinePrimitiveLocked( &i915->intel, - PRIM3D_TRIFAN, - numVerts * vertex_size, - vertex_size ); - intelVertex tmp; - int i, k; - - /* initial constant vertex fields */ - tmp.v.z = 1.0; - tmp.v.w = 1.0; - tmp.v.color.red = red; - tmp.v.color.green = green; - tmp.v.color.blue = blue; - tmp.v.color.alpha = alpha; - tmp.v.specular.red = 0; - tmp.v.specular.green = 0; - tmp.v.specular.blue = 0; - tmp.v.specular.alpha = 0; - - for (k = 0; k < numVerts; k++) { - tmp.v.x = verts[k][0]; - tmp.v.y = verts[k][1]; - tmp.v.u0 = texcoords[k][0]; - tmp.v.v0 = texcoords[k][1]; - - for (i = 0 ; i < vertex_size ; i++) - vb[i] = tmp.ui[i]; - - vb += vertex_size; - } -} + struct i915_context *i915 = i915_context(&intel->ctx); + memcpy(&i915->meta, &i915->initial, sizeof(i915->meta)); + i915->meta.active = ACTIVE; + i915->meta.emitted = 0; + SET_STATE(i915, meta); + set_vertex_format(intel); + meta_no_texture(intel); +} -void -i915ClearWithTris(intelContextPtr intel, GLbitfield buffers, - GLboolean allFoo, - GLint cxFoo, GLint cyFoo, GLint cwFoo, GLint chFoo) +static void +leave_meta_state(struct intel_context *intel) { - i915ContextPtr i915 = I915_CONTEXT( intel ); - __DRIdrawablePrivate *dPriv = intel->driDrawable; - intelScreenPrivate *screen = intel->intelScreen; - int x0, y0, x1, y1; - GLint cx, cy, cw, ch; - GLboolean all; - - SET_STATE( i915, meta ); - set_initial_state( i915 ); - set_no_texture( i915 ); - set_vertex_format( i915 ); - - LOCK_HARDWARE(intel); - - /* get clear bounds after locking */ - cx = intel->ctx.DrawBuffer->_Xmin; - cy = intel->ctx.DrawBuffer->_Ymin; - cw = intel->ctx.DrawBuffer->_Xmax - cx; - ch = intel->ctx.DrawBuffer->_Ymax - cy; - all = (cw == intel->ctx.DrawBuffer->Width && - ch == intel->ctx.DrawBuffer->Height); - - if (!all) { - x0 = cx; - y0 = cy; - x1 = x0 + cw; - y1 = y0 + ch; - } else { - x0 = 0; - y0 = 0; - x1 = x0 + dPriv->w; - y1 = y0 + dPriv->h; - } - - /* Don't do any clipping to screen - these are window coordinates. - * The active cliprects will be applied as for any other geometry. - */ - - if (buffers & BUFFER_BIT_FRONT_LEFT) { - set_no_depth_stencil_write( i915 ); - set_color_mask( i915, GL_TRUE ); - set_draw_region( i915, &screen->front ); - - draw_quad(i915, x0, x1, y0, y1, - intel->clear_red, intel->clear_green, - intel->clear_blue, intel->clear_alpha, - 0, 0, 0, 0); - } - - if (buffers & BUFFER_BIT_BACK_LEFT) { - set_no_depth_stencil_write( i915 ); - set_color_mask( i915, GL_TRUE ); - set_draw_region( i915, &screen->back ); - - draw_quad(i915, x0, x1, y0, y1, - intel->clear_red, intel->clear_green, - intel->clear_blue, intel->clear_alpha, - 0, 0, 0, 0); - } - - if (buffers & BUFFER_BIT_STENCIL) { - set_stencil_replace( i915, - intel->ctx.Stencil.WriteMask[0], - intel->ctx.Stencil.Clear); - - set_color_mask( i915, GL_FALSE ); - set_draw_region( i915, &screen->front ); /* could be either? */ - - draw_quad( i915, x0, x1, y0, y1, 0, 0, 0, 0, 0, 0, 0, 0 ); - } - - UNLOCK_HARDWARE(intel); - - SET_STATE( i915, state ); + struct i915_context *i915 = i915_context(&intel->ctx); + intel_region_release(&i915->meta.draw_region); + intel_region_release(&i915->meta.depth_region); +/* intel_region_release(&i915->meta.tex_region[0]); */ + SET_STATE(i915, state); } -/** - * Copy the window contents named by dPriv to the rotated (or reflected) - * color buffer. - * srcBuf is BUFFER_BIT_FRONT_LEFT or BUFFER_BIT_BACK_LEFT to indicate the source. - */ + void -i915RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv, - GLuint srcBuf) +i915InitMetaFuncs(struct i915_context *i915) { - i915ContextPtr i915 = I915_CONTEXT( intel ); - intelScreenPrivate *screen = intel->intelScreen; - const GLuint cpp = screen->cpp; - drm_clip_rect_t fullRect; - GLuint textureFormat, srcOffset, srcPitch; - const drm_clip_rect_t *clipRects; - int numClipRects; - int i; - - int xOrig, yOrig; - int origNumClipRects; - drm_clip_rect_t *origRects; - - /* - * set up hardware state - */ - intelFlush( &intel->ctx ); - - SET_STATE( i915, meta ); - set_initial_state( i915 ); - set_no_texture( i915 ); - set_vertex_format( i915 ); - set_no_depth_stencil_write( i915 ); - set_color_mask( i915, GL_TRUE ); - - LOCK_HARDWARE(intel); - - /* save current drawing origin and cliprects (restored at end) */ - xOrig = intel->drawX; - yOrig = intel->drawY; - origNumClipRects = intel->numClipRects; - origRects = intel->pClipRects; - - if (!intel->numClipRects) - goto done; - - /* - * set drawing origin, cliprects for full-screen access to rotated screen - */ - fullRect.x1 = 0; - fullRect.y1 = 0; - fullRect.x2 = screen->rotatedWidth; - fullRect.y2 = screen->rotatedHeight; - intel->drawX = 0; - intel->drawY = 0; - intel->numClipRects = 1; - intel->pClipRects = &fullRect; - - set_draw_region( i915, &screen->rotated ); - - if (cpp == 4) - textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888; - else - textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565; - - if (srcBuf == BUFFER_BIT_FRONT_LEFT) { - srcPitch = screen->front.pitch; /* in bytes */ - srcOffset = screen->front.offset; /* bytes */ - clipRects = dPriv->pClipRects; - numClipRects = dPriv->numClipRects; - } - else { - srcPitch = screen->back.pitch; /* in bytes */ - srcOffset = screen->back.offset; /* bytes */ - clipRects = dPriv->pBackClipRects; - numClipRects = dPriv->numBackClipRects; - } - - /* set the whole screen up as a texture to avoid alignment issues */ - set_tex_rect_source(i915, - srcOffset, - screen->width, - screen->height, - srcPitch, - textureFormat); - - enable_texture_blend_replace(i915); - - /* - * loop over the source window's cliprects - */ - for (i = 0; i < numClipRects; i++) { - int srcX0 = clipRects[i].x1; - int srcY0 = clipRects[i].y1; - int srcX1 = clipRects[i].x2; - int srcY1 = clipRects[i].y2; - GLfloat verts[4][2], tex[4][2]; - int j; - - /* build vertices for four corners of clip rect */ - verts[0][0] = srcX0; verts[0][1] = srcY0; - verts[1][0] = srcX1; verts[1][1] = srcY0; - verts[2][0] = srcX1; verts[2][1] = srcY1; - verts[3][0] = srcX0; verts[3][1] = srcY1; - - /* .. and texcoords */ - tex[0][0] = srcX0; tex[0][1] = srcY0; - tex[1][0] = srcX1; tex[1][1] = srcY0; - tex[2][0] = srcX1; tex[2][1] = srcY1; - tex[3][0] = srcX0; tex[3][1] = srcY1; - - /* transform coords to rotated screen coords */ - for (j = 0; j < 4; j++) { - matrix23TransformCoordf(&screen->rotMatrix, - &verts[j][0], &verts[j][1]); - } - - /* draw polygon to map source image to dest region */ - draw_poly(i915, 255, 255, 255, 255, 4, verts, tex); - - } /* cliprect loop */ - - intelFlushBatchLocked( intel, GL_FALSE, GL_FALSE, GL_FALSE ); - - done: - /* restore original drawing origin and cliprects */ - intel->drawX = xOrig; - intel->drawY = yOrig; - intel->numClipRects = origNumClipRects; - intel->pClipRects = origRects; - - UNLOCK_HARDWARE(intel); - - SET_STATE( i915, state ); + i915->intel.vtbl.install_meta_state = install_meta_state; + i915->intel.vtbl.leave_meta_state = leave_meta_state; + i915->intel.vtbl.meta_no_depth_write = meta_no_depth_write; + i915->intel.vtbl.meta_no_stencil_write = meta_no_stencil_write; + i915->intel.vtbl.meta_stencil_replace = meta_stencil_replace; + i915->intel.vtbl.meta_depth_replace = meta_depth_replace; + i915->intel.vtbl.meta_color_mask = meta_color_mask; + i915->intel.vtbl.meta_no_texture = meta_no_texture; + i915->intel.vtbl.meta_texture_blend_replace = meta_texture_blend_replace; + i915->intel.vtbl.meta_tex_rect_source = meta_tex_rect_source; + i915->intel.vtbl.meta_draw_region = meta_draw_region; + i915->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state; } - diff --git a/i915/i915_program.c b/i915/i915_program.c index 6849112..f79d00d 100644 --- a/i915/i915_program.c +++ b/i915/i915_program.c @@ -72,58 +72,62 @@ #define I915_CONSTFLAG_PARAM 0x1f -GLuint i915_get_temp( struct i915_fragment_program *p ) +GLuint +i915_get_temp(struct i915_fragment_program *p) { - int bit = ffs( ~p->temp_flag ); + int bit = ffs(~p->temp_flag); if (!bit) { fprintf(stderr, "%s: out of temporaries\n", __FILE__); exit(1); } - p->temp_flag |= 1<<(bit-1); - return UREG(REG_TYPE_R, (bit-1)); + p->temp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_R, (bit - 1)); } -GLuint i915_get_utemp( struct i915_fragment_program *p ) +GLuint +i915_get_utemp(struct i915_fragment_program * p) { - int bit = ffs( ~p->utemp_flag ); + int bit = ffs(~p->utemp_flag); if (!bit) { fprintf(stderr, "%s: out of temporaries\n", __FILE__); exit(1); } - p->utemp_flag |= 1<<(bit-1); - return UREG(REG_TYPE_U, (bit-1)); + p->utemp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_U, (bit - 1)); } -void i915_release_utemps( struct i915_fragment_program *p ) +void +i915_release_utemps(struct i915_fragment_program *p) { p->utemp_flag = ~0x7; } -GLuint i915_emit_decl( struct i915_fragment_program *p, - GLuint type, GLuint nr, GLuint d0_flags ) +GLuint +i915_emit_decl(struct i915_fragment_program *p, + GLuint type, GLuint nr, GLuint d0_flags) { GLuint reg = UREG(type, nr); if (type == REG_TYPE_T) { - if (p->decl_t & (1<<nr)) - return reg; + if (p->decl_t & (1 << nr)) + return reg; - p->decl_t |= (1<<nr); + p->decl_t |= (1 << nr); } else if (type == REG_TYPE_S) { - if (p->decl_s & (1<<nr)) - return reg; + if (p->decl_s & (1 << nr)) + return reg; - p->decl_s |= (1<<nr); + p->decl_s |= (1 << nr); } - else + else return reg; - *(p->decl++) = (D0_DCL | D0_DEST( reg ) | d0_flags); + *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); *(p->decl++) = D1_MBZ; *(p->decl++) = D2_MBZ; @@ -131,24 +135,26 @@ GLuint i915_emit_decl( struct i915_fragment_program *p, return reg; } -GLuint i915_emit_arith( struct i915_fragment_program *p, - GLuint op, - GLuint dest, - GLuint mask, - GLuint saturate, - GLuint src0, - GLuint src1, - GLuint src2 ) +GLuint +i915_emit_arith(struct i915_fragment_program * p, + GLuint op, + GLuint dest, + GLuint mask, + GLuint saturate, GLuint src0, GLuint src1, GLuint src2) { GLuint c[3]; GLuint nr_const = 0; assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); - assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); + assert(dest); - if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) c[nr_const++] = 0; - if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) c[nr_const++] = 1; - if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) c[nr_const++] = 2; + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) + c[nr_const++] = 0; + if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) + c[nr_const++] = 1; + if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) + c[nr_const++] = 2; /* Recursively call this function to MOV additional const values * into temporary registers. Use utemp registers for this - @@ -164,57 +170,67 @@ GLuint i915_emit_arith( struct i915_fragment_program *p, old_utemp_flag = p->utemp_flag; first = GET_UREG_NR(s[c[0]]); - for (i = 1 ; i < nr_const ; i++) { - if (GET_UREG_NR(s[c[i]]) != first) { - GLuint tmp = i915_get_utemp(p); - - i915_emit_arith( p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, - s[c[i]], 0, 0 ); - s[c[i]] = tmp; - } + for (i = 1; i < nr_const; i++) { + if (GET_UREG_NR(s[c[i]]) != first) { + GLuint tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + s[c[i]], 0, 0); + s[c[i]] = tmp; + } } src0 = s[0]; src1 = s[1]; src2 = s[2]; - p->utemp_flag = old_utemp_flag; /* restore */ + p->utemp_flag = old_utemp_flag; /* restore */ } - *(p->csr++) = (op | - A0_DEST( dest ) | - mask | - saturate | - A0_SRC0( src0 )); - *(p->csr++) = (A1_SRC0( src0 ) | - A1_SRC1( src1 )); - *(p->csr++) = (A2_SRC1( src1 ) | - A2_SRC2( src2 )); + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); + *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); + *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); p->nr_alu_insn++; return dest; } +static GLuint get_free_rreg (struct i915_fragment_program *p, + GLuint live_regs) +{ + int bit = ffs(~live_regs); + if (!bit) { + i915_program_error(p, "Can't find free R reg"); + return UREG_BAD; + } + return UREG(REG_TYPE_R, bit - 1); +} + GLuint i915_emit_texld( struct i915_fragment_program *p, + GLuint live_regs, GLuint dest, GLuint destmask, GLuint sampler, GLuint coord, GLuint op ) { - if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) { - /* No real way to work around this in the general case - need to - * allocate and declare a new temporary register (a utemp won't - * do). Will fallback for now. - */ - i915_program_error(p, "Can't (yet) swizzle TEX arguments"); - return 0; - } - - /* Don't worry about saturate as we only support + if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) { + /* With the help of the "needed registers" table created earlier, pick + * a register we can MOV the swizzled TC to (since TEX doesn't support + * swizzled sources) */ + GLuint swizCoord = get_free_rreg(p, live_regs); + if (swizCoord == UREG_BAD) + return 0; + + i915_emit_arith( p, A0_MOV, swizCoord, A0_DEST_CHANNEL_ALL, 0, coord, 0, 0 ); + coord = swizCoord; + } + + /* Don't worry about saturate as we only support texture formats + * that are always in the 0..1 range. */ if (destmask != A0_DEST_CHANNEL_ALL) { GLuint tmp = i915_get_utemp(p); - i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op ); + i915_emit_texld( p, 0, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op ); i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); return dest; } @@ -239,24 +255,28 @@ GLuint i915_emit_texld( struct i915_fragment_program *p, } -GLuint i915_emit_const1f( struct i915_fragment_program *p, GLfloat c0 ) +GLuint +i915_emit_const1f(struct i915_fragment_program * p, GLfloat c0) { GLint reg, idx; - if (c0 == 0.0) return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); - if (c0 == 1.0) return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE ); + if (c0 == 0.0) + return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); + if (c0 == 1.0) + return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM) - continue; + continue; for (idx = 0; idx < 4; idx++) { - if (!(p->constant_flags[reg] & (1<<idx)) || - p->constant[reg][idx] == c0) { - p->constant[reg][idx] = c0; - p->constant_flags[reg] |= 1<<idx; - if (reg+1 > p->nr_constants) p->nr_constants = reg+1; - return swizzle(UREG(REG_TYPE_CONST, reg),idx,ZERO,ZERO,ONE); - } + if (!(p->constant_flags[reg] & (1 << idx)) || + p->constant[reg][idx] == c0) { + p->constant[reg][idx] = c0; + p->constant_flags[reg] |= 1 << idx; + if (reg + 1 > p->nr_constants) + p->nr_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); + } } } @@ -265,29 +285,35 @@ GLuint i915_emit_const1f( struct i915_fragment_program *p, GLfloat c0 ) return 0; } -GLuint i915_emit_const2f( struct i915_fragment_program *p, - GLfloat c0, GLfloat c1 ) +GLuint +i915_emit_const2f(struct i915_fragment_program * p, GLfloat c0, GLfloat c1) { GLint reg, idx; - if (c0 == 0.0) return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); - if (c0 == 1.0) return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); + if (c0 == 0.0) + return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); + if (c0 == 1.0) + return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); - if (c1 == 0.0) return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); - if (c1 == 1.0) return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + if (c1 == 0.0) + return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); + if (c1 == 1.0) + return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (p->constant_flags[reg] == 0xf || - p->constant_flags[reg] == I915_CONSTFLAG_PARAM) - continue; + p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + continue; for (idx = 0; idx < 3; idx++) { - if (!(p->constant_flags[reg] & (3<<idx))) { - p->constant[reg][idx] = c0; - p->constant[reg][idx+1] = c1; - p->constant_flags[reg] |= 3<<idx; - if (reg+1 > p->nr_constants) p->nr_constants = reg+1; - return swizzle(UREG(REG_TYPE_CONST, reg),idx,idx+1,ZERO,ONE); - } + if (!(p->constant_flags[reg] & (3 << idx))) { + p->constant[reg][idx] = c0; + p->constant[reg][idx + 1] = c1; + p->constant_flags[reg] |= 3 << idx; + if (reg + 1 > p->nr_constants) + p->nr_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, + ONE); + } } } @@ -298,27 +324,28 @@ GLuint i915_emit_const2f( struct i915_fragment_program *p, -GLuint i915_emit_const4f( struct i915_fragment_program *p, - GLfloat c0, GLfloat c1, GLfloat c2, GLfloat c3 ) +GLuint +i915_emit_const4f(struct i915_fragment_program * p, + GLfloat c0, GLfloat c1, GLfloat c2, GLfloat c3) { GLint reg; for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (p->constant_flags[reg] == 0xf && - p->constant[reg][0] == c0 && - p->constant[reg][1] == c1 && - p->constant[reg][2] == c2 && - p->constant[reg][3] == c3) { - return UREG(REG_TYPE_CONST, reg); + p->constant[reg][0] == c0 && + p->constant[reg][1] == c1 && + p->constant[reg][2] == c2 && p->constant[reg][3] == c3) { + return UREG(REG_TYPE_CONST, reg); } else if (p->constant_flags[reg] == 0) { - p->constant[reg][0] = c0; - p->constant[reg][1] = c1; - p->constant[reg][2] = c2; - p->constant[reg][3] = c3; - p->constant_flags[reg] = 0xf; - if (reg+1 > p->nr_constants) p->nr_constants = reg+1; - return UREG(REG_TYPE_CONST, reg); + p->constant[reg][0] = c0; + p->constant[reg][1] = c1; + p->constant[reg][2] = c2; + p->constant[reg][3] = c3; + p->constant_flags[reg] = 0xf; + if (reg + 1 > p->nr_constants) + p->nr_constants = reg + 1; + return UREG(REG_TYPE_CONST, reg); } } @@ -328,34 +355,36 @@ GLuint i915_emit_const4f( struct i915_fragment_program *p, } -GLuint i915_emit_const4fv( struct i915_fragment_program *p, const GLfloat *c ) +GLuint +i915_emit_const4fv(struct i915_fragment_program * p, const GLfloat * c) { - return i915_emit_const4f( p, c[0], c[1], c[2], c[3] ); + return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); } -GLuint i915_emit_param4fv( struct i915_fragment_program *p, - const GLfloat *values ) +GLuint +i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) { GLint reg, i; for (i = 0; i < p->nr_params; i++) { if (p->param[i].values == values) - return UREG(REG_TYPE_CONST, p->param[i].reg); + return UREG(REG_TYPE_CONST, p->param[i].reg); } for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { if (p->constant_flags[reg] == 0) { - p->constant_flags[reg] = I915_CONSTFLAG_PARAM; - i = p->nr_params++; + p->constant_flags[reg] = I915_CONSTFLAG_PARAM; + i = p->nr_params++; - p->param[i].values = values; - p->param[i].reg = reg; - p->params_uptodate = 0; + p->param[i].values = values; + p->param[i].reg = reg; + p->params_uptodate = 0; - if (reg+1 > p->nr_constants) p->nr_constants = reg+1; - return UREG(REG_TYPE_CONST, reg); + if (reg + 1 > p->nr_constants) + p->nr_constants = reg + 1; + return UREG(REG_TYPE_CONST, reg); } } @@ -366,30 +395,31 @@ GLuint i915_emit_param4fv( struct i915_fragment_program *p, - -void i915_program_error( struct i915_fragment_program *p, const char *msg ) +void +i915_program_error(struct i915_fragment_program *p, const char *msg) { _mesa_problem(NULL, "i915_program_error: %s", msg); p->error = 1; } -void i915_init_program( i915ContextPtr i915, struct i915_fragment_program *p ) + +void +i915_init_program(struct i915_context *i915, struct i915_fragment_program *p) { GLcontext *ctx = &i915->intel.ctx; - TNLcontext *tnl = TNL_CONTEXT( ctx ); - + p->translated = 0; p->params_uptodate = 0; p->on_hardware = 0; p->error = 0; - p->nr_tex_indirect = 1; /* correct? */ + p->nr_tex_indirect = 1; /* correct? */ p->nr_tex_insn = 0; p->nr_alu_insn = 0; p->nr_decl_insn = 0; - p->ctx = ctx; - memset( p->constant_flags, 0, sizeof(p->constant_flags) ); + p->ctx = ctx; + memset(p->constant_flags, 0, sizeof(p->constant_flags)); p->nr_constants = 0; p->csr = p->program; @@ -402,21 +432,17 @@ void i915_init_program( i915ContextPtr i915, struct i915_fragment_program *p ) p->depth_written = 0; p->nr_params = 0; - p->src_texture = UREG_BAD; - p->src_previous = UREG(REG_TYPE_T, T_DIFFUSE); - p->last_tex_stage = 0; - p->VB = &tnl->vb; - *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; } -void i915_fini_program( struct i915_fragment_program *p ) +void +i915_fini_program(struct i915_fragment_program *p) { GLuint program_size = p->csr - p->program; GLuint decl_size = p->decl - p->declarations; - - if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) + + if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) i915_program_error(p, "Exceeded max nr indirect texture lookups"); if (p->nr_tex_insn > I915_MAX_TEX_INSN) @@ -446,22 +472,24 @@ void i915_fini_program( struct i915_fragment_program *p ) p->declarations[0] |= program_size + decl_size - 2; } -void i915_upload_program( i915ContextPtr i915, struct i915_fragment_program *p ) +void +i915_upload_program(struct i915_context *i915, + struct i915_fragment_program *p) { GLuint program_size = p->csr - p->program; GLuint decl_size = p->decl - p->declarations; - FALLBACK( &i915->intel, I915_FALLBACK_PROGRAM, p->error ); + FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, p->error); /* Could just go straight to the batchbuffer from here: */ if (i915->state.ProgramSize != (program_size + decl_size) || - memcmp(i915->state.Program + decl_size, p->program, - program_size*sizeof(int)) != 0) { - I915_STATECHANGE( i915, I915_UPLOAD_PROGRAM ); - memcpy(i915->state.Program, p->declarations, decl_size*sizeof(int)); + memcmp(i915->state.Program + decl_size, p->program, + program_size * sizeof(int)) != 0) { + I915_STATECHANGE(i915, I915_UPLOAD_PROGRAM); + memcpy(i915->state.Program, p->declarations, decl_size * sizeof(int)); memcpy(i915->state.Program + decl_size, p->program, - program_size*sizeof(int)); + program_size * sizeof(int)); i915->state.ProgramSize = decl_size + program_size; } @@ -470,30 +498,28 @@ void i915_upload_program( i915ContextPtr i915, struct i915_fragment_program *p ) */ if (p->nr_constants) { GLuint nr = p->nr_constants; - - I915_ACTIVESTATE( i915, I915_UPLOAD_CONSTANTS, 1 ); - I915_STATECHANGE( i915, I915_UPLOAD_CONSTANTS ); + + I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 1); + I915_STATECHANGE(i915, I915_UPLOAD_CONSTANTS); i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | ((nr) * 4); - i915->state.Constant[1] = (1<<(nr-1)) | ((1<<(nr-1))-1); - - memcpy(&i915->state.Constant[2], p->constant, 4*sizeof(int)*(nr)); + i915->state.Constant[1] = (1 << (nr - 1)) | ((1 << (nr - 1)) - 1); + + memcpy(&i915->state.Constant[2], p->constant, 4 * sizeof(int) * (nr)); i915->state.ConstantSize = 2 + (nr) * 4; if (0) { - GLuint i; - for (i = 0; i < nr; i++) { - fprintf(stderr, "const[%d]: %f %f %f %f\n", i, - p->constant[i][0], - p->constant[i][1], - p->constant[i][2], - p->constant[i][3]); - } + GLuint i; + for (i = 0; i < nr; i++) { + fprintf(stderr, "const[%d]: %f %f %f %f\n", i, + p->constant[i][0], + p->constant[i][1], p->constant[i][2], p->constant[i][3]); + } } } else { - I915_ACTIVESTATE( i915, I915_UPLOAD_CONSTANTS, 0 ); - } + I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 0); + } p->on_hardware = 1; } diff --git a/i915/i915_program.h b/i915/i915_program.h index 8891a17..14a3f08 100644 --- a/i915/i915_program.h +++ b/i915/i915_program.h @@ -48,11 +48,11 @@ #define UREG_CHANNEL_W_NEGATE_SHIFT 11 #define UREG_CHANNEL_W_SHIFT 8 #define UREG_CHANNEL_ZERO_NEGATE_MBZ 5 -#define UREG_CHANNEL_ZERO_SHIFT 4 +#define UREG_CHANNEL_ZERO_SHIFT 4 #define UREG_CHANNEL_ONE_NEGATE_MBZ 1 -#define UREG_CHANNEL_ONE_SHIFT 0 +#define UREG_CHANNEL_ONE_SHIFT 0 -#define UREG_BAD 0xffffffff /* not a valid ureg */ +#define UREG_BAD 0xffffffff /* not a valid ureg */ #define X SRC_X #define Y SRC_Y @@ -84,78 +84,76 @@ /* One neat thing about the UREG representation: */ -static __inline int swizzle( int reg, int x, int y, int z, int w ) +static INLINE int +swizzle(int reg, int x, int y, int z, int w) { return ((reg & ~UREG_XYZW_CHANNEL_MASK) | - CHANNEL_SRC( GET_CHANNEL_SRC( reg, x ), 0 ) | - CHANNEL_SRC( GET_CHANNEL_SRC( reg, y ), 1 ) | - CHANNEL_SRC( GET_CHANNEL_SRC( reg, z ), 2 ) | - CHANNEL_SRC( GET_CHANNEL_SRC( reg, w ), 3 )); + CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); } /* Another neat thing about the UREG representation: */ -static __inline int negate( int reg, int x, int y, int z, int w ) +static INLINE int +negate(int reg, int x, int y, int z, int w) { - return reg ^ (((x&1)<<UREG_CHANNEL_X_NEGATE_SHIFT)| - ((y&1)<<UREG_CHANNEL_Y_NEGATE_SHIFT)| - ((z&1)<<UREG_CHANNEL_Z_NEGATE_SHIFT)| - ((w&1)<<UREG_CHANNEL_W_NEGATE_SHIFT)); + return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | + ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | + ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | + ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); } -extern GLuint i915_get_temp( struct i915_fragment_program *p ); -extern GLuint i915_get_utemp( struct i915_fragment_program *p ); -extern void i915_release_utemps( struct i915_fragment_program *p ); +extern GLuint i915_get_temp(struct i915_fragment_program *p); +extern GLuint i915_get_utemp(struct i915_fragment_program *p); +extern void i915_release_utemps(struct i915_fragment_program *p); -extern GLuint i915_emit_texld( struct i915_fragment_program *p, - GLuint dest, - GLuint destmask, - GLuint sampler, - GLuint coord, - GLuint op ); +extern GLuint i915_emit_texld(struct i915_fragment_program *p, + GLuint live_regs, + GLuint dest, + GLuint destmask, + GLuint sampler, GLuint coord, GLuint op); -extern GLuint i915_emit_arith( struct i915_fragment_program *p, - GLuint op, - GLuint dest, - GLuint mask, - GLuint saturate, - GLuint src0, - GLuint src1, - GLuint src2 ); +extern GLuint i915_emit_arith(struct i915_fragment_program *p, + GLuint op, + GLuint dest, + GLuint mask, + GLuint saturate, + GLuint src0, GLuint src1, GLuint src2); -extern GLuint i915_emit_decl( struct i915_fragment_program *p, - GLuint type, GLuint nr, GLuint d0_flags ); +extern GLuint i915_emit_decl(struct i915_fragment_program *p, + GLuint type, GLuint nr, GLuint d0_flags); -extern GLuint i915_emit_const1f( struct i915_fragment_program *p, - GLfloat c0 ); +extern GLuint i915_emit_const1f(struct i915_fragment_program *p, GLfloat c0); -extern GLuint i915_emit_const2f( struct i915_fragment_program *p, - GLfloat c0, GLfloat c1 ); +extern GLuint i915_emit_const2f(struct i915_fragment_program *p, + GLfloat c0, GLfloat c1); -extern GLuint i915_emit_const4fv( struct i915_fragment_program *p, - const GLfloat *c ); +extern GLuint i915_emit_const4fv(struct i915_fragment_program *p, + const GLfloat * c); -extern GLuint i915_emit_const4f( struct i915_fragment_program *p, - GLfloat c0, GLfloat c1, - GLfloat c2, GLfloat c3 ); +extern GLuint i915_emit_const4f(struct i915_fragment_program *p, + GLfloat c0, GLfloat c1, + GLfloat c2, GLfloat c3); -extern GLuint i915_emit_param4fv( struct i915_fragment_program *p, - const GLfloat *values ); +extern GLuint i915_emit_param4fv(struct i915_fragment_program *p, + const GLfloat * values); -extern void i915_program_error( struct i915_fragment_program *p, - const char *msg ); +extern void i915_program_error(struct i915_fragment_program *p, + const char *msg); -extern void i915_init_program( i915ContextPtr i915, - struct i915_fragment_program *p ); +extern void i915_init_program(struct i915_context *i915, + struct i915_fragment_program *p); -extern void i915_upload_program( i915ContextPtr i915, - struct i915_fragment_program *p ); +extern void i915_upload_program(struct i915_context *i915, + struct i915_fragment_program *p); -extern void i915_fini_program( struct i915_fragment_program *p ); +extern void i915_fini_program(struct i915_fragment_program *p); diff --git a/i915/i915_reg.h b/i915/i915_reg.h index 694cd4c..b5585e7 100644 --- a/i915/i915_reg.h +++ b/i915/i915_reg.h @@ -34,8 +34,6 @@ #define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) -#define CMD_3D (0x3<<29) - #define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) #define PRIM3D_TRILIST (0x0<<18) #define PRIM3D_TRISTRIP (0x1<<18) @@ -112,6 +110,20 @@ /* 3DSTATE_CHROMA_KEY */ /* 3DSTATE_CLEAR_PARAMETERS, p150 */ +/* + * Sets the color, depth and stencil clear values used by the + * CLEAR_RECT and ZONE_INIT primitive types, respectively. These + * primitives set override most 3d state and only take a minimal x/y + * vertex. The color/z/stencil information is supplied here and + * therefore cannot vary per vertex. + */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) /* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ #define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) @@ -424,9 +436,22 @@ #define S7_DEPTH_OFFSET_CONST_MASK ~0 + +/* Helper macros for blend factors + */ +#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT) +#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT) +#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT) +#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT) + + + + /* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */ -/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ +/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ +#define _3DSTATE_MAP_PALETTE_LOAD_32 (CMD_3D|(0x1d<<24)|(0x8f<<16)) +/* subsequent dwords up to length (max 16) are ARGB8888 color values */ /* _3DSTATE_MODES_4, p218 */ #define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24)) @@ -435,7 +460,7 @@ #define LOGICOP_MASK (0xf<<18) #define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) #define ENABLE_STENCIL_TEST_MASK (1<<17) -#define STENCIL_TEST_MASK(x) ((x)<<8) +#define STENCIL_TEST_MASK(x) (((x)&0xff)<<8) #define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) #define ENABLE_STENCIL_WRITE_MASK (1<<16) #define STENCIL_WRITE_MASK(x) ((x)&0xff) @@ -458,7 +483,7 @@ #define I915_MAX_TEX_INDIRECT 4 -#define I915_MAX_TEX_INSN 32 +#define I915_MAX_TEX_INSN 32 #define I915_MAX_ALU_INSN 64 #define I915_MAX_DECL_INSN 27 #define I915_MAX_TEMPORARY 16 @@ -470,33 +495,33 @@ */ #define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) -#define REG_TYPE_R 0 /* temporary regs, no need to - * dcl, must be written before - * read -- Preserved between - * phases. - */ -#define REG_TYPE_T 1 /* Interpolated values, must be - * dcl'ed before use. - * - * 0..7: texture coord, - * 8: diffuse spec, - * 9: specular color, - * 10: fog parameter in w. - */ -#define REG_TYPE_CONST 2 /* Restriction: only one const - * can be referenced per - * instruction, though it may be - * selected for multiple inputs. - * Constants not initialized - * default to zero. - */ -#define REG_TYPE_S 3 /* sampler */ -#define REG_TYPE_OC 4 /* output color (rgba) */ -#define REG_TYPE_OD 5 /* output depth (w), xyz are - * temporaries. If not written, - * interpolated depth is used? - */ -#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ #define REG_TYPE_MASK 0x7 #define REG_NR_MASK 0xf @@ -513,34 +538,34 @@ #define T_TEX7 7 #define T_DIFFUSE 8 #define T_SPECULAR 9 -#define T_FOG_W 10 /* interpolated fog is in W coord */ +#define T_FOG_W 10 /* interpolated fog is in W coord */ /* Arithmetic instructions */ /* .replicate_swizzle == selection and replication of a particular * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww */ -#define A0_NOP (0x0<<24) /* no operation */ -#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ -#define A0_MOV (0x2<<24) /* dst = src0 */ -#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ -#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ -#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ -#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ -#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ -#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ -#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ -#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ -#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ -#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ -#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ -#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ -#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ -#define A0_FLR (0x10<<24) /* dst = floor(src0) */ -#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ -#define A0_TRC (0x12<<24) /* dst = int(src0) */ -#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ -#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ #define A0_DEST_SATURATE (1<<22) #define A0_DEST_TYPE_SHIFT 19 /* Allow: R, OC, OD, U */ @@ -599,23 +624,23 @@ /* Texture instructions */ -#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared - * sampler and address, and output - * filtered texel data to destination - * register */ -#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a - * perspective divide of the texture - * coordinate .xyz values by .w before - * sampling. */ -#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the - * computed LOD by w. Only S4.6 two's - * comp is used. This implies that a - * float to fixed conversion is - * done. */ -#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling - * operation. Simply kills the pixel - * if any channel of the address - * register is < 0.0. */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ #define T0_DEST_TYPE_SHIFT 19 /* Allow: R, OC, OD, U */ /* Note: U (unpreserved) regs do not retain their values between @@ -627,18 +652,18 @@ */ #define T0_DEST_NR_SHIFT 14 /* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ -#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ #define T0_SAMPLER_NR_MASK (0xf<<0) -#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ /* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ #define T1_ADDRESS_REG_NR_SHIFT 17 #define T2_MBZ 0 /* Declaration instructions */ -#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) - * register or an s (sampler) - * register. */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ #define D0_SAMPLE_TYPE_SHIFT 22 #define D0_SAMPLE_TYPE_2D (0x0<<22) #define D0_SAMPLE_TYPE_CUBE (0x1<<22) @@ -695,12 +720,12 @@ #define MAPSURF_4BIT_INDEXED (7<<7) #define MS3_MT_FORMAT_MASK (0x7 << 3) #define MS3_MT_FORMAT_SHIFT 3 -#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ -#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ #define MT_8BIT_L8 (1<<3) #define MT_8BIT_A8 (4<<3) #define MT_8BIT_MONO8 (5<<3) -#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ #define MT_16BIT_ARGB1555 (1<<3) #define MT_16BIT_ARGB4444 (2<<3) #define MT_16BIT_AY88 (3<<3) @@ -709,7 +734,7 @@ #define MT_16BIT_I16 (7<<3) #define MT_16BIT_L16 (8<<3) #define MT_16BIT_A16 (9<<3) -#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ #define MT_32BIT_ABGR8888 (1<<3) #define MT_32BIT_XRGB8888 (2<<3) #define MT_32BIT_XBGR8888 (3<<3) @@ -725,11 +750,11 @@ #define MT_32BIT_xI824 (0xD<<3) #define MT_32BIT_xA824 (0xE<<3) #define MT_32BIT_xL824 (0xF<<3) -#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ #define MT_422_YCRCB_NORMAL (1<<3) #define MT_422_YCRCB_SWAPUV (2<<3) #define MT_422_YCRCB_SWAPUVY (3<<3) -#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ #define MT_COMPRESS_DXT2_3 (1<<3) #define MT_COMPRESS_DXT4_5 (2<<3) #define MT_COMPRESS_FXT1 (3<<3) @@ -751,7 +776,7 @@ #define MS4_MIP_LAYOUT_LEGACY (0<<8) #define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) #define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) -#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_SHIFT 0 #define MS4_VOLUME_DEPTH_MASK (0xff<<0) /* p244 */ @@ -779,7 +804,7 @@ #define FILTER_4X4_1 3 #define FILTER_4X4_2 4 #define FILTER_4X4_FLAT 5 -#define FILTER_6X5_MONO 6 /* XXX - check */ +#define FILTER_6X5_MONO 6 /* XXX - check */ #define SS2_MIN_FILTER_SHIFT 14 #define SS2_MIN_FILTER_MASK (0x7<<14) #define SS2_LOD_BIAS_SHIFT 5 @@ -826,10 +851,14 @@ #define ST1_ENABLE (1<<16) #define ST1_MASK (0xffff) +#define _3DSTATE_DEFAULT_Z ((0x3<<29)|(0x1d<<24)|(0x98<<16)) +#define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16)) +#define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16)) + -#define MI_FLUSH ((0<<29)|(4<<23)) -#define FLUSH_MAP_CACHE (1<<0) -#define FLUSH_RENDER_CACHE (1<<1) +#define MI_FLUSH ((0<<29)|(4<<23)) +#define FLUSH_MAP_CACHE (1<<0) +#define INHIBIT_FLUSH_RENDER_CACHE (1<<2) #endif diff --git a/i915/i915_state.c b/i915/i915_state.c index 0d5ca32..c814f8d 100644 --- a/i915/i915_state.c +++ b/i915/i915_state.c @@ -36,97 +36,101 @@ #include "texmem.h" +#include "drivers/common/driverfuncs.h" + +#include "intel_fbo.h" #include "intel_screen.h" #include "intel_batchbuffer.h" #include "i915_context.h" #include "i915_reg.h" - +#define FILE_DEBUG_FLAG DEBUG_STATE static void -i915StencilFuncSeparate(GLcontext *ctx, GLenum face, GLenum func, GLint ref, +i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref, GLuint mask) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - int test = intel_translate_compare_func( func ); + struct i915_context *i915 = I915_CONTEXT(ctx); + int test = intel_translate_compare_func(func); mask = mask & 0xff; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(func), ref, mask); + DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(func), ref, mask); I915_STATECHANGE(i915, I915_UPLOAD_CTX); i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK; i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(mask)); + STENCIL_TEST_MASK(mask)); i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK | - S5_STENCIL_TEST_FUNC_MASK); - - i915->state.Ctx[I915_CTXREG_LIS5] |= ((ref << S5_STENCIL_REF_SHIFT) | - (test << S5_STENCIL_TEST_FUNC_SHIFT)); + S5_STENCIL_TEST_FUNC_MASK); + + i915->state.Ctx[I915_CTXREG_LIS5] |= ((ref << S5_STENCIL_REF_SHIFT) | + (test << + S5_STENCIL_TEST_FUNC_SHIFT)); } static void -i915StencilMaskSeparate(GLcontext *ctx, GLenum face, GLuint mask) +i915StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s : mask 0x%x\n", __FUNCTION__, mask); + struct i915_context *i915 = I915_CONTEXT(ctx); + DBG("%s : mask 0x%x\n", __FUNCTION__, mask); + mask = mask & 0xff; I915_STATECHANGE(i915, I915_UPLOAD_CTX); i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK; i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK(mask)); + STENCIL_WRITE_MASK(mask)); } static void -i915StencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, GLenum zfail, +i915StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail, GLenum zpass) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - int fop = intel_translate_stencil_op(fail); - int dfop = intel_translate_stencil_op(zfail); + struct i915_context *i915 = I915_CONTEXT(ctx); + int fop = intel_translate_stencil_op(fail); + int dfop = intel_translate_stencil_op(zfail); int dpop = intel_translate_stencil_op(zpass); - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(fail), - _mesa_lookup_enum_by_nr(zfail), - _mesa_lookup_enum_by_nr(zpass)); + DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(fail), + _mesa_lookup_enum_by_nr(zfail), _mesa_lookup_enum_by_nr(zpass)); I915_STATECHANGE(i915, I915_UPLOAD_CTX); i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK | - S5_STENCIL_PASS_Z_FAIL_MASK | - S5_STENCIL_PASS_Z_PASS_MASK); + S5_STENCIL_PASS_Z_FAIL_MASK | + S5_STENCIL_PASS_Z_PASS_MASK); i915->state.Ctx[I915_CTXREG_LIS5] |= ((fop << S5_STENCIL_FAIL_SHIFT) | - (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) | - (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT)); + (dfop << + S5_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << + S5_STENCIL_PASS_Z_PASS_SHIFT)); } -static void i915AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref) +static void +i915AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - int test = intel_translate_compare_func( func ); + struct i915_context *i915 = I915_CONTEXT(ctx); + int test = intel_translate_compare_func(func); GLubyte refByte; UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref); I915_STATECHANGE(i915, I915_UPLOAD_CTX); i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_ALPHA_TEST_FUNC_MASK | - S6_ALPHA_REF_MASK); + S6_ALPHA_REF_MASK); i915->state.Ctx[I915_CTXREG_LIS6] |= ((test << S6_ALPHA_TEST_FUNC_SHIFT) | - (((GLuint)refByte) << S6_ALPHA_REF_SHIFT)); + (((GLuint) refByte) << + S6_ALPHA_REF_SHIFT)); } /* This function makes sure that the proper enables are @@ -135,41 +139,45 @@ static void i915AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref) * could change the LogicOp or Independant Alpha Blend without subsequent * calls to glEnable. */ -static void i915EvalLogicOpBlendState(GLcontext *ctx) +static void +i915EvalLogicOpBlendState(GLcontext * ctx) { - i915ContextPtr i915 = I915_CONTEXT(ctx); + struct i915_context *i915 = I915_CONTEXT(ctx); I915_STATECHANGE(i915, I915_UPLOAD_CTX); if (RGBA_LOGICOP_ENABLED(ctx)) { i915->state.Ctx[I915_CTXREG_LIS5] |= S5_LOGICOP_ENABLE; i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE; - } else { + } + else { i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_LOGICOP_ENABLE; if (ctx->Color.BlendEnabled) { - i915->state.Ctx[I915_CTXREG_LIS6] |= S6_CBUF_BLEND_ENABLE; - } else { - i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE; + i915->state.Ctx[I915_CTXREG_LIS6] |= S6_CBUF_BLEND_ENABLE; + } + else { + i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE; } } } -static void i915BlendColor(GLcontext *ctx, const GLfloat color[4]) +static void +i915BlendColor(GLcontext * ctx, const GLfloat color[4]) { - i915ContextPtr i915 = I915_CONTEXT(ctx); + struct i915_context *i915 = I915_CONTEXT(ctx); GLubyte r, g, b, a; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - + DBG("%s\n", __FUNCTION__); + UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]); UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]); UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]); UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]); I915_STATECHANGE(i915, I915_UPLOAD_CTX); - i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = (a<<24) | (r<<16) | (g<<8) | b; + i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = + (a << 24) | (r << 16) | (g << 8) | b; } @@ -180,31 +188,37 @@ static void i915BlendColor(GLcontext *ctx, const GLfloat color[4]) -static GLuint translate_blend_equation( GLenum mode ) +static GLuint +translate_blend_equation(GLenum mode) { switch (mode) { - case GL_FUNC_ADD: return BLENDFUNC_ADD; - case GL_MIN: return BLENDFUNC_MIN; - case GL_MAX: return BLENDFUNC_MAX; - case GL_FUNC_SUBTRACT: return BLENDFUNC_SUBTRACT; - case GL_FUNC_REVERSE_SUBTRACT: return BLENDFUNC_REVERSE_SUBTRACT; - default: return 0; + case GL_FUNC_ADD: + return BLENDFUNC_ADD; + case GL_MIN: + return BLENDFUNC_MIN; + case GL_MAX: + return BLENDFUNC_MAX; + case GL_FUNC_SUBTRACT: + return BLENDFUNC_SUBTRACT; + case GL_FUNC_REVERSE_SUBTRACT: + return BLENDFUNC_REVERSE_SUBTRACT; + default: + return 0; } } -static void i915UpdateBlendState( GLcontext *ctx ) +static void +i915UpdateBlendState(GLcontext * ctx) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - GLuint iab = (i915->state.Ctx[I915_CTXREG_IAB] & - ~(IAB_SRC_FACTOR_MASK | - IAB_DST_FACTOR_MASK | - (BLENDFUNC_MASK << IAB_FUNC_SHIFT) | - IAB_ENABLE)); - - GLuint lis6 = (i915->state.Ctx[I915_CTXREG_LIS6] & - ~(S6_CBUF_SRC_BLEND_FACT_MASK | - S6_CBUF_DST_BLEND_FACT_MASK | - S6_CBUF_BLEND_FUNC_MASK)); + struct i915_context *i915 = I915_CONTEXT(ctx); + GLuint iab = (i915->state.Ctx[I915_CTXREG_IAB] & + ~(IAB_SRC_FACTOR_MASK | + IAB_DST_FACTOR_MASK | + (BLENDFUNC_MASK << IAB_FUNC_SHIFT) | IAB_ENABLE)); + + GLuint lis6 = (i915->state.Ctx[I915_CTXREG_LIS6] & + ~(S6_CBUF_SRC_BLEND_FACT_MASK | + S6_CBUF_DST_BLEND_FACT_MASK | S6_CBUF_BLEND_FUNC_MASK)); GLuint eqRGB = ctx->Color.BlendEquationRGB; GLuint eqA = ctx->Color.BlendEquationA; @@ -221,15 +235,15 @@ static void i915UpdateBlendState( GLcontext *ctx ) srcA = dstA = GL_ONE; } - lis6 |= SRC_BLND_FACT(intel_translate_blend_factor(srcRGB)); - lis6 |= DST_BLND_FACT(intel_translate_blend_factor(dstRGB)); - lis6 |= translate_blend_equation( eqRGB ) << S6_CBUF_BLEND_FUNC_SHIFT; + lis6 |= SRC_BLND_FACT(intel_translate_blend_factor(srcRGB)); + lis6 |= DST_BLND_FACT(intel_translate_blend_factor(dstRGB)); + lis6 |= translate_blend_equation(eqRGB) << S6_CBUF_BLEND_FUNC_SHIFT; - iab |= SRC_ABLND_FACT(intel_translate_blend_factor(srcA)); - iab |= DST_ABLND_FACT(intel_translate_blend_factor(dstA)); - iab |= translate_blend_equation( eqA ) << IAB_FUNC_SHIFT; + iab |= SRC_ABLND_FACT(intel_translate_blend_factor(srcA)); + iab |= DST_ABLND_FACT(intel_translate_blend_factor(dstA)); + iab |= translate_blend_equation(eqA) << IAB_FUNC_SHIFT; - if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) iab |= IAB_ENABLE; if (iab != i915->state.Ctx[I915_CTXREG_IAB] || @@ -244,41 +258,41 @@ static void i915UpdateBlendState( GLcontext *ctx ) } -static void i915BlendFuncSeparate(GLcontext *ctx, GLenum srcRGB, - GLenum dstRGB, GLenum srcA, - GLenum dstA ) -{ - i915UpdateBlendState( ctx ); +static void +i915BlendFuncSeparate(GLcontext * ctx, GLenum srcRGB, + GLenum dstRGB, GLenum srcA, GLenum dstA) +{ + i915UpdateBlendState(ctx); } -static void i915BlendEquationSeparate(GLcontext *ctx, GLenum eqRGB, - GLenum eqA) +static void +i915BlendEquationSeparate(GLcontext * ctx, GLenum eqRGB, GLenum eqA) { - i915UpdateBlendState( ctx ); + i915UpdateBlendState(ctx); } -static void i915DepthFunc(GLcontext *ctx, GLenum func) +static void +i915DepthFunc(GLcontext * ctx, GLenum func) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - int test = intel_translate_compare_func( func ); - - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); + struct i915_context *i915 = I915_CONTEXT(ctx); + int test = intel_translate_compare_func(func); + DBG("%s\n", __FUNCTION__); + I915_STATECHANGE(i915, I915_UPLOAD_CTX); i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_FUNC_MASK; i915->state.Ctx[I915_CTXREG_LIS6] |= test << S6_DEPTH_TEST_FUNC_SHIFT; } -static void i915DepthMask(GLcontext *ctx, GLboolean flag) +static void +i915DepthMask(GLcontext * ctx, GLboolean flag) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s flag (%d)\n", __FUNCTION__, flag); + struct i915_context *i915 = I915_CONTEXT(ctx); + DBG("%s flag (%d)\n", __FUNCTION__, flag); + I915_STATECHANGE(i915, I915_UPLOAD_CTX); if (flag && ctx->Depth.Test) @@ -293,14 +307,15 @@ static void i915DepthMask(GLcontext *ctx, GLboolean flag) * The i915 supports a 4x4 stipple natively, GL wants 32x32. * Fortunately stipple is usually a repeating pattern. */ -static void i915PolygonStipple( GLcontext *ctx, const GLubyte *mask ) +static void +i915PolygonStipple(GLcontext * ctx, const GLubyte * mask) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - const GLubyte *m = mask; + struct i915_context *i915 = I915_CONTEXT(ctx); + const GLubyte *m; GLubyte p[4]; - int i,j,k; + int i, j, k; int active = (ctx->Polygon.StippleFlag && - i915->intel.reduced_primitive == GL_TRIANGLES); + i915->intel.reduced_primitive == GL_TRIANGLES); GLuint newMask; if (active) { @@ -308,23 +323,32 @@ static void i915PolygonStipple( GLcontext *ctx, const GLubyte *mask ) i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE; } - p[0] = mask[12] & 0xf; p[0] |= p[0] << 4; - p[1] = mask[8] & 0xf; p[1] |= p[1] << 4; - p[2] = mask[4] & 0xf; p[2] |= p[2] << 4; - p[3] = mask[0] & 0xf; p[3] |= p[3] << 4; - - for (k = 0 ; k < 8 ; k++) - for (j = 3 ; j >= 0; j--) - for (i = 0 ; i < 4 ; i++, m++) - if (*m != p[j]) { - i915->intel.hw_stipple = 0; - return; - } + /* Use the already unpacked stipple data from the context rather than the + * uninterpreted mask passed in. + */ + mask = (const GLubyte *)ctx->PolygonStipple; + m = mask; + + p[0] = mask[12] & 0xf; + p[0] |= p[0] << 4; + p[1] = mask[8] & 0xf; + p[1] |= p[1] << 4; + p[2] = mask[4] & 0xf; + p[2] |= p[2] << 4; + p[3] = mask[0] & 0xf; + p[3] |= p[3] << 4; + + for (k = 0; k < 8; k++) + for (j = 3; j >= 0; j--) + for (i = 0; i < 4; i++, m++) + if (*m != p[j]) { + i915->intel.hw_stipple = 0; + return; + } newMask = (((p[0] & 0xf) << 0) | - ((p[1] & 0xf) << 4) | - ((p[2] & 0xf) << 8) | - ((p[3] & 0xf) << 12)); + ((p[1] & 0xf) << 4) | + ((p[2] & 0xf) << 8) | ((p[3] & 0xf) << 12)); if (newMask == 0xffff || newMask == 0x0) { @@ -345,49 +369,54 @@ static void i915PolygonStipple( GLcontext *ctx, const GLubyte *mask ) /* ============================================================= * Hardware clipping */ -static void i915Scissor(GLcontext *ctx, GLint x, GLint y, - GLsizei w, GLsizei h) +static void +i915Scissor(GLcontext * ctx, GLint x, GLint y, GLsizei w, GLsizei h) { - i915ContextPtr i915 = I915_CONTEXT(ctx); - intelScreenPrivate *screen = i915->intel.intelScreen; + struct i915_context *i915 = I915_CONTEXT(ctx); int x1, y1, x2, y2; - if (!i915->intel.driDrawable) + if (!ctx->DrawBuffer) return; - x1 = x; - y1 = i915->intel.driDrawable->h - (y + h); - x2 = x + w - 1; - y2 = y1 + h - 1; - - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "[%s] x(%d) y(%d) w(%d) h(%d)\n", __FUNCTION__, - x, y, w, h); - - if (x1 < 0) x1 = 0; - if (y1 < 0) y1 = 0; - if (x2 < 0) x2 = 0; - if (y2 < 0) y2 = 0; - - if (x2 >= screen->width) x2 = screen->width-1; - if (y2 >= screen->height) y2 = screen->height-1; - if (x1 >= screen->width) x1 = screen->width-1; - if (y1 >= screen->height) y1 = screen->height-1; + DBG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h); + if (ctx->DrawBuffer->Name == 0) { + x1 = x; + y1 = ctx->DrawBuffer->Height - (y + h); + x2 = x + w - 1; + y2 = y1 + h - 1; + DBG("%s %d..%d,%d..%d (inverted)\n", __FUNCTION__, x1, x2, y1, y2); + } + else { + /* FBO - not inverted + */ + x1 = x; + y1 = y; + x2 = x + w - 1; + y2 = y + h - 1; + DBG("%s %d..%d,%d..%d (not inverted)\n", __FUNCTION__, x1, x2, y1, y2); + } + + x1 = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1); + y1 = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1); + x2 = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1); + y2 = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1); + + DBG("%s %d..%d,%d..%d (clamped)\n", __FUNCTION__, x1, x2, y1, y2); I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS); i915->state.Buffer[I915_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff); i915->state.Buffer[I915_DESTREG_SR2] = (y2 << 16) | (x2 & 0xffff); } -static void i915LogicOp(GLcontext *ctx, GLenum opcode) +static void +i915LogicOp(GLcontext * ctx, GLenum opcode) { - i915ContextPtr i915 = I915_CONTEXT(ctx); + struct i915_context *i915 = I915_CONTEXT(ctx); int tmp = intel_translate_logic_op(opcode); - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - + DBG("%s\n", __FUNCTION__); + I915_STATECHANGE(i915, I915_UPLOAD_CTX); i915->state.Ctx[I915_CTXREG_STATE4] &= ~LOGICOP_MASK; i915->state.Ctx[I915_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp); @@ -395,13 +424,14 @@ static void i915LogicOp(GLcontext *ctx, GLenum opcode) -static void i915CullFaceFrontFace(GLcontext *ctx, GLenum unused) +static void +i915CullFaceFrontFace(GLcontext * ctx, GLenum unused) { - i915ContextPtr i915 = I915_CONTEXT(ctx); + struct i915_context *i915 = I915_CONTEXT(ctx); GLuint mode; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); + DBG("%s %d\n", __FUNCTION__, + ctx->DrawBuffer ? ctx->DrawBuffer->Name : 0); if (!ctx->Polygon.CullFlag) { mode = S4_CULLMODE_NONE; @@ -409,10 +439,12 @@ static void i915CullFaceFrontFace(GLcontext *ctx, GLenum unused) else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) { mode = S4_CULLMODE_CW; + if (ctx->DrawBuffer && ctx->DrawBuffer->Name != 0) + mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW); if (ctx->Polygon.CullFaceMode == GL_FRONT) - mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW); + mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW); if (ctx->Polygon.FrontFace != GL_CCW) - mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW); + mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW); } else { mode = S4_CULLMODE_BOTH; @@ -423,16 +455,16 @@ static void i915CullFaceFrontFace(GLcontext *ctx, GLenum unused) i915->state.Ctx[I915_CTXREG_LIS4] |= mode; } -static void i915LineWidth( GLcontext *ctx, GLfloat widthf ) +static void +i915LineWidth(GLcontext * ctx, GLfloat widthf) { - i915ContextPtr i915 = I915_CONTEXT( ctx ); + struct i915_context *i915 = I915_CONTEXT(ctx); int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_LINE_WIDTH_MASK; int width; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - - width = (int)(widthf * 2); + DBG("%s\n", __FUNCTION__); + + width = (int) (widthf * 2); CLAMP_SELF(width, 1, 0xf); lis4 |= width << S4_LINE_WIDTH_SHIFT; @@ -442,15 +474,15 @@ static void i915LineWidth( GLcontext *ctx, GLfloat widthf ) } } -static void i915PointSize(GLcontext *ctx, GLfloat size) +static void +i915PointSize(GLcontext * ctx, GLfloat size) { - i915ContextPtr i915 = I915_CONTEXT(ctx); + struct i915_context *i915 = I915_CONTEXT(ctx); int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_POINT_WIDTH_MASK; - GLint point_size = (int)size; - - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); + GLint point_size = (int) size; + DBG("%s\n", __FUNCTION__); + CLAMP_SELF(point_size, 1, 255); lis4 |= point_size << S4_POINT_WIDTH_SHIFT; @@ -465,20 +497,24 @@ static void i915PointSize(GLcontext *ctx, GLfloat size) * Color masks */ -static void i915ColorMask(GLcontext *ctx, - GLboolean r, GLboolean g, - GLboolean b, GLboolean a) +static void +i915ColorMask(GLcontext * ctx, + GLboolean r, GLboolean g, GLboolean b, GLboolean a) { - i915ContextPtr i915 = I915_CONTEXT( ctx ); + struct i915_context *i915 = I915_CONTEXT(ctx); GLuint tmp = i915->state.Ctx[I915_CTXREG_LIS5] & ~S5_WRITEDISABLE_MASK; - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a); + DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, + a); - if (!r) tmp |= S5_WRITEDISABLE_RED; - if (!g) tmp |= S5_WRITEDISABLE_GREEN; - if (!b) tmp |= S5_WRITEDISABLE_BLUE; - if (!a) tmp |= S5_WRITEDISABLE_ALPHA; + if (!r) + tmp |= S5_WRITEDISABLE_RED; + if (!g) + tmp |= S5_WRITEDISABLE_GREEN; + if (!b) + tmp |= S5_WRITEDISABLE_BLUE; + if (!a) + tmp |= S5_WRITEDISABLE_ALPHA; if (tmp != i915->state.Ctx[I915_CTXREG_LIS5]) { I915_STATECHANGE(i915, I915_UPLOAD_CTX); @@ -486,54 +522,55 @@ static void i915ColorMask(GLcontext *ctx, } } -static void update_specular( GLcontext *ctx ) +static void +update_specular(GLcontext * ctx) { /* A hack to trigger the rebuild of the fragment program. */ - INTEL_CONTEXT(ctx)->NewGLState |= _NEW_TEXTURE; - I915_CONTEXT(ctx)->tex_program.translated = 0; + intel_context(ctx)->NewGLState |= _NEW_TEXTURE; } -static void i915LightModelfv(GLcontext *ctx, GLenum pname, - const GLfloat *param) +static void +i915LightModelfv(GLcontext * ctx, GLenum pname, const GLfloat * param) { - if (INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - + DBG("%s\n", __FUNCTION__); + if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) { - update_specular( ctx ); + update_specular(ctx); } } -static void i915ShadeModel(GLcontext *ctx, GLenum mode) +static void +i915ShadeModel(GLcontext * ctx, GLenum mode) { - i915ContextPtr i915 = I915_CONTEXT(ctx); + struct i915_context *i915 = I915_CONTEXT(ctx); I915_STATECHANGE(i915, I915_UPLOAD_CTX); if (mode == GL_SMOOTH) { - i915->state.Ctx[I915_CTXREG_LIS4] &= ~(S4_FLATSHADE_ALPHA | - S4_FLATSHADE_COLOR | - S4_FLATSHADE_SPECULAR); - } else { - i915->state.Ctx[I915_CTXREG_LIS4] |= (S4_FLATSHADE_ALPHA | - S4_FLATSHADE_COLOR | - S4_FLATSHADE_SPECULAR); + i915->state.Ctx[I915_CTXREG_LIS4] &= ~(S4_FLATSHADE_ALPHA | + S4_FLATSHADE_COLOR | + S4_FLATSHADE_SPECULAR); + } + else { + i915->state.Ctx[I915_CTXREG_LIS4] |= (S4_FLATSHADE_ALPHA | + S4_FLATSHADE_COLOR | + S4_FLATSHADE_SPECULAR); } } /* ============================================================= * Fog */ -void i915_update_fog( GLcontext *ctx ) +void +i915_update_fog(GLcontext * ctx) { - i915ContextPtr i915 = I915_CONTEXT(ctx); + struct i915_context *i915 = I915_CONTEXT(ctx); GLenum mode; GLboolean enabled; GLboolean try_pixel_fog; - + if (ctx->FragmentProgram._Active) { /* Pull in static fog state from program */ - mode = ctx->FragmentProgram._Current->FogOption; enabled = (mode != GL_NONE); try_pixel_fog = 0; @@ -544,7 +581,7 @@ void i915_update_fog( GLcontext *ctx ) #if 0 /* XXX - DISABLED -- Need ortho fallback */ try_pixel_fog = (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT - &&ctx->Hint.Fog == GL_NICEST); + && ctx->Hint.Fog == GL_NICEST); #else try_pixel_fog = 0; #endif @@ -557,48 +594,49 @@ void i915_update_fog( GLcontext *ctx ) I915_STATECHANGE(i915, I915_UPLOAD_FOG); i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_FOGFUNC_MASK; i915->vertex_fog = I915_FOG_PIXEL; - + switch (mode) { case GL_LINEAR: - if (ctx->Fog.End <= ctx->Fog.Start) { - /* XXX - this won't work with fragment programs. Need to - * either fallback or append fog instructions to end of - * program in the case of linear fog. - */ - i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_VERTEX; - i915->vertex_fog = I915_FOG_VERTEX; - } - else { + if (ctx->Fog.End <= ctx->Fog.Start) { + /* XXX - this won't work with fragment programs. Need to + * either fallback or append fog instructions to end of + * program in the case of linear fog. + */ + printf("vertex fog!\n"); + i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_VERTEX; + i915->vertex_fog = I915_FOG_VERTEX; + } + else { GLfloat c2 = 1.0 / (ctx->Fog.End - ctx->Fog.Start); GLfloat c1 = ctx->Fog.End * c2; - i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_C1_MASK; - i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_LINEAR; - i915->state.Fog[I915_FOGREG_MODE1] |= - ((GLuint)(c1 * FMC1_C1_ONE)) & FMC1_C1_MASK; - - if (i915->state.Fog[I915_FOGREG_MODE1] & FMC1_FOGINDEX_Z) { - i915->state.Fog[I915_FOGREG_MODE2] - = (GLuint)(c2 * FMC2_C2_ONE); - } - else { - fi_type fi; - fi.f = c2; - i915->state.Fog[I915_FOGREG_MODE2] = fi.i; - } - } - break; + i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_C1_MASK; + i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_LINEAR; + i915->state.Fog[I915_FOGREG_MODE1] |= + ((GLuint) (c1 * FMC1_C1_ONE)) & FMC1_C1_MASK; + + if (i915->state.Fog[I915_FOGREG_MODE1] & FMC1_FOGINDEX_Z) { + i915->state.Fog[I915_FOGREG_MODE2] + = (GLuint) (c2 * FMC2_C2_ONE); + } + else { + fi_type fi; + fi.f = c2; + i915->state.Fog[I915_FOGREG_MODE2] = fi.i; + } + } + break; case GL_EXP: - i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP; - break; + i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP; + break; case GL_EXP2: - i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP2; - break; + i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP2; + break; default: - break; + break; } } - else /* if (i915->vertex_fog != I915_FOG_VERTEX) */ { + else { /* if (i915->vertex_fog != I915_FOG_VERTEX) */ I915_STATECHANGE(i915, I915_UPLOAD_FOG); i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_FOGFUNC_MASK; i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_VERTEX; @@ -620,38 +658,38 @@ void i915_update_fog( GLcontext *ctx ) } static void -i915Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *param) +i915Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) { - i915ContextPtr i915 = I915_CONTEXT(ctx); + struct i915_context *i915 = I915_CONTEXT(ctx); switch (pname) { - case GL_FOG_COORDINATE_SOURCE_EXT: + case GL_FOG_COORDINATE_SOURCE_EXT: case GL_FOG_MODE: case GL_FOG_START: - case GL_FOG_END: + case GL_FOG_END: break; case GL_FOG_DENSITY: I915_STATECHANGE(i915, I915_UPLOAD_FOG); if (i915->state.Fog[I915_FOGREG_MODE1] & FMC1_FOGINDEX_Z) { - i915->state.Fog[I915_FOGREG_MODE3] - = (GLuint)(ctx->Fog.Density * FMC3_D_ONE); + i915->state.Fog[I915_FOGREG_MODE3] = + (GLuint) (ctx->Fog.Density * FMC3_D_ONE); } else { - union { float f; int i; } fi; - fi.f = ctx->Fog.Density; - i915->state.Fog[I915_FOGREG_MODE3] = fi.i; + fi_type fi; + fi.f = ctx->Fog.Density; + i915->state.Fog[I915_FOGREG_MODE3] = fi.i; } break; - case GL_FOG_COLOR: + case GL_FOG_COLOR: I915_STATECHANGE(i915, I915_UPLOAD_FOG); - i915->state.Fog[I915_FOGREG_COLOR] = - (_3DSTATE_FOG_COLOR_CMD | - ((GLubyte)(ctx->Fog.Color[0]*255.0F) << 16) | - ((GLubyte)(ctx->Fog.Color[1]*255.0F) << 8) | - ((GLubyte)(ctx->Fog.Color[2]*255.0F) << 0)); + i915->state.Fog[I915_FOGREG_COLOR] = + (_3DSTATE_FOG_COLOR_CMD | + ((GLubyte) (ctx->Fog.Color[0] * 255.0F) << 16) | + ((GLubyte) (ctx->Fog.Color[1] * 255.0F) << 8) | + ((GLubyte) (ctx->Fog.Color[2] * 255.0F) << 0)); break; default: @@ -659,7 +697,8 @@ i915Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *param) } } -static void i915Hint(GLcontext *ctx, GLenum target, GLenum state) +static void +i915Hint(GLcontext * ctx, GLenum target, GLenum state) { switch (target) { case GL_FOG_HINT: @@ -672,25 +711,26 @@ static void i915Hint(GLcontext *ctx, GLenum target, GLenum state) /* ============================================================= */ -static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state) +static void +i915Enable(GLcontext * ctx, GLenum cap, GLboolean state) { - i915ContextPtr i915 = I915_CONTEXT(ctx); + struct i915_context *i915 = I915_CONTEXT(ctx); - switch(cap) { + switch (cap) { case GL_TEXTURE_2D: break; case GL_LIGHTING: case GL_COLOR_SUM: - update_specular( ctx ); + update_specular(ctx); break; case GL_ALPHA_TEST: I915_STATECHANGE(i915, I915_UPLOAD_CTX); if (state) - i915->state.Ctx[I915_CTXREG_LIS6] |= S6_ALPHA_TEST_ENABLE; + i915->state.Ctx[I915_CTXREG_LIS6] |= S6_ALPHA_TEST_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_ALPHA_TEST_ENABLE; + i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_ALPHA_TEST_ENABLE; break; case GL_BLEND: @@ -702,8 +742,8 @@ static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state) /* Logicop doesn't seem to work at 16bpp: */ - if (i915->intel.intelScreen->cpp == 2) - FALLBACK( &i915->intel, I915_FALLBACK_LOGICOP, state ); + if (ctx->Visual.rgbBits == 16) + FALLBACK(&i915->intel, I915_FALLBACK_LOGICOP, state); break; case GL_FRAGMENT_PROGRAM_ARB: @@ -712,37 +752,37 @@ static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state) case GL_DITHER: I915_STATECHANGE(i915, I915_UPLOAD_CTX); if (state) - i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE; + i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_COLOR_DITHER_ENABLE; + i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_COLOR_DITHER_ENABLE; break; case GL_DEPTH_TEST: I915_STATECHANGE(i915, I915_UPLOAD_CTX); if (state) - i915->state.Ctx[I915_CTXREG_LIS6] |= S6_DEPTH_TEST_ENABLE; + i915->state.Ctx[I915_CTXREG_LIS6] |= S6_DEPTH_TEST_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_ENABLE; + i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_ENABLE; - i915DepthMask( ctx, ctx->Depth.Mask ); + i915DepthMask(ctx, ctx->Depth.Mask); break; case GL_SCISSOR_TEST: I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS); if (state) - i915->state.Buffer[I915_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD | - ENABLE_SCISSOR_RECT); + i915->state.Buffer[I915_DESTREG_SENABLE] = + (_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT); else - i915->state.Buffer[I915_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD | - DISABLE_SCISSOR_RECT); + i915->state.Buffer[I915_DESTREG_SENABLE] = + (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); break; case GL_LINE_SMOOTH: I915_STATECHANGE(i915, I915_UPLOAD_CTX); if (state) - i915->state.Ctx[I915_CTXREG_LIS4] |= S4_LINE_ANTIALIAS_ENABLE; + i915->state.Ctx[I915_CTXREG_LIS4] |= S4_LINE_ANTIALIAS_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_LINE_ANTIALIAS_ENABLE; + i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_LINE_ANTIALIAS_ENABLE; break; case GL_FOG: @@ -753,16 +793,25 @@ static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state) break; case GL_STENCIL_TEST: - if (i915->intel.hw_stencil) { - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - if (state) - i915->state.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE | - S5_STENCIL_WRITE_ENABLE); - else - i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE | - S5_STENCIL_WRITE_ENABLE); - } else { - FALLBACK( &i915->intel, I915_FALLBACK_STENCIL, state ); + { + GLboolean hw_stencil = GL_FALSE; + if (ctx->DrawBuffer) { + struct intel_renderbuffer *irbStencil + = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + hw_stencil = (irbStencil && irbStencil->region); + } + if (hw_stencil) { + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + if (state) + i915->state.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE | + S5_STENCIL_WRITE_ENABLE); + else + i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE | + S5_STENCIL_WRITE_ENABLE); + } + else { + FALLBACK(&i915->intel, I915_FALLBACK_STENCIL, state); + } } break; @@ -771,23 +820,20 @@ static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state) * I'll do more testing later to find out exactly which hardware * supports it. Disabled for now. */ - if (i915->intel.hw_stipple && - i915->intel.reduced_primitive == GL_TRIANGLES) - { - I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE); - if (state) - i915->state.Stipple[I915_STPREG_ST1] |= ST1_ENABLE; - else - i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE; + if (i915->intel.hw_stipple && + i915->intel.reduced_primitive == GL_TRIANGLES) { + I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE); + if (state) + i915->state.Stipple[I915_STPREG_ST1] |= ST1_ENABLE; + else + i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE; } break; case GL_POLYGON_SMOOTH: - FALLBACK( &i915->intel, I915_FALLBACK_POLYGON_SMOOTH, state ); break; case GL_POINT_SMOOTH: - FALLBACK( &i915->intel, I915_FALLBACK_POINT_SMOOTH, state ); break; default: @@ -796,10 +842,9 @@ static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state) } -static void i915_init_packets( i915ContextPtr i915 ) +static void +i915_init_packets(struct i915_context *i915) { - intelScreenPrivate *screen = i915->intel.intelScreen; - /* Zero all state */ memset(&i915->state, 0, sizeof(i915->state)); @@ -809,39 +854,35 @@ static void i915_init_packets( i915ContextPtr i915 ) /* Probably don't want to upload all this stuff every time one * piece changes. */ - i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(2) | - I1_LOAD_S(4) | - I1_LOAD_S(5) | - I1_LOAD_S(6) | - (3)); + i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(2) | + I1_LOAD_S(4) | + I1_LOAD_S(5) | I1_LOAD_S(6) | (3)); i915->state.Ctx[I915_CTXREG_LIS2] = 0; i915->state.Ctx[I915_CTXREG_LIS4] = 0; i915->state.Ctx[I915_CTXREG_LIS5] = 0; - if (screen->cpp == 2) - i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE; + if (i915->intel.ctx.Visual.rgbBits == 16) + i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE; i915->state.Ctx[I915_CTXREG_LIS6] = (S6_COLOR_WRITE_ENABLE | - (2 << S6_TRISTRIP_PV_SHIFT)); + (2 << S6_TRISTRIP_PV_SHIFT)); i915->state.Ctx[I915_CTXREG_STATE4] = (_3DSTATE_MODES_4_CMD | - ENABLE_LOGIC_OP_FUNC | - LOGIC_OP_FUNC(LOGICOP_COPY) | - ENABLE_STENCIL_TEST_MASK | - STENCIL_TEST_MASK(0xff) | - ENABLE_STENCIL_WRITE_MASK | - STENCIL_WRITE_MASK(0xff)); - - - i915->state.Ctx[I915_CTXREG_IAB] = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | - IAB_MODIFY_ENABLE | - IAB_MODIFY_FUNC | - IAB_MODIFY_SRC_FACTOR | - IAB_MODIFY_DST_FACTOR); - - i915->state.Ctx[I915_CTXREG_BLENDCOLOR0] = _3DSTATE_CONST_BLEND_COLOR_CMD; + ENABLE_LOGIC_OP_FUNC | + LOGIC_OP_FUNC(LOGICOP_COPY) | + ENABLE_STENCIL_TEST_MASK | + STENCIL_TEST_MASK(0xff) | + ENABLE_STENCIL_WRITE_MASK | + STENCIL_WRITE_MASK(0xff)); + + i915->state.Ctx[I915_CTXREG_IAB] = + (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE | + IAB_MODIFY_FUNC | IAB_MODIFY_SRC_FACTOR | IAB_MODIFY_DST_FACTOR); + + i915->state.Ctx[I915_CTXREG_BLENDCOLOR0] = + _3DSTATE_CONST_BLEND_COLOR_CMD; i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = 0; } @@ -856,79 +897,50 @@ static void i915_init_packets( i915ContextPtr i915 ) I915_STATECHANGE(i915, I915_UPLOAD_FOG); i915->state.Fog[I915_FOGREG_MODE0] = _3DSTATE_FOG_MODE_CMD; i915->state.Fog[I915_FOGREG_MODE1] = (FMC1_FOGFUNC_MODIFY_ENABLE | - FMC1_FOGFUNC_VERTEX | - FMC1_FOGINDEX_MODIFY_ENABLE | - FMC1_FOGINDEX_W | - FMC1_C1_C2_MODIFY_ENABLE | - FMC1_DENSITY_MODIFY_ENABLE); + FMC1_FOGFUNC_VERTEX | + FMC1_FOGINDEX_MODIFY_ENABLE | + FMC1_FOGINDEX_W | + FMC1_C1_C2_MODIFY_ENABLE | + FMC1_DENSITY_MODIFY_ENABLE); i915->state.Fog[I915_FOGREG_COLOR] = _3DSTATE_FOG_COLOR_CMD; } - { - I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS); - /* color buffer offset/stride */ - i915->state.Buffer[I915_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - i915->state.Buffer[I915_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(screen->front.pitch) | /* pitch in bytes */ - BUF_3D_USE_FENCE); - /*i915->state.Buffer[I915_DESTREG_CBUFADDR2] is the offset */ - - - /* depth/Z buffer offset/stride */ - i915->state.Buffer[I915_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - i915->state.Buffer[I915_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | - BUF_3D_PITCH(screen->depth.pitch) | /* pitch in bytes */ - BUF_3D_USE_FENCE); - i915->state.Buffer[I915_DESTREG_DBUFADDR2] = screen->depth.offset; - - i915->state.Buffer[I915_DESTREG_DV0] = _3DSTATE_DST_BUF_VARS_CMD; - /* color/depth pixel format */ - switch (screen->fbFormat) { - case DV_PF_555: - case DV_PF_565: - i915->state.Buffer[I915_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - DITHER_FULL_ALWAYS | - screen->fbFormat | - DEPTH_FRMT_16_FIXED); - break; - case DV_PF_8888: - i915->state.Buffer[I915_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - screen->fbFormat | - DEPTH_FRMT_24_FIXED_8_OTHER); - break; - } - /* scissor */ - i915->state.Buffer[I915_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD | - DISABLE_SCISSOR_RECT); + i915->state.Buffer[I915_DESTREG_SENABLE] = + (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); i915->state.Buffer[I915_DESTREG_SR0] = _3DSTATE_SCISSOR_RECT_0_CMD; i915->state.Buffer[I915_DESTREG_SR1] = 0; i915->state.Buffer[I915_DESTREG_SR2] = 0; } +#if 0 + { + I915_STATECHANGE(i915, I915_UPLOAD_DEFAULTS); + i915->state.Default[I915_DEFREG_C0] = _3DSTATE_DEFAULT_DIFFUSE; + i915->state.Default[I915_DEFREG_C1] = 0; + i915->state.Default[I915_DEFREG_S0] = _3DSTATE_DEFAULT_SPECULAR; + i915->state.Default[I915_DEFREG_S1] = 0; + i915->state.Default[I915_DEFREG_Z0] = _3DSTATE_DEFAULT_Z; + i915->state.Default[I915_DEFREG_Z1] = 0; + } +#endif + + /* These will be emitted every at the head of every buffer, unless * we get hardware contexts working. */ - i915->state.active = (I915_UPLOAD_PROGRAM | - I915_UPLOAD_STIPPLE | - I915_UPLOAD_CTX | - I915_UPLOAD_BUFFERS | - I915_UPLOAD_INVARIENT); + i915->state.active = (I915_UPLOAD_PROGRAM | + I915_UPLOAD_STIPPLE | + I915_UPLOAD_CTX | + I915_UPLOAD_BUFFERS | I915_UPLOAD_INVARIENT); } -void i915InitStateFunctions( struct dd_function_table *functions ) +void +i915InitStateFunctions(struct dd_function_table *functions) { functions->AlphaFunc = i915AlphaFunc; functions->BlendColor = i915BlendColor; @@ -955,21 +967,15 @@ void i915InitStateFunctions( struct dd_function_table *functions ) } -void i915InitState( i915ContextPtr i915 ) +void +i915InitState(struct i915_context *i915) { GLcontext *ctx = &i915->intel.ctx; - i915_init_packets( i915 ); + i915_init_packets(i915); - intelInitState( ctx ); + _mesa_init_driver_state(ctx); - memcpy( &i915->initial, &i915->state, sizeof(i915->state) ); + memcpy(&i915->initial, &i915->state, sizeof(i915->state)); i915->current = &i915->state; } - - - - - - - diff --git a/i915/i915_tex.c b/i915/i915_tex.c index d9609d3..386617a 100644 --- a/i915/i915_tex.c +++ b/i915/i915_tex.c @@ -45,109 +45,25 @@ - - - -/** - * Allocate space for and load the mesa images into the texture memory block. - * This will happen before drawing with a new texture, or drawing with a - * texture after it was swapped out or teximaged again. - */ - -intelTextureObjectPtr i915AllocTexObj( struct gl_texture_object *texObj ) +static void +i915TexEnv(GLcontext * ctx, GLenum target, + GLenum pname, const GLfloat * param) { - i915TextureObjectPtr t = CALLOC_STRUCT( i915_texture_object ); - if ( !t ) - return NULL; + struct i915_context *i915 = I915_CONTEXT(ctx); - texObj->DriverData = t; - t->intel.base.tObj = texObj; - t->intel.dirty = I915_UPLOAD_TEX_ALL; - make_empty_list( &t->intel.base ); - return &t->intel; -} - - -static void i915TexParameter( GLcontext *ctx, GLenum target, - struct gl_texture_object *tObj, - GLenum pname, const GLfloat *params ) -{ - i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData; - switch (pname) { - case GL_TEXTURE_MIN_FILTER: - case GL_TEXTURE_MAG_FILTER: - case GL_TEXTURE_MAX_ANISOTROPY_EXT: - case GL_TEXTURE_WRAP_S: - case GL_TEXTURE_WRAP_T: - case GL_TEXTURE_WRAP_R: - case GL_TEXTURE_BORDER_COLOR: - t->intel.dirty = I915_UPLOAD_TEX_ALL; - break; - - case GL_TEXTURE_COMPARE_MODE: - t->intel.dirty = I915_UPLOAD_TEX_ALL; - break; - case GL_TEXTURE_COMPARE_FUNC: - t->intel.dirty = I915_UPLOAD_TEX_ALL; - break; - - case GL_TEXTURE_BASE_LEVEL: - case GL_TEXTURE_MAX_LEVEL: - case GL_TEXTURE_MIN_LOD: - case GL_TEXTURE_MAX_LOD: - /* The i915 and its successors can do a lot of this without - * reloading the textures. A project for someone? - */ - intelFlush( ctx ); - driSwapOutTextureObject( (driTextureObject *) t ); - t->intel.dirty = I915_UPLOAD_TEX_ALL; - break; - - default: - return; - } -} - - -static void i915TexEnv( GLcontext *ctx, GLenum target, - GLenum pname, const GLfloat *param ) -{ - i915ContextPtr i915 = I915_CONTEXT( ctx ); - GLuint unit = ctx->Texture.CurrentUnit; - - switch (pname) { - case GL_TEXTURE_ENV_COLOR: /* Should be a tracked param */ - case GL_TEXTURE_ENV_MODE: - case GL_COMBINE_RGB: - case GL_COMBINE_ALPHA: - case GL_SOURCE0_RGB: - case GL_SOURCE1_RGB: - case GL_SOURCE2_RGB: - case GL_SOURCE0_ALPHA: - case GL_SOURCE1_ALPHA: - case GL_SOURCE2_ALPHA: - case GL_OPERAND0_RGB: - case GL_OPERAND1_RGB: - case GL_OPERAND2_RGB: - case GL_OPERAND0_ALPHA: - case GL_OPERAND1_ALPHA: - case GL_OPERAND2_ALPHA: - case GL_RGB_SCALE: - case GL_ALPHA_SCALE: - i915->tex_program.translated = 0; - break; - - case GL_TEXTURE_LOD_BIAS: { - int b = (int) ((*param) * 16.0); - if (b > 255) b = 255; - if (b < -256) b = -256; - I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); - i915->state.Tex[unit][I915_TEXREG_SS2] &= ~SS2_LOD_BIAS_MASK; - i915->state.Tex[unit][I915_TEXREG_SS2] |= - ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); - break; - } + case GL_TEXTURE_LOD_BIAS:{ + GLuint unit = ctx->Texture.CurrentUnit; + GLint b = (int) ((*param) * 16.0); + if (b > 255) + b = 255; + if (b < -256) + b = -256; + I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); + i915->lodbias_ss2[unit] = + ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); + break; + } default: break; @@ -155,33 +71,10 @@ static void i915TexEnv( GLcontext *ctx, GLenum target, } -static void i915BindTexture( GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj ) -{ - i915TextureObjectPtr tex; - - if (!texObj->DriverData) - i915AllocTexObj( texObj ); - - tex = (i915TextureObjectPtr)texObj->DriverData; - - if (tex->lastTarget != texObj->Target) { - tex->intel.dirty = I915_UPLOAD_TEX_ALL; - tex->lastTarget = texObj->Target; - } - - /* Need this if image format changes between bound textures. - * Could try and shortcircuit by checking for differences in - * state between incoming and outgoing textures: - */ - I915_CONTEXT(ctx)->tex_program.translated = 0; -} - - - -void i915InitTextureFuncs( struct dd_function_table *functions ) +void +i915InitTextureFuncs(struct dd_function_table *functions) { - functions->BindTexture = i915BindTexture; +/* functions->TexEnv = i915TexEnv; - functions->TexParameter = i915TexParameter; +*/ } diff --git a/i915/i915_tex_layout.c b/i915/i915_tex_layout.c new file mode 100644 index 0000000..b5085f4 --- /dev/null +++ b/i915/i915_tex_layout.c @@ -0,0 +1,477 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** @file i915_tex_layout.c + * Code to layout images in a mipmap tree for i830M-GM915 and G945 and beyond. + */ + +#include "intel_mipmap_tree.h" +#include "intel_tex_layout.h" +#include "macros.h" +#include "intel_context.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +static GLint initial_offsets[6][2] = { + [FACE_POS_X] = {0, 0}, + [FACE_POS_Y] = {1, 0}, + [FACE_POS_Z] = {1, 1}, + [FACE_NEG_X] = {0, 2}, + [FACE_NEG_Y] = {1, 2}, + [FACE_NEG_Z] = {1, 3}, +}; + + +static GLint step_offsets[6][2] = { + [FACE_POS_X] = {0, 2}, + [FACE_POS_Y] = {-1, 2}, + [FACE_POS_Z] = {-1, 1}, + [FACE_NEG_X] = {0, 2}, + [FACE_NEG_Y] = {-1, 2}, + [FACE_NEG_Z] = {-1, 1}, +}; + +/** + * Cube texture map layout for i830M-GM915. + * + * Hardware layout looks like: + * + * +-------+-------+ + * | | | + * | | | + * | | | + * | +x | +y | + * | | | + * | | | + * | | | + * | | | + * +---+---+-------+ + * | | | | + * | +x| +y| | + * | | | | + * | | | | + * +-+-+---+ +z | + * | | | | | + * +-+-+ +z| | + * | | | | + * +-+-+---+-------+ + * | | | + * | | | + * | | | + * | -x | -y | + * | | | + * | | | + * | | | + * | | | + * +---+---+-------+ + * | | | | + * | -x| -y| | + * | | | | + * | | | | + * +-+-+---+ -z | + * | | | | | + * +-+-+ -z| | + * | | | | + * +-+---+-------+ + * + */ +static void +i915_miptree_layout_cube(struct intel_context *intel, + struct intel_mipmap_tree * mt) +{ + const GLuint dim = mt->width0; + GLuint face; + GLuint lvlWidth = mt->width0, lvlHeight = mt->height0; + GLint level; + + assert(lvlWidth == lvlHeight); /* cubemap images are square */ + + /* double pitch for cube layouts */ + mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2); + mt->total_height = dim * 4; + + for (level = mt->first_level; level <= mt->last_level; level++) { + intel_miptree_set_level_info(mt, level, 6, + 0, 0, + /*OLD: mt->pitch, mt->total_height,*/ + lvlWidth, lvlHeight, + 1); + lvlWidth /= 2; + lvlHeight /= 2; + } + + for (face = 0; face < 6; face++) { + GLuint x = initial_offsets[face][0] * dim; + GLuint y = initial_offsets[face][1] * dim; + GLuint d = dim; + + for (level = mt->first_level; level <= mt->last_level; level++) { + intel_miptree_set_image_offset(mt, level, face, x, y); + + if (d == 0) + _mesa_printf("cube mipmap %d/%d (%d..%d) is 0x0\n", + face, level, mt->first_level, mt->last_level); + + d >>= 1; + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + } + } +} + +static void +i915_miptree_layout_3d(struct intel_context *intel, + struct intel_mipmap_tree * mt) +{ + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint depth = mt->depth0; + GLuint stack_height = 0; + GLint level; + + /* Calculate the size of a single slice. */ + mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + + /* XXX: hardware expects/requires 9 levels at minimum. */ + for (level = mt->first_level; level <= MAX2(8, mt->last_level); level++) { + intel_miptree_set_level_info(mt, level, depth, 0, mt->total_height, + width, height, depth); + + stack_height += MAX2(2, height); + + width = minify(width); + height = minify(height); + depth = minify(depth); + } + + /* Fixup depth image_offsets: */ + depth = mt->depth0; + for (level = mt->first_level; level <= mt->last_level; level++) { + GLuint i; + for (i = 0; i < depth; i++) { + intel_miptree_set_image_offset(mt, level, i, + 0, i * stack_height); + } + + depth = minify(depth); + } + + /* Multiply slice size by texture depth for total size. It's + * remarkable how wasteful of memory the i915 texture layouts + * are. They are largely fixed in the i945. + */ + mt->total_height = stack_height * mt->depth0; +} + +static void +i915_miptree_layout_2d(struct intel_context *intel, + struct intel_mipmap_tree * mt) +{ + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint img_height; + GLint level; + + mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->total_height = 0; + + for (level = mt->first_level; level <= mt->last_level; level++) { + intel_miptree_set_level_info(mt, level, 1, + 0, mt->total_height, + width, height, 1); + + if (mt->compressed) + img_height = MAX2(1, height / 4); + else + img_height = (MAX2(2, height) + 1) & ~1; + + mt->total_height += img_height; + + width = minify(width); + height = minify(height); + } +} + +GLboolean +i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) +{ + switch (mt->target) { + case GL_TEXTURE_CUBE_MAP: + i915_miptree_layout_cube(intel, mt); + break; + case GL_TEXTURE_3D: + i915_miptree_layout_3d(intel, mt); + break; + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE_ARB: + i915_miptree_layout_2d(intel, mt); + break; + default: + _mesa_problem(NULL, "Unexpected tex target in i915_miptree_layout()"); + break; + } + + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, + mt->total_height, mt->cpp, mt->pitch * mt->total_height * mt->cpp); + + return GL_TRUE; +} + + +/** + * Cube texture map layout for GM945 and later. + * + * The hardware layout looks like the 830-915 layout, except for the small + * sizes. A zoomed in view of the layout for 945 is: + * + * +-------+-------+ + * | 8x8 | 8x8 | + * | | | + * | | | + * | +x | +y | + * | | | + * | | | + * | | | + * | | | + * +---+---+-------+ + * |4x4| | 8x8 | + * | +x| | | + * | | | | + * | | | | + * +---+ | +z | + * |4x4| | | + * | +y| | | + * | | | | + * +---+ +-------+ + * + * ... + * + * +-------+-------+ + * | 8x8 | 8x8 | + * | | | + * | | | + * | -x | -y | + * | | | + * | | | + * | | | + * | | | + * +---+---+-------+ + * |4x4| | 8x8 | + * | -x| | | + * | | | | + * | | | | + * +---+ | -z | + * |4x4| | | + * | -y| | | + * | | | | + * +---+ +---+---+---+---+---+---+---+---+---+ + * |4x4| |4x4| |2x2| |2x2| |2x2| |2x2| + * | +z| | -z| | +x| | +y| | +z| | -x| ... + * | | | | | | | | | | | | + * +---+ +---+ +---+ +---+ +---+ +---+ + * + * The bottom row continues with the remaining 2x2 then the 1x1 mip contents + * in order, with each of them aligned to a 4x4 block boundary. Thus, for + * 32x32 cube maps and smaller, the bottom row layout is going to dictate the + * pitch of the tree. For a tree with 4x4 images, the pitch is at least + * 14 * 8 = 112 texels, for 2x2 it is at least 12 * 8 texels, and for 1x1 + * it is 6 * 8 texels. + */ + +static void +i945_miptree_layout_cube(struct intel_context *intel, + struct intel_mipmap_tree * mt) +{ + const GLuint dim = mt->width0; + GLuint face; + GLuint lvlWidth = mt->width0, lvlHeight = mt->height0; + GLint level; + + assert(lvlWidth == lvlHeight); /* cubemap images are square */ + + /* Depending on the size of the largest images, pitch can be + * determined either by the old-style packing of cubemap faces, + * or the final row of 4x4, 2x2 and 1x1 faces below this. + */ + if (dim > 32) + mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2); + else + mt->pitch = intel_miptree_pitch_align (intel, mt, 14 * 8); + + if (dim >= 4) + mt->total_height = dim * 4 + 4; + else + mt->total_height = 4; + + /* Set all the levels to effectively occupy the whole rectangular region. */ + for (level = mt->first_level; level <= mt->last_level; level++) { + intel_miptree_set_level_info(mt, level, 6, + 0, 0, + lvlWidth, lvlHeight, 1); + lvlWidth /= 2; + lvlHeight /= 2; + } + + for (face = 0; face < 6; face++) { + GLuint x = initial_offsets[face][0] * dim; + GLuint y = initial_offsets[face][1] * dim; + GLuint d = dim; + + if (dim == 4 && face >= 4) { + y = mt->total_height - 4; + x = (face - 4) * 8; + } else if (dim < 4 && (face > 0 || mt->first_level > 0)) { + y = mt->total_height - 4; + x = face * 8; + } + + for (level = mt->first_level; level <= mt->last_level; level++) { + intel_miptree_set_image_offset(mt, level, face, x, y); + + d >>= 1; + + switch (d) { + case 4: + switch (face) { + case FACE_POS_X: + case FACE_NEG_X: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + case FACE_POS_Y: + case FACE_NEG_Y: + y += 12; + x -= 8; + break; + case FACE_POS_Z: + case FACE_NEG_Z: + y = mt->total_height - 4; + x = (face - 4) * 8; + break; + } + + case 2: + y = mt->total_height - 4; + x = 16 + face * 8; + break; + + case 1: + x += 48; + break; + + default: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + } + } + } +} + +static void +i945_miptree_layout_3d(struct intel_context *intel, + struct intel_mipmap_tree * mt) +{ + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint depth = mt->depth0; + GLuint pack_x_pitch, pack_x_nr; + GLuint pack_y_pitch; + GLuint level; + + mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->total_height = 0; + + pack_y_pitch = MAX2(mt->height0, 2); + pack_x_pitch = mt->pitch; + pack_x_nr = 1; + + for (level = mt->first_level; level <= mt->last_level; level++) { + GLint x = 0; + GLint y = 0; + GLint q, j; + + intel_miptree_set_level_info(mt, level, depth, + 0, mt->total_height, + width, height, depth); + + for (q = 0; q < depth;) { + for (j = 0; j < pack_x_nr && q < depth; j++, q++) { + intel_miptree_set_image_offset(mt, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + mt->total_height += y; + + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + } + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + +GLboolean +i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) +{ + switch (mt->target) { + case GL_TEXTURE_CUBE_MAP: + i945_miptree_layout_cube(intel, mt); + break; + case GL_TEXTURE_3D: + i945_miptree_layout_3d(intel, mt); + break; + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE_ARB: + i945_miptree_layout_2d(intel, mt); + break; + default: + _mesa_problem(NULL, "Unexpected tex target in i945_miptree_layout()"); + break; + } + + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, + mt->total_height, mt->cpp, mt->pitch * mt->total_height * mt->cpp); + + return GL_TRUE; +} diff --git a/i915/i915_texprog.c b/i915/i915_texprog.c deleted file mode 100644 index f6a8b02..0000000 --- a/i915/i915_texprog.c +++ /dev/null @@ -1,676 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include <strings.h> - -#include "glheader.h" -#include "macros.h" -#include "enums.h" - -#include "tnl/t_context.h" -#include "intel_batchbuffer.h" - -#include "i915_reg.h" -#include "i915_context.h" -#include "i915_program.h" - -static GLuint translate_tex_src_bit( struct i915_fragment_program *p, - GLubyte bit ) -{ - switch (bit) { - case TEXTURE_1D_BIT: return D0_SAMPLE_TYPE_2D; - case TEXTURE_2D_BIT: return D0_SAMPLE_TYPE_2D; - case TEXTURE_RECT_BIT: return D0_SAMPLE_TYPE_2D; - case TEXTURE_3D_BIT: return D0_SAMPLE_TYPE_VOLUME; - case TEXTURE_CUBE_BIT: return D0_SAMPLE_TYPE_CUBE; - default: i915_program_error(p, "TexSrcBit"); return 0; - } -} - -static GLuint get_source( struct i915_fragment_program *p, - GLenum src, GLuint unit ) -{ - switch (src) { - case GL_TEXTURE: - if (p->src_texture == UREG_BAD) { - - /* TODO: Use D0_CHANNEL_XY where possible. - */ - GLuint dim = translate_tex_src_bit( p, p->ctx->Texture.Unit[unit]._ReallyEnabled); - GLuint sampler = i915_emit_decl(p, REG_TYPE_S, unit, dim); - GLuint texcoord = i915_emit_decl(p, REG_TYPE_T, unit, D0_CHANNEL_ALL); - GLuint tmp = i915_get_temp( p ); - GLuint op = T0_TEXLD; - - if (p->VB->TexCoordPtr[unit]->size == 4) - op = T0_TEXLDP; - - p->src_texture = i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, - sampler, texcoord, op ); - } - - return p->src_texture; - - /* Crossbar: */ - case GL_TEXTURE0: - case GL_TEXTURE1: - case GL_TEXTURE2: - case GL_TEXTURE3: - case GL_TEXTURE4: - case GL_TEXTURE5: - case GL_TEXTURE6: - case GL_TEXTURE7: { - return UREG_BAD; - } - - case GL_CONSTANT: - return i915_emit_const4fv( p, p->ctx->Texture.Unit[unit].EnvColor ); - case GL_PRIMARY_COLOR: - return i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); - case GL_PREVIOUS: - default: - i915_emit_decl(p, - GET_UREG_TYPE(p->src_previous), - GET_UREG_NR(p->src_previous), D0_CHANNEL_ALL); - return p->src_previous; - } -} - - -static GLuint emit_combine_source( struct i915_fragment_program *p, - GLuint mask, - GLuint unit, - GLenum source, - GLenum operand ) -{ - GLuint arg, src; - - src = get_source(p, source, unit); - - switch (operand) { - case GL_ONE_MINUS_SRC_COLOR: - /* Get unused tmp, - * Emit tmp = 1.0 + arg.-x-y-z-w - */ - arg = i915_get_temp( p ); - return i915_emit_arith( p, A0_ADD, arg, mask, 0, - swizzle(src, ONE, ONE, ONE, ONE ), - negate(src, 1,1,1,1), 0); - - case GL_SRC_ALPHA: - if (mask == A0_DEST_CHANNEL_W) - return src; - else - return swizzle( src, W, W, W, W ); - case GL_ONE_MINUS_SRC_ALPHA: - /* Get unused tmp, - * Emit tmp = 1.0 + arg.-w-w-w-w - */ - arg = i915_get_temp( p ); - return i915_emit_arith( p, A0_ADD, arg, mask, 0, - swizzle(src, ONE, ONE, ONE, ONE ), - negate( swizzle(src,W,W,W,W), 1,1,1,1), 0); - case GL_SRC_COLOR: - default: - return src; - } -} - - - -static int nr_args( GLenum mode ) -{ - switch (mode) { - case GL_REPLACE: return 1; - case GL_MODULATE: return 2; - case GL_ADD: return 2; - case GL_ADD_SIGNED: return 2; - case GL_INTERPOLATE: return 3; - case GL_SUBTRACT: return 2; - case GL_DOT3_RGB_EXT: return 2; - case GL_DOT3_RGBA_EXT: return 2; - case GL_DOT3_RGB: return 2; - case GL_DOT3_RGBA: return 2; - default: return 0; - } -} - - -static GLboolean args_match( struct gl_texture_unit *texUnit ) -{ - int i, nr = nr_args(texUnit->Combine.ModeRGB); - - for (i = 0 ; i < nr ; i++) { - if (texUnit->Combine.SourceA[i] != texUnit->Combine.SourceRGB[i]) - return GL_FALSE; - - switch(texUnit->Combine.OperandA[i]) { - case GL_SRC_ALPHA: - switch(texUnit->Combine.OperandRGB[i]) { - case GL_SRC_COLOR: - case GL_SRC_ALPHA: - break; - default: - return GL_FALSE; - } - break; - case GL_ONE_MINUS_SRC_ALPHA: - switch(texUnit->Combine.OperandRGB[i]) { - case GL_ONE_MINUS_SRC_COLOR: - case GL_ONE_MINUS_SRC_ALPHA: - break; - default: - return GL_FALSE; - } - break; - default: - return GL_FALSE; /* impossible */ - } - } - - return GL_TRUE; -} - - -static GLuint emit_combine( struct i915_fragment_program *p, - GLuint dest, - GLuint mask, - GLuint saturate, - GLuint unit, - GLenum mode, - const GLenum *source, - const GLenum *operand) -{ - int tmp, src[3], nr = nr_args(mode); - int i; - - for (i = 0; i < nr; i++) - src[i] = emit_combine_source( p, mask, unit, source[i], operand[i] ); - - switch (mode) { - case GL_REPLACE: - if (mask == A0_DEST_CHANNEL_ALL && !saturate) - return src[0]; - else - return i915_emit_arith( p, A0_MOV, dest, mask, saturate, src[0], 0, 0 ); - case GL_MODULATE: - return i915_emit_arith( p, A0_MUL, dest, mask, saturate, - src[0], src[1], 0 ); - case GL_ADD: - return i915_emit_arith( p, A0_ADD, dest, mask, saturate, - src[0], src[1], 0 ); - case GL_ADD_SIGNED: - /* tmp = arg0 + arg1 - * result = tmp + -.5 - */ - tmp = i915_emit_const1f(p, .5); - tmp = negate(swizzle(tmp,X,X,X,X),1,1,1,1); - i915_emit_arith( p, A0_ADD, dest, mask, 0, src[0], src[1], 0 ); - i915_emit_arith( p, A0_ADD, dest, mask, saturate, dest, tmp, 0 ); - return dest; - case GL_INTERPOLATE: /* TWO INSTRUCTIONS */ - /* Arg0 * (Arg2) + Arg1 * (1-Arg2) - * - * Arg0*Arg2 + Arg1 - Arg1Arg2 - * - * tmp = Arg0*Arg2 + Arg1, - * result = (-Arg1)Arg2 + tmp - */ - tmp = i915_get_temp( p ); - i915_emit_arith( p, A0_MAD, tmp, mask, 0, src[0], src[2], src[1] ); - i915_emit_arith( p, A0_MAD, dest, mask, saturate, - negate(src[1], 1,1,1,1), src[2], tmp ); - return dest; - case GL_SUBTRACT: - /* negate src[1] */ - return i915_emit_arith( p, A0_ADD, dest, mask, saturate, src[0], - negate(src[1],1,1,1,1), 0 ); - - case GL_DOT3_RGBA: - case GL_DOT3_RGBA_EXT: - case GL_DOT3_RGB_EXT: - case GL_DOT3_RGB: { - GLuint tmp0 = i915_get_temp( p ); - GLuint tmp1 = i915_get_temp( p ); - GLuint neg1 = negate(swizzle(i915_emit_const1f(p, 1),X,X,X,X), 1,1,1,1); - GLuint two = swizzle(i915_emit_const1f(p, 2),X,X,X,X); - i915_emit_arith( p, A0_MAD, tmp0, A0_DEST_CHANNEL_ALL, 0, - two, src[0], neg1); - if (src[0] == src[1]) - tmp1 = tmp0; - else - i915_emit_arith( p, A0_MAD, tmp1, A0_DEST_CHANNEL_ALL, 0, - two, src[1], neg1); - i915_emit_arith( p, A0_DP3, dest, mask, saturate, tmp0, tmp1, 0); - return dest; - } - - default: - return src[0]; - } -} - -static GLuint get_dest( struct i915_fragment_program *p, int unit ) -{ - if (p->ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) - return i915_get_temp( p ); - else if (unit != p->last_tex_stage) - return i915_get_temp( p ); - else - return UREG(REG_TYPE_OC, 0); -} - - - -static GLuint emit_texenv( struct i915_fragment_program *p, int unit ) -{ - struct gl_texture_unit *texUnit = &p->ctx->Texture.Unit[unit]; - GLenum envMode = texUnit->EnvMode; - struct gl_texture_object *tObj = texUnit->_Current; - GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat; - GLuint saturate = unit < p->last_tex_stage ? A0_DEST_SATURATE : 0; - - switch(envMode) { - case GL_BLEND: { - const int cf = get_source(p, GL_PREVIOUS, unit); - const int cc = get_source(p, GL_CONSTANT, unit); - const int cs = get_source(p, GL_TEXTURE, unit); - const int out = get_dest(p, unit); - - if (format == GL_INTENSITY) { - /* cv = cf(1 - cs) + cc.cs - * cv = cf - cf.cs + cc.cs - */ - /* u[2] = MAD( -cf * cs + cf ) - * cv = MAD( cc * cs + u[2] ) - */ - - i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0, - negate(cf,1,1,1,1), cs, cf ); - - i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate, - cc, cs, out ); - - return out; - } else { - /* cv = cf(1 - cs) + cc.cs - * cv = cf - cf.cs + cc.cs - * av = af.as - */ - /* u[2] = MAD( cf.-x-y-zw * cs.xyzw + cf.xyz0 ) - * oC = MAD( cc.xyz0 * cs.xyz0 + u[2].xyzw ) - */ - i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0, - negate(cf,1,1,1,0), - cs, - swizzle(cf,X,Y,Z,ZERO) ); - - - i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate, - swizzle(cc,X,Y,Z,ZERO), - swizzle(cs,X,Y,Z,ZERO), - out ); - - return out; - } - } - - case GL_DECAL: { - if (format == GL_RGB || - format == GL_RGBA) { - int cf = get_source( p, GL_PREVIOUS, unit ); - int cs = get_source( p, GL_TEXTURE, unit ); - int out = get_dest(p, unit); - - /* cv = cf(1-as) + cs.as - * cv = cf.(-as) + cf + cs.as - * av = af - */ - - /* u[2] = mad( cf.xyzw * cs.-w-w-w1 + cf.xyz0 ) - * oc = mad( cs.xyz0 * cs.www0 + u[2].xyzw ) - */ - i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0, - cf, - negate(swizzle(cs,W,W,W,ONE),1,1,1,0), - swizzle(cf,X,Y,Z,ZERO) ); - - i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate, - swizzle(cs,X,Y,Z,ZERO), - swizzle(cs,W,W,W,ZERO), - out ); - return out; - } - else { - return get_source( p, GL_PREVIOUS, unit ); - } - } - - case GL_REPLACE: { - const int cs = get_source( p, GL_TEXTURE, unit ); /* saturated */ - switch (format) { - case GL_ALPHA: { - const int cf = get_source( p, GL_PREVIOUS, unit ); /* saturated */ - i915_emit_arith( p, A0_MOV, cs, A0_DEST_CHANNEL_XYZ, 0, cf, 0, 0 ); - return cs; - } - case GL_RGB: - case GL_LUMINANCE: { - const int cf = get_source( p, GL_PREVIOUS, unit ); /* saturated */ - i915_emit_arith( p, A0_MOV, cs, A0_DEST_CHANNEL_W, 0, cf, 0, 0 ); - return cs; - } - default: - return cs; - } - } - - case GL_MODULATE: { - const int cf = get_source( p, GL_PREVIOUS, unit ); - const int cs = get_source( p, GL_TEXTURE, unit ); - const int out = get_dest(p, unit); - switch (format) { - case GL_ALPHA: - i915_emit_arith( p, A0_MUL, out, A0_DEST_CHANNEL_ALL, saturate, - swizzle(cs, ONE, ONE, ONE, W), cf, 0 ); - break; - default: - i915_emit_arith( p, A0_MUL, out, A0_DEST_CHANNEL_ALL, saturate, - cs, cf, 0 ); - break; - } - return out; - } - case GL_ADD: { - int cf = get_source( p, GL_PREVIOUS, unit ); - int cs = get_source( p, GL_TEXTURE, unit ); - const int out = get_dest( p, unit ); - - if (format == GL_INTENSITY) { - /* output-color.rgba = add( incoming, u[1] ) - */ - i915_emit_arith( p, A0_ADD, out, A0_DEST_CHANNEL_ALL, saturate, - cs, cf, 0 ); - return out; - } - else { - /* cv.xyz = cf.xyz + cs.xyz - * cv.w = cf.w * cs.w - * - * cv.xyzw = MAD( cf.111w * cs.xyzw + cf.xyz0 ) - */ - i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate, - swizzle(cf,ONE,ONE,ONE,W), - cs, - swizzle(cf,X,Y,Z,ZERO) ); - return out; - } - break; - } - case GL_COMBINE: { - GLuint rgb_shift, alpha_shift, out, shift; - GLuint dest = get_dest(p, unit); - - /* The EXT version of the DOT3 extension does not support the - * scale factor, but the ARB version (and the version in OpenGL - * 1.3) does. - */ - switch (texUnit->Combine.ModeRGB) { - case GL_DOT3_RGB_EXT: - alpha_shift = texUnit->Combine.ScaleShiftA; - rgb_shift = 0; - break; - - case GL_DOT3_RGBA_EXT: - alpha_shift = 0; - rgb_shift = 0; - break; - - default: - rgb_shift = texUnit->Combine.ScaleShiftRGB; - alpha_shift = texUnit->Combine.ScaleShiftA; - break; - } - - - /* Emit the RGB and A combine ops - */ - if (texUnit->Combine.ModeRGB == texUnit->Combine.ModeA && - args_match( texUnit )) { - out = emit_combine( p, dest, A0_DEST_CHANNEL_ALL, saturate, - unit, - texUnit->Combine.ModeRGB, - texUnit->Combine.SourceRGB, - texUnit->Combine.OperandRGB ); - } - else if (texUnit->Combine.ModeRGB == GL_DOT3_RGBA_EXT || - texUnit->Combine.ModeRGB == GL_DOT3_RGBA) { - - out = emit_combine( p, dest, A0_DEST_CHANNEL_ALL, saturate, - unit, - texUnit->Combine.ModeRGB, - texUnit->Combine.SourceRGB, - texUnit->Combine.OperandRGB ); - } - else { - /* Need to do something to stop from re-emitting identical - * argument calculations here: - */ - out = emit_combine( p, dest, A0_DEST_CHANNEL_XYZ, saturate, - unit, - texUnit->Combine.ModeRGB, - texUnit->Combine.SourceRGB, - texUnit->Combine.OperandRGB ); - out = emit_combine( p, dest, A0_DEST_CHANNEL_W, saturate, - unit, - texUnit->Combine.ModeA, - texUnit->Combine.SourceA, - texUnit->Combine.OperandA ); - } - - /* Deal with the final shift: - */ - if (alpha_shift || rgb_shift) { - if (rgb_shift == alpha_shift) { - shift = i915_emit_const1f(p, 1<<rgb_shift); - shift = swizzle(shift,X,X,X,X); - } - else { - shift = i915_emit_const2f(p, 1<<rgb_shift, 1<<alpha_shift); - shift = swizzle(shift,X,X,X,Y); - } - return i915_emit_arith( p, A0_MUL, dest, A0_DEST_CHANNEL_ALL, - saturate, out, shift, 0 ); - } - - return out; - } - - default: - return get_source(p, GL_PREVIOUS, 0); - } -} - -static void emit_program_fini( struct i915_fragment_program *p ) -{ - int cf = get_source( p, GL_PREVIOUS, 0 ); - int out = UREG( REG_TYPE_OC, 0 ); - - if (p->ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { - /* Emit specular add. - */ - GLuint s = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_ALL); - i915_emit_arith( p, A0_ADD, out, A0_DEST_CHANNEL_ALL, 0, cf, - swizzle(s, X,Y,Z,ZERO), 0 ); - } - else if (cf != out) { - /* Will wind up in here if no texture enabled or a couple of - * other scenarios (GL_REPLACE for instance). - */ - i915_emit_arith( p, A0_MOV, out, A0_DEST_CHANNEL_ALL, 0, cf, 0, 0 ); - } -} - - -static void i915EmitTextureProgram( i915ContextPtr i915 ) -{ - GLcontext *ctx = &i915->intel.ctx; - struct i915_fragment_program *p = &i915->tex_program; - GLuint unit; - - if (0) fprintf(stderr, "%s\n", __FUNCTION__); - - i915_init_program( i915, p ); - - if (ctx->Texture._EnabledUnits) { - for (unit = 0 ; unit < ctx->Const.MaxTextureUnits ; unit++) - if (ctx->Texture.Unit[unit]._ReallyEnabled) { - p->last_tex_stage = unit; - } - - for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) - if (ctx->Texture.Unit[unit]._ReallyEnabled) { - p->src_previous = emit_texenv( p, unit ); - p->src_texture = UREG_BAD; - p->temp_flag = 0xffff000; - p->temp_flag |= 1 << GET_UREG_NR(p->src_previous); - } - } - - emit_program_fini( p ); - - i915_fini_program( p ); - i915_upload_program( i915, p ); - - p->translated = 1; -} - - -void i915ValidateTextureProgram( i915ContextPtr i915 ) -{ - intelContextPtr intel = &i915->intel; - GLcontext *ctx = &intel->ctx; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - DECLARE_RENDERINPUTS(index_bitset); - int i, offset; - GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK; - GLuint s2 = S2_TEXCOORD_NONE; - - RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); - - /* Important: - */ - VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; - intel->vertex_attr_count = 0; - intel->coloroffset = 0; - intel->specoffset = 0; - offset = 0; - - if (i915->current_program) { - i915->current_program->on_hardware = 0; - i915->current_program->params_uptodate = 0; - } - - if (i915->vertex_fog == I915_FOG_PIXEL) { - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 ); - RENDERINPUTS_CLEAR( index_bitset, _TNL_ATTRIB_FOG ); - } - else if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 ); - } - else { - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 ); - } - - /* How undefined is undefined? */ - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { - EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4 ); - } - - intel->coloroffset = offset / 4; - EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4 ); - - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) || - RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { - intel->specoffset = offset / 4; - EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3 ); - } else - EMIT_PAD( 3 ); - - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) - EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1 ); - else - EMIT_PAD( 1 ); - } - - if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { - for (i = 0; i < 8; i++) { - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { - int sz = VB->TexCoordPtr[i]->size; - - s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); - s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); - - EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 ); - } - } - } - - /* Only need to change the vertex emit code if there has been a - * statechange to a new hardware vertex format: - */ - if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] || - s4 != i915->state.Ctx[I915_CTXREG_LIS4]) { - - I915_STATECHANGE( i915, I915_UPLOAD_CTX ); - - i915->tex_program.translated = 0; - - /* Must do this *after* statechange, so as not to affect - * buffered vertices reliant on the old state: - */ - intel->vertex_size = _tnl_install_attrs( ctx, - intel->vertex_attrs, - intel->vertex_attr_count, - intel->ViewportMatrix.m, 0 ); - - intel->vertex_size >>= 2; - - i915->state.Ctx[I915_CTXREG_LIS2] = s2; - i915->state.Ctx[I915_CTXREG_LIS4] = s4; - - assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size )); - } - - if (!i915->tex_program.translated || - i915->last_ReallyEnabled != ctx->Texture._EnabledUnits) { - i915EmitTextureProgram( i915 ); - i915->last_ReallyEnabled = ctx->Texture._EnabledUnits; - } -} diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c index a19d4b6..69c1e07 100644 --- a/i915/i915_texstate.c +++ b/i915/i915_texstate.c @@ -25,902 +25,351 @@ * **************************************************************************/ -#include "glheader.h" -#include "macros.h" #include "mtypes.h" -#include "simple_list.h" #include "enums.h" #include "texformat.h" -#include "texstore.h" +#include "dri_bufmgr.h" -#include "mm.h" - -#include "intel_screen.h" -#include "intel_ioctl.h" +#include "intel_mipmap_tree.h" #include "intel_tex.h" #include "i915_context.h" #include "i915_reg.h" -static GLint initial_offsets[6][2] = { {0,0}, - {0,2}, - {1,0}, - {1,2}, - {1,1}, - {1,3} }; - - -static GLint step_offsets[6][2] = { {0,2}, - {0,2}, - {-1,2}, - {-1,2}, - {-1,1}, - {-1,1} }; - - -#define I915_TEX_UNIT_ENABLED(unit) (1<<unit) - -static void i915LayoutTextureImages( i915ContextPtr i915, - struct gl_texture_object *tObj ) -{ - const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData; - GLint firstLevel, lastLevel, numLevels; - GLint i, total_height, pitch; - - /* Compute which mipmap levels we really want to send to the hardware. - */ - driCalculateTextureFirstLastLevel( (driTextureObject *) t ); - - /* Figure out the amount of memory required to hold all the mipmap - * levels. Choose the smallest pitch to accomodate the largest - * mipmap: - */ - firstLevel = t->intel.base.firstLevel; - lastLevel = t->intel.base.lastLevel; - numLevels = lastLevel - firstLevel + 1; - - - - /* All images must be loaded at this pitch. Count the number of - * lines required: - */ - switch (tObj->Target) { - case GL_TEXTURE_CUBE_MAP: { - const GLuint dim = tObj->Image[0][firstLevel]->Width; - GLuint face; - - pitch = dim * t->intel.texelBytes; - pitch *= 2; /* double pitch for cube layouts */ - pitch = (pitch + 3) & ~3; - - total_height = dim * 4; - - for ( face = 0 ; face < 6 ; face++) { - GLuint x = initial_offsets[face][0] * dim; - GLuint y = initial_offsets[face][1] * dim; - GLuint d = dim; - - t->intel.base.dirty_images[face] = ~0; - - assert(tObj->Image[face][firstLevel]->Width == dim); - assert(tObj->Image[face][firstLevel]->Height == dim); - - for (i = 0; i < numLevels; i++) { - t->intel.image[face][i].image = tObj->Image[face][firstLevel + i]; - if (!t->intel.image[face][i].image) { - fprintf(stderr, "no image %d %d\n", face, i); - break; /* can't happen */ - } - - t->intel.image[face][i].offset = - y * pitch + x * t->intel.texelBytes; - t->intel.image[face][i].internalFormat = baseImage->_BaseFormat; - - d >>= 1; - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; - } - } - break; - } - case GL_TEXTURE_3D: { - GLuint virtual_height; - GLuint tmp_numLevels = numLevels; - pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes; - pitch = (pitch + 3) & ~3; - t->intel.base.dirty_images[0] = ~0; - - /* Calculate the size of a single slice. Hardware demands a - * minimum of 8 mipmaps, some of which might ultimately not be - * used: - */ - if (tmp_numLevels < 9) - tmp_numLevels = 9; - - virtual_height = tObj->Image[0][firstLevel]->Height; - - for ( total_height = i = 0 ; i < tmp_numLevels ; i++ ) { - t->intel.image[0][i].image = tObj->Image[0][firstLevel + i]; - if (t->intel.image[0][i].image) { - t->intel.image[0][i].offset = total_height * pitch; - t->intel.image[0][i].internalFormat = baseImage->_BaseFormat; - } - - total_height += MAX2(2, virtual_height); - virtual_height >>= 1; - } - - t->intel.depth_pitch = total_height * pitch; - - /* Multiply slice size by texture depth for total size. It's - * remarkable how wasteful of memory all the i8x0 texture - * layouts are. - */ - total_height *= t->intel.image[0][0].image->Depth; - break; - } - default: - pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes; - pitch = (pitch + 3) & ~3; - t->intel.base.dirty_images[0] = ~0; - - for ( total_height = i = 0 ; i < numLevels ; i++ ) { - t->intel.image[0][i].image = tObj->Image[0][firstLevel + i]; - if (!t->intel.image[0][i].image) - break; - - t->intel.image[0][i].offset = total_height * pitch; - t->intel.image[0][i].internalFormat = baseImage->_BaseFormat; - if (t->intel.image[0][i].image->IsCompressed) { - total_height += (t->intel.image[0][i].image->Height + 3) / 4; - } - else - total_height += MAX2(2, t->intel.image[0][i].image->Height); - } - break; - } - - t->intel.Pitch = pitch; - t->intel.base.totalSize = total_height*pitch; - t->intel.max_level = numLevels-1; -} - -static void i945LayoutTextureImages( i915ContextPtr i915, - struct gl_texture_object *tObj ) +static GLuint +translate_texture_format(GLuint mesa_format, GLenum DepthMode) { - const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData; - GLint firstLevel, lastLevel, numLevels; - GLint i, total_height, pitch, sz, max_offset = 0, offset; - - - /* Compute which mipmap levels we really want to send to the hardware. - */ - driCalculateTextureFirstLastLevel( (driTextureObject *) t ); - - /* Figure out the amount of memory required to hold all the mipmap - * levels. Choose the smallest pitch to accomodate the largest - * mipmap: - */ - firstLevel = t->intel.base.firstLevel; - lastLevel = t->intel.base.lastLevel; - numLevels = lastLevel - firstLevel + 1; - - - - /* All images must be loaded at this pitch. Count the number of - * lines required: - */ - switch (tObj->Target) { - case GL_TEXTURE_CUBE_MAP: { - const GLuint dim = tObj->Image[0][firstLevel]->Width; - GLuint face; - - /* Depending on the size of the largest images, pitch can be - * determined either by the old-style packing of cubemap faces, - * or the final row of 4x4, 2x2 and 1x1 faces below this. - */ - if (dim > 32) { - pitch = dim * t->intel.texelBytes; - pitch *= 2; /* double pitch for cube layouts */ - pitch = (pitch + 3) & ~3; - } - else { - pitch = 14 * 8 * t->intel.texelBytes; /* determined by row of - * little maps at - * bottom */ - } - - total_height = dim * 4 + 4; - - for ( face = 0 ; face < 6 ; face++) { - GLuint x = initial_offsets[face][0] * dim; - GLuint y = initial_offsets[face][1] * dim; - GLuint d = dim; - - if (dim == 4 && face >= 4) { - y = total_height - 4; - x = (face - 4) * 8; - } - else if (dim < 4) { - y = total_height - 4; - x = face * 8; - } - - t->intel.base.dirty_images[face] = ~0; - - assert(tObj->Image[face][firstLevel]->Width == dim); - assert(tObj->Image[face][firstLevel]->Height == dim); - - for (i = 0; i < numLevels; i++) { - - - t->intel.image[face][i].image = tObj->Image[face][firstLevel + i]; - assert(t->intel.image[face][i].image); - - t->intel.image[face][i].offset = - y * pitch + x * t->intel.texelBytes; - t->intel.image[face][i].internalFormat = baseImage->_BaseFormat; - - d >>= 1; - - switch (d) { - case 4: - switch (face) { - case FACE_POS_X: - case FACE_NEG_X: - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; - break; - case FACE_POS_Y: - case FACE_NEG_Y: - y += 12; - x -= 8; - break; - case FACE_POS_Z: - case FACE_NEG_Z: - y = total_height - 4; - x = (face - 4) * 8; - break; - } - - case 2: - y = total_height - 4; - x = 16 + face * 8; - break; - - case 1: - x += 48; - break; - - default: - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; - break; - } - } - } - max_offset = total_height * pitch; - break; - } - case GL_TEXTURE_3D: { - GLuint depth_packing = 0, depth_pack_pitch; - GLuint tmp_numLevels = numLevels; - pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes; - pitch = (pitch + 3) & ~3; - depth_pack_pitch = pitch; - - t->intel.base.dirty_images[0] = ~0; - - - for ( total_height = i = 0 ; i < tmp_numLevels ; i++ ) { - t->intel.image[0][i].image = tObj->Image[0][firstLevel + i]; - if (!t->intel.image[0][i].image) - break; - - - t->intel.image[0][i].offset = total_height * pitch; - t->intel.image[0][i].internalFormat = baseImage->_BaseFormat; - - - - total_height += MAX2(2, t->intel.image[0][i].image->Height) * - MAX2((t->intel.image[0][i].image->Depth >> depth_packing), 1); - - /* When alignment dominates, can't increase depth packing? - * Or does pitch grow??? What are the alignment constraints, - * anyway? - */ - if (depth_pack_pitch > 4) { - depth_packing++; - depth_pack_pitch <<= 2; - } - } - - max_offset = total_height * pitch; - break; - } - default: - pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes; - pitch = (pitch + 3) & ~3; - t->intel.base.dirty_images[0] = ~0; - max_offset = 0; - - for ( offset = i = 0 ; i < numLevels ; i++ ) { - t->intel.image[0][i].image = tObj->Image[0][firstLevel + i]; - if (!t->intel.image[0][i].image) - break; - - t->intel.image[0][i].offset = offset; - t->intel.image[0][i].internalFormat = baseImage->_BaseFormat; - - if (t->intel.image[0][i].image->IsCompressed) - sz = MAX2(1, t->intel.image[0][i].image->Height/4) * pitch; - else - sz = MAX2(2, t->intel.image[0][i].image->Height) * pitch; - - /* Because the images are packed better, the final offset - * might not be the maximal one: - */ - max_offset = MAX2(max_offset, offset + sz); - - /* LPT change: step right after second mipmap. - */ - if (i == 1) - offset += pitch / 2; - else - offset += sz; - - } - break; - } - - t->intel.Pitch = pitch; - t->intel.base.totalSize = max_offset; - t->intel.max_level = numLevels-1; -} - - - - -static void i915SetTexImages( i915ContextPtr i915, - struct gl_texture_object *tObj ) -{ - GLuint textureFormat; - i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData; - const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint ss2 = 0; - - switch( baseImage->TexFormat->MesaFormat ) { + switch (mesa_format) { case MESA_FORMAT_L8: - t->intel.texelBytes = 1; - textureFormat = MAPSURF_8BIT | MT_8BIT_L8; - break; - + return MAPSURF_8BIT | MT_8BIT_L8; case MESA_FORMAT_I8: - t->intel.texelBytes = 1; - textureFormat = MAPSURF_8BIT | MT_8BIT_I8; - break; - + return MAPSURF_8BIT | MT_8BIT_I8; case MESA_FORMAT_A8: - t->intel.texelBytes = 1; - textureFormat = MAPSURF_8BIT | MT_8BIT_A8; - break; - + return MAPSURF_8BIT | MT_8BIT_A8; case MESA_FORMAT_AL88: - t->intel.texelBytes = 2; - textureFormat = MAPSURF_16BIT | MT_16BIT_AY88; - break; - + return MAPSURF_16BIT | MT_16BIT_AY88; case MESA_FORMAT_RGB565: - t->intel.texelBytes = 2; - textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565; - break; - + return MAPSURF_16BIT | MT_16BIT_RGB565; case MESA_FORMAT_ARGB1555: - t->intel.texelBytes = 2; - textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555; - break; - + return MAPSURF_16BIT | MT_16BIT_ARGB1555; case MESA_FORMAT_ARGB4444: - t->intel.texelBytes = 2; - textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB4444; - break; - + return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - t->intel.texelBytes = 4; - textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888; - break; - + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: - t->intel.texelBytes = 2; - textureFormat = (MAPSURF_422 | MT_422_YCRCB_NORMAL); - ss2 |= SS2_COLORSPACE_CONVERSION; - break; - + return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: - t->intel.texelBytes = 2; - textureFormat = (MAPSURF_422 | MT_422_YCRCB_SWAPY); - ss2 |= SS2_COLORSPACE_CONVERSION; - break; - + return (MAPSURF_422 | MT_422_YCRCB_SWAPY); case MESA_FORMAT_RGB_FXT1: case MESA_FORMAT_RGBA_FXT1: - t->intel.texelBytes = 2; - textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); - break; - + return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); case MESA_FORMAT_Z16: - t->intel.texelBytes = 2; - textureFormat = (MAPSURF_16BIT | MT_16BIT_L16); - break; - + if (DepthMode == GL_ALPHA) + return (MAPSURF_16BIT | MT_16BIT_A16); + else if (DepthMode == GL_INTENSITY) + return (MAPSURF_16BIT | MT_16BIT_I16); + else + return (MAPSURF_16BIT | MT_16BIT_L16); case MESA_FORMAT_RGBA_DXT1: case MESA_FORMAT_RGB_DXT1: - /* - * DXTn pitches are Width/4 * blocksize in bytes - * for DXT1: blocksize=8 so Width/4*8 = Width * 2 - * for DXT3/5: blocksize=16 so Width/4*16 = Width * 4 - */ - t->intel.texelBytes = 2; - textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); - break; - + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); case MESA_FORMAT_RGBA_DXT3: - t->intel.texelBytes = 4; - textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); - break; - + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); case MESA_FORMAT_RGBA_DXT5: - t->intel.texelBytes = 4; - textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); - break; - -#if 0 + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); case MESA_FORMAT_Z24_S8: - t->intel.texelBytes = 4; - textureFormat = (MAPSURF_32BIT | MT_32BIT_xL824); - break; -#endif - + return (MAPSURF_32BIT | MT_32BIT_xI824); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, - baseImage->TexFormat->MesaFormat); + fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); abort(); + return 0; } +} - switch (i915->intel.intelScreen->deviceID) { - case PCI_CHIP_I945_G: - case PCI_CHIP_I945_GM: - case PCI_CHIP_I945_GME: - case PCI_CHIP_G33_G: - case PCI_CHIP_Q33_G: - case PCI_CHIP_Q35_G: - i945LayoutTextureImages( i915, tObj ); - break; - default: - i915LayoutTextureImages( i915, tObj ); - break; - } - - t->Setup[I915_TEXREG_MS3] = - (((tObj->Image[0][t->intel.base.firstLevel]->Height - 1) << MS3_HEIGHT_SHIFT) | - ((tObj->Image[0][t->intel.base.firstLevel]->Width - 1) << MS3_WIDTH_SHIFT) | - textureFormat | - MS3_USE_FENCE_REGS); - - t->Setup[I915_TEXREG_MS4] = - ((((t->intel.Pitch / 4) - 1) << MS4_PITCH_SHIFT) | - MS4_CUBE_FACE_ENA_MASK | - (((t->intel.max_level * 4)) << MS4_MAX_LOD_SHIFT) | - ((tObj->Image[0][t->intel.base.firstLevel]->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); - - t->Setup[I915_TEXREG_SS2] &= ~(SS2_COLORSPACE_CONVERSION); - t->Setup[I915_TEXREG_SS2] |= ss2; - - t->intel.dirty = I915_UPLOAD_TEX_ALL; -} /* The i915 (and related graphics cores) do not support GL_CLAMP. The * Intel drivers for "other operating systems" implement GL_CLAMP as * GL_CLAMP_TO_EDGE, so the same is done here. */ -static GLuint translate_wrap_mode( GLenum wrap ) +static GLuint +translate_wrap_mode(GLenum wrap) { - switch( wrap ) { - case GL_REPEAT: return TEXCOORDMODE_WRAP; - case GL_CLAMP: return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */ - case GL_CLAMP_TO_EDGE: return TEXCOORDMODE_CLAMP_EDGE; - case GL_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; - case GL_MIRRORED_REPEAT: return TEXCOORDMODE_MIRROR; - default: return TEXCOORDMODE_WRAP; - } -} - - -/** - */ -static void i915ImportTexObjState( struct gl_texture_object *texObj ) -{ - i915TextureObjectPtr t = (i915TextureObjectPtr)texObj->DriverData; - int minFilt = 0, mipFilt = 0, magFilt = 0, shadow = 0; - - if(INTEL_DEBUG&DEBUG_DRI) - fprintf(stderr, "%s\n", __FUNCTION__); - - switch (texObj->MinFilter) { - case GL_NEAREST: - minFilt = FILTER_NEAREST; - mipFilt = MIPFILTER_NONE; - break; - case GL_LINEAR: - minFilt = FILTER_LINEAR; - mipFilt = MIPFILTER_NONE; - break; - case GL_NEAREST_MIPMAP_NEAREST: - minFilt = FILTER_NEAREST; - mipFilt = MIPFILTER_NEAREST; - break; - case GL_LINEAR_MIPMAP_NEAREST: - minFilt = FILTER_LINEAR; - mipFilt = MIPFILTER_NEAREST; - break; - case GL_NEAREST_MIPMAP_LINEAR: - minFilt = FILTER_NEAREST; - mipFilt = MIPFILTER_LINEAR; - break; - case GL_LINEAR_MIPMAP_LINEAR: - minFilt = FILTER_LINEAR; - mipFilt = MIPFILTER_LINEAR; - break; + switch (wrap) { + case GL_REPEAT: + return TEXCOORDMODE_WRAP; + case GL_CLAMP: + return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */ + case GL_CLAMP_TO_EDGE: + return TEXCOORDMODE_CLAMP_EDGE; + case GL_CLAMP_TO_BORDER: + return TEXCOORDMODE_CLAMP_BORDER; + case GL_MIRRORED_REPEAT: + return TEXCOORDMODE_MIRROR; default: - break; - } - - if ( texObj->MaxAnisotropy > 1.0 ) { - minFilt = FILTER_ANISOTROPIC; - magFilt = FILTER_ANISOTROPIC; - } - else { - switch (texObj->MagFilter) { - case GL_NEAREST: - magFilt = FILTER_NEAREST; - break; - case GL_LINEAR: - magFilt = FILTER_LINEAR; - break; - default: - break; - } - } - - if (texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && - texObj->Target != GL_TEXTURE_3D) { - - shadow = SS2_SHADOW_ENABLE; - shadow |= intel_translate_compare_func( texObj->CompareFunc ); - - minFilt = FILTER_4X4_FLAT; - magFilt = FILTER_4X4_FLAT; - } - - - t->Setup[I915_TEXREG_SS2] &= ~(SS2_MIN_FILTER_MASK | - SS2_MIP_FILTER_MASK | - SS2_MAG_FILTER_MASK | - SS2_SHADOW_ENABLE | - SS2_SHADOW_FUNC_MASK); - t->Setup[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | - (mipFilt << SS2_MIP_FILTER_SHIFT) | - (magFilt << SS2_MAG_FILTER_SHIFT) | - shadow); - - { - GLuint ss3 = t->Setup[I915_TEXREG_SS3] & ~(SS3_TCX_ADDR_MODE_MASK | - SS3_TCY_ADDR_MODE_MASK | - SS3_TCZ_ADDR_MODE_MASK); - GLenum ws = texObj->WrapS; - GLenum wt = texObj->WrapT; - GLenum wr = texObj->WrapR; - - t->refs_border_color = 0; - - if (texObj->Target == GL_TEXTURE_3D && - (texObj->MinFilter != GL_NEAREST || - texObj->MagFilter != GL_NEAREST)) { - - /* Try to mimic GL_CLAMP functionality a little better - - * switch to CLAMP_TO_BORDER whenever a non-NEAREST filter is - * in use. Only do this for 3D textures at the moment -- - * doing it universally would fix the conform texbc.c - * failure, though. - */ - if (ws == GL_CLAMP) ws = GL_CLAMP_TO_BORDER; - if (wt == GL_CLAMP) wt = GL_CLAMP_TO_BORDER; - if (wr == GL_CLAMP) wr = GL_CLAMP_TO_BORDER; - - /* 3D textures don't seem to respect the border color. - * Fallback if there's ever a danger that they might refer to - * it. - */ - if (ws == GL_CLAMP_TO_BORDER) t->refs_border_color = 1; - if (wt == GL_CLAMP_TO_BORDER) t->refs_border_color = 1; - if (wr == GL_CLAMP_TO_BORDER) t->refs_border_color = 1; - } - - ss3 |= translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT; - ss3 |= translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT; - ss3 |= translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT; - - if (ss3 != t->Setup[I915_TEXREG_SS3]) { - t->intel.dirty = I915_UPLOAD_TEX_ALL; - t->Setup[I915_TEXREG_SS3] = ss3; - } - } - - { - const GLubyte *color = texObj->_BorderChan; - - t->Setup[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(color[0],color[1], - color[2],color[3]); + return TEXCOORDMODE_WRAP; } } -static void i915_import_tex_unit( i915ContextPtr i915, - i915TextureObjectPtr t, - GLuint unit ) +/* Recalculate all state from scratch. Perhaps not the most + * efficient, but this has gotten complex enough that we need + * something which is understandable and reliable. + */ +static GLboolean +i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { - GLuint state[I915_TEX_SETUP_SIZE]; + GLcontext *ctx = &intel->ctx; + struct i915_context *i915 = i915_context(ctx); + struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = tUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_texture_image *firstImage; + GLuint *state = i915->state.Tex[unit], format, pitch; + GLint lodbias; - if(INTEL_DEBUG&DEBUG_TEXTURE) - fprintf(stderr, "%s unit(%d)\n", __FUNCTION__, unit); - - if (i915->intel.CurrentTexObj[unit]) - i915->intel.CurrentTexObj[unit]->base.bound &= ~(1U << unit); + memset(state, 0, sizeof(state)); - i915->intel.CurrentTexObj[unit] = (intelTextureObjectPtr)t; - t->intel.base.bound |= (1 << unit); + /*We need to refcount these. */ - if (t->intel.dirty & I915_UPLOAD_TEX(unit)) { - i915ImportTexObjState( t->intel.base.tObj ); - t->intel.dirty &= ~I915_UPLOAD_TEX(unit); + if (i915->state.tex_buffer[unit] != NULL) { + dri_bo_unreference(i915->state.tex_buffer[unit]); + i915->state.tex_buffer[unit] = NULL; } - state[I915_TEXREG_MS2] = t->intel.TextureOffset; - state[I915_TEXREG_MS3] = t->Setup[I915_TEXREG_MS3]; - state[I915_TEXREG_MS4] = t->Setup[I915_TEXREG_MS4]; - - state[I915_TEXREG_SS2] = (i915->state.Tex[unit][I915_TEXREG_SS2] & - SS2_LOD_BIAS_MASK); - state[I915_TEXREG_SS2] |= (t->Setup[I915_TEXREG_SS2] & ~SS2_LOD_BIAS_MASK); - - state[I915_TEXREG_SS3] = (i915->state.Tex[unit][I915_TEXREG_SS3] & - SS3_NORMALIZED_COORDS); - state[I915_TEXREG_SS3] |= (t->Setup[I915_TEXREG_SS3] & - ~(SS3_NORMALIZED_COORDS| - SS3_TEXTUREMAP_INDEX_MASK)); + if (!intelObj->imageOverride && !intel_finalize_mipmap_tree(intel, unit)) + return GL_FALSE; - state[I915_TEXREG_SS3] |= (unit<<SS3_TEXTUREMAP_INDEX_SHIFT); + /* Get first image here, since intelObj->firstLevel will get set in + * the intel_finalize_mipmap_tree() call above. + */ + firstImage = tObj->Image[0][intelObj->firstLevel]; - state[I915_TEXREG_SS4] = t->Setup[I915_TEXREG_SS4]; + if (intelObj->imageOverride) { + i915->state.tex_buffer[unit] = NULL; + i915->state.tex_offset[unit] = intelObj->textureOffset; + switch (intelObj->depthOverride) { + case 32: + format = MAPSURF_32BIT | MT_32BIT_ARGB8888; + break; + case 24: + default: + format = MAPSURF_32BIT | MT_32BIT_XRGB8888; + break; + case 16: + format = MAPSURF_16BIT | MT_16BIT_RGB565; + break; + } - if (memcmp(state, i915->state.Tex[unit], sizeof(state)) != 0) { - I915_STATECHANGE( i915, I915_UPLOAD_TEX(unit) ); - memcpy(i915->state.Tex[unit], state, sizeof(state)); + pitch = intelObj->pitchOverride; + } else { + dri_bo_reference(intelObj->mt->region->buffer); + i915->state.tex_buffer[unit] = intelObj->mt->region->buffer; + i915->state.tex_offset[unit] = intel_miptree_image_offset(intelObj->mt, + 0, intelObj-> + firstLevel); + + format = translate_texture_format(firstImage->TexFormat->MesaFormat, + tObj->DepthMode); + pitch = intelObj->mt->pitch * intelObj->mt->cpp; } -} - - -static GLboolean enable_tex_common( GLcontext *ctx, GLuint unit ) -{ - i915ContextPtr i915 = I915_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData; + state[I915_TEXREG_MS3] = + (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) | + ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format | + MS3_USE_FENCE_REGS); - if (0) fprintf(stderr, "%s %d\n", __FUNCTION__, unit); + state[I915_TEXREG_MS4] = + ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK | + ((((intelObj->lastLevel - intelObj->firstLevel) * 4)) << + MS4_MAX_LOD_SHIFT) | ((firstImage->Depth - 1) << + MS4_VOLUME_DEPTH_SHIFT)); - if (!(i915->state.active & I915_UPLOAD_TEX(unit))) { - I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_TRUE); - } - /* Fallback if there's a texture border */ - if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) { - return GL_FALSE; - } + { + GLuint minFilt, mipFilt, magFilt; + switch (tObj->MinFilter) { + case GL_NEAREST: + minFilt = FILTER_NEAREST; + mipFilt = MIPFILTER_NONE; + break; + case GL_LINEAR: + minFilt = FILTER_LINEAR; + mipFilt = MIPFILTER_NONE; + break; + case GL_NEAREST_MIPMAP_NEAREST: + minFilt = FILTER_NEAREST; + mipFilt = MIPFILTER_NEAREST; + break; + case GL_LINEAR_MIPMAP_NEAREST: + minFilt = FILTER_LINEAR; + mipFilt = MIPFILTER_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + minFilt = FILTER_NEAREST; + mipFilt = MIPFILTER_LINEAR; + break; + case GL_LINEAR_MIPMAP_LINEAR: + minFilt = FILTER_LINEAR; + mipFilt = MIPFILTER_LINEAR; + break; + default: + return GL_FALSE; + } - /* Update state if this is a different texture object to last - * time. - */ - if (i915->intel.CurrentTexObj[unit] != &t->intel || - (t->intel.dirty & I915_UPLOAD_TEX(unit))) { - i915_import_tex_unit( i915, t, unit); - i915->tex_program.translated = 0; - } + if (tObj->MaxAnisotropy > 1.0) { + minFilt = FILTER_ANISOTROPIC; + magFilt = FILTER_ANISOTROPIC; + } + else { + switch (tObj->MagFilter) { + case GL_NEAREST: + magFilt = FILTER_NEAREST; + break; + case GL_LINEAR: + magFilt = FILTER_LINEAR; + break; + default: + return GL_FALSE; + } + } - return GL_TRUE; -} + lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0); + if (lodbias < -256) + lodbias = -256; + if (lodbias > 255) + lodbias = 255; + state[I915_TEXREG_SS2] = ((lodbias << SS2_LOD_BIAS_SHIFT) & + SS2_LOD_BIAS_MASK); -static GLboolean enable_tex_rect( GLcontext *ctx, GLuint unit ) -{ - i915ContextPtr i915 = I915_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData; - GLuint ss3 = i915->state.Tex[unit][I915_TEXREG_SS3]; + /* YUV conversion: + */ + if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR || + firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV) + state[I915_TEXREG_SS2] |= SS2_COLORSPACE_CONVERSION; - ss3 &= ~SS3_NORMALIZED_COORDS; + /* Shadow: + */ + if (tObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && + tObj->Target != GL_TEXTURE_3D) { + if (tObj->Target == GL_TEXTURE_1D) + return GL_FALSE; - if (ss3 != i915->state.Tex[unit][I915_TEXREG_SS3]) { - I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); - i915->state.Tex[unit][I915_TEXREG_SS3] = ss3; - } + state[I915_TEXREG_SS2] |= + (SS2_SHADOW_ENABLE | + intel_translate_shadow_compare_func(tObj->CompareFunc)); - /* Upload teximages (not pipelined) - */ - if (t->intel.base.dirty_images[0]) { - i915SetTexImages( i915, tObj ); - if (!intelUploadTexImages( &i915->intel, &t->intel, 0 )) { - return GL_FALSE; + minFilt = FILTER_4X4_FLAT; + magFilt = FILTER_4X4_FLAT; } - } - - return GL_TRUE; -} + state[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | + (mipFilt << SS2_MIP_FILTER_SHIFT) | + (magFilt << SS2_MAG_FILTER_SHIFT)); + } -static GLboolean enable_tex_2d( GLcontext *ctx, GLuint unit ) -{ - i915ContextPtr i915 = I915_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData; - GLuint ss3 = i915->state.Tex[unit][I915_TEXREG_SS3]; - - ss3 |= SS3_NORMALIZED_COORDS; + { + GLenum ws = tObj->WrapS; + GLenum wt = tObj->WrapT; + GLenum wr = tObj->WrapR; - if (ss3 != i915->state.Tex[unit][I915_TEXREG_SS3]) { - I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); - i915->state.Tex[unit][I915_TEXREG_SS3] = ss3; - } - /* Upload teximages (not pipelined) - */ - if (t->intel.base.dirty_images[0]) { - i915SetTexImages( i915, tObj ); - if (!intelUploadTexImages( &i915->intel, &t->intel, 0 )) { - return GL_FALSE; - } - } + /* 3D textures don't seem to respect the border color. + * Fallback if there's ever a danger that they might refer to + * it. + * + * Effectively this means fallback on 3D clamp or + * clamp_to_border. + */ + if (tObj->Target == GL_TEXTURE_3D && + (tObj->MinFilter != GL_NEAREST || + tObj->MagFilter != GL_NEAREST) && + (ws == GL_CLAMP || + wt == GL_CLAMP || + wr == GL_CLAMP || + ws == GL_CLAMP_TO_BORDER || + wt == GL_CLAMP_TO_BORDER || wr == GL_CLAMP_TO_BORDER)) + return GL_FALSE; - return GL_TRUE; -} -static GLboolean enable_tex_cube( GLcontext *ctx, GLuint unit ) -{ - i915ContextPtr i915 = I915_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData; - GLuint ss3 = i915->state.Tex[unit][I915_TEXREG_SS3]; - GLuint face; - - ss3 |= SS3_NORMALIZED_COORDS; - - if (ss3 != i915->state.Tex[unit][I915_TEXREG_SS3]) { - I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); - i915->state.Tex[unit][I915_TEXREG_SS3] = ss3; - } + state[I915_TEXREG_SS3] = ss3; /* SS3_NORMALIZED_COORDS */ - /* Upload teximages (not pipelined) - */ - if ( t->intel.base.dirty_images[0] || t->intel.base.dirty_images[1] || - t->intel.base.dirty_images[2] || t->intel.base.dirty_images[3] || - t->intel.base.dirty_images[4] || t->intel.base.dirty_images[5] ) { - i915SetTexImages( i915, tObj ); - } + state[I915_TEXREG_SS3] |= + ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); - /* upload (per face) */ - for (face = 0; face < 6; face++) { - if (t->intel.base.dirty_images[face]) { - if (!intelUploadTexImages( &i915->intel, &t->intel, face )) { - return GL_FALSE; - } - } + state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); } - return GL_TRUE; -} + state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0], + tObj->_BorderChan[1], + tObj->_BorderChan[2], + tObj->_BorderChan[3]); -static GLboolean enable_tex_3d( GLcontext *ctx, GLuint unit ) -{ - struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData; - /* 3D textures on I915 seem to get bogus border colors, hence this - * fallback: + I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_TRUE); + /* memcmp was already disabled, but definitely won't work as the + * region might now change and that wouldn't be detected: */ - if (t->refs_border_color) - return GL_FALSE; + I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); - return GL_TRUE; -} - - - -static GLboolean disable_tex( GLcontext *ctx, GLuint unit ) -{ - i915ContextPtr i915 = I915_CONTEXT(ctx); - - if (i915->state.active & I915_UPLOAD_TEX(unit)) { - I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_FALSE); - } - - /* The old texture is no longer bound to this texture unit. - * Mark it as such. - */ - if ( i915->intel.CurrentTexObj[unit] != NULL ) { - i915->intel.CurrentTexObj[unit]->base.bound &= ~(1U << 0); - i915->intel.CurrentTexObj[unit] = NULL; - } +#if 0 + DBG(TEXTURE, "state[I915_TEXREG_SS2] = 0x%x\n", state[I915_TEXREG_SS2]); + DBG(TEXTURE, "state[I915_TEXREG_SS3] = 0x%x\n", state[I915_TEXREG_SS3]); + DBG(TEXTURE, "state[I915_TEXREG_SS4] = 0x%x\n", state[I915_TEXREG_SS4]); + DBG(TEXTURE, "state[I915_TEXREG_MS2] = 0x%x\n", state[I915_TEXREG_MS2]); + DBG(TEXTURE, "state[I915_TEXREG_MS3] = 0x%x\n", state[I915_TEXREG_MS3]); + DBG(TEXTURE, "state[I915_TEXREG_MS4] = 0x%x\n", state[I915_TEXREG_MS4]); +#endif return GL_TRUE; } -static GLboolean i915UpdateTexUnit( GLcontext *ctx, GLuint unit ) -{ - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - if (texUnit->_ReallyEnabled && - INTEL_CONTEXT(ctx)->intelScreen->tex.size < 2048 * 1024) - return GL_FALSE; - switch (texUnit->_ReallyEnabled) { - case TEXTURE_1D_BIT: - case TEXTURE_2D_BIT: - return (enable_tex_2d( ctx, unit ) && - enable_tex_common( ctx, unit )); - case TEXTURE_RECT_BIT: - return (enable_tex_rect( ctx, unit ) && - enable_tex_common( ctx, unit )); - case TEXTURE_CUBE_BIT: - return (enable_tex_cube( ctx, unit ) && - enable_tex_common( ctx, unit )); - case TEXTURE_3D_BIT: - return (enable_tex_2d( ctx, unit ) && - enable_tex_common( ctx, unit ) && - enable_tex_3d( ctx, unit)); - case 0: - return disable_tex( ctx, unit ); - default: - return GL_FALSE; - } -} - -void i915UpdateTextureState( intelContextPtr intel ) +void +i915UpdateTextureState(struct intel_context *intel) { - GLcontext *ctx = &intel->ctx; GLboolean ok = GL_TRUE; GLuint i; - for (i = 0 ; i < I915_TEX_UNITS && ok ; i++) { - ok = i915UpdateTexUnit( ctx, i ); + for (i = 0; i < I915_TEX_UNITS && ok; i++) { + switch (intel->ctx.Texture.Unit[i]._ReallyEnabled) { + case TEXTURE_1D_BIT: + case TEXTURE_2D_BIT: + case TEXTURE_CUBE_BIT: + case TEXTURE_3D_BIT: + ok = i915_update_tex_unit(intel, i, SS3_NORMALIZED_COORDS); + break; + case TEXTURE_RECT_BIT: + ok = i915_update_tex_unit(intel, i, 0); + break; + case 0:{ + struct i915_context *i915 = i915_context(&intel->ctx); + if (i915->state.active & I915_UPLOAD_TEX(i)) + I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(i), GL_FALSE); + + if (i915->state.tex_buffer[i] != NULL) { + dri_bo_unreference(i915->state.tex_buffer[i]); + i915->state.tex_buffer[i] = NULL; + } + + break; + } + default: + ok = GL_FALSE; + break; + } } - FALLBACK( intel, I915_FALLBACK_TEXTURE, !ok ); + FALLBACK(intel, I915_FALLBACK_TEXTURE, !ok); } - - - diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c index cc8a605..135bfaa 100644 --- a/i915/i915_vtbl.c +++ b/i915/i915_vtbl.c @@ -37,115 +37,147 @@ #include "tnl/t_vertex.h" #include "intel_batchbuffer.h" +#include "intel_tex.h" +#include "intel_regions.h" #include "i915_reg.h" #include "i915_context.h" -static void i915_render_start( intelContextPtr intel ) +#include "glapi.h" + +static void +i915_render_prevalidate(struct intel_context *intel) { - GLcontext *ctx = &intel->ctx; - i915ContextPtr i915 = I915_CONTEXT(intel); + struct i915_context *i915 = i915_context(&intel->ctx); - if (ctx->FragmentProgram._Active) - i915ValidateFragmentProgram( i915 ); - else { - assert(!ctx->FragmentProgram._MaintainTexEnvProgram); - i915ValidateTextureProgram( i915 ); - } + if (!intel->Fallback) + i915ValidateFragmentProgram(i915); +} + +static void +i915_render_start(struct intel_context *intel) +{ } -static void i915_reduced_primitive_state( intelContextPtr intel, - GLenum rprim ) +static void +i915_reduced_primitive_state(struct intel_context *intel, GLenum rprim) { - i915ContextPtr i915 = I915_CONTEXT(intel); - GLuint st1 = i915->state.Stipple[I915_STPREG_ST1]; - - st1 &= ~ST1_ENABLE; - - switch (rprim) { - case GL_QUADS: /* from RASTERIZE(GL_QUADS) in t_dd_tritemp.h */ - case GL_TRIANGLES: - if (intel->ctx.Polygon.StippleFlag && - intel->hw_stipple) - st1 |= ST1_ENABLE; - break; - case GL_LINES: - case GL_POINTS: - default: - break; - } - - i915->intel.reduced_primitive = rprim; - - if (st1 != i915->state.Stipple[I915_STPREG_ST1]) { - I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE); - i915->state.Stipple[I915_STPREG_ST1] = st1; - } + struct i915_context *i915 = i915_context(&intel->ctx); + GLuint st1 = i915->state.Stipple[I915_STPREG_ST1]; + + st1 &= ~ST1_ENABLE; + + switch (rprim) { + case GL_QUADS: /* from RASTERIZE(GL_QUADS) in t_dd_tritemp.h */ + case GL_TRIANGLES: + if (intel->ctx.Polygon.StippleFlag && intel->hw_stipple) + st1 |= ST1_ENABLE; + break; + case GL_LINES: + case GL_POINTS: + default: + break; + } + + i915->intel.reduced_primitive = rprim; + + if (st1 != i915->state.Stipple[I915_STPREG_ST1]) { + INTEL_FIREVERTICES(intel); + + I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE); + i915->state.Stipple[I915_STPREG_ST1] = st1; + } } /* Pull apart the vertex format registers and figure out how large a * vertex is supposed to be. */ -static GLboolean i915_check_vertex_size( intelContextPtr intel, - GLuint expected ) +static GLboolean +i915_check_vertex_size(struct intel_context *intel, GLuint expected) { - i915ContextPtr i915 = I915_CONTEXT(intel); + struct i915_context *i915 = i915_context(&intel->ctx); int lis2 = i915->current->Ctx[I915_CTXREG_LIS2]; int lis4 = i915->current->Ctx[I915_CTXREG_LIS4]; int i, sz = 0; switch (lis4 & S4_VFMT_XYZW_MASK) { - case S4_VFMT_XY: sz = 2; break; - case S4_VFMT_XYZ: sz = 3; break; - case S4_VFMT_XYW: sz = 3; break; - case S4_VFMT_XYZW: sz = 4; break; - default: + case S4_VFMT_XY: + sz = 2; + break; + case S4_VFMT_XYZ: + sz = 3; + break; + case S4_VFMT_XYW: + sz = 3; + break; + case S4_VFMT_XYZW: + sz = 4; + break; + default: fprintf(stderr, "no xyzw specified\n"); return 0; } - if (lis4 & S4_VFMT_SPEC_FOG) sz++; - if (lis4 & S4_VFMT_COLOR) sz++; - if (lis4 & S4_VFMT_DEPTH_OFFSET) sz++; - if (lis4 & S4_VFMT_POINT_WIDTH) sz++; - if (lis4 & S4_VFMT_FOG_PARAM) sz++; - - for (i = 0 ; i < 8 ; i++) { + if (lis4 & S4_VFMT_SPEC_FOG) + sz++; + if (lis4 & S4_VFMT_COLOR) + sz++; + if (lis4 & S4_VFMT_DEPTH_OFFSET) + sz++; + if (lis4 & S4_VFMT_POINT_WIDTH) + sz++; + if (lis4 & S4_VFMT_FOG_PARAM) + sz++; + + for (i = 0; i < 8; i++) { switch (lis2 & S2_TEXCOORD_FMT0_MASK) { - case TEXCOORDFMT_2D: sz += 2; break; - case TEXCOORDFMT_3D: sz += 3; break; - case TEXCOORDFMT_4D: sz += 4; break; - case TEXCOORDFMT_1D: sz += 1; break; - case TEXCOORDFMT_2D_16: sz += 1; break; - case TEXCOORDFMT_4D_16: sz += 2; break; - case TEXCOORDFMT_NOT_PRESENT: break; + case TEXCOORDFMT_2D: + sz += 2; + break; + case TEXCOORDFMT_3D: + sz += 3; + break; + case TEXCOORDFMT_4D: + sz += 4; + break; + case TEXCOORDFMT_1D: + sz += 1; + break; + case TEXCOORDFMT_2D_16: + sz += 1; + break; + case TEXCOORDFMT_4D_16: + sz += 2; + break; + case TEXCOORDFMT_NOT_PRESENT: + break; default: - fprintf(stderr, "bad texcoord fmt %d\n", i); - return GL_FALSE; + fprintf(stderr, "bad texcoord fmt %d\n", i); + return GL_FALSE; } lis2 >>= S2_TEXCOORD_FMT1_SHIFT; } - - if (sz != expected) + + if (sz != expected) fprintf(stderr, "vertex size mismatch %d/%d\n", sz, expected); - + return sz == expected; } -static void i915_emit_invarient_state( intelContextPtr intel ) +static void +i915_emit_invarient_state(struct intel_context *intel) { BATCH_LOCALS; - BEGIN_BATCH( 20 ); + BEGIN_BATCH(200, IGNORE_CLIPRECTS); OUT_BATCH(_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | - AA_LINE_REGION_WIDTH_1_0); + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); OUT_BATCH(0); @@ -158,35 +190,27 @@ static void i915_emit_invarient_state( intelContextPtr intel ) /* Don't support texture crossbar yet */ OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | - CSB_TCB(0, 0) | - CSB_TCB(1, 1) | - CSB_TCB(2, 2) | - CSB_TCB(3, 3) | - CSB_TCB(4, 4) | - CSB_TCB(5, 5) | - CSB_TCB(6, 6) | - CSB_TCB(7, 7)); + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7)); OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | - ENABLE_TEXKILL_3D_4D | - TEXKILL_4D); + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D); /* Need to initialize this to zero. */ - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(3) | - (0)); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); OUT_BATCH(0); - + /* XXX: Use this */ - OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | - DISABLE_SCISSOR_RECT); + OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); OUT_BATCH(0); @@ -194,29 +218,23 @@ static void i915_emit_invarient_state( intelContextPtr intel ) OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); - OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */ + OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */ OUT_BATCH(0); /* Don't support twosided stencil yet */ - OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | - BFO_ENABLE_STENCIL_TWO_SIDE | - 0 ); - + OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0); + OUT_BATCH(0); + ADVANCE_BATCH(); } -#define emit( intel, state, size ) \ -do { \ - int k; \ - BEGIN_BATCH( (size) / sizeof(GLuint)); \ - for (k = 0 ; k < (size) / sizeof(GLuint) ; k++) \ - OUT_BATCH((state)[k]); \ - ADVANCE_BATCH(); \ -} while (0); +#define emit(intel, state, size ) \ + intel_batchbuffer_data(intel->batch, state, size, IGNORE_CLIPRECTS ) -static GLuint get_dirty( struct i915_hw_state *state ) +static GLuint +get_dirty(struct i915_hw_state *state) { GLuint dirty; @@ -227,94 +245,168 @@ static GLuint get_dirty( struct i915_hw_state *state ) if (dirty & I915_UPLOAD_TEX_ALL) state->emitted &= ~I915_UPLOAD_TEX_ALL; dirty = state->active & ~state->emitted; - return dirty; } -static GLuint get_state_size( struct i915_hw_state *state ) +static GLuint +get_state_size(struct i915_hw_state *state) { GLuint dirty = get_dirty(state); GLuint i; GLuint sz = 0; if (dirty & I915_UPLOAD_INVARIENT) - sz += 20 * sizeof(int); + sz += 30 * 4; if (dirty & I915_UPLOAD_CTX) sz += sizeof(state->Ctx); - if (dirty & I915_UPLOAD_BUFFERS) + if (dirty & I915_UPLOAD_BUFFERS) sz += sizeof(state->Buffer); if (dirty & I915_UPLOAD_STIPPLE) sz += sizeof(state->Stipple); - if (dirty & I915_UPLOAD_FOG) + if (dirty & I915_UPLOAD_FOG) sz += sizeof(state->Fog); if (dirty & I915_UPLOAD_TEX_ALL) { int nr = 0; - for (i = 0; i < I915_TEX_UNITS; i++) - if (dirty & I915_UPLOAD_TEX(i)) - nr++; + for (i = 0; i < I915_TEX_UNITS; i++) + if (dirty & I915_UPLOAD_TEX(i)) + nr++; - sz += (2+nr*3) * sizeof(GLuint) * 2; + sz += (2 + nr * 3) * sizeof(GLuint) * 2; } - if (dirty & I915_UPLOAD_CONSTANTS) + if (dirty & I915_UPLOAD_CONSTANTS) sz += state->ConstantSize * sizeof(GLuint); - if (dirty & I915_UPLOAD_PROGRAM) + if (dirty & I915_UPLOAD_PROGRAM) sz += state->ProgramSize * sizeof(GLuint); return sz; } - /* Push the state into the sarea and/or texture memory. */ -static void i915_emit_state( intelContextPtr intel ) +static void +i915_emit_state(struct intel_context *intel) { - i915ContextPtr i915 = I915_CONTEXT(intel); + struct i915_context *i915 = i915_context(&intel->ctx); struct i915_hw_state *state = i915->current; int i; - GLuint dirty = get_dirty(state); - GLuint counter = intel->batch.counter; + int ret, count; + GLuint dirty; + GET_CURRENT_CONTEXT(ctx); BATCH_LOCALS; - if (intel->batch.space < get_state_size(state)) { - intelFlushBatch(intel, GL_TRUE); - dirty = get_dirty(state); - counter = intel->batch.counter; + /* We don't hold the lock at this point, so want to make sure that + * there won't be a buffer wrap between the state emits and the primitive + * emit header. + * + * It might be better to talk about explicit places where + * scheduling is allowed, rather than assume that it is whenever a + * batchbuffer fills up. + * + * Set the space as LOOP_CLIPRECTS now, since that's what our primitives + * will be emitted under. + */ + intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8, + LOOP_CLIPRECTS); + count = 0; + again: + dirty = get_dirty(state); + + ret = 0; + if (dirty & I915_UPLOAD_BUFFERS) { + ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer); + if (state->depth_region) + ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer); } - if (VERBOSE) + if (dirty & I915_UPLOAD_TEX_ALL) { + for (i = 0; i < I915_TEX_UNITS; i++) + if (dirty & I915_UPLOAD_TEX(i)) { + if (state->tex_buffer[i]) { + ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]); + } + } + } + if (ret) { + if (count == 0) { + count++; + intel_batchbuffer_flush(intel->batch); + goto again; + } else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state"); + assert(0); + } + } + + /* work out list of buffers to emit */ + + /* Do this here as we may have flushed the batchbuffer above, + * causing more state to be dirty! + */ + dirty = get_dirty(state); + state->emitted |= dirty; + assert(get_dirty(state) == 0); + + if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty); if (dirty & I915_UPLOAD_INVARIENT) { - if (VERBOSE) fprintf(stderr, "I915_UPLOAD_INVARIENT:\n"); - i915_emit_invarient_state( intel ); + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_INVARIENT:\n"); + i915_emit_invarient_state(intel); } if (dirty & I915_UPLOAD_CTX) { - if (VERBOSE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); - emit( i915, state->Ctx, sizeof(state->Ctx) ); + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_CTX:\n"); + + emit(intel, state->Ctx, sizeof(state->Ctx)); } if (dirty & I915_UPLOAD_BUFFERS) { - if (VERBOSE) fprintf(stderr, "I915_UPLOAD_BUFFERS:\n"); - emit( i915, state->Buffer, sizeof(state->Buffer) ); + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_BUFFERS:\n"); + BEGIN_BATCH(I915_DEST_SETUP_SIZE + 2, IGNORE_CLIPRECTS); + OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]); + OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]); + OUT_RELOC(state->draw_region->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + state->draw_region->draw_offset); + + if (state->depth_region) { + OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]); + OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]); + OUT_RELOC(state->depth_region->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + state->depth_region->draw_offset); + } + + OUT_BATCH(state->Buffer[I915_DESTREG_DV0]); + OUT_BATCH(state->Buffer[I915_DESTREG_DV1]); + OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]); + OUT_BATCH(state->Buffer[I915_DESTREG_SR0]); + OUT_BATCH(state->Buffer[I915_DESTREG_SR1]); + OUT_BATCH(state->Buffer[I915_DESTREG_SR2]); + ADVANCE_BATCH(); } if (dirty & I915_UPLOAD_STIPPLE) { - if (VERBOSE) fprintf(stderr, "I915_UPLOAD_STIPPLE:\n"); - emit( i915, state->Stipple, sizeof(state->Stipple) ); + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_STIPPLE:\n"); + emit(intel, state->Stipple, sizeof(state->Stipple)); } if (dirty & I915_UPLOAD_FOG) { - if (VERBOSE) fprintf(stderr, "I915_UPLOAD_FOG:\n"); - emit( i915, state->Fog, sizeof(state->Fog) ); + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_FOG:\n"); + emit(intel, state->Fog, sizeof(state->Fog)); } /* Combine all the dirty texture state into a single command to @@ -323,141 +415,218 @@ static void i915_emit_state( intelContextPtr intel ) if (dirty & I915_UPLOAD_TEX_ALL) { int nr = 0; - for (i = 0; i < I915_TEX_UNITS; i++) - if (dirty & I915_UPLOAD_TEX(i)) - nr++; + for (i = 0; i < I915_TEX_UNITS; i++) + if (dirty & I915_UPLOAD_TEX(i)) + nr++; - BEGIN_BATCH(2+nr*3); - OUT_BATCH(_3DSTATE_MAP_STATE | (3*nr)); + BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); - for (i = 0 ; i < I915_TEX_UNITS ; i++) - if (dirty & I915_UPLOAD_TEX(i)) { - OUT_BATCH(state->Tex[i][I915_TEXREG_MS2]); - OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]); - OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]); - } + for (i = 0; i < I915_TEX_UNITS; i++) + if (dirty & I915_UPLOAD_TEX(i)) { + + if (state->tex_buffer[i]) { + OUT_RELOC(state->tex_buffer[i], + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + state->tex_offset[i]); + } + else if (state == &i915->meta) { + assert(i == 0); + OUT_BATCH(0); + } + else { + OUT_BATCH(state->tex_offset[i]); + } + + OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]); + OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]); + } ADVANCE_BATCH(); - BEGIN_BATCH(2+nr*3); - OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3*nr)); + BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); - for (i = 0 ; i < I915_TEX_UNITS ; i++) - if (dirty & I915_UPLOAD_TEX(i)) { - OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]); - OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]); - OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]); - } + for (i = 0; i < I915_TEX_UNITS; i++) + if (dirty & I915_UPLOAD_TEX(i)) { + OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]); + OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]); + OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]); + } ADVANCE_BATCH(); } if (dirty & I915_UPLOAD_CONSTANTS) { - if (VERBOSE) fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n"); - emit( i915, state->Constant, state->ConstantSize * sizeof(GLuint) ); + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n"); + emit(intel, state->Constant, state->ConstantSize * sizeof(GLuint)); } if (dirty & I915_UPLOAD_PROGRAM) { - if (VERBOSE) fprintf(stderr, "I915_UPLOAD_PROGRAM:\n"); + if (state->ProgramSize) { + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_PROGRAM:\n"); + + assert((state->Program[0] & 0x1ff) + 2 == state->ProgramSize); - assert((state->Program[0] & 0x1ff)+2 == state->ProgramSize); - - emit( i915, state->Program, state->ProgramSize * sizeof(GLuint) ); - if (VERBOSE) - i915_disassemble_program( state->Program, state->ProgramSize ); + emit(intel, state->Program, state->ProgramSize * sizeof(GLuint)); + if (INTEL_DEBUG & DEBUG_STATE) + i915_disassemble_program(state->Program, state->ProgramSize); + } } - state->emitted |= dirty; - intel->batch.last_emit_state = counter; - assert(counter == intel->batch.counter); + intel->batch->dirty_state &= ~dirty; + assert(get_dirty(state) == 0); + assert((intel->batch->dirty_state & (1<<1)) == 0); } -static void i915_destroy_context( intelContextPtr intel ) +static void +i915_destroy_context(struct intel_context *intel) { + GLuint i; + struct i915_context *i915 = i915_context(&intel->ctx); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->state.tex_buffer[i] != NULL) { + dri_bo_unreference(i915->state.tex_buffer[i]); + i915->state.tex_buffer[i] = NULL; + } + } + _tnl_free_vertices(&intel->ctx); } /** - * Set the color buffer drawing region. + * Set the drawing regions for the color and depth/stencil buffers. + * This involves setting the pitch, cpp and buffer ID/location. + * Also set pixel format for color and Z rendering + * Used for setting both regular and meta state. */ -static void -i915_set_color_region( intelContextPtr intel, const intelRegion *region) +void +i915_state_draw_region(struct intel_context *intel, + struct i915_hw_state *state, + struct intel_region *color_region, + struct intel_region *depth_region) { - i915ContextPtr i915 = I915_CONTEXT(intel); - I915_STATECHANGE( i915, I915_UPLOAD_BUFFERS ); - i915->state.Buffer[I915_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE); - i915->state.Buffer[I915_DESTREG_CBUFADDR2] = region->offset; + struct i915_context *i915 = i915_context(&intel->ctx); + GLuint value; + + ASSERT(state == &i915->state || state == &i915->meta); + + if (state->draw_region != color_region) { + intel_region_release(&state->draw_region); + intel_region_reference(&state->draw_region, color_region); + } + if (state->depth_region != depth_region) { + intel_region_release(&state->depth_region); + intel_region_reference(&state->depth_region, depth_region); + } + + /* + * Set stride/cpp values + */ + if (color_region) { + state->Buffer[I915_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD; + state->Buffer[I915_DESTREG_CBUFADDR1] = + (BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(color_region->pitch * color_region->cpp) | + BUF_3D_USE_FENCE); + } + + if (depth_region) { + state->Buffer[I915_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD; + state->Buffer[I915_DESTREG_DBUFADDR1] = + (BUF_3D_ID_DEPTH | + BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) | + BUF_3D_USE_FENCE); + } + + /* + * Compute/set I915_DESTREG_DV1 value + */ + value = (DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL); + if (color_region && color_region->cpp == 4) { + value |= DV_PF_8888; + } + else { + value |= (DITHER_FULL_ALWAYS | DV_PF_565); + } + if (depth_region && depth_region->cpp == 4) { + value |= DEPTH_FRMT_24_FIXED_8_OTHER; + } + else { + value |= DEPTH_FRMT_16_FIXED; + } + state->Buffer[I915_DESTREG_DV1] = value; + + I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS); } -/** - * specify the z-buffer/stencil region - */ static void -i915_set_z_region( intelContextPtr intel, const intelRegion *region) +i915_set_draw_region(struct intel_context *intel, + struct intel_region *color_regions[], + struct intel_region *depth_region, + GLuint num_regions) { - i915ContextPtr i915 = I915_CONTEXT(intel); - I915_STATECHANGE( i915, I915_UPLOAD_BUFFERS ); - i915->state.Buffer[I915_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE); - i915->state.Buffer[I915_DESTREG_DBUFADDR2] = region->offset; + struct i915_context *i915 = i915_context(&intel->ctx); + i915_state_draw_region(intel, &i915->state, color_regions[0], depth_region); } -/** - * Set both the color and Z/stencil drawing regions. - * Similar to two previous functions, but don't use I915_STATECHANGE() - */ + static void -i915_update_color_z_regions(intelContextPtr intel, - const intelRegion *colorRegion, - const intelRegion *depthRegion) +i915_new_batch(struct intel_context *intel) { - i915ContextPtr i915 = I915_CONTEXT(intel); + struct i915_context *i915 = i915_context(&intel->ctx); - i915->state.Buffer[I915_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) | BUF_3D_USE_FENCE); - i915->state.Buffer[I915_DESTREG_CBUFADDR2] = colorRegion->offset; + /* Mark all state as needing to be emitted when starting a new batchbuffer. + * Using hardware contexts would be an alternative, but they have some + * difficulties associated with them (physical address requirements). + */ + i915->state.emitted = 0; - i915->state.Buffer[I915_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | - BUF_3D_PITCH(depthRegion->pitch) | /* pitch in bytes */ - BUF_3D_USE_FENCE); - i915->state.Buffer[I915_DESTREG_DBUFADDR2] = depthRegion->offset; + /* Check that we didn't just wrap our batchbuffer at a bad time. */ + assert(!intel->no_batch_wrap); } - -static void i915_lost_hardware( intelContextPtr intel ) +static GLuint +i915_flush_cmd(void) { - I915_CONTEXT(intel)->state.emitted = 0; + return MI_FLUSH | FLUSH_MAP_CACHE; } -static void i915_emit_flush( intelContextPtr intel ) +static void +i915_assert_not_dirty( struct intel_context *intel ) { - BATCH_LOCALS; + struct i915_context *i915 = i915_context(&intel->ctx); + struct i915_hw_state *state = i915->current; + GLuint dirty = get_dirty(state); + assert(!dirty); +} - BEGIN_BATCH(2); - OUT_BATCH( MI_FLUSH | FLUSH_MAP_CACHE | FLUSH_RENDER_CACHE ); - OUT_BATCH( 0 ); - ADVANCE_BATCH(); +static void +i915_note_unlock( struct intel_context *intel ) +{ + /* nothing */ } -void i915InitVtbl( i915ContextPtr i915 ) +void +i915InitVtbl(struct i915_context *i915) { - i915->intel.vtbl.alloc_tex_obj = i915AllocTexObj; i915->intel.vtbl.check_vertex_size = i915_check_vertex_size; - i915->intel.vtbl.clear_with_tris = i915ClearWithTris; - i915->intel.vtbl.rotate_window = i915RotateWindow; i915->intel.vtbl.destroy = i915_destroy_context; i915->intel.vtbl.emit_state = i915_emit_state; - i915->intel.vtbl.lost_hardware = i915_lost_hardware; + i915->intel.vtbl.new_batch = i915_new_batch; i915->intel.vtbl.reduced_primitive_state = i915_reduced_primitive_state; i915->intel.vtbl.render_start = i915_render_start; - i915->intel.vtbl.set_color_region = i915_set_color_region; - i915->intel.vtbl.set_z_region = i915_set_z_region; - i915->intel.vtbl.update_color_z_regions = i915_update_color_z_regions; + i915->intel.vtbl.render_prevalidate = i915_render_prevalidate; + i915->intel.vtbl.set_draw_region = i915_set_draw_region; i915->intel.vtbl.update_texture_state = i915UpdateTextureState; - i915->intel.vtbl.emit_flush = i915_emit_flush; + i915->intel.vtbl.flush_cmd = i915_flush_cmd; + i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty; + i915->intel.vtbl.note_unlock = i915_note_unlock; } - diff --git a/i915/intel_batchbuffer.c b/i915/intel_batchbuffer.c deleted file mode 100644 index 803b41b..0000000 --- a/i915/intel_batchbuffer.c +++ /dev/null @@ -1,829 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdio.h> -#include <errno.h> - -#include "mtypes.h" -#include "context.h" -#include "enums.h" -#include "vblank.h" - -#include "intel_reg.h" -#include "intel_batchbuffer.h" -#include "intel_context.h" - - - - -/* ================================================================ - * Performance monitoring functions - */ - -static void intel_fill_box( intelContextPtr intel, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLubyte r, GLubyte g, GLubyte b ) -{ - x += intel->drawX; - y += intel->drawY; - - if (x >= 0 && y >= 0 && - x+w < intel->intelScreen->width && - y+h < intel->intelScreen->height) - intelEmitFillBlitLocked( intel, - intel->intelScreen->cpp, - intel->intelScreen->back.pitch, - intel->intelScreen->back.offset, - x, y, w, h, - INTEL_PACKCOLOR(intel->intelScreen->fbFormat, - r,g,b,0xff)); -} - -static void intel_draw_performance_boxes( intelContextPtr intel ) -{ - /* Purple box for page flipping - */ - if ( intel->perf_boxes & I830_BOX_FLIP ) - intel_fill_box( intel, 4, 4, 8, 8, 255, 0, 255 ); - - /* Red box if we have to wait for idle at any point - */ - if ( intel->perf_boxes & I830_BOX_WAIT ) - intel_fill_box( intel, 16, 4, 8, 8, 255, 0, 0 ); - - /* Blue box: lost context? - */ - if ( intel->perf_boxes & I830_BOX_LOST_CONTEXT ) - intel_fill_box( intel, 28, 4, 8, 8, 0, 0, 255 ); - - /* Yellow box for texture swaps - */ - if ( intel->perf_boxes & I830_BOX_TEXTURE_LOAD ) - intel_fill_box( intel, 40, 4, 8, 8, 255, 255, 0 ); - - /* Green box if hardware never idles (as far as we can tell) - */ - if ( !(intel->perf_boxes & I830_BOX_RING_EMPTY) ) - intel_fill_box( intel, 64, 4, 8, 8, 0, 255, 0 ); - - - /* Draw bars indicating number of buffers allocated - * (not a great measure, easily confused) - */ -#if 0 - if (intel->dma_used) { - int bar = intel->dma_used / 10240; - if (bar > 100) bar = 100; - if (bar < 1) bar = 1; - intel_fill_box( intel, 4, 16, bar, 4, 196, 128, 128 ); - intel->dma_used = 0; - } -#endif - - intel->perf_boxes = 0; -} - - - - - - -static int bad_prim_vertex_nr( int primitive, int nr ) -{ - switch (primitive & PRIM3D_MASK) { - case PRIM3D_POINTLIST: - return nr < 1; - case PRIM3D_LINELIST: - return (nr & 1) || nr == 0; - case PRIM3D_LINESTRIP: - return nr < 2; - case PRIM3D_TRILIST: - case PRIM3D_RECTLIST: - return nr % 3 || nr == 0; - case PRIM3D_POLY: - case PRIM3D_TRIFAN: - case PRIM3D_TRISTRIP: - case PRIM3D_TRISTRIP_RVRSE: - return nr < 3; - default: - return 1; - } -} - -static void intel_flush_inline_primitive( GLcontext *ctx ) -{ - intelContextPtr intel = INTEL_CONTEXT( ctx ); - GLuint used = intel->batch.ptr - intel->prim.start_ptr; - GLuint vertcount; - - assert(intel->prim.primitive != ~0); - - if (1) { - /* Check vertex size against the vertex we're specifying to - * hardware. If it's wrong, ditch the primitive. - */ - if (!intel->vtbl.check_vertex_size( intel, intel->vertex_size )) - goto do_discard; - - vertcount = (used - 4)/ (intel->vertex_size * 4); - - if (!vertcount) - goto do_discard; - - if (vertcount * intel->vertex_size * 4 != used - 4) { - fprintf(stderr, "vertex size confusion %d %d\n", used, - intel->vertex_size * vertcount * 4); - goto do_discard; - } - - if (bad_prim_vertex_nr( intel->prim.primitive, vertcount )) { - fprintf(stderr, "bad_prim_vertex_nr %x %d\n", intel->prim.primitive, - vertcount); - goto do_discard; - } - } - - if (used < 8) - goto do_discard; - - *(int *)intel->prim.start_ptr = (_3DPRIMITIVE | - intel->prim.primitive | - (used/4-2)); - - goto finished; - - do_discard: - intel->batch.ptr -= used; - intel->batch.space += used; - assert(intel->batch.space >= 0); - - finished: - intel->prim.primitive = ~0; - intel->prim.start_ptr = 0; - intel->prim.flush = 0; -} - - -/* Emit a primitive referencing vertices in a vertex buffer. - */ -void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim ) -{ - BATCH_LOCALS; - - if (0) - fprintf(stderr, "%s %x\n", __FUNCTION__, prim); - - - /* Finish any in-progress primitive: - */ - INTEL_FIREVERTICES( intel ); - - /* Emit outstanding state: - */ - intel->vtbl.emit_state( intel ); - - /* Make sure there is some space in this buffer: - */ - if (intel->vertex_size * 10 * sizeof(GLuint) >= intel->batch.space) { - intelFlushBatch(intel, GL_TRUE); - intel->vtbl.emit_state( intel ); - } - -#if 1 - if (((unsigned long)intel->batch.ptr) & 0x4) { - BEGIN_BATCH(1); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -#endif - - /* Emit a slot which will be filled with the inline primitive - * command later. - */ - BEGIN_BATCH(2); - OUT_BATCH( 0 ); - - intel->prim.start_ptr = batch_ptr; - intel->prim.primitive = prim; - intel->prim.flush = intel_flush_inline_primitive; - intel->batch.contains_geometry = 1; - - OUT_BATCH( 0 ); - ADVANCE_BATCH(); -} - - -void intelRestartInlinePrimitive( intelContextPtr intel ) -{ - GLuint prim = intel->prim.primitive; - - intel_flush_inline_primitive( &intel->ctx ); - if (1) intelFlushBatch(intel, GL_TRUE); /* GL_TRUE - is critical */ - intelStartInlinePrimitive( intel, prim ); -} - - - -void intelWrapInlinePrimitive( intelContextPtr intel ) -{ - GLuint prim = intel->prim.primitive; - - if (0) - fprintf(stderr, "%s\n", __FUNCTION__); - intel_flush_inline_primitive( &intel->ctx ); - intelFlushBatch(intel, GL_TRUE); - intelStartInlinePrimitive( intel, prim ); -} - - -/* Emit a primitive with space for inline vertices. - */ -GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel, - int primitive, - int dwords, - int vertex_size ) -{ - GLuint *tmp = 0; - BATCH_LOCALS; - - if (0) - fprintf(stderr, "%s 0x%x %d\n", __FUNCTION__, primitive, dwords); - - /* Emit outstanding state: - */ - intel->vtbl.emit_state( intel ); - - if ((1+dwords)*4 >= intel->batch.space) { - intelFlushBatch(intel, GL_TRUE); - intel->vtbl.emit_state( intel ); - } - - - if (1) { - int used = dwords * 4; - int vertcount; - - /* Check vertex size against the vertex we're specifying to - * hardware. If it's wrong, ditch the primitive. - */ - if (!intel->vtbl.check_vertex_size( intel, vertex_size )) - goto do_discard; - - vertcount = dwords / vertex_size; - - if (dwords % vertex_size) { - fprintf(stderr, "did not request a whole number of vertices\n"); - goto do_discard; - } - - if (bad_prim_vertex_nr( primitive, vertcount )) { - fprintf(stderr, "bad_prim_vertex_nr %x %d\n", primitive, vertcount); - goto do_discard; - } - - if (used < 8) - goto do_discard; - } - - /* Emit 3D_PRIMITIVE commands: - */ - BEGIN_BATCH(1 + dwords); - OUT_BATCH( _3DPRIMITIVE | - primitive | - (dwords-1) ); - - tmp = (GLuint *)batch_ptr; - batch_ptr += dwords * 4; - - ADVANCE_BATCH(); - - intel->batch.contains_geometry = 1; - - do_discard: - return tmp; -} - - -static void intelWaitForFrameCompletion( intelContextPtr intel ) -{ - drm_i915_sarea_t *sarea = (drm_i915_sarea_t *)intel->sarea; - - if (intel->do_irqs) { - if (intelGetLastFrame(intel) < sarea->last_dispatch) { - if (!intel->irqsEmitted) { - while (intelGetLastFrame (intel) < sarea->last_dispatch) - ; - } - else { - intelWaitIrq( intel, intel->alloc.irq_emitted ); - } - intel->irqsEmitted = 10; - } - - if (intel->irqsEmitted) { - LOCK_HARDWARE( intel ); - intelEmitIrqLocked( intel ); - intel->irqsEmitted--; - UNLOCK_HARDWARE( intel ); - } - } - else { - while (intelGetLastFrame (intel) < sarea->last_dispatch) { - if (intel->do_usleeps) - DO_USLEEP( 1 ); - } - } -} - -/* - * Copy the back buffer to the front buffer. - */ -void intelCopyBuffer( const __DRIdrawablePrivate *dPriv, - const drm_clip_rect_t *rect) -{ - intelContextPtr intel; - const intelScreenPrivate *intelScreen; - GLboolean missed_target; - int64_t ust; - - if (0) - fprintf(stderr, "%s\n", __FUNCTION__); - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate; - - intelFlush( &intel->ctx ); - - intelScreen = intel->intelScreen; - - if (!rect && !intel->swap_scheduled && intelScreen->drmMinor >= 6 && - !(intel->vblank_flags & VBLANK_FLAG_NO_IRQ) && - intelScreen->current_rotation == 0) { - unsigned int interval = driGetVBlankInterval(dPriv, intel->vblank_flags); - unsigned int target; - drm_i915_vblank_swap_t swap; - - swap.drawable = dPriv->hHWDrawable; - swap.seqtype = DRM_VBLANK_ABSOLUTE; - target = swap.sequence = intel->vbl_seq + interval; - - if (intel->vblank_flags & VBLANK_FLAG_SYNC) { - swap.seqtype |= DRM_VBLANK_NEXTONMISS; - } else if (interval == 0) { - goto noschedule; - } - - if ( intel->vblank_flags & VBLANK_FLAG_SECONDARY ) { - swap.seqtype |= DRM_VBLANK_SECONDARY; - } - - if (!drmCommandWriteRead(intel->driFd, DRM_I915_VBLANK_SWAP, &swap, - sizeof(swap))) { - intel->swap_scheduled = 1; - intel->vbl_seq = swap.sequence; - swap.sequence -= target; - missed_target = swap.sequence > 0 && swap.sequence <= (1 << 23); - } - } else { - intel->swap_scheduled = 0; - } -noschedule: - - if (!intel->swap_scheduled) { - intelWaitForFrameCompletion( intel ); - LOCK_HARDWARE( intel ); - - if (!rect) - { - UNLOCK_HARDWARE( intel ); - driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target ); - LOCK_HARDWARE( intel ); - } - { - const intelScreenPrivate *intelScreen = intel->intelScreen; - const __DRIdrawablePrivate *dPriv = intel->driDrawable; - const int nbox = dPriv->numClipRects; - const drm_clip_rect_t *pbox = dPriv->pClipRects; - drm_clip_rect_t box; - const int cpp = intelScreen->cpp; - const int pitch = intelScreen->front.pitch; /* in bytes */ - int i; - GLuint CMD, BR13; - BATCH_LOCALS; - - switch(cpp) { - case 2: - BR13 = (pitch) | (0xCC << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - BR13 = (pitch) | (0xCC << 16) | (1<<24) | (1<<25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - break; - default: - BR13 = (pitch) | (0xCC << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - } - - if (0) - intel_draw_performance_boxes( intel ); - - for (i = 0 ; i < nbox; i++, pbox++) - { - if (pbox->x1 > pbox->x2 || - pbox->y1 > pbox->y2 || - pbox->x2 > intelScreen->width || - pbox->y2 > intelScreen->height) { - _mesa_warning(&intel->ctx, "Bad cliprect in intelCopyBuffer()"); - continue; - } - - box = *pbox; - - if (rect) - { - if (rect->x1 > box.x1) - box.x1 = rect->x1; - if (rect->y1 > box.y1) - box.y1 = rect->y1; - if (rect->x2 < box.x2) - box.x2 = rect->x2; - if (rect->y2 < box.y2) - box.y2 = rect->y2; - - if (box.x1 > box.x2 || box.y1 > box.y2) - continue; - } - - BEGIN_BATCH( 8); - OUT_BATCH( CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (box.y1 << 16) | box.x1 ); - OUT_BATCH( (box.y2 << 16) | box.x2 ); - - if (intel->sarea->pf_current_page == 0) - OUT_BATCH( intelScreen->front.offset ); - else - OUT_BATCH( intelScreen->back.offset ); - - OUT_BATCH( (box.y1 << 16) | box.x1 ); - OUT_BATCH( BR13 & 0xffff ); - - if (intel->sarea->pf_current_page == 0) - OUT_BATCH( intelScreen->back.offset ); - else - OUT_BATCH( intelScreen->front.offset ); - - ADVANCE_BATCH(); - } - } - intelFlushBatchLocked( intel, GL_TRUE, GL_TRUE, GL_TRUE ); - UNLOCK_HARDWARE( intel ); - } - - if (!rect) - { - intel->swap_count++; - (*dri_interface->getUST)(&ust); - if (missed_target) { - intel->swap_missed_count++; - intel->swap_missed_ust = ust - intel->swap_ust; - } - - intel->swap_ust = ust; - } -} - - - - -void intelEmitFillBlitLocked( intelContextPtr intel, - GLuint cpp, - GLshort dst_pitch, /* in bytes */ - GLuint dst_offset, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLuint color ) -{ - GLuint BR13, CMD; - BATCH_LOCALS; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = dst_pitch | (0xF0 << 16) | (1<<24); - CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = dst_pitch | (0xF0 << 16) | (1<<24) | (1<<25); - CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | - XY_COLOR_BLT_WRITE_RGB); - break; - default: - return; - } - - BEGIN_BATCH( 6); - OUT_BATCH( CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (y << 16) | x ); - OUT_BATCH( ((y+h) << 16) | (x+w) ); - OUT_BATCH( dst_offset ); - OUT_BATCH( color ); - ADVANCE_BATCH(); -} - - -/* Copy BitBlt - */ -void intelEmitCopyBlitLocked( intelContextPtr intel, - GLuint cpp, - GLshort src_pitch, - GLuint src_offset, - GLshort dst_pitch, - GLuint dst_offset, - GLshort src_x, GLshort src_y, - GLshort dst_x, GLshort dst_y, - GLshort w, GLshort h ) -{ - GLuint CMD, BR13; - int dst_y2 = dst_y + h; - int dst_x2 = dst_x + w; - BATCH_LOCALS; - - src_pitch *= cpp; - dst_pitch *= cpp; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = dst_pitch | (0xCC << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - BR13 = dst_pitch | (0xCC << 16) | (1<<24) | (1<<25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - break; - default: - return; - } - - if (dst_y2 < dst_y || - dst_x2 < dst_x) { - return; - } - - BEGIN_BATCH( 12); - OUT_BATCH( CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (dst_y << 16) | dst_x ); - OUT_BATCH( (dst_y2 << 16) | dst_x2 ); - OUT_BATCH( dst_offset ); - OUT_BATCH( (src_y << 16) | src_x ); - OUT_BATCH( src_pitch ); - OUT_BATCH( src_offset ); - ADVANCE_BATCH(); -} - - - -void intelClearWithBlit(GLcontext *ctx, GLbitfield buffers, GLboolean allFoo, - GLint cx1Foo, GLint cy1Foo, GLint cwFoo, GLint chFoo) -{ - intelContextPtr intel = INTEL_CONTEXT( ctx ); - intelScreenPrivate *intelScreen = intel->intelScreen; - GLuint clear_depth, clear_color; - GLint cx, cy, cw, ch; - GLboolean all; - GLint pitch; - GLint cpp = intelScreen->cpp; - GLint i; - GLuint BR13, CMD, D_CMD; - BATCH_LOCALS; - - intelFlush( &intel->ctx ); - LOCK_HARDWARE( intel ); - - /* get clear bounds after locking */ - cx = intel->ctx.DrawBuffer->_Xmin; - cy = intel->ctx.DrawBuffer->_Ymin; - cw = intel->ctx.DrawBuffer->_Xmax - cx; - ch = intel->ctx.DrawBuffer->_Ymax - cy; - all = (cw == intel->ctx.DrawBuffer->Width && - ch == intel->ctx.DrawBuffer->Height); - - pitch = intelScreen->front.pitch; - - clear_color = intel->ClearColor; - clear_depth = 0; - - if (buffers & BUFFER_BIT_DEPTH) { - clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth); - } - - if (buffers & BUFFER_BIT_STENCIL) { - clear_depth |= (ctx->Stencil.Clear & 0xff) << 24; - } - - switch(cpp) { - case 2: - BR13 = (0xF0 << 16) | (pitch) | (1<<24); - D_CMD = CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (0xF0 << 16) | (pitch) | (1<<24) | (1<<25); - CMD = (XY_COLOR_BLT_CMD | - XY_COLOR_BLT_WRITE_ALPHA | - XY_COLOR_BLT_WRITE_RGB); - D_CMD = XY_COLOR_BLT_CMD; - if (buffers & BUFFER_BIT_DEPTH) D_CMD |= XY_COLOR_BLT_WRITE_RGB; - if (buffers & BUFFER_BIT_STENCIL) D_CMD |= XY_COLOR_BLT_WRITE_ALPHA; - break; - default: - BR13 = (0xF0 << 16) | (pitch) | (1<<24); - D_CMD = CMD = XY_COLOR_BLT_CMD; - break; - } - - { - /* flip top to bottom */ - cy = intel->driDrawable->h - cy - ch; - cx = cx + intel->drawX; - cy += intel->drawY; - - /* adjust for page flipping */ - if ( intel->sarea->pf_current_page == 1 ) { - GLuint tmp = buffers; - - buffers &= ~(BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT); - if ( tmp & BUFFER_BIT_FRONT_LEFT ) buffers |= BUFFER_BIT_BACK_LEFT; - if ( tmp & BUFFER_BIT_BACK_LEFT ) buffers |= BUFFER_BIT_FRONT_LEFT; - } - - for (i = 0 ; i < intel->numClipRects ; i++) - { - drm_clip_rect_t *box = &intel->pClipRects[i]; - drm_clip_rect_t b; - - if (!all) { - GLint x = box->x1; - GLint y = box->y1; - GLint w = box->x2 - x; - GLint h = box->y2 - y; - - if (x < cx) w -= cx - x, x = cx; - if (y < cy) h -= cy - y, y = cy; - if (x + w > cx + cw) w = cx + cw - x; - if (y + h > cy + ch) h = cy + ch - y; - if (w <= 0) continue; - if (h <= 0) continue; - - b.x1 = x; - b.y1 = y; - b.x2 = x + w; - b.y2 = y + h; - } else { - b = *box; - } - - - if (b.x1 > b.x2 || - b.y1 > b.y2 || - b.x2 > intelScreen->width || - b.y2 > intelScreen->height) - continue; - - if ( buffers & BUFFER_BIT_FRONT_LEFT ) { - BEGIN_BATCH( 6); - OUT_BATCH( CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( intelScreen->front.offset ); - OUT_BATCH( clear_color ); - ADVANCE_BATCH(); - } - - if ( buffers & BUFFER_BIT_BACK_LEFT ) { - BEGIN_BATCH( 6); - OUT_BATCH( CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( intelScreen->back.offset ); - OUT_BATCH( clear_color ); - ADVANCE_BATCH(); - } - - if ( buffers & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) { - BEGIN_BATCH( 6); - OUT_BATCH( D_CMD ); - OUT_BATCH( BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( intelScreen->depth.offset ); - OUT_BATCH( clear_depth ); - ADVANCE_BATCH(); - } - } - } - intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_TRUE ); - UNLOCK_HARDWARE( intel ); -} - - - - -void intelDestroyBatchBuffer( GLcontext *ctx ) -{ - intelContextPtr intel = INTEL_CONTEXT(ctx); - - if (intel->alloc.offset) { - intelFreeAGP( intel, intel->alloc.ptr ); - intel->alloc.ptr = NULL; - intel->alloc.offset = 0; - } - else if (intel->alloc.ptr) { - free(intel->alloc.ptr); - intel->alloc.ptr = NULL; - } - - memset(&intel->batch, 0, sizeof(intel->batch)); -} - - -void intelInitBatchBuffer( GLcontext *ctx ) -{ - intelContextPtr intel = INTEL_CONTEXT(ctx); - - /* This path isn't really safe with rotate: - */ - if (getenv("INTEL_BATCH") && intel->intelScreen->allow_batchbuffer) { - switch (intel->intelScreen->deviceID) { - case PCI_CHIP_I865_G: - /* HW bug? Seems to crash if batchbuffer crosses 4k boundary. - */ - intel->alloc.size = 8 * 1024; - break; - default: - /* This is the smallest amount of memory the kernel deals with. - * We'd ideally like to make this smaller. - */ - intel->alloc.size = 1 << intel->intelScreen->logTextureGranularity; - break; - } - - intel->alloc.ptr = intelAllocateAGP( intel, intel->alloc.size ); - if (intel->alloc.ptr) - intel->alloc.offset = - intelAgpOffsetFromVirtual( intel, intel->alloc.ptr ); - else - intel->alloc.offset = 0; /* OK? */ - } - - /* The default is now to use a local buffer and pass that to the - * kernel. This is also a fallback if allocation fails on the - * above path: - */ - if (!intel->alloc.ptr) { - intel->alloc.size = 8 * 1024; - intel->alloc.ptr = malloc( intel->alloc.size ); - intel->alloc.offset = 0; - } - - assert(intel->alloc.ptr); -} diff --git a/i915/intel_batchbuffer.h b/i915/intel_batchbuffer.h deleted file mode 100644 index 577d071..0000000 --- a/i915/intel_batchbuffer.h +++ /dev/null @@ -1,126 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_BATCHBUFFER_H -#define INTEL_BATCHBUFFER_H - -#include "intel_context.h" -#include "intel_ioctl.h" - - -#define BATCH_LOCALS GLubyte *batch_ptr; - -/* #define VERBOSE 0 */ -#ifndef VERBOSE -extern int VERBOSE; -#endif - - -#define BEGIN_BATCH(n) \ -do { \ - if (VERBOSE) fprintf(stderr, \ - "BEGIN_BATCH(%ld) in %s, %d dwords free\n", \ - ((unsigned long)n), __FUNCTION__, \ - intel->batch.space/4); \ - if (intel->batch.space < (n)*4) \ - intelFlushBatch(intel, GL_TRUE); \ - if (intel->batch.space == intel->batch.size) intel->batch.func = __FUNCTION__; \ - batch_ptr = intel->batch.ptr; \ -} while (0) - -#define OUT_BATCH(n) \ -do { \ - *(GLuint *)batch_ptr = (n); \ - if (VERBOSE) fprintf(stderr, " -- %08x at %s/%d\n", (n), __FILE__, __LINE__); \ - batch_ptr += 4; \ -} while (0) - -#define ADVANCE_BATCH() \ -do { \ - if (VERBOSE) fprintf(stderr, "ADVANCE_BATCH()\n"); \ - intel->batch.space -= (batch_ptr - intel->batch.ptr); \ - intel->batch.ptr = batch_ptr; \ - assert(intel->batch.space >= 0); \ -} while(0) - -extern void intelInitBatchBuffer( GLcontext *ctx ); -extern void intelDestroyBatchBuffer( GLcontext *ctx ); - -extern void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim ); -extern void intelWrapInlinePrimitive( intelContextPtr intel ); -extern void intelRestartInlinePrimitive( intelContextPtr intel ); -extern GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel, - int primitive, int dwords, - int vertex_size); -extern void intelCopyBuffer( const __DRIdrawablePrivate *dpriv, - const drm_clip_rect_t *rect); -extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask, GLboolean all, - GLint cx1, GLint cy1, GLint cw, GLint ch); - -extern void intelEmitCopyBlitLocked( intelContextPtr intel, - GLuint cpp, - GLshort src_pitch, - GLuint src_offset, - GLshort dst_pitch, - GLuint dst_offset, - GLshort srcx, GLshort srcy, - GLshort dstx, GLshort dsty, - GLshort w, GLshort h ); - -extern void intelEmitFillBlitLocked( intelContextPtr intel, - GLuint cpp, - GLshort dst_pitch, - GLuint dst_offset, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLuint color ); - - - - -static __inline GLuint *intelExtendInlinePrimitive( intelContextPtr intel, - GLuint dwords ) -{ - GLuint sz = dwords * sizeof(GLuint); - GLuint *ptr; - - if (intel->batch.space < sz) { - intelWrapInlinePrimitive( intel ); -/* assert(intel->batch.space >= sz); */ - } - -/* assert(intel->prim.primitive != ~0); */ - ptr = (GLuint *)intel->batch.ptr; - intel->batch.ptr += sz; - intel->batch.space -= sz; - - return ptr; -} - - - -#endif diff --git a/i915/intel_context.c b/i915/intel_context.c deleted file mode 100644 index bb5ce64..0000000 --- a/i915/intel_context.c +++ /dev/null @@ -1,871 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "glheader.h" -#include "context.h" -#include "matrix.h" -#include "simple_list.h" -#include "extensions.h" -#include "framebuffer.h" -#include "imports.h" -#include "points.h" - -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" -#include "tnl/tnl.h" -#include "vbo/vbo.h" - -#include "tnl/t_pipeline.h" -#include "tnl/t_vertex.h" - -#include "drivers/common/driverfuncs.h" - -#include "intel_screen.h" - -#include "i830_dri.h" -#include "i830_common.h" - -#include "intel_tex.h" -#include "intel_span.h" -#include "intel_tris.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" - -#include "vblank.h" -#include "utils.h" -#include "xmlpool.h" /* for symbolic values of enum-type options */ -#ifndef INTEL_DEBUG -int INTEL_DEBUG = (0); -#endif - -#define need_GL_ARB_multisample -#define need_GL_ARB_point_parameters -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_buffer_object -#define need_GL_ARB_vertex_program -#define need_GL_ARB_window_pos -#define need_GL_EXT_blend_color -#define need_GL_EXT_blend_equation_separate -#define need_GL_EXT_blend_func_separate -#define need_GL_EXT_blend_minmax -#define need_GL_EXT_cull_vertex -#define need_GL_EXT_fog_coord -#define need_GL_EXT_multi_draw_arrays -#define need_GL_EXT_secondary_color -#define need_GL_NV_vertex_program -#include "extension_helper.h" - -#ifndef VERBOSE -int VERBOSE = 0; -#endif - -#if DEBUG_LOCKING -char *prevLockFile; -int prevLockLine; -#endif - -/*************************************** - * Mesa's Driver Functions - ***************************************/ - -#define DRIVER_DATE "20061017" - -const GLubyte *intelGetString( GLcontext *ctx, GLenum name ) -{ - const char * chipset; - static char buffer[128]; - - switch (name) { - case GL_VENDOR: - return (GLubyte *)"Tungsten Graphics, Inc"; - break; - - case GL_RENDERER: - switch (INTEL_CONTEXT(ctx)->intelScreen->deviceID) { - case PCI_CHIP_845_G: - chipset = "Intel(R) 845G"; break; - case PCI_CHIP_I830_M: - chipset = "Intel(R) 830M"; break; - case PCI_CHIP_I855_GM: - chipset = "Intel(R) 852GM/855GM"; break; - case PCI_CHIP_I865_G: - chipset = "Intel(R) 865G"; break; - case PCI_CHIP_I915_G: - chipset = "Intel(R) 915G"; break; - case PCI_CHIP_I915_GM: - chipset = "Intel(R) 915GM"; break; - case PCI_CHIP_I945_G: - chipset = "Intel(R) 945G"; break; - case PCI_CHIP_I945_GM: - chipset = "Intel(R) 945GM"; break; - case PCI_CHIP_I945_GME: - chipset = "Intel(R) 945GME"; break; - case PCI_CHIP_G33_G: - chipset = "Intel(R) G33"; break; - case PCI_CHIP_Q35_G: - chipset = "Intel(R) Q35"; break; - case PCI_CHIP_Q33_G: - chipset = "Intel(R) Q33"; break; - default: - chipset = "Unknown Intel Chipset"; break; - } - - (void) driGetRendererString( buffer, chipset, DRIVER_DATE, 0 ); - return (GLubyte *) buffer; - - default: - return NULL; - } -} - - -/** - * Extension strings exported by the intel driver. - * - * \note - * It appears that ARB_texture_env_crossbar has "disappeared" compared to the - * old i830-specific driver. - */ -const struct dri_extension card_extensions[] = -{ - { "GL_ARB_multisample", GL_ARB_multisample_functions }, - { "GL_ARB_multitexture", NULL }, - { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, - { "GL_ARB_texture_border_clamp", NULL }, - { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, - { "GL_ARB_texture_cube_map", NULL }, - { "GL_ARB_texture_env_add", NULL }, - { "GL_ARB_texture_env_combine", NULL }, - { "GL_ARB_texture_env_dot3", NULL }, - { "GL_ARB_texture_mirrored_repeat", NULL }, - { "GL_ARB_texture_rectangle", NULL }, - { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, - { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, - { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, - { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, - { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, - { "GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions }, - { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, - { "GL_EXT_blend_subtract", NULL }, - { "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions }, - { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, - { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, - { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, - { "GL_EXT_stencil_wrap", NULL }, - { "GL_EXT_texture_edge_clamp", NULL }, - { "GL_EXT_texture_env_combine", NULL }, - { "GL_EXT_texture_env_dot3", NULL }, - { "GL_EXT_texture_filter_anisotropic", NULL }, - { "GL_EXT_texture_lod_bias", NULL }, - { "GL_3DFX_texture_compression_FXT1", NULL }, - { "GL_APPLE_client_storage", NULL }, - { "GL_MESA_pack_invert", NULL }, - { "GL_MESA_ycbcr_texture", NULL }, - { "GL_NV_blend_square", NULL }, - { "GL_NV_vertex_program", GL_NV_vertex_program_functions }, - { "GL_NV_vertex_program1_1", NULL }, - { "GL_SGIS_generate_mipmap", NULL }, - { NULL, NULL } -}; - -extern const struct tnl_pipeline_stage _intel_render_stage; - -static const struct tnl_pipeline_stage *intel_pipeline[] = { - &_tnl_vertex_transform_stage, - &_tnl_vertex_cull_stage, - &_tnl_normal_transform_stage, - &_tnl_lighting_stage, - &_tnl_fog_coordinate_stage, - &_tnl_texgen_stage, - &_tnl_texture_transform_stage, - &_tnl_point_attenuation_stage, - &_tnl_vertex_program_stage, -#if 1 - &_intel_render_stage, /* ADD: unclipped rastersetup-to-dma */ -#endif - &_tnl_render_stage, - 0, -}; - - -static const struct dri_debug_control debug_control[] = -{ - { "fall", DEBUG_FALLBACKS }, - { "tex", DEBUG_TEXTURE }, - { "ioctl", DEBUG_IOCTL }, - { "prim", DEBUG_PRIMS }, - { "vert", DEBUG_VERTS }, - { "state", DEBUG_STATE }, - { "verb", DEBUG_VERBOSE }, - { "dri", DEBUG_DRI }, - { "dma", DEBUG_DMA }, - { "san", DEBUG_SANITY }, - { "sync", DEBUG_SYNC }, - { "sleep", DEBUG_SLEEP }, - { "pix", DEBUG_PIXEL }, - { NULL, 0 } -}; - - -static void intelInvalidateState( GLcontext *ctx, GLuint new_state ) -{ - _swrast_InvalidateState( ctx, new_state ); - _swsetup_InvalidateState( ctx, new_state ); - _vbo_InvalidateState( ctx, new_state ); - _tnl_InvalidateState( ctx, new_state ); - _tnl_invalidate_vertex_state( ctx, new_state ); - INTEL_CONTEXT(ctx)->NewGLState |= new_state; -} - - -void intelInitDriverFunctions( struct dd_function_table *functions ) -{ - _mesa_init_driver_functions( functions ); - - functions->Clear = intelClear; - functions->Flush = intelglFlush; - functions->Finish = intelFinish; - functions->GetString = intelGetString; - functions->UpdateState = intelInvalidateState; - - intelInitTextureFuncs( functions ); - intelInitPixelFuncs( functions ); - intelInitStateFuncs( functions ); -} - -static void intel_emit_invarient_state( GLcontext *ctx ) -{ -} - - - -GLboolean intelInitContext( intelContextPtr intel, - const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate, - struct dd_function_table *functions ) -{ - GLcontext *ctx = &intel->ctx; - GLcontext *shareCtx = (GLcontext *) sharedContextPrivate; - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - drmI830Sarea *saPriv = (drmI830Sarea *) - (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset); - int fthrottle_mode; - - if (!_mesa_initialize_context(&intel->ctx, - mesaVis, shareCtx, - functions, - (void*) intel)) - return GL_FALSE; - - driContextPriv->driverPrivate = intel; - intel->intelScreen = intelScreen; - intel->driScreen = sPriv; - intel->sarea = saPriv; - - - (void) memset( intel->texture_heaps, 0, sizeof( intel->texture_heaps ) ); - make_empty_list( & intel->swapped ); - - driParseConfigFiles (&intel->optionCache, &intelScreen->optionCache, - intel->driScreen->myNum, "i915"); - - ctx->Const.MaxTextureMaxAnisotropy = 2.0; - - ctx->Const.MinLineWidth = 1.0; - ctx->Const.MinLineWidthAA = 1.0; - ctx->Const.MaxLineWidth = 3.0; - ctx->Const.MaxLineWidthAA = 3.0; - ctx->Const.LineWidthGranularity = 1.0; - - ctx->Const.MinPointSize = 1.0; - ctx->Const.MinPointSizeAA = 1.0; - ctx->Const.MaxPointSize = 255.0; - ctx->Const.MaxPointSizeAA = 3.0; - ctx->Const.PointSizeGranularity = 1.0; - - /* reinitialize the context point state. - * It depend on constants in __GLcontextRec::Const - */ - _mesa_init_point(ctx); - - /* Initialize the software rasterizer and helper modules. */ - _swrast_CreateContext( ctx ); - _vbo_CreateContext( ctx ); - _tnl_CreateContext( ctx ); - _swsetup_CreateContext( ctx ); - - /* Install the customized pipeline: */ - _tnl_destroy_pipeline( ctx ); - _tnl_install_pipeline( ctx, intel_pipeline ); - - /* Configure swrast to match hardware characteristics: */ - _swrast_allow_pixel_fog( ctx, GL_FALSE ); - _swrast_allow_vertex_fog( ctx, GL_TRUE ); - - /* Dri stuff */ - intel->hHWContext = driContextPriv->hHWContext; - intel->driFd = sPriv->fd; - intel->driHwLock = (drmLock *) &sPriv->pSAREA->lock; - - intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24; - intel->hw_stipple = 1; - - switch(mesaVis->depthBits) { - case 0: /* what to do in this case? */ - case 16: - intel->depth_scale = 1.0/0xffff; - intel->polygon_offset_scale = 1.0/0xffff; - intel->depth_clear_mask = ~0; - intel->ClearDepth = 0xffff; - break; - case 24: - intel->depth_scale = 1.0/0xffffff; - intel->polygon_offset_scale = 2.0/0xffffff; /* req'd to pass glean */ - intel->depth_clear_mask = 0x00ffffff; - intel->stencil_clear_mask = 0xff000000; - intel->ClearDepth = 0x00ffffff; - break; - default: - assert(0); - break; - } - - /* Initialize swrast, tnl driver tables: */ - intelInitSpanFuncs( ctx ); - intelInitTriFuncs( ctx ); - - - intel->RenderIndex = ~0; - - fthrottle_mode = driQueryOptioni(&intel->optionCache, "fthrottle_mode"); - intel->iw.irq_seq = -1; - intel->irqsEmitted = 0; - - intel->do_irqs = (intel->intelScreen->irq_active && - fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS); - - intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); - - intel->vblank_flags = (intel->intelScreen->irq_active != 0) - ? driGetDefaultVBlankFlags(&intel->optionCache) : VBLANK_FLAG_NO_IRQ; - - (*dri_interface->getUST)(&intel->swap_ust); - _math_matrix_ctr (&intel->ViewportMatrix); - - driInitExtensions( ctx, card_extensions, GL_TRUE ); - - if (intel->ctx.Mesa_DXTn) { - _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); - _mesa_enable_extension( ctx, "GL_S3_s3tc" ); - } - else if (driQueryOptionb (&intel->optionCache, "force_s3tc_enable")) { - _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); - } - -/* driInitTextureObjects( ctx, & intel->swapped, */ -/* DRI_TEXMGR_DO_TEXTURE_1D | */ -/* DRI_TEXMGR_DO_TEXTURE_2D | */ -/* DRI_TEXMGR_DO_TEXTURE_RECT ); */ - - - intelInitBatchBuffer(&intel->ctx); - intel->prim.flush = intel_emit_invarient_state; - intel->prim.primitive = ~0; - - -#if DO_DEBUG - INTEL_DEBUG = driParseDebugString( getenv( "INTEL_DEBUG" ), - debug_control ); - INTEL_DEBUG |= driParseDebugString( getenv( "INTEL_DEBUG" ), - debug_control ); -#endif - -#ifndef VERBOSE - if (getenv("INTEL_VERBOSE")) - VERBOSE=1; -#endif - - if (getenv("INTEL_NO_RAST") || - getenv("INTEL_NO_RAST")) { - fprintf(stderr, "disabling 3D rasterization\n"); - FALLBACK(intel, INTEL_FALLBACK_USER, 1); - } - - return GL_TRUE; -} - -void intelDestroyContext(__DRIcontextPrivate *driContextPriv) -{ - intelContextPtr intel = (intelContextPtr) driContextPriv->driverPrivate; - - assert(intel); /* should never be null */ - if (intel) { - GLboolean release_texture_heaps; - - INTEL_FIREVERTICES( intel ); - - intel->vtbl.destroy( intel ); - - release_texture_heaps = (intel->ctx.Shared->RefCount == 1); - _swsetup_DestroyContext (&intel->ctx); - _tnl_DestroyContext (&intel->ctx); - _vbo_DestroyContext (&intel->ctx); - - _swrast_DestroyContext (&intel->ctx); - intel->Fallback = 0; /* don't call _swrast_Flush later */ - - intelDestroyBatchBuffer(&intel->ctx); - - - if ( release_texture_heaps ) { - /* This share group is about to go away, free our private - * texture object data. - */ - int i; - - for ( i = 0 ; i < intel->nr_heaps ; i++ ) { - driDestroyTextureHeap( intel->texture_heaps[ i ] ); - intel->texture_heaps[ i ] = NULL; - } - - assert( is_empty_list( & intel->swapped ) ); - } - - /* free the Mesa context */ - _mesa_destroy_context(&intel->ctx); - } -} - -void intelSetFrontClipRects( intelContextPtr intel ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - if (!dPriv) return; - - intel->numClipRects = dPriv->numClipRects; - intel->pClipRects = dPriv->pClipRects; - intel->drawX = dPriv->x; - intel->drawY = dPriv->y; -} - - -void intelSetBackClipRects( intelContextPtr intel ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - if (!dPriv) return; - - if (intel->sarea->pf_enabled == 0 && dPriv->numBackClipRects == 0) { - intel->numClipRects = dPriv->numClipRects; - intel->pClipRects = dPriv->pClipRects; - intel->drawX = dPriv->x; - intel->drawY = dPriv->y; - } else { - intel->numClipRects = dPriv->numBackClipRects; - intel->pClipRects = dPriv->pBackClipRects; - intel->drawX = dPriv->backX; - intel->drawY = dPriv->backY; - - if (dPriv->numBackClipRects == 1 && - dPriv->x == dPriv->backX && - dPriv->y == dPriv->backY) { - - /* Repeat the calculation of the back cliprect dimensions here - * as early versions of dri.a in the Xserver are incorrect. Try - * very hard not to restrict future versions of dri.a which - * might eg. allocate truly private back buffers. - */ - int x1, y1; - int x2, y2; - - x1 = dPriv->x; - y1 = dPriv->y; - x2 = dPriv->x + dPriv->w; - y2 = dPriv->y + dPriv->h; - - if (x1 < 0) x1 = 0; - if (y1 < 0) y1 = 0; - if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width; - if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height; - - if (x1 == dPriv->pBackClipRects[0].x1 && - y1 == dPriv->pBackClipRects[0].y1) { - - dPriv->pBackClipRects[0].x2 = x2; - dPriv->pBackClipRects[0].y2 = y2; - } - } - } -} - - -void intelWindowMoved( intelContextPtr intel ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - GLframebuffer *drawFb = (GLframebuffer *) dPriv->driverPrivate; - - if (!intel->ctx.DrawBuffer) { - intelSetFrontClipRects( intel ); - } - else { - driUpdateFramebufferSize(&intel->ctx, dPriv); - switch (drawFb->_ColorDrawBufferMask[0]) { - case BUFFER_BIT_FRONT_LEFT: - intelSetFrontClipRects( intel ); - break; - case BUFFER_BIT_BACK_LEFT: - intelSetBackClipRects( intel ); - break; - default: - /* glDrawBuffer(GL_NONE or GL_FRONT_AND_BACK): software fallback */ - intelSetFrontClipRects( intel ); - } - } - - if (drawFb->Width != dPriv->w || drawFb->Height != dPriv->h) { - /* update Mesa's notion of framebuffer/window size */ - _mesa_resize_framebuffer(&intel->ctx, drawFb, dPriv->w, dPriv->h); - drawFb->Initialized = GL_TRUE; /* XXX remove someday */ - } - - /* Set state we know depends on drawable parameters: - */ - { - GLcontext *ctx = &intel->ctx; - - if (intel->intelScreen->driScrnPriv->ddxMinor >= 7) { - drmI830Sarea *sarea = intel->sarea; - drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w, - .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h }; - drm_clip_rect_t pipeA_rect = { .x1 = sarea->pipeA_x, - .x2 = sarea->pipeA_x + sarea->pipeA_w, - .y1 = sarea->pipeA_y, - .y2 = sarea->pipeA_y + sarea->pipeA_h }; - drm_clip_rect_t pipeB_rect = { .x1 = sarea->pipeB_x, - .x2 = sarea->pipeB_x + sarea->pipeB_w, - .y1 = sarea->pipeB_y, - .y2 = sarea->pipeB_y + sarea->pipeB_h }; - GLint areaA = driIntersectArea( drw_rect, pipeA_rect ); - GLint areaB = driIntersectArea( drw_rect, pipeB_rect ); - GLuint flags = intel->vblank_flags; - - if (areaB > areaA || (areaA == areaB && areaB > 0)) { - flags = intel->vblank_flags | VBLANK_FLAG_SECONDARY; - } else { - flags = intel->vblank_flags & ~VBLANK_FLAG_SECONDARY; - } - - if (flags != intel->vblank_flags) { - intel->vblank_flags = flags; - driGetCurrentVBlank(dPriv, intel->vblank_flags, &intel->vbl_seq); - } - } else { - intel->vblank_flags &= ~VBLANK_FLAG_SECONDARY; - } - - ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y, - ctx->Scissor.Width, ctx->Scissor.Height ); - - ctx->Driver.DepthRange( ctx, - ctx->Viewport.Near, - ctx->Viewport.Far ); - } -} - -GLboolean intelUnbindContext(__DRIcontextPrivate *driContextPriv) -{ - return GL_TRUE; -} - -GLboolean intelMakeCurrent(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv) -{ - - if (driContextPriv) { - intelContextPtr intel = (intelContextPtr) driContextPriv->driverPrivate; - - if ( intel->driDrawable != driDrawPriv ) { - /* Shouldn't the readbuffer be stored also? */ - driDrawableInitVBlank( driDrawPriv, intel->vblank_flags, - &intel->vbl_seq ); - - intel->driDrawable = driDrawPriv; - intelWindowMoved( intel ); - } - - _mesa_make_current(&intel->ctx, - (GLframebuffer *) driDrawPriv->driverPrivate, - (GLframebuffer *) driReadPriv->driverPrivate); - - intel->ctx.Driver.DrawBuffer( &intel->ctx, intel->ctx.Color.DrawBuffer[0] ); - } else { - _mesa_make_current(NULL, NULL, NULL); - } - - return GL_TRUE; -} - -/** - * Use the information in the sarea to update the screen parameters - * related to screen rotation. - */ -static void -intelUpdateScreenRotation(intelContextPtr intel, - __DRIscreenPrivate *sPriv, - drmI830Sarea *sarea) -{ - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - intelRegion *colorBuf; - - intelUnmapScreenRegions(intelScreen); - - intelUpdateScreenFromSAREA(intelScreen, sarea); - - /* update the current hw offsets for the color and depth buffers */ - if (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) - colorBuf = &intelScreen->back; - else - colorBuf = &intelScreen->front; - intel->vtbl.update_color_z_regions(intel, colorBuf, &intelScreen->depth); - - if (!intelMapScreenRegions(sPriv)) { - fprintf(stderr, "ERROR Remapping screen regions!!!\n"); - } -} - -void intelGetLock( intelContextPtr intel, GLuint flags ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - __DRIscreenPrivate *sPriv = intel->driScreen; - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - drmI830Sarea * sarea = intel->sarea; - unsigned i; - - drmGetLock(intel->driFd, intel->hHWContext, flags); - - /* If the window moved, may need to set a new cliprect now. - * - * NOTE: This releases and regains the hw lock, so all state - * checking must be done *after* this call: - */ - if (dPriv) - DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); - - if (dPriv && intel->lastStamp != dPriv->lastStamp) { - intelWindowMoved( intel ); - intel->lastStamp = dPriv->lastStamp; - } - - /* If we lost context, need to dump all registers to hardware. - * Note that we don't care about 2d contexts, even if they perform - * accelerated commands, so the DRI locking in the X server is even - * more broken than usual. - */ - - if (sarea->width != intelScreen->width || - sarea->height != intelScreen->height || - sarea->rotation != intelScreen->current_rotation) { - intelUpdateScreenRotation(intel, sPriv, sarea); - - /* This will drop the outstanding batchbuffer on the floor */ - intel->batch.ptr -= (intel->batch.size - intel->batch.space); - intel->batch.space = intel->batch.size; - /* lose all primitives */ - intel->prim.primitive = ~0; - intel->prim.start_ptr = 0; - intel->prim.flush = 0; - intel->vtbl.lost_hardware( intel ); - - intel->lastStamp = 0; /* force window update */ - - /* Release batch buffer - */ - intelDestroyBatchBuffer(&intel->ctx); - intelInitBatchBuffer(&intel->ctx); - intel->prim.flush = intel_emit_invarient_state; - - /* Still need to reset the global LRU? - */ - intel_driReinitTextureHeap( intel->texture_heaps[0], intel->intelScreen->tex.size ); - } - - /* Shared texture managment - if another client has played with - * texture space, figure out which if any of our textures have been - * ejected, and update our global LRU. - */ - for ( i = 0 ; i < intel->nr_heaps ; i++ ) { - DRI_AGE_TEXTURES( intel->texture_heaps[ i ] ); - } -} - - -void intelSwapBuffers( __DRIdrawablePrivate *dPriv ) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - intelContextPtr intel; - GLcontext *ctx; - intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate; - ctx = &intel->ctx; - if (ctx->Visual.doubleBufferMode) { - intelScreenPrivate *screen = intel->intelScreen; - _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ - if ( 0 /*intel->doPageFlip*/ ) { /* doPageFlip is never set !!! */ - intelPageFlip( dPriv ); - } else { - intelCopyBuffer( dPriv, NULL ); - } - if (screen->current_rotation != 0) { - intelRotateWindow(intel, dPriv, BUFFER_BIT_FRONT_LEFT); - } - } - } else { - /* XXX this shouldn't be an error but we can't handle it for now */ - fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); - } -} - -void intelCopySubBuffer( __DRIdrawablePrivate *dPriv, - int x, int y, int w, int h ) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - intelContextPtr intel; - GLcontext *ctx; - intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate; - ctx = &intel->ctx; - if (ctx->Visual.doubleBufferMode) { - drm_clip_rect_t rect; - rect.x1 = x + dPriv->x; - rect.y1 = (dPriv->h - y - h) + dPriv->y; - rect.x2 = rect.x1 + w; - rect.y2 = rect.y1 + h; - _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ - intelCopyBuffer( dPriv, &rect ); - } - } else { - /* XXX this shouldn't be an error but we can't handle it for now */ - fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); - } -} - -void intelInitState( GLcontext *ctx ) -{ - /* Mesa should do this for us: - */ - ctx->Driver.AlphaFunc( ctx, - ctx->Color.AlphaFunc, - ctx->Color.AlphaRef); - - ctx->Driver.BlendColor( ctx, - ctx->Color.BlendColor ); - - ctx->Driver.BlendEquationSeparate( ctx, - ctx->Color.BlendEquationRGB, - ctx->Color.BlendEquationA); - - ctx->Driver.BlendFuncSeparate( ctx, - ctx->Color.BlendSrcRGB, - ctx->Color.BlendDstRGB, - ctx->Color.BlendSrcA, - ctx->Color.BlendDstA); - - ctx->Driver.ColorMask( ctx, - ctx->Color.ColorMask[RCOMP], - ctx->Color.ColorMask[GCOMP], - ctx->Color.ColorMask[BCOMP], - ctx->Color.ColorMask[ACOMP]); - - ctx->Driver.CullFace( ctx, ctx->Polygon.CullFaceMode ); - ctx->Driver.DepthFunc( ctx, ctx->Depth.Func ); - ctx->Driver.DepthMask( ctx, ctx->Depth.Mask ); - - ctx->Driver.Enable( ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled ); - ctx->Driver.Enable( ctx, GL_BLEND, ctx->Color.BlendEnabled ); - ctx->Driver.Enable( ctx, GL_COLOR_LOGIC_OP, ctx->Color.ColorLogicOpEnabled ); - ctx->Driver.Enable( ctx, GL_COLOR_SUM, ctx->Fog.ColorSumEnabled ); - ctx->Driver.Enable( ctx, GL_CULL_FACE, ctx->Polygon.CullFlag ); - ctx->Driver.Enable( ctx, GL_DEPTH_TEST, ctx->Depth.Test ); - ctx->Driver.Enable( ctx, GL_DITHER, ctx->Color.DitherFlag ); - ctx->Driver.Enable( ctx, GL_FOG, ctx->Fog.Enabled ); - ctx->Driver.Enable( ctx, GL_LIGHTING, ctx->Light.Enabled ); - ctx->Driver.Enable( ctx, GL_LINE_SMOOTH, ctx->Line.SmoothFlag ); - ctx->Driver.Enable( ctx, GL_POLYGON_STIPPLE, ctx->Polygon.StippleFlag ); - ctx->Driver.Enable( ctx, GL_SCISSOR_TEST, ctx->Scissor.Enabled ); - ctx->Driver.Enable( ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled ); - ctx->Driver.Enable( ctx, GL_TEXTURE_1D, GL_FALSE ); - ctx->Driver.Enable( ctx, GL_TEXTURE_2D, GL_FALSE ); - ctx->Driver.Enable( ctx, GL_TEXTURE_RECTANGLE_NV, GL_FALSE ); - ctx->Driver.Enable( ctx, GL_TEXTURE_3D, GL_FALSE ); - ctx->Driver.Enable( ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE ); - - ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color ); - ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 ); - ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density ); - ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start ); - ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End ); - - ctx->Driver.FrontFace( ctx, ctx->Polygon.FrontFace ); - - { - GLfloat f = (GLfloat)ctx->Light.Model.ColorControl; - ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_COLOR_CONTROL, &f ); - } - - ctx->Driver.LineWidth( ctx, ctx->Line.Width ); - ctx->Driver.LogicOpcode( ctx, ctx->Color.LogicOp ); - ctx->Driver.PointSize( ctx, ctx->Point.Size ); - ctx->Driver.PolygonStipple( ctx, (const GLubyte *)ctx->PolygonStipple ); - ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y, - ctx->Scissor.Width, ctx->Scissor.Height ); - ctx->Driver.ShadeModel( ctx, ctx->Light.ShadeModel ); - ctx->Driver.StencilFuncSeparate( ctx, GL_FRONT, - ctx->Stencil.Function[0], - ctx->Stencil.Ref[0], - ctx->Stencil.ValueMask[0] ); - ctx->Driver.StencilFuncSeparate( ctx, GL_BACK, - ctx->Stencil.Function[1], - ctx->Stencil.Ref[1], - ctx->Stencil.ValueMask[1] ); - ctx->Driver.StencilMaskSeparate( ctx, GL_FRONT, ctx->Stencil.WriteMask[0] ); - ctx->Driver.StencilMaskSeparate( ctx, GL_BACK, ctx->Stencil.WriteMask[1] ); - ctx->Driver.StencilOpSeparate( ctx, GL_FRONT, - ctx->Stencil.FailFunc[0], - ctx->Stencil.ZFailFunc[0], - ctx->Stencil.ZPassFunc[0]); - ctx->Driver.StencilOpSeparate( ctx, GL_BACK, - ctx->Stencil.FailFunc[1], - ctx->Stencil.ZFailFunc[1], - ctx->Stencil.ZPassFunc[1]); - - - ctx->Driver.DrawBuffer( ctx, ctx->Color.DrawBuffer[0] ); -} - - diff --git a/i915/intel_context.h b/i915/intel_context.h deleted file mode 100644 index 50e6178..0000000 --- a/i915/intel_context.h +++ /dev/null @@ -1,564 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTELCONTEXT_INC -#define INTELCONTEXT_INC - - - -#include "mtypes.h" -#include "drm.h" -#include "mm.h" -#include "texmem.h" -#include "vblank.h" - -#include "intel_screen.h" -#include "i915_drm.h" -#include "i830_common.h" -#include "tnl/t_vertex.h" - -#define TAG(x) intel##x -#include "tnl_dd/t_dd_vertex.h" -#undef TAG - -#define DV_PF_555 (1<<8) -#define DV_PF_565 (2<<8) -#define DV_PF_8888 (3<<8) - -#define INTEL_CONTEXT(ctx) ((intelContextPtr)(ctx)) - -typedef struct intel_context intelContext; -typedef struct intel_context *intelContextPtr; -typedef struct intel_texture_object *intelTextureObjectPtr; - -typedef void (*intel_tri_func)(intelContextPtr, intelVertex *, intelVertex *, - intelVertex *); -typedef void (*intel_line_func)(intelContextPtr, intelVertex *, intelVertex *); -typedef void (*intel_point_func)(intelContextPtr, intelVertex *); - -#define INTEL_FALLBACK_DRAW_BUFFER 0x1 -#define INTEL_FALLBACK_READ_BUFFER 0x2 -#define INTEL_FALLBACK_USER 0x4 -#define INTEL_FALLBACK_NO_BATCHBUFFER 0x8 -#define INTEL_FALLBACK_NO_TEXMEM 0x10 -#define INTEL_FALLBACK_RENDERMODE 0x20 - -extern void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode ); -#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode ) - - -#define INTEL_TEX_MAXLEVELS 10 - - -struct intel_texture_object -{ - driTextureObject base; /* the parent class */ - - GLuint texelBytes; - GLuint age; - GLuint Pitch; - GLuint Height; - GLuint TextureOffset; - GLubyte *BufAddr; - - GLuint min_level; - GLuint max_level; - GLuint depth_pitch; - - struct { - const struct gl_texture_image *image; - GLuint offset; /* into BufAddr */ - GLuint height; - GLuint internalFormat; - } image[6][INTEL_TEX_MAXLEVELS]; - - GLuint dirty; - GLuint firstLevel,lastLevel; -}; - - -struct intel_context -{ - GLcontext ctx; /* the parent class */ - - struct { - void (*destroy)( intelContextPtr intel ); - void (*emit_state)( intelContextPtr intel ); - void (*lost_hardware)( intelContextPtr intel ); - void (*update_texture_state)( intelContextPtr intel ); - - void (*render_start)( intelContextPtr intel ); - void (*set_color_region)( intelContextPtr intel, const intelRegion *reg ); - void (*set_z_region)( intelContextPtr intel, const intelRegion *reg ); - void (*update_color_z_regions)(intelContextPtr intel, - const intelRegion *colorRegion, - const intelRegion *depthRegion); - void (*emit_flush)( intelContextPtr intel ); - void (*reduced_primitive_state)( intelContextPtr intel, GLenum rprim ); - - GLboolean (*check_vertex_size)( intelContextPtr intel, GLuint expected ); - - void (*clear_with_tris)( intelContextPtr intel, GLbitfield mask, - GLboolean all, - GLint cx, GLint cy, GLint cw, GLint ch); - - void (*rotate_window)( intelContextPtr intel, - __DRIdrawablePrivate *dPriv, GLuint srcBuf); - - intelTextureObjectPtr (*alloc_tex_obj)( struct gl_texture_object *tObj ); - - } vtbl; - - GLint refcount; - GLuint Fallback; - GLuint NewGLState; - - struct { - GLuint start_offset; - GLint size; - GLint space; - GLubyte *ptr; - GLuint counter; - GLuint last_emit_state; - GLboolean contains_geometry; - const char *func; - GLuint last_swap; - } batch; - - struct { - void *ptr; - GLint size; - GLuint offset; - GLuint active_buf; - GLuint irq_emitted; - } alloc; - - struct { - GLuint primitive; - GLubyte *start_ptr; - void (*flush)( GLcontext * ); - } prim; - - GLboolean locked; - - GLubyte clear_red; - GLubyte clear_green; - GLubyte clear_blue; - GLubyte clear_alpha; - GLuint ClearColor; - GLuint ClearDepth; - - GLuint coloroffset; - GLuint specoffset; - - /* Support for duplicating XYZW as WPOS parameter (crutch for I915). - */ - GLuint wpos_offset; - GLuint wpos_size; - - struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; - GLuint vertex_attr_count; - - GLfloat depth_scale; - GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */ - GLuint depth_clear_mask; - GLuint stencil_clear_mask; - - GLboolean hw_stencil; - GLboolean hw_stipple; - - /* Texture object bookkeeping - */ - GLuint nr_heaps; - driTexHeap * texture_heaps[1]; - driTextureObject swapped; - GLuint lastStamp; - - struct intel_texture_object *CurrentTexObj[MAX_TEXTURE_UNITS]; - - /* State for intelvb.c and inteltris.c. - */ - GLuint RenderIndex; - GLmatrix ViewportMatrix; - GLenum render_primitive; - GLenum reduced_primitive; - GLuint vertex_size; - unsigned char *verts; /* points to tnl->clipspace.vertex_buf */ - - - /* Fallback rasterization functions - */ - intel_point_func draw_point; - intel_line_func draw_line; - intel_tri_func draw_tri; - - /* Drawing buffer state - */ - intelRegion *drawRegion; /* current drawing buffer */ - intelRegion *readRegion; /* current reading buffer */ - - int drawX; /* origin of drawable in draw buffer */ - int drawY; - GLuint numClipRects; /* cliprects for that buffer */ - drm_clip_rect_t *pClipRects; - - int dirtyAge; - int perf_boxes; - - GLuint do_usleeps; - int do_irqs; - GLuint irqsEmitted; - drm_i915_irq_wait_t iw; - - GLboolean scissor; - drm_clip_rect_t draw_rect; - drm_clip_rect_t scissor_rect; - - drm_context_t hHWContext; - drmLock *driHwLock; - int driFd; - - __DRIdrawablePrivate *driDrawable; - __DRIscreenPrivate *driScreen; - intelScreenPrivate *intelScreen; - drmI830Sarea *sarea; - - /** - * Configuration cache - */ - driOptionCache optionCache; - - /* VBI - */ - GLuint vbl_seq; - GLuint vblank_flags; - - int64_t swap_ust; - int64_t swap_missed_ust; - - GLuint swap_count; - GLuint swap_missed_count; - - GLuint swap_scheduled; -}; - - -#define DEBUG_LOCKING 1 - -#if DEBUG_LOCKING -extern char *prevLockFile; -extern int prevLockLine; - -#define DEBUG_LOCK() \ - do { \ - prevLockFile = (__FILE__); \ - prevLockLine = (__LINE__); \ - } while (0) - -#define DEBUG_RESET() \ - do { \ - prevLockFile = 0; \ - prevLockLine = 0; \ - } while (0) - -/* Slightly less broken way of detecting recursive locking in a - * threaded environment. The right way to do this would be to make - * prevLockFile, prevLockLine thread-local. - * - * This technique instead checks to see if the same context is - * requesting the lock twice -- this will not catch application - * breakages where the same context is active in two different threads - * at once, but it will catch driver breakages (recursive locking) in - * threaded apps. - */ -#define DEBUG_CHECK_LOCK() \ - do { \ - if ( *((volatile int *)intel->driHwLock) == \ - (DRM_LOCK_HELD | intel->hHWContext) ) { \ - fprintf( stderr, \ - "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \ - prevLockFile, prevLockLine, __FILE__, __LINE__ ); \ - abort(); \ - } \ - } while (0) - -#else - -#define DEBUG_LOCK() -#define DEBUG_RESET() -#define DEBUG_CHECK_LOCK() - -#endif - - - - -/* Lock the hardware and validate our state. - */ -#define LOCK_HARDWARE( intel ) \ -do { \ - char __ret=0; \ - DEBUG_CHECK_LOCK(); \ - assert(!(intel)->locked); \ - if ((intel)->swap_scheduled) { \ - drmVBlank vbl; \ - vbl.request.type = DRM_VBLANK_ABSOLUTE; \ - if ((intel)->vblank_flags & \ - VBLANK_FLAG_SECONDARY) { \ - vbl.request.type |= DRM_VBLANK_SECONDARY; \ - } \ - vbl.request.sequence = (intel)->vbl_seq; \ - drmWaitVBlank((intel)->driFd, &vbl); \ - (intel)->swap_scheduled = 0; \ - } \ - DRM_CAS((intel)->driHwLock, (intel)->hHWContext, \ - (DRM_LOCK_HELD|(intel)->hHWContext), __ret); \ - if (__ret) \ - intelGetLock( (intel), 0 ); \ - DEBUG_LOCK(); \ - (intel)->locked = 1; \ -}while (0) - - - /* Unlock the hardware using the global current context - */ -#define UNLOCK_HARDWARE(intel) \ -do { \ - intel->locked = 0; \ - if (0) { \ - intel->perf_boxes |= intel->sarea->perf_boxes; \ - intel->sarea->perf_boxes = 0; \ - } \ - DRM_UNLOCK((intel)->driFd, (intel)->driHwLock, (intel)->hHWContext); \ - DEBUG_RESET(); \ -} while (0) - - -#define SUBPIXEL_X 0.125 -#define SUBPIXEL_Y 0.125 - -#define INTEL_FIREVERTICES(intel) \ -do { \ - if ((intel)->prim.flush) \ - (intel)->prim.flush(&(intel)->ctx); \ -} while (0) - -/* ================================================================ - * Color packing: - */ - -#define INTEL_PACKCOLOR4444(r,g,b,a) \ - ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) - -#define INTEL_PACKCOLOR1555(r,g,b,a) \ - ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ - ((a) ? 0x8000 : 0)) - -#define INTEL_PACKCOLOR565(r,g,b) \ - ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) - -#define INTEL_PACKCOLOR8888(r,g,b,a) \ - ((a<<24) | (r<<16) | (g<<8) | b) - - -#define INTEL_PACKCOLOR(format, r, g, b, a) \ -(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) : \ - (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) : \ - (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) : \ - 0))) - - - -/* ================================================================ - * From linux kernel i386 header files, copes with odd sizes better - * than COPY_DWORDS would: - */ -#if defined(i386) || defined(__i386__) -static __inline__ void * __memcpy(void * to, const void * from, size_t n) -{ - int d0, d1, d2; - __asm__ __volatile__( - "rep ; movsl\n\t" - "testb $2,%b4\n\t" - "je 1f\n\t" - "movsw\n" - "1:\ttestb $1,%b4\n\t" - "je 2f\n\t" - "movsb\n" - "2:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) - : "memory"); - return (to); -} -#else -#define __memcpy(a,b,c) memcpy(a,b,c) -#endif - - - -/* ================================================================ - * Debugging: - */ -#define DO_DEBUG 1 -#if DO_DEBUG -extern int INTEL_DEBUG; -#else -#define INTEL_DEBUG 0 -#endif - -#define DEBUG_TEXTURE 0x1 -#define DEBUG_STATE 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_PRIMS 0x8 -#define DEBUG_VERTS 0x10 -#define DEBUG_FALLBACKS 0x20 -#define DEBUG_VERBOSE 0x40 -#define DEBUG_DRI 0x80 -#define DEBUG_DMA 0x100 -#define DEBUG_SANITY 0x200 -#define DEBUG_SYNC 0x400 -#define DEBUG_SLEEP 0x800 -#define DEBUG_PIXEL 0x1000 - - -#define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_I830_M 0x3577 -#define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I865_G 0x2572 -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I945_GM 0x27A2 -#define PCI_CHIP_I945_GME 0x27AE -#define PCI_CHIP_G33_G 0x29C2 -#define PCI_CHIP_Q35_G 0x29B2 -#define PCI_CHIP_Q33_G 0x29D2 - - -/* ================================================================ - * intel_context.c: - */ - -extern void intelInitDriverFunctions( struct dd_function_table *functions ); - -extern GLboolean intelInitContext( intelContextPtr intel, - const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate, - struct dd_function_table *functions ); - -extern void intelGetLock(intelContextPtr intel, GLuint flags); -extern void intelSetBackClipRects(intelContextPtr intel); -extern void intelSetFrontClipRects(intelContextPtr intel); -extern void intelWindowMoved( intelContextPtr intel ); - -extern void intelInitState( GLcontext *ctx ); -extern const GLubyte *intelGetString( GLcontext *ctx, GLenum name ); - - -/* ================================================================ - * intel_state.c: - */ -extern void intelInitStateFuncs( struct dd_function_table *functions ); - -#define COMPAREFUNC_ALWAYS 0 -#define COMPAREFUNC_NEVER 0x1 -#define COMPAREFUNC_LESS 0x2 -#define COMPAREFUNC_EQUAL 0x3 -#define COMPAREFUNC_LEQUAL 0x4 -#define COMPAREFUNC_GREATER 0x5 -#define COMPAREFUNC_NOTEQUAL 0x6 -#define COMPAREFUNC_GEQUAL 0x7 - -#define STENCILOP_KEEP 0 -#define STENCILOP_ZERO 0x1 -#define STENCILOP_REPLACE 0x2 -#define STENCILOP_INCRSAT 0x3 -#define STENCILOP_DECRSAT 0x4 -#define STENCILOP_INCR 0x5 -#define STENCILOP_DECR 0x6 -#define STENCILOP_INVERT 0x7 - -#define LOGICOP_CLEAR 0 -#define LOGICOP_NOR 0x1 -#define LOGICOP_AND_INV 0x2 -#define LOGICOP_COPY_INV 0x3 -#define LOGICOP_AND_RVRSE 0x4 -#define LOGICOP_INV 0x5 -#define LOGICOP_XOR 0x6 -#define LOGICOP_NAND 0x7 -#define LOGICOP_AND 0x8 -#define LOGICOP_EQUIV 0x9 -#define LOGICOP_NOOP 0xa -#define LOGICOP_OR_INV 0xb -#define LOGICOP_COPY 0xc -#define LOGICOP_OR_RVRSE 0xd -#define LOGICOP_OR 0xe -#define LOGICOP_SET 0xf - -#define BLENDFACT_ZERO 0x01 -#define BLENDFACT_ONE 0x02 -#define BLENDFACT_SRC_COLR 0x03 -#define BLENDFACT_INV_SRC_COLR 0x04 -#define BLENDFACT_SRC_ALPHA 0x05 -#define BLENDFACT_INV_SRC_ALPHA 0x06 -#define BLENDFACT_DST_ALPHA 0x07 -#define BLENDFACT_INV_DST_ALPHA 0x08 -#define BLENDFACT_DST_COLR 0x09 -#define BLENDFACT_INV_DST_COLR 0x0a -#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b -#define BLENDFACT_CONST_COLOR 0x0c -#define BLENDFACT_INV_CONST_COLOR 0x0d -#define BLENDFACT_CONST_ALPHA 0x0e -#define BLENDFACT_INV_CONST_ALPHA 0x0f -#define BLENDFACT_MASK 0x0f - - -extern int intel_translate_compare_func( GLenum func ); -extern int intel_translate_stencil_op( GLenum op ); -extern int intel_translate_blend_factor( GLenum factor ); -extern int intel_translate_logic_op( GLenum opcode ); - - -/* ================================================================ - * intel_ioctl.c: - */ -extern void intel_dump_batchbuffer( long offset, - int *ptr, - int count ); - - -/* ================================================================ - * intel_pixel.c: - */ -extern void intelInitPixelFuncs( struct dd_function_table *functions ); - - - -#endif - diff --git a/i915/intel_ioctl.c b/i915/intel_ioctl.c deleted file mode 100644 index ede3b63..0000000 --- a/i915/intel_ioctl.c +++ /dev/null @@ -1,659 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdio.h> -#include <unistd.h> -#include <errno.h> -#include <sched.h> - -#include "mtypes.h" -#include "context.h" -#include "swrast/swrast.h" - -#include "intel_context.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" -#include "drm.h" - -u_int32_t intelGetLastFrame (intelContextPtr intel) -{ - int ret; - u_int32_t frame; - drm_i915_getparam_t gp; - - gp.param = I915_PARAM_LAST_DISPATCH; - gp.value = (int *)&frame; - ret = drmCommandWriteRead( intel->driFd, DRM_I915_GETPARAM, - &gp, sizeof(gp) ); - return frame; -} - -int intelEmitIrqLocked( intelContextPtr intel ) -{ - drmI830IrqEmit ie; - int ret, seq; - - assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == - (DRM_LOCK_HELD|intel->hHWContext)); - - ie.irq_seq = &seq; - - ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, - &ie, sizeof(ie) ); - if ( ret ) { - fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret ); - exit(1); - } - - if (0) - fprintf(stderr, "%s --> %d\n", __FUNCTION__, seq ); - - return seq; -} - -void intelWaitIrq( intelContextPtr intel, int seq ) -{ - int ret; - - if (0) - fprintf(stderr, "%s %d\n", __FUNCTION__, seq ); - - intel->iw.irq_seq = seq; - - do { - ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &intel->iw, sizeof(intel->iw) ); - } while (ret == -EAGAIN || ret == -EINTR); - - if ( ret ) { - fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret ); - if (0) - intel_dump_batchbuffer( intel->alloc.offset, - intel->alloc.ptr, - intel->alloc.size ); - exit(1); - } -} - - - -static void age_intel( intelContextPtr intel, int age ) -{ - GLuint i; - - for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++) - if (intel->CurrentTexObj[i]) - intel->CurrentTexObj[i]->age = age; -} - -void intel_dump_batchbuffer( long offset, - int *ptr, - int count ) -{ - int i; - fprintf(stderr, "\n\n\nSTART BATCH (%d dwords):\n", count); - for (i = 0; i < count/4; i += 4) - fprintf(stderr, "\t0x%x: 0x%08x 0x%08x 0x%08x 0x%08x\n", - (unsigned int)offset + i*4, ptr[i], ptr[i+1], ptr[i+2], ptr[i+3]); - fprintf(stderr, "END BATCH\n\n\n"); -} - -void intelRefillBatchLocked( intelContextPtr intel, GLboolean allow_unlock ) -{ - GLuint last_irq = intel->alloc.irq_emitted; - GLuint half = intel->alloc.size / 2; - GLuint buf = (intel->alloc.active_buf ^= 1); - - intel->alloc.irq_emitted = intelEmitIrqLocked( intel ); - - if (last_irq) { - if (allow_unlock) UNLOCK_HARDWARE( intel ); - intelWaitIrq( intel, last_irq ); - if (allow_unlock) LOCK_HARDWARE( intel ); - } - - if (0) - fprintf(stderr, "%s: now using half %d\n", __FUNCTION__, buf); - - intel->batch.start_offset = intel->alloc.offset + buf * half; - intel->batch.ptr = (unsigned char *)intel->alloc.ptr + buf * half; - intel->batch.size = half - 8; - intel->batch.space = half - 8; - assert(intel->batch.space >= 0); -} - -#define MI_BATCH_BUFFER_END (0xA<<23) - - -void intelFlushBatchLocked( intelContextPtr intel, - GLboolean ignore_cliprects, - GLboolean refill, - GLboolean allow_unlock) -{ - drmI830BatchBuffer batch; - - assert(intel->locked); - - if (0) - fprintf(stderr, "%s used %d of %d offset %x..%x refill %d (started in %s)\n", - __FUNCTION__, - (intel->batch.size - intel->batch.space), - intel->batch.size, - intel->batch.start_offset, - intel->batch.start_offset + - (intel->batch.size - intel->batch.space), - refill, - intel->batch.func); - - /* Throw away non-effective packets. Won't work once we have - * hardware contexts which would preserve statechanges beyond a - * single buffer. - */ - if (intel->numClipRects == 0 && !ignore_cliprects) { - - /* Without this yeild, an application with no cliprects can hog - * the hardware. Without unlocking, the effect is much worse - - * effectively a lock-out of other contexts. - */ - if (allow_unlock) { - UNLOCK_HARDWARE( intel ); - sched_yield(); - LOCK_HARDWARE( intel ); - } - - /* Note that any state thought to have been emitted actually - * hasn't: - */ - intel->batch.ptr -= (intel->batch.size - intel->batch.space); - intel->batch.space = intel->batch.size; - intel->vtbl.lost_hardware( intel ); - } - - if (intel->batch.space != intel->batch.size) { - - if (intel->sarea->ctxOwner != intel->hHWContext) { - intel->perf_boxes |= I830_BOX_LOST_CONTEXT; - intel->sarea->ctxOwner = intel->hHWContext; - } - - batch.start = intel->batch.start_offset; - batch.used = intel->batch.size - intel->batch.space; - batch.cliprects = intel->pClipRects; - batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; - batch.DR1 = 0; - batch.DR4 = ((((GLuint)intel->drawX) & 0xffff) | - (((GLuint)intel->drawY) << 16)); - - if (intel->alloc.offset) { - if ((batch.used & 0x4) == 0) { - ((int *)intel->batch.ptr)[0] = 0; - ((int *)intel->batch.ptr)[1] = MI_BATCH_BUFFER_END; - batch.used += 0x8; - intel->batch.ptr += 0x8; - } - else { - ((int *)intel->batch.ptr)[0] = MI_BATCH_BUFFER_END; - batch.used += 0x4; - intel->batch.ptr += 0x4; - } - } - - if (0) - intel_dump_batchbuffer( batch.start, - (int *)(intel->batch.ptr - batch.used), - batch.used ); - - intel->batch.start_offset += batch.used; - intel->batch.size -= batch.used; - - if (intel->batch.size < 8) { - refill = GL_TRUE; - intel->batch.space = intel->batch.size = 0; - } - else { - intel->batch.size -= 8; - intel->batch.space = intel->batch.size; - } - - - assert(intel->batch.space >= 0); - assert(batch.start >= intel->alloc.offset); - assert(batch.start < intel->alloc.offset + intel->alloc.size); - assert(batch.start + batch.used > intel->alloc.offset); - assert(batch.start + batch.used <= - intel->alloc.offset + intel->alloc.size); - - - if (intel->alloc.offset) { - if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, - sizeof(batch))) { - fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); - } - } else { - drmI830CmdBuffer cmd; - cmd.buf = (char *)intel->alloc.ptr + batch.start; - cmd.sz = batch.used; - cmd.DR1 = batch.DR1; - cmd.DR4 = batch.DR4; - cmd.num_cliprects = batch.num_cliprects; - cmd.cliprects = batch.cliprects; - - if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, - sizeof(cmd))) { - fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); - } - } - - - age_intel(intel, intel->sarea->last_enqueue); - - /* FIXME: use hardware contexts to avoid 'losing' hardware after - * each buffer flush. - */ - if (intel->batch.contains_geometry) - assert(intel->batch.last_emit_state == intel->batch.counter); - - intel->batch.counter++; - intel->batch.contains_geometry = 0; - intel->batch.func = 0; - intel->vtbl.lost_hardware( intel ); - } - - if (refill) - intelRefillBatchLocked( intel, allow_unlock ); -} - -void intelFlushBatch( intelContextPtr intel, GLboolean refill ) -{ - if (intel->locked) { - intelFlushBatchLocked( intel, GL_FALSE, refill, GL_FALSE ); - } - else { - LOCK_HARDWARE(intel); - intelFlushBatchLocked( intel, GL_FALSE, refill, GL_TRUE ); - UNLOCK_HARDWARE(intel); - } -} - - -void intelWaitForIdle( intelContextPtr intel ) -{ - if (0) - fprintf(stderr, "%s\n", __FUNCTION__); - - intel->vtbl.emit_flush( intel ); - intelFlushBatch( intel, GL_TRUE ); - - /* Use an irq to wait for dma idle -- Need to track lost contexts - * to shortcircuit consecutive calls to this function: - */ - intelWaitIrq( intel, intel->alloc.irq_emitted ); - intel->alloc.irq_emitted = 0; -} - - -/** - * Check if we need to rotate/warp the front color buffer to the - * rotated screen. We generally need to do this when we get a glFlush - * or glFinish after drawing to the front color buffer. - */ -static void -intelCheckFrontRotate(GLcontext *ctx) -{ - intelContextPtr intel = INTEL_CONTEXT( ctx ); - if (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) { - intelScreenPrivate *screen = intel->intelScreen; - if (screen->current_rotation != 0) { - __DRIdrawablePrivate *dPriv = intel->driDrawable; - intelRotateWindow(intel, dPriv, BUFFER_BIT_FRONT_LEFT); - } - } -} - - -/** - * NOT directly called via glFlush. - */ -void intelFlush( GLcontext *ctx ) -{ - intelContextPtr intel = INTEL_CONTEXT( ctx ); - - if (intel->Fallback) - _swrast_flush( ctx ); - - INTEL_FIREVERTICES( intel ); - - if (intel->batch.size != intel->batch.space) - intelFlushBatch( intel, GL_FALSE ); -} - - -/** - * Called via glFlush. - */ -void intelglFlush( GLcontext *ctx ) -{ - intelFlush(ctx); - intelCheckFrontRotate(ctx); -} - - -void intelFinish( GLcontext *ctx ) -{ - intelContextPtr intel = INTEL_CONTEXT( ctx ); - intelFlush( ctx ); - intelWaitForIdle( intel ); - intelCheckFrontRotate(ctx); -} - - -void intelClear(GLcontext *ctx, GLbitfield mask) -{ - intelContextPtr intel = INTEL_CONTEXT( ctx ); - const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask); - GLbitfield tri_mask = 0; - GLbitfield blit_mask = 0; - GLbitfield swrast_mask = 0; - - if (0) - fprintf(stderr, "%s\n", __FUNCTION__); - - /* Take care of cliprects, which are handled differently for - * clears, etc. - */ - intelFlush( &intel->ctx ); - - if (mask & BUFFER_BIT_FRONT_LEFT) { - if (colorMask == ~0) { - blit_mask |= BUFFER_BIT_FRONT_LEFT; - } - else { - tri_mask |= BUFFER_BIT_FRONT_LEFT; - } - } - - if (mask & BUFFER_BIT_BACK_LEFT) { - if (colorMask == ~0) { - blit_mask |= BUFFER_BIT_BACK_LEFT; - } - else { - tri_mask |= BUFFER_BIT_BACK_LEFT; - } - } - - if (mask & BUFFER_BIT_DEPTH) { - blit_mask |= BUFFER_BIT_DEPTH; - } - - if (mask & BUFFER_BIT_STENCIL) { - if (!intel->hw_stencil) { - swrast_mask |= BUFFER_BIT_STENCIL; - } - else if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff) { - tri_mask |= BUFFER_BIT_STENCIL; - } - else { - blit_mask |= BUFFER_BIT_STENCIL; - } - } - - swrast_mask |= (mask & BUFFER_BIT_ACCUM); - - if (blit_mask) - intelClearWithBlit( ctx, blit_mask, 0, 0, 0, 0, 0); - - if (tri_mask) - intel->vtbl.clear_with_tris( intel, tri_mask, 0, 0, 0, 0, 0); - - if (swrast_mask) - _swrast_Clear( ctx, swrast_mask ); -} - - -void -intelRotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv, - GLuint srcBuffer) -{ - if (intel->vtbl.rotate_window) { - intel->vtbl.rotate_window(intel, dPriv, srcBuffer); - } -} - - -void *intelAllocateAGP( intelContextPtr intel, GLsizei size ) -{ - int region_offset; - drmI830MemAlloc alloc; - int ret; - - if (0) - fprintf(stderr, "%s: %d bytes\n", __FUNCTION__, size); - - alloc.region = I830_MEM_REGION_AGP; - alloc.alignment = 0; - alloc.size = size; - alloc.region_offset = ®ion_offset; - - LOCK_HARDWARE(intel); - - /* Make sure the global heap is initialized - */ - if (intel->texture_heaps[0]) - driAgeTextures( intel->texture_heaps[0] ); - - - ret = drmCommandWriteRead( intel->driFd, - DRM_I830_ALLOC, - &alloc, sizeof(alloc)); - - if (ret) { - fprintf(stderr, "%s: DRM_I830_ALLOC ret %d\n", __FUNCTION__, ret); - UNLOCK_HARDWARE(intel); - return NULL; - } - - if (0) - fprintf(stderr, "%s: allocated %d bytes\n", __FUNCTION__, size); - - /* Need to propogate this information (agp memory in use) to our - * local texture lru. The kernel has already updated the global - * lru. An alternative would have been to allocate memory the - * usual way and then notify the kernel to pin the allocation. - */ - if (intel->texture_heaps[0]) - driAgeTextures( intel->texture_heaps[0] ); - - UNLOCK_HARDWARE(intel); - - return (void *)((char *)intel->intelScreen->tex.map + region_offset); -} - -void intelFreeAGP( intelContextPtr intel, void *pointer ) -{ - int region_offset; - drmI830MemFree memfree; - int ret; - - region_offset = (char *)pointer - (char *)intel->intelScreen->tex.map; - - if (region_offset < 0 || - region_offset > intel->intelScreen->tex.size) { - fprintf(stderr, "offset %d outside range 0..%d\n", region_offset, - intel->intelScreen->tex.size); - return; - } - - memfree.region = I830_MEM_REGION_AGP; - memfree.region_offset = region_offset; - - ret = drmCommandWrite( intel->driFd, - DRM_I830_FREE, - &memfree, sizeof(memfree)); - - if (ret) - fprintf(stderr, "%s: DRM_I830_FREE ret %d\n", __FUNCTION__, ret); -} - -/* This version of AllocateMemoryMESA allocates only agp memory, and - * only does so after the point at which the driver has been - * initialized. - * - * Theoretically a valid context isn't required. However, in this - * implementation, it is, as I'm using the hardware lock to protect - * the kernel data structures, and the current context to get the - * device fd. - */ -void *intelAllocateMemoryMESA(__DRInativeDisplay *dpy, int scrn, - GLsizei size, GLfloat readfreq, - GLfloat writefreq, GLfloat priority) -{ - GET_CURRENT_CONTEXT(ctx); - - if (INTEL_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, - writefreq, priority); - - if (getenv("INTEL_NO_ALLOC")) - return NULL; - - if (!ctx || INTEL_CONTEXT(ctx) == 0) - return NULL; - - return intelAllocateAGP( INTEL_CONTEXT(ctx), size ); -} - - -/* Called via glXFreeMemoryMESA() */ -void intelFreeMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLvoid *pointer) -{ - GET_CURRENT_CONTEXT(ctx); - if (INTEL_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %p\n", __FUNCTION__, pointer); - - if (!ctx || INTEL_CONTEXT(ctx) == 0) { - fprintf(stderr, "%s: no context\n", __FUNCTION__); - return; - } - - intelFreeAGP( INTEL_CONTEXT(ctx), pointer ); -} - -/* Called via glXGetMemoryOffsetMESA() - * - * Returns offset of pointer from the start of agp aperture. - */ -GLuint intelGetMemoryOffsetMESA(__DRInativeDisplay *dpy, int scrn, - const GLvoid *pointer) -{ - GET_CURRENT_CONTEXT(ctx); - intelContextPtr intel; - - if (!ctx || !(intel = INTEL_CONTEXT(ctx)) ) { - fprintf(stderr, "%s: no context\n", __FUNCTION__); - return ~0; - } - - if (!intelIsAgpMemory( intel, pointer, 0 )) - return ~0; - - return intelAgpOffsetFromVirtual( intel, pointer ); -} - - -GLboolean intelIsAgpMemory( intelContextPtr intel, const GLvoid *pointer, - GLint size ) -{ - int offset = (char *)pointer - (char *)intel->intelScreen->tex.map; - int valid = (size >= 0 && - offset >= 0 && - offset + size < intel->intelScreen->tex.size); - - if (INTEL_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "intelIsAgpMemory( %p ) : %d\n", pointer, valid ); - - return valid; -} - - -GLuint intelAgpOffsetFromVirtual( intelContextPtr intel, const GLvoid *pointer ) -{ - int offset = (char *)pointer - (char *)intel->intelScreen->tex.map; - - if (offset < 0 || offset > intel->intelScreen->tex.size) - return ~0; - else - return intel->intelScreen->tex.offset + offset; -} - - - - - -/* Flip the front & back buffes - */ -void intelPageFlip( const __DRIdrawablePrivate *dPriv ) -{ -#if 0 - intelContextPtr intel; - int tmp, ret; - - if (INTEL_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate; - - intelFlush( &intel->ctx ); - LOCK_HARDWARE( intel ); - - if (dPriv->pClipRects) { - *(drm_clip_rect_t *)intel->sarea->boxes = dPriv->pClipRects[0]; - intel->sarea->nbox = 1; - } - - ret = drmCommandNone(intel->driFd, DRM_I830_FLIP); - if (ret) { - fprintf(stderr, "%s: %d\n", __FUNCTION__, ret); - UNLOCK_HARDWARE( intel ); - exit(1); - } - - tmp = intel->sarea->last_enqueue; - intelRefillBatchLocked( intel ); - UNLOCK_HARDWARE( intel ); - - - intelSetDrawBuffer( &intel->ctx, intel->ctx.Color.DriverDrawBuffer ); -#endif -} diff --git a/i915/intel_ioctl.h b/i915/intel_ioctl.h deleted file mode 100644 index 6ea47e4..0000000 --- a/i915/intel_ioctl.h +++ /dev/null @@ -1,72 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_IOCTL_H -#define INTEL_IOCTL_H - -#include "intel_context.h" - -extern void intelWaitAgeLocked( intelContextPtr intel, int age, GLboolean unlock ); - -extern void intelClear(GLcontext *ctx, GLbitfield mask); - -extern void intelPageFlip( const __DRIdrawablePrivate *dpriv ); - -extern void intelRotateWindow(intelContextPtr intel, - __DRIdrawablePrivate *dPriv, GLuint srcBuffer); - -extern void intelWaitForIdle( intelContextPtr intel ); -extern void intelFlushBatch( intelContextPtr intel, GLboolean refill ); -extern void intelFlushBatchLocked( intelContextPtr intel, - GLboolean ignore_cliprects, - GLboolean refill, - GLboolean allow_unlock); -extern void intelRefillBatchLocked( intelContextPtr intel, GLboolean allow_unlock ); -extern void intelFinish( GLcontext *ctx ); -extern void intelFlush( GLcontext *ctx ); -extern void intelglFlush( GLcontext *ctx ); - -extern void *intelAllocateAGP( intelContextPtr intel, GLsizei size ); -extern void intelFreeAGP( intelContextPtr intel, void *pointer ); - -extern void *intelAllocateMemoryMESA( __DRInativeDisplay *dpy, int scrn, - GLsizei size, GLfloat readfreq, - GLfloat writefreq, GLfloat priority ); - -extern void intelFreeMemoryMESA( __DRInativeDisplay *dpy, int scrn, - GLvoid *pointer ); - -extern GLuint intelGetMemoryOffsetMESA( __DRInativeDisplay *dpy, int scrn, const GLvoid *pointer ); -extern GLboolean intelIsAgpMemory( intelContextPtr intel, const GLvoid *pointer, - GLint size ); - -extern GLuint intelAgpOffsetFromVirtual( intelContextPtr intel, const GLvoid *p ); - -extern void intelWaitIrq( intelContextPtr intel, int seq ); -extern u_int32_t intelGetLastFrame (intelContextPtr intel); -extern int intelEmitIrqLocked( intelContextPtr intel ); -#endif diff --git a/i915/intel_pixel.c b/i915/intel_pixel.c deleted file mode 100644 index 535cbfc..0000000 --- a/i915/intel_pixel.c +++ /dev/null @@ -1,502 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "glheader.h" -#include "enums.h" -#include "mtypes.h" -#include "macros.h" -#include "swrast/swrast.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" - - - -static GLboolean -check_color( const GLcontext *ctx, GLenum type, GLenum format, - const struct gl_pixelstore_attrib *packing, - const void *pixels, GLint sz, GLint pitch ) -{ - intelContextPtr intel = INTEL_CONTEXT(ctx); - GLuint cpp = intel->intelScreen->cpp; - - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - if ( (pitch & 63) || - ctx->_ImageTransferState || - packing->SwapBytes || - packing->LsbFirst) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: failed 1\n", __FUNCTION__); - return GL_FALSE; - } - - if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && - cpp == 4 && - format == GL_BGRA ) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: passed 2\n", __FUNCTION__); - return GL_TRUE; - } - - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: failed\n", __FUNCTION__); - - return GL_FALSE; -} - -static GLboolean -check_color_per_fragment_ops( const GLcontext *ctx ) -{ - int result; - result = (!( ctx->Color.AlphaEnabled || - ctx->Depth.Test || - ctx->Fog.Enabled || - ctx->Scissor.Enabled || - ctx->Stencil.Enabled || - !ctx->Color.ColorMask[0] || - !ctx->Color.ColorMask[1] || - !ctx->Color.ColorMask[2] || - !ctx->Color.ColorMask[3] || - ctx->Color.ColorLogicOpEnabled || - ctx->Texture._EnabledUnits - ) && - ctx->Current.RasterPosValid); - - return result; -} - - -/** - * Clip the given rectangle against the buffer's bounds (including scissor). - * \param size returns the - * \return GL_TRUE if any pixels remain, GL_FALSE if totally clipped. - * - * XXX Replace this with _mesa_clip_drawpixels() and _mesa_clip_readpixels() - * from Mesa 6.4. We shouldn't apply scissor for ReadPixels. - */ -static GLboolean -clip_pixelrect( const GLcontext *ctx, - const GLframebuffer *buffer, - GLint *x, GLint *y, - GLsizei *width, GLsizei *height) -{ - /* left clipping */ - if (*x < buffer->_Xmin) { - *width -= (buffer->_Xmin - *x); - *x = buffer->_Xmin; - } - - /* right clipping */ - if (*x + *width > buffer->_Xmax) - *width -= (*x + *width - buffer->_Xmax - 1); - - if (*width <= 0) - return GL_FALSE; - - /* bottom clipping */ - if (*y < buffer->_Ymin) { - *height -= (buffer->_Ymin - *y); - *y = buffer->_Ymin; - } - - /* top clipping */ - if (*y + *height > buffer->_Ymax) - *height -= (*y + *height - buffer->_Ymax - 1); - - if (*height <= 0) - return GL_FALSE; - - return GL_TRUE; -} - - -/** - * Compute intersection of a clipping rectangle and pixel rectangle, - * returning results in x/y/w/hOut vars. - * \return GL_TRUE if there's intersection, GL_FALSE if disjoint. - */ -static INLINE GLboolean -intersect_region(const drm_clip_rect_t *box, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint *xOut, GLint *yOut, GLint *wOut, GLint *hOut) -{ - GLint bx = box->x1; - GLint by = box->y1; - GLint bw = box->x2 - bx; - GLint bh = box->y2 - by; - - if (bx < x) bw -= x - bx, bx = x; - if (by < y) bh -= y - by, by = y; - if (bx + bw > x + width) bw = x + width - bx; - if (by + bh > y + height) bh = y + height - by; - - *xOut = bx; - *yOut = by; - *wOut = bw; - *hOut = bh; - - if (bw <= 0) return GL_FALSE; - if (bh <= 0) return GL_FALSE; - - return GL_TRUE; -} - - - -static GLboolean -intelTryReadPixels( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, - GLvoid *pixels ) -{ - intelContextPtr intel = INTEL_CONTEXT(ctx); - GLint size = 0; /* not really used */ - GLint pitch = pack->RowLength ? pack->RowLength : width; - - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - /* Only accelerate reading to agp buffers. - */ - if ( !intelIsAgpMemory(intel, pixels, - pitch * height * intel->intelScreen->cpp ) ) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: dest not agp\n", __FUNCTION__); - return GL_FALSE; - } - - /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from - * blitter: - */ - if (!pack->Invert) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: MESA_PACK_INVERT not set\n", __FUNCTION__); - return GL_FALSE; - } - - if (!check_color(ctx, type, format, pack, pixels, size, pitch)) - return GL_FALSE; - - switch ( intel->intelScreen->cpp ) { - case 4: - break; - default: - return GL_FALSE; - } - - - /* Although the blits go on the command buffer, need to do this and - * fire with lock held to guarentee cliprects and drawing offset are - * correct. - * - * This is an unusual situation however, as the code which flushes - * a full command buffer expects to be called unlocked. As a - * workaround, immediately flush the buffer on aquiring the lock. - */ - intelFlush( &intel->ctx ); - LOCK_HARDWARE( intel ); - { - __DRIdrawablePrivate *dPriv = intel->driDrawable; - int nbox = dPriv->numClipRects; - int src_offset = intel->readRegion->offset; - int src_pitch = intel->intelScreen->front.pitch; - int dst_offset = intelAgpOffsetFromVirtual( intel, pixels); - drm_clip_rect_t *box = dPriv->pClipRects; - int i; - - assert(dst_offset != ~0); /* should have been caught above */ - - if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height)) { - UNLOCK_HARDWARE( intel ); - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s totally clipped -- nothing to do\n", - __FUNCTION__); - return GL_TRUE; - } - - /* convert to screen coords (y=0=top) */ - y = dPriv->h - y - height; - x += dPriv->x; - y += dPriv->y; - - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "readpixel blit src_pitch %d dst_pitch %d\n", - src_pitch, pitch); - - /* We don't really have to do window clipping for readpixels. - * The OpenGL spec says that pixels read from outside the - * visible window region (pixel ownership) have undefined value. - */ - for (i = 0 ; i < nbox ; i++) - { - GLint bx, by, bw, bh; - if (intersect_region(box+i, x, y, width, height, - &bx, &by, &bw, &bh)) { - intelEmitCopyBlitLocked( intel, - intel->intelScreen->cpp, - src_pitch, src_offset, - pitch, dst_offset, - bx, by, - bx - x, by - y, - bw, bh ); - } - } - } - UNLOCK_HARDWARE( intel ); - intelFinish( &intel->ctx ); - - return GL_TRUE; -} - -static void -intelReadPixels( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, - GLvoid *pixels ) -{ - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (!intelTryReadPixels( ctx, x, y, width, height, format, type, pack, - pixels)) - _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, - pixels); -} - - - - -static void do_draw_pix( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint pitch, - const void *pixels, - GLuint dest ) -{ - intelContextPtr intel = INTEL_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = intel->driDrawable; - drm_clip_rect_t *box = dPriv->pClipRects; - int nbox = dPriv->numClipRects; - int i; - int src_offset = intelAgpOffsetFromVirtual( intel, pixels); - int src_pitch = pitch; - - assert(src_offset != ~0); /* should be caught earlier */ - - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - intelFlush( &intel->ctx ); - LOCK_HARDWARE( intel ); - if (ctx->DrawBuffer) - { - y -= height; /* cope with pixel zoom */ - - if (!clip_pixelrect(ctx, ctx->DrawBuffer, - &x, &y, &width, &height)) { - UNLOCK_HARDWARE( intel ); - return; - } - - y = dPriv->h - y - height; /* convert from gl to hardware coords */ - x += dPriv->x; - y += dPriv->y; - - for (i = 0 ; i < nbox ; i++ ) - { - GLint bx, by, bw, bh; - if (intersect_region(box + i, x, y, width, height, - &bx, &by, &bw, &bh)) { - intelEmitCopyBlitLocked( intel, - intel->intelScreen->cpp, - src_pitch, src_offset, - intel->intelScreen->front.pitch, - intel->drawRegion->offset, - bx - x, by - y, - bx, by, - bw, bh ); - } - } - } - UNLOCK_HARDWARE( intel ); - intelFinish( &intel->ctx ); -} - - - -static GLboolean -intelTryDrawPixels( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels ) -{ - intelContextPtr intel = INTEL_CONTEXT(ctx); - GLint pitch = unpack->RowLength ? unpack->RowLength : width; - GLuint dest; - GLuint cpp = intel->intelScreen->cpp; - GLint size = width * pitch * cpp; - - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - switch (format) { - case GL_RGB: - case GL_RGBA: - case GL_BGRA: - dest = intel->drawRegion->offset; - - /* Planemask doesn't have full support in blits. - */ - if (!ctx->Color.ColorMask[RCOMP] || - !ctx->Color.ColorMask[GCOMP] || - !ctx->Color.ColorMask[BCOMP] || - !ctx->Color.ColorMask[ACOMP]) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: planemask\n", __FUNCTION__); - return GL_FALSE; - } - - /* Can't do conversions on agp reads/draws. - */ - if ( !intelIsAgpMemory( intel, pixels, size ) ) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: not agp memory\n", __FUNCTION__); - return GL_FALSE; - } - - if (!check_color(ctx, type, format, unpack, pixels, size, pitch)) { - return GL_FALSE; - } - if (!check_color_per_fragment_ops(ctx)) { - return GL_FALSE; - } - - if (ctx->Pixel.ZoomX != 1.0F || - ctx->Pixel.ZoomY != -1.0F) - return GL_FALSE; - break; - - default: - return GL_FALSE; - } - - if ( intelIsAgpMemory(intel, pixels, size) ) - { - do_draw_pix( ctx, x, y, width, height, pitch, pixels, dest ); - return GL_TRUE; - } - else if (0) - { - /* Pixels is in regular memory -- get dma buffers and perform - * upload through them. No point doing this for regular uploads - * but once we remove some of the restrictions above (colormask, - * pixelformat conversion, zoom?, etc), this could be a win. - */ - } - else - return GL_FALSE; - - return GL_FALSE; -} - -static void -intelDrawPixels( GLcontext *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels ) -{ - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (!intelTryDrawPixels( ctx, x, y, width, height, format, type, - unpack, pixels )) - _swrast_DrawPixels( ctx, x, y, width, height, format, type, - unpack, pixels ); -} - - - - -/** - * Implement glCopyPixels for the front color buffer (or back buffer Pixmap) - * for the color buffer. Don't support zooming, pixel transfer, etc. - * We do support copying from one window to another, ala glXMakeCurrentRead. - */ -static void -intelCopyPixels( GLcontext *ctx, - GLint srcx, GLint srcy, GLsizei width, GLsizei height, - GLint destx, GLint desty, GLenum type ) -{ -#if 0 - const XMesaContext xmesa = XMESA_CONTEXT(ctx); - const SWcontext *swrast = SWRAST_CONTEXT( ctx ); - XMesaDisplay *dpy = xmesa->xm_visual->display; - const XMesaDrawable drawBuffer = xmesa->xm_draw_buffer->buffer; - const XMesaDrawable readBuffer = xmesa->xm_read_buffer->buffer; - const XMesaGC gc = xmesa->xm_draw_buffer->gc; - - ASSERT(dpy); - ASSERT(gc); - - if (drawBuffer && /* buffer != 0 means it's a Window or Pixmap */ - readBuffer && - type == GL_COLOR && - (swrast->_RasterMask & ~CLIP_BIT) == 0 && /* no blend, z-test, etc */ - ctx->_ImageTransferState == 0 && /* no color tables, scale/bias, etc */ - ctx->Pixel.ZoomX == 1.0 && /* no zooming */ - ctx->Pixel.ZoomY == 1.0) { - /* Note: we don't do any special clipping work here. We could, - * but X will do it for us. - */ - srcy = FLIP(xmesa->xm_read_buffer, srcy) - height + 1; - desty = FLIP(xmesa->xm_draw_buffer, desty) - height + 1; - XCopyArea(dpy, readBuffer, drawBuffer, gc, - srcx, srcy, width, height, destx, desty); - } -#else - _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type ); -#endif -} - - - - -void intelInitPixelFuncs( struct dd_function_table *functions ) -{ - functions->CopyPixels = intelCopyPixels; - if (!getenv("INTEL_NO_BLITS")) { - functions->ReadPixels = intelReadPixels; - functions->DrawPixels = intelDrawPixels; - } -} diff --git a/i915/intel_pixel_read.c b/i915/intel_pixel_read.c new file mode 100644 index 0000000..2e31656 --- /dev/null +++ b/i915/intel_pixel_read.c @@ -0,0 +1,318 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "glheader.h" +#include "enums.h" +#include "mtypes.h" +#include "macros.h" +#include "image.h" +#include "bufferobj.h" +#include "swrast/swrast.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_ioctl.h" +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_buffers.h" +#include "intel_regions.h" +#include "intel_pixel.h" +#include "intel_buffer_objects.h" + +/* For many applications, the new ability to pull the source buffers + * back out of the GTT and then do the packing/conversion operations + * in software will be as much of an improvement as trying to get the + * blitter and/or texture engine to do the work. + * + * This step is gated on private backbuffers. + * + * Obviously the frontbuffer can't be pulled back, so that is either + * an argument for blit/texture readpixels, or for blitting to a + * temporary and then pulling that back. + * + * When the destination is a pbo, however, it's not clear if it is + * ever going to be pulled to main memory (though the access param + * will be a good hint). So it sounds like we do want to be able to + * choose between blit/texture implementation on the gpu and pullback + * and cpu-based copying. + * + * Unless you can magically turn client memory into a PBO for the + * duration of this call, there will be a cpu-based copying step in + * any case. + */ + + +static GLboolean +do_texture_readpixels(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, + struct intel_region *dest_region) +{ +#if 0 + struct intel_context *intel = intel_context(ctx); + intelScreenPrivate *screen = intel->intelScreen; + GLint pitch = pack->RowLength ? pack->RowLength : width; + __DRIdrawablePrivate *dPriv = intel->driDrawable; + int textureFormat; + GLenum glTextureFormat; + int destFormat, depthFormat, destPitch; + drm_clip_rect_t tmp; + + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + + if (ctx->_ImageTransferState || + pack->SwapBytes || pack->LsbFirst || !pack->Invert) { + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: check_color failed\n", __FUNCTION__); + return GL_FALSE; + } + + intel->vtbl.meta_texrect_source(intel, intel_readbuf_region(intel)); + + if (!intel->vtbl.meta_render_dest(intel, dest_region, type, format)) { + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: couldn't set dest %s/%s\n", + __FUNCTION__, + _mesa_lookup_enum_by_nr(type), + _mesa_lookup_enum_by_nr(format)); + return GL_FALSE; + } + + LOCK_HARDWARE(intel); + + if (intel->driDrawable->numClipRects) { + intel->vtbl.install_meta_state(intel); + intel->vtbl.meta_no_depth_write(intel); + intel->vtbl.meta_no_stencil_write(intel); + + if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) { + UNLOCK_HARDWARE(intel); + SET_STATE(i830, state); + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__); + return GL_TRUE; + } + + y = dPriv->h - y - height; + x += dPriv->x; + y += dPriv->y; + + + /* Set the frontbuffer up as a large rectangular texture. + */ + intel->vtbl.meta_tex_rect_source(intel, src_region, textureFormat); + + + intel->vtbl.meta_texture_blend_replace(i830, glTextureFormat); + + + /* Set the 3d engine to draw into the destination region: + */ + + intel->vtbl.meta_draw_region(intel, dest_region); + intel->vtbl.meta_draw_format(intel, destFormat, depthFormat); /* ?? */ + + + /* Draw a single quad, no cliprects: + */ + intel->vtbl.meta_disable_cliprects(intel); + + intel->vtbl.draw_quad(intel, + 0, width, 0, height, + 0x00ff00ff, x, x + width, y, y + height); + + intel->vtbl.leave_meta_state(intel); + } + UNLOCK_HARDWARE(intel); + + intel_region_wait_fence(ctx, dest_region); /* required by GL */ + return GL_TRUE; +#endif + + return GL_FALSE; +} + + + + +static GLboolean +do_blit_readpixels(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, GLvoid * pixels) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_region *src = intel_readbuf_region(intel); + struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); + GLuint dst_offset; + GLuint rowLength; + dri_fence *fence = NULL; + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s\n", __FUNCTION__); + + if (!src) + return GL_FALSE; + + if (dst) { + /* XXX This validation should be done by core mesa: + */ + if (!_mesa_validate_pbo_access(2, pack, width, height, 1, + format, type, pixels)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels"); + return GL_TRUE; + } + } + else { + /* PBO only for now: + */ + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - not PBO\n", __FUNCTION__); + return GL_FALSE; + } + + + if (ctx->_ImageTransferState || + !intel_check_blit_format(src, format, type)) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - bad format for blit\n", __FUNCTION__); + return GL_FALSE; + } + + if (pack->Alignment != 1 || pack->SwapBytes || pack->LsbFirst) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s: bad packing params\n", __FUNCTION__); + return GL_FALSE; + } + + if (pack->RowLength > 0) + rowLength = pack->RowLength; + else + rowLength = width; + + if (pack->Invert) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__); + return GL_FALSE; + } + else { + rowLength = -rowLength; + } + + /* XXX 64-bit cast? */ + dst_offset = (GLuint) _mesa_image_address(2, pack, pixels, width, height, + format, type, 0, 0, 0); + + + /* Although the blits go on the command buffer, need to do this and + * fire with lock held to guarentee cliprects are correct. + */ + intelFlush(&intel->ctx); + LOCK_HARDWARE(intel); + + if (intel->driDrawable->numClipRects) { + GLboolean all = (width * height * src->cpp == dst->Base.Size && + x == 0 && dst_offset == 0); + + dri_bo *dst_buffer = intel_bufferobj_buffer(intel, dst, + all ? INTEL_WRITE_FULL : + INTEL_WRITE_PART); + __DRIdrawablePrivate *dPriv = intel->driDrawable; + int nbox = dPriv->numClipRects; + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t rect; + drm_clip_rect_t src_rect; + int i; + + src_rect.x1 = dPriv->x + x; + src_rect.y1 = dPriv->y + dPriv->h - (y + height); + src_rect.x2 = src_rect.x1 + width; + src_rect.y2 = src_rect.y1 + height; + + + + for (i = 0; i < nbox; i++) { + if (!intel_intersect_cliprects(&rect, &src_rect, &box[i])) + continue; + + intelEmitCopyBlit(intel, + src->cpp, + src->pitch, src->buffer, 0, src->tiled, + rowLength, dst_buffer, dst_offset, GL_FALSE, + rect.x1, + rect.y1, + rect.x1 - src_rect.x1, + rect.y2 - src_rect.y2, + rect.x2 - rect.x1, rect.y2 - rect.y1, + GL_COPY); + } + + intel_batchbuffer_flush(intel->batch); + fence = intel->batch->last_fence; + dri_fence_reference(fence); + + } + UNLOCK_HARDWARE(intel); + + if (fence) { + dri_fence_wait(fence); + dri_fence_unreference(fence); + } + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - DONE\n", __FUNCTION__); + + return GL_TRUE; +} + +void +intelReadPixels(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, GLvoid * pixels) +{ + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + intelFlush(ctx); + + if (do_blit_readpixels + (ctx, x, y, width, height, format, type, pack, pixels)) + return; + + if (do_texture_readpixels + (ctx, x, y, width, height, format, type, pack, pixels)) + return; + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); + + _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels); +} diff --git a/i915/intel_render.c b/i915/intel_render.c index 773779a..5e6500c 100644 --- a/i915/intel_render.c +++ b/i915/intel_render.c @@ -39,6 +39,7 @@ #include "tnl/t_context.h" #include "tnl/t_vertex.h" +#include "tnl/t_pipeline.h" #include "intel_screen.h" #include "intel_context.h" @@ -51,14 +52,14 @@ * dma buffers. Use strip/fan hardware primitives where possible. * Try to simulate missing primitives with indexed vertices. */ -#define HAVE_POINTS 0 /* Has it, but can't use because subpixel has to - * be adjusted for points on the INTEL/I845G - */ +#define HAVE_POINTS 0 /* Has it, but can't use because subpixel has to + * be adjusted for points on the INTEL/I845G + */ #define HAVE_LINES 1 #define HAVE_LINE_STRIPS 1 #define HAVE_TRIANGLES 1 #define HAVE_TRI_STRIPS 1 -#define HAVE_TRI_STRIP_1 0 /* has it, template can't use it yet */ +#define HAVE_TRI_STRIP_1 0 /* has it, template can't use it yet */ #define HAVE_TRI_FANS 1 #define HAVE_POLYGONS 1 #define HAVE_QUADS 0 @@ -66,7 +67,7 @@ #define HAVE_ELTS 0 -static GLuint hw_prim[GL_POLYGON+1] = { +static GLuint hw_prim[GL_POLYGON + 1] = { 0, PRIM3D_LINELIST, PRIM3D_LINESTRIP, @@ -79,7 +80,7 @@ static GLuint hw_prim[GL_POLYGON+1] = { PRIM3D_POLY }; -static const GLenum reduced_prim[GL_POLYGON+1] = { +static const GLenum reduced_prim[GL_POLYGON + 1] = { GL_POINTS, GL_LINES, GL_LINES, @@ -92,58 +93,61 @@ static const GLenum reduced_prim[GL_POLYGON+1] = { GL_TRIANGLES }; -static const int scale_prim[GL_POLYGON+1] = { - 0, /* fallback case */ +static const int scale_prim[GL_POLYGON + 1] = { + 0, /* fallback case */ 1, 2, 2, 1, 3, 3, - 0, /* fallback case */ - 0, /* fallback case */ + 0, /* fallback case */ + 0, /* fallback case */ 3 }; -static void intelDmaPrimitive( intelContextPtr intel, GLenum prim ) +static void +intelDmaPrimitive(struct intel_context *intel, GLenum prim) { - if (0) fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim)); + if (0) + fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim)); INTEL_FIREVERTICES(intel); - intel->vtbl.reduced_primitive_state( intel, reduced_prim[prim] ); - intelStartInlinePrimitive( intel, hw_prim[prim] ); + intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]); + intelStartInlinePrimitive(intel, hw_prim[prim], LOOP_CLIPRECTS); } -#define LOCAL_VARS intelContextPtr intel = INTEL_CONTEXT(ctx) +#define LOCAL_VARS struct intel_context *intel = intel_context(ctx) #define INIT( prim ) \ do { \ intelDmaPrimitive( intel, prim ); \ } while (0) -#define FLUSH() INTEL_FIREVERTICES( intel ) + +#define FLUSH() INTEL_FIREVERTICES(intel) #define GET_SUBSEQUENT_VB_MAX_VERTS() \ - (((intel->alloc.size / 2) - 1500) / (intel->vertex_size*4)) + ((intel->batch->size - 1500) / (intel->vertex_size*4)) #define GET_CURRENT_VB_MAX_VERTS() GET_SUBSEQUENT_VB_MAX_VERTS() #define ALLOC_VERTS( nr ) \ intelExtendInlinePrimitive( intel, (nr) * intel->vertex_size ) - + #define EMIT_VERTS( ctx, j, nr, buf ) \ - _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf ) + _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf ) #define TAG(x) intel_##x #include "tnl_dd/t_dd_dmatmp.h" - - + + /**********************************************************************/ /* Render pipeline stage */ /**********************************************************************/ /* Heuristic to choose between the two render paths: */ -static GLboolean choose_render( intelContextPtr intel, - struct vertex_buffer *VB ) +static GLboolean +choose_render(struct intel_context *intel, struct vertex_buffer *VB) { int vertsz = intel->vertex_size; int cost_render = 0; @@ -153,20 +157,20 @@ static GLboolean choose_render( intelContextPtr intel, int nr_rverts = 0; int rprim = intel->reduced_primitive; int i = 0; - - for (i = 0 ; i < VB->PrimitiveCount ; i++) { + + for (i = 0; i < VB->PrimitiveCount; i++) { GLuint prim = VB->Primitive[i].mode; GLuint length = VB->Primitive[i].count; if (!length) - continue; + continue; nr_prims++; nr_rverts += length * scale_prim[prim & PRIM_MODE_MASK]; if (reduced_prim[prim & PRIM_MODE_MASK] != rprim) { - nr_rprims++; - rprim = reduced_prim[prim & PRIM_MODE_MASK]; + nr_rprims++; + rprim = reduced_prim[prim & PRIM_MODE_MASK]; } } @@ -177,64 +181,82 @@ static GLboolean choose_render( intelContextPtr intel, /* One point for every 1024 dwords (4k) of dma: */ - cost_render += (vertsz * i) / 1024; - cost_fallback += (vertsz * nr_rverts) / 1024; + cost_render += (vertsz * i) / 1024; + cost_fallback += (vertsz * nr_rverts) / 1024; if (0) fprintf(stderr, "cost render: %d fallback: %d\n", - cost_render, cost_fallback); + cost_render, cost_fallback); - if (cost_render > cost_fallback) + if (cost_render > cost_fallback) return GL_FALSE; return GL_TRUE; } -static GLboolean intel_run_render( GLcontext *ctx, - struct tnl_pipeline_stage *stage ) +static GLboolean +intel_run_render(GLcontext * ctx, struct tnl_pipeline_stage *stage) { - intelContextPtr intel = INTEL_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint i; + intel->vtbl.render_prevalidate( intel ); + /* Don't handle clipping or indexed vertices. */ - if (intel->RenderIndex != 0 || - !intel_validate_render( ctx, VB ) || - !choose_render( intel, VB )) { + if (intel->RenderIndex != 0 || + !intel_validate_render(ctx, VB) || !choose_render(intel, VB)) { return GL_TRUE; } tnl->clipspace.new_inputs |= VERT_BIT_POS; - tnl->Driver.Render.Start( ctx ); - - for (i = 0 ; i < VB->PrimitiveCount ; i++) - { + tnl->Driver.Render.Start(ctx); + + for (i = 0; i < VB->PrimitiveCount; i++) { GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); GLuint start = VB->Primitive[i].start; GLuint length = VB->Primitive[i].count; if (!length) - continue; + continue; - intel_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length, - prim ); + intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start, + start + length, prim); } - - tnl->Driver.Render.Finish( ctx ); - return GL_FALSE; /* finished the pipe */ + tnl->Driver.Render.Finish(ctx); + + INTEL_FIREVERTICES(intel); + + return GL_FALSE; /* finished the pipe */ } -const struct tnl_pipeline_stage _intel_render_stage = -{ +static const struct tnl_pipeline_stage _intel_render_stage = { "intel render", NULL, NULL, NULL, NULL, - intel_run_render /* run */ + intel_run_render /* run */ +}; + +const struct tnl_pipeline_stage *intel_pipeline[] = { + &_tnl_vertex_transform_stage, + &_tnl_vertex_cull_stage, + &_tnl_normal_transform_stage, + &_tnl_lighting_stage, + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, + &_tnl_point_attenuation_stage, + &_tnl_vertex_program_stage, +#if 1 + &_intel_render_stage, /* ADD: unclipped rastersetup-to-dma */ +#endif + &_tnl_render_stage, + 0, }; diff --git a/i915/intel_rotate.c b/i915/intel_rotate.c deleted file mode 100644 index a77640e..0000000 --- a/i915/intel_rotate.c +++ /dev/null @@ -1,221 +0,0 @@ - -/** - * Routines for simple 2D->2D transformations for rotated, flipped screens. - * - * XXX This code is not intel-specific. Move it into a common/utility - * someday. - */ - -#include "intel_rotate.h" - -#define MIN2(A, B) ( ((A) < (B)) ? (A) : (B) ) - -#define ABS(A) ( ((A) < 0) ? -(A) : (A) ) - - -void -matrix23Set(struct matrix23 *m, - int m00, int m01, int m02, - int m10, int m11, int m12) -{ - m->m00 = m00; m->m01 = m01; m->m02 = m02; - m->m10 = m10; m->m11 = m11; m->m12 = m12; -} - - -/* - * Transform (x,y) coordinate by the given matrix. - */ -void -matrix23TransformCoordf(const struct matrix23 *m, float *x, float *y) -{ - const float x0 = *x; - const float y0 = *y; - - *x = m->m00 * x0 + m->m01 * y0 + m->m02; - *y = m->m10 * x0 + m->m11 * y0 + m->m12; -} - - -void -matrix23TransformCoordi(const struct matrix23 *m, int *x, int *y) -{ - const int x0 = *x; - const int y0 = *y; - - *x = m->m00 * x0 + m->m01 * y0 + m->m02; - *y = m->m10 * x0 + m->m11 * y0 + m->m12; -} - - -/* - * Transform a width and height by the given matrix. - * XXX this could be optimized quite a bit. - */ -void -matrix23TransformDistance(const struct matrix23 *m, int *xDist, int *yDist) -{ - int x0 = 0, y0 = 0; - int x1 = *xDist, y1 = 0; - int x2 = 0, y2 = *yDist; - matrix23TransformCoordi(m, &x0, &y0); - matrix23TransformCoordi(m, &x1, &y1); - matrix23TransformCoordi(m, &x2, &y2); - - *xDist = (x1 - x0) + (x2 - x0); - *yDist = (y1 - y0) + (y2 - y0); - - if (*xDist < 0) - *xDist = -*xDist; - if (*yDist < 0) - *yDist = -*yDist; -} - - -/** - * Transform the rect defined by (x, y, w, h) by m. - */ -void -matrix23TransformRect(const struct matrix23 *m, int *x, int *y, int *w, int *h) -{ - int x0 = *x, y0 = *y; - int x1 = *x + *w, y1 = *y; - int x2 = *x + *w, y2 = *y + *h; - int x3 = *x, y3 = *y + *h; - matrix23TransformCoordi(m, &x0, &y0); - matrix23TransformCoordi(m, &x1, &y1); - matrix23TransformCoordi(m, &x2, &y2); - matrix23TransformCoordi(m, &x3, &y3); - *w = ABS(x1 - x0) + ABS(x2 - x1); - /**w = ABS(*w);*/ - *h = ABS(y1 - y0) + ABS(y2 - y1); - /**h = ABS(*h);*/ - *x = MIN2(x0, x1); - *x = MIN2(*x, x2); - *y = MIN2(y0, y1); - *y = MIN2(*y, y2); -} - - -/* - * Make rotation matrix for width X height screen. - */ -void -matrix23Rotate(struct matrix23 *m, int width, int height, int angle) -{ - switch (angle) { - case 0: - matrix23Set(m, 1, 0, 0, 0, 1, 0); - break; - case 90: - matrix23Set(m, 0, 1, 0, -1, 0, width); - break; - case 180: - matrix23Set(m, -1, 0, width, 0, -1, height); - break; - case 270: - matrix23Set(m, 0, -1, height, 1, 0, 0); - break; - default: - /*abort()*/; - } -} - - -/* - * Make flip/reflection matrix for width X height screen. - */ -void -matrix23Flip(struct matrix23 *m, int width, int height, int xflip, int yflip) -{ - if (xflip) { - m->m00 = -1; m->m01 = 0; m->m02 = width - 1; - } - else { - m->m00 = 1; m->m01 = 0; m->m02 = 0; - } - if (yflip) { - m->m10 = 0; m->m11 = -1; m->m12 = height - 1; - } - else { - m->m10 = 0; m->m11 = 1; m->m12 = 0; - } -} - - -/* - * result = a * b - */ -void -matrix23Multiply(struct matrix23 *result, - const struct matrix23 *a, const struct matrix23 *b) -{ - result->m00 = a->m00 * b->m00 + a->m01 * b->m10; - result->m01 = a->m00 * b->m01 + a->m01 * b->m11; - result->m02 = a->m00 * b->m02 + a->m01 * b->m12 + a->m02; - - result->m10 = a->m10 * b->m00 + a->m11 * b->m10; - result->m11 = a->m10 * b->m01 + a->m11 * b->m11; - result->m12 = a->m10 * b->m02 + a->m11 * b->m12 + a->m12; -} - - -#if 000 - -#include <stdio.h> - -int -main(int argc, char *argv[]) -{ - int width = 500, height = 400; - int rot; - int fx = 0, fy = 0; /* flip x and/or y ? */ - int coords[4][2]; - - /* four corner coords to test with */ - coords[0][0] = 0; coords[0][1] = 0; - coords[1][0] = width-1; coords[1][1] = 0; - coords[2][0] = width-1; coords[2][1] = height-1; - coords[3][0] = 0; coords[3][1] = height-1; - - - for (rot = 0; rot < 360; rot += 90) { - struct matrix23 rotate, flip, m; - int i; - - printf("Rot %d, xFlip %d, yFlip %d:\n", rot, fx, fy); - - /* make transformation matrix 'm' */ - matrix23Rotate(&rotate, width, height, rot); - matrix23Flip(&flip, width, height, fx, fy); - matrix23Multiply(&m, &rotate, &flip); - - /* xform four coords */ - for (i = 0; i < 4; i++) { - int x = coords[i][0]; - int y = coords[i][1]; - matrix23TransformCoordi(&m, &x, &y); - printf(" %d, %d -> %d %d\n", coords[i][0], coords[i][1], x, y); - } - - /* xform width, height */ - { - int x = width; - int y = height; - matrix23TransformDistance(&m, &x, &y); - printf(" %d x %d -> %d x %d\n", width, height, x, y); - } - - /* xform rect */ - { - int x = 50, y = 10, w = 200, h = 100; - matrix23TransformRect(&m, &x, &y, &w, &h); - printf(" %d,%d %d x %d -> %d, %d %d x %d\n", 50, 10, 200, 100, - x, y, w, h); - } - - } - - return 0; -} -#endif diff --git a/i915/intel_rotate.h b/i915/intel_rotate.h deleted file mode 100644 index 0da45d2..0000000 --- a/i915/intel_rotate.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef INTEL_ROTATE_H -#define INTEL_ROTATE_H 1 - -struct matrix23 -{ - int m00, m01, m02; - int m10, m11, m12; -}; - - - -extern void -matrix23Set(struct matrix23 *m, - int m00, int m01, int m02, - int m10, int m11, int m12); - -extern void -matrix23TransformCoordi(const struct matrix23 *m, int *x, int *y); - -extern void -matrix23TransformCoordf(const struct matrix23 *m, float *x, float *y); - -extern void -matrix23TransformDistance(const struct matrix23 *m, int *xDist, int *yDist); - -extern void -matrix23TransformRect(const struct matrix23 *m, - int *x, int *y, int *w, int *h); - -extern void -matrix23Rotate(struct matrix23 *m, int width, int height, int angle); - -extern void -matrix23Flip(struct matrix23 *m, int width, int height, int xflip, int yflip); - -extern void -matrix23Multiply(struct matrix23 *result, - const struct matrix23 *a, const struct matrix23 *b); - - -#endif /* INTEL_ROTATE_H */ diff --git a/i915/intel_screen.c b/i915/intel_screen.c deleted file mode 100644 index ca8610b..0000000 --- a/i915/intel_screen.c +++ /dev/null @@ -1,690 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "glheader.h" -#include "context.h" -#include "framebuffer.h" -#include "matrix.h" -#include "renderbuffer.h" -#include "simple_list.h" -#include "utils.h" -#include "vblank.h" -#include "xmlpool.h" - - -#include "intel_screen.h" - -#include "intel_tex.h" -#include "intel_span.h" -#include "intel_tris.h" -#include "intel_ioctl.h" - -#include "i830_dri.h" - -PUBLIC const char __driConfigOptions[] = -DRI_CONF_BEGIN - DRI_CONF_SECTION_PERFORMANCE - DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) - DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) - DRI_CONF_SECTION_END - DRI_CONF_SECTION_QUALITY - DRI_CONF_FORCE_S3TC_ENABLE(false) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) - DRI_CONF_SECTION_END -DRI_CONF_END; -const GLuint __driNConfigOptions = 4; - -#ifdef USE_NEW_INTERFACE -static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; -#endif /*USE_NEW_INTERFACE*/ - -extern const struct dri_extension card_extensions[]; - -/** - * Map all the memory regions described by the screen. - * \return GL_TRUE if success, GL_FALSE if error. - */ -GLboolean -intelMapScreenRegions(__DRIscreenPrivate *sPriv) -{ - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - - if (intelScreen->front.handle) { - if (drmMap(sPriv->fd, - intelScreen->front.handle, - intelScreen->front.size, - (drmAddress *)&intelScreen->front.map) != 0) { - _mesa_problem(NULL, "drmMap(frontbuffer) failed!"); - return GL_FALSE; - } - } - else { - _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!"); - } - - if (drmMap(sPriv->fd, - intelScreen->back.handle, - intelScreen->back.size, - (drmAddress *)&intelScreen->back.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (drmMap(sPriv->fd, - intelScreen->depth.handle, - intelScreen->depth.size, - (drmAddress *)&intelScreen->depth.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (drmMap(sPriv->fd, - intelScreen->tex.handle, - intelScreen->tex.size, - (drmAddress *)&intelScreen->tex.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (0) - printf("Mappings: front: %p back: %p depth: %p tex: %p\n", - intelScreen->front.map, - intelScreen->back.map, - intelScreen->depth.map, - intelScreen->tex.map); - return GL_TRUE; -} - - -void -intelUnmapScreenRegions(intelScreenPrivate *intelScreen) -{ -#define REALLY_UNMAP 1 - if (intelScreen->front.map) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0) - printf("drmUnmap front failed!\n"); -#endif - intelScreen->front.map = NULL; - } - if (intelScreen->back.map) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0) - printf("drmUnmap back failed!\n"); -#endif - intelScreen->back.map = NULL; - } - if (intelScreen->depth.map) { -#if REALLY_UNMAP - drmUnmap(intelScreen->depth.map, intelScreen->depth.size); - intelScreen->depth.map = NULL; -#endif - } - if (intelScreen->tex.map) { -#if REALLY_UNMAP - drmUnmap(intelScreen->tex.map, intelScreen->tex.size); - intelScreen->tex.map = NULL; -#endif - } -} - - -static void -intelPrintDRIInfo(intelScreenPrivate *intelScreen, - __DRIscreenPrivate *sPriv, - I830DRIPtr gDRIPriv) -{ - fprintf(stderr, "*** Front size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->front.size, intelScreen->front.offset, - intelScreen->front.pitch); - fprintf(stderr, "*** Back size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->back.size, intelScreen->back.offset, - intelScreen->back.pitch); - fprintf(stderr, "*** Depth size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->depth.size, intelScreen->depth.offset, - intelScreen->depth.pitch); - fprintf(stderr, "*** Rotated size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->rotated.size, intelScreen->rotated.offset, - intelScreen->rotated.pitch); - fprintf(stderr, "*** Texture size: 0x%x offset: 0x%x\n", - intelScreen->tex.size, intelScreen->tex.offset); - fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem); -} - - -static void -intelPrintSAREA(const drmI830Sarea *sarea) -{ - fprintf(stderr, "SAREA: sarea width %d height %d\n", sarea->width, sarea->height); - fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); - fprintf(stderr, - "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->front_offset, sarea->front_size, - (unsigned) sarea->front_handle); - fprintf(stderr, - "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->back_offset, sarea->back_size, - (unsigned) sarea->back_handle); - fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->depth_offset, sarea->depth_size, - (unsigned) sarea->depth_handle); - fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->tex_offset, sarea->tex_size, - (unsigned) sarea->tex_handle); - fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation); - fprintf(stderr, - "SAREA: rotated offset: 0x%08x size: 0x%x\n", - sarea->rotated_offset, sarea->rotated_size); - fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch); -} - - -/** - * A number of the screen parameters are obtained/computed from - * information in the SAREA. This function updates those parameters. - */ -void -intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, - drmI830Sarea *sarea) -{ - intelScreen->width = sarea->width; - intelScreen->height = sarea->height; - - intelScreen->front.offset = sarea->front_offset; - intelScreen->front.pitch = sarea->pitch * intelScreen->cpp; - intelScreen->front.handle = sarea->front_handle; - intelScreen->front.size = sarea->front_size; - - intelScreen->back.offset = sarea->back_offset; - intelScreen->back.pitch = sarea->pitch * intelScreen->cpp; - intelScreen->back.handle = sarea->back_handle; - intelScreen->back.size = sarea->back_size; - - intelScreen->depth.offset = sarea->depth_offset; - intelScreen->depth.pitch = sarea->pitch * intelScreen->cpp; - intelScreen->depth.handle = sarea->depth_handle; - intelScreen->depth.size = sarea->depth_size; - - intelScreen->tex.offset = sarea->tex_offset; - intelScreen->logTextureGranularity = sarea->log_tex_granularity; - intelScreen->tex.handle = sarea->tex_handle; - intelScreen->tex.size = sarea->tex_size; - - intelScreen->rotated.offset = sarea->rotated_offset; - intelScreen->rotated.pitch = sarea->rotated_pitch * intelScreen->cpp; - intelScreen->rotated.size = sarea->rotated_size; - intelScreen->current_rotation = sarea->rotation; - matrix23Rotate(&intelScreen->rotMatrix, - sarea->width, sarea->height, sarea->rotation); - intelScreen->rotatedWidth = sarea->virtualX; - intelScreen->rotatedHeight = sarea->virtualY; - - if (0) - intelPrintSAREA(sarea); -} - - -static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv) -{ - intelScreenPrivate *intelScreen; - I830DRIPtr gDRIPriv = (I830DRIPtr)sPriv->pDevPriv; - drmI830Sarea *sarea; - PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension = - (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension")); - void * const psc = sPriv->psc->screenConfigs; - - if (sPriv->devPrivSize != sizeof(I830DRIRec)) { - fprintf(stderr,"\nERROR! sizeof(I830DRIRec) does not match passed size from device driver\n"); - return GL_FALSE; - } - - /* Allocate the private area */ - intelScreen = (intelScreenPrivate *)CALLOC(sizeof(intelScreenPrivate)); - if (!intelScreen) { - fprintf(stderr,"\nERROR! Allocating private area failed\n"); - return GL_FALSE; - } - /* parse information in __driConfigOptions */ - driParseOptionInfo (&intelScreen->optionCache, - __driConfigOptions, __driNConfigOptions); - - intelScreen->driScrnPriv = sPriv; - sPriv->private = (void *)intelScreen; - intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset; - sarea = (drmI830Sarea *) - (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset); - - intelScreen->deviceID = gDRIPriv->deviceID; - intelScreen->mem = gDRIPriv->mem; - intelScreen->cpp = gDRIPriv->cpp; - - switch (gDRIPriv->bitsPerPixel) { - case 15: intelScreen->fbFormat = DV_PF_555; break; - case 16: intelScreen->fbFormat = DV_PF_565; break; - case 32: intelScreen->fbFormat = DV_PF_8888; break; - } - - intelUpdateScreenFromSAREA(intelScreen, sarea); - - if (0) - intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv); - - if (!intelMapScreenRegions(sPriv)) { - fprintf(stderr,"\nERROR! mapping regions\n"); - _mesa_free(intelScreen); - sPriv->private = NULL; - return GL_FALSE; - } - - intelScreen->drmMinor = sPriv->drmMinor; - - /* Determine if IRQs are active? */ - { - int ret; - drmI830GetParam gp; - - gp.param = I830_PARAM_IRQ_ACTIVE; - gp.value = &intelScreen->irq_active; - - ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM, - &gp, sizeof(gp)); - if (ret) { - fprintf(stderr, "drmI830GetParam: %d\n", ret); - return GL_FALSE; - } - } - - /* Determine if batchbuffers are allowed */ - { - int ret; - drmI830GetParam gp; - - gp.param = I830_PARAM_ALLOW_BATCHBUFFER; - gp.value = &intelScreen->allow_batchbuffer; - - ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM, - &gp, sizeof(gp)); - if (ret) { - fprintf(stderr, "drmI830GetParam: (%d) %d\n", gp.param, ret); - return GL_FALSE; - } - } - - if (glx_enable_extension != NULL) { - (*glx_enable_extension)( psc, "GLX_SGI_swap_control" ); - (*glx_enable_extension)( psc, "GLX_SGI_video_sync" ); - (*glx_enable_extension)( psc, "GLX_MESA_swap_control" ); - (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" ); - (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" ); - (*glx_enable_extension)( psc, "GLX_MESA_allocate_memory" ); - (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" ); - } - - sPriv->psc->allocateMemory = (void *) intelAllocateMemoryMESA; - sPriv->psc->freeMemory = (void *) intelFreeMemoryMESA; - sPriv->psc->memoryOffset = (void *) intelGetMemoryOffsetMESA; - - return GL_TRUE; -} - - -static void intelDestroyScreen(__DRIscreenPrivate *sPriv) -{ - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - - intelUnmapScreenRegions(intelScreen); - - driDestroyOptionInfo (&intelScreen->optionCache); - - FREE(intelScreen); - sPriv->private = NULL; -} - - -static GLboolean intelCreateBuffer( __DRIscreenPrivate *driScrnPriv, - __DRIdrawablePrivate *driDrawPriv, - const __GLcontextModes *mesaVis, - GLboolean isPixmap ) -{ - intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private; - - if (isPixmap) { - return GL_FALSE; /* not implemented */ - } else { - GLboolean swStencil = (mesaVis->stencilBits > 0 && - mesaVis->depthBits != 24); - - struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); - - { - driRenderbuffer *frontRb - = driNewRenderbuffer(GL_RGBA, - screen->front.map, - screen->cpp, - screen->front.offset, screen->front.pitch, - driDrawPriv); - intelSetSpanFunctions(frontRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base); - } - - if (mesaVis->doubleBufferMode) { - driRenderbuffer *backRb - = driNewRenderbuffer(GL_RGBA, - screen->back.map, - screen->cpp, - screen->back.offset, screen->back.pitch, - driDrawPriv); - intelSetSpanFunctions(backRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base); - } - - if (mesaVis->depthBits == 16) { - driRenderbuffer *depthRb - = driNewRenderbuffer(GL_DEPTH_COMPONENT16, - screen->depth.map, - screen->cpp, - screen->depth.offset, screen->depth.pitch, - driDrawPriv); - intelSetSpanFunctions(depthRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); - } - else if (mesaVis->depthBits == 24) { - driRenderbuffer *depthRb - = driNewRenderbuffer(GL_DEPTH_COMPONENT24, - screen->depth.map, - screen->cpp, - screen->depth.offset, screen->depth.pitch, - driDrawPriv); - intelSetSpanFunctions(depthRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); - } - - if (mesaVis->stencilBits > 0 && !swStencil) { - driRenderbuffer *stencilRb - = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT, - screen->depth.map, - screen->cpp, - screen->depth.offset, screen->depth.pitch, - driDrawPriv); - intelSetSpanFunctions(stencilRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base); - } - - _mesa_add_soft_renderbuffers(fb, - GL_FALSE, /* color */ - GL_FALSE, /* depth */ - swStencil, - mesaVis->accumRedBits > 0, - GL_FALSE, /* alpha */ - GL_FALSE /* aux */); - driDrawPriv->driverPrivate = (void *) fb; - - return (driDrawPriv->driverPrivate != NULL); - } -} - -static void intelDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) -{ - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); -} - - -/** - * Get information about previous buffer swaps. - */ -static int -intelGetSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) -{ - intelContextPtr intel; - - if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL) - || (dPriv->driContextPriv->driverPrivate == NULL) - || (sInfo == NULL) ) { - return -1; - } - - intel = dPriv->driContextPriv->driverPrivate; - sInfo->swap_count = intel->swap_count; - sInfo->swap_ust = intel->swap_ust; - sInfo->swap_missed_count = intel->swap_missed_count; - - sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0) - ? driCalculateSwapUsage( dPriv, 0, intel->swap_missed_ust ) - : 0.0; - - return 0; -} - - -/* There are probably better ways to do this, such as an - * init-designated function to register chipids and createcontext - * functions. - */ -extern GLboolean i830CreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); - -extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); - - - - -static GLboolean intelCreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate) -{ - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - - switch (intelScreen->deviceID) { - case PCI_CHIP_845_G: - case PCI_CHIP_I830_M: - case PCI_CHIP_I855_GM: - case PCI_CHIP_I865_G: - return i830CreateContext( mesaVis, driContextPriv, - sharedContextPrivate ); - - case PCI_CHIP_I915_G: - case PCI_CHIP_I915_GM: - case PCI_CHIP_I945_G: - case PCI_CHIP_I945_GM: - case PCI_CHIP_I945_GME: - case PCI_CHIP_G33_G: - case PCI_CHIP_Q35_G: - case PCI_CHIP_Q33_G: - return i915CreateContext( mesaVis, driContextPriv, - sharedContextPrivate ); - - default: - fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID); - return GL_FALSE; - } -} - - -static const struct __DriverAPIRec intelAPI = { - .InitDriver = intelInitDriver, - .DestroyScreen = intelDestroyScreen, - .CreateContext = intelCreateContext, - .DestroyContext = intelDestroyContext, - .CreateBuffer = intelCreateBuffer, - .DestroyBuffer = intelDestroyBuffer, - .SwapBuffers = intelSwapBuffers, - .MakeCurrent = intelMakeCurrent, - .UnbindContext = intelUnbindContext, - .GetSwapInfo = intelGetSwapInfo, - .GetMSC = driGetMSC32, - .WaitForMSC = driWaitForMSC32, - .WaitForSBC = NULL, - .SwapBuffersMSC = NULL, - .CopySubBuffer = intelCopySubBuffer -}; - - -static __GLcontextModes * -intelFillInModes( unsigned pixel_bits, unsigned depth_bits, - unsigned stencil_bits, GLboolean have_back_buffer ) -{ - __GLcontextModes * modes; - __GLcontextModes * m; - unsigned num_modes; - unsigned depth_buffer_factor; - unsigned back_buffer_factor; - GLenum fb_format; - GLenum fb_type; - - /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't - * support pageflipping at all. - */ - static const GLenum back_buffer_modes[] = { - GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML - }; - - u_int8_t depth_bits_array[3]; - u_int8_t stencil_bits_array[3]; - - - depth_bits_array[0] = 0; - depth_bits_array[1] = depth_bits; - depth_bits_array[2] = depth_bits; - - /* Just like with the accumulation buffer, always provide some modes - * with a stencil buffer. It will be a sw fallback, but some apps won't - * care about that. - */ - stencil_bits_array[0] = 0; - stencil_bits_array[1] = 0; - stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; - - depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; - back_buffer_factor = (have_back_buffer) ? 3 : 1; - - num_modes = depth_buffer_factor * back_buffer_factor * 4; - - if ( pixel_bits == 16 ) { - fb_format = GL_RGB; - fb_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - fb_format = GL_BGRA; - fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; - } - - modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) ); - m = modes; - if ( ! driFillInModes( & m, fb_format, fb_type, - depth_bits_array, stencil_bits_array, depth_buffer_factor, - back_buffer_modes, back_buffer_factor, - GLX_TRUE_COLOR ) ) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - if ( ! driFillInModes( & m, fb_format, fb_type, - depth_bits_array, stencil_bits_array, depth_buffer_factor, - back_buffer_modes, back_buffer_factor, - GLX_DIRECT_COLOR ) ) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - - /* Mark the visual as slow if there are "fake" stencil bits. - */ - for ( m = modes ; m != NULL ; m = m->next ) { - if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) { - m->visualRating = GLX_SLOW_CONFIG; - } - } - - return modes; -} - - -/** - * This is the bootstrap function for the driver. libGL supplies all of the - * requisite information about the system, and the driver initializes itself. - * This routine also fills in the linked list pointed to by \c driver_modes - * with the \c __GLcontextModes that the driver can support for windows or - * pbuffers. - * - * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on - * failure. - */ -PUBLIC -void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIscreen *psc, - const __GLcontextModes * modes, - const __DRIversion * ddx_version, - const __DRIversion * dri_version, - const __DRIversion * drm_version, - const __DRIframebuffer * frame_buffer, - drmAddress pSAREA, int fd, - int internal_api_version, - const __DRIinterfaceMethods * interface, - __GLcontextModes ** driver_modes ) - -{ - __DRIscreenPrivate *psp; - static const __DRIversion ddx_expected = { 1, 5, 0 }; - static const __DRIversion dri_expected = { 4, 0, 0 }; - static const __DRIversion drm_expected = { 1, 4, 0 }; - - dri_interface = interface; - - if ( ! driCheckDriDdxDrmVersions2( "i915", - dri_version, & dri_expected, - ddx_version, & ddx_expected, - drm_version, & drm_expected ) ) { - return NULL; - } - - psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, - ddx_version, dri_version, drm_version, - frame_buffer, pSAREA, fd, - internal_api_version, &intelAPI); - if ( psp != NULL ) { - I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv; - *driver_modes = intelFillInModes( dri_priv->cpp * 8, - (dri_priv->cpp == 2) ? 16 : 24, - (dri_priv->cpp == 2) ? 0 : 8, - 1 ); - - /* Calling driInitExtensions here, with a NULL context pointer, does not actually - * enable the extensions. It just makes sure that all the dispatch offsets for all - * the extensions that *might* be enables are known. This is needed because the - * dispatch offsets need to be known when _mesa_context_create is called, but we can't - * enable the extensions until we have a context pointer. - * - * Hello chicken. Hello egg. How are you two today? - */ - driInitExtensions( NULL, card_extensions, GL_FALSE ); - } - - return (void *) psp; -} diff --git a/i915/intel_screen.h b/i915/intel_screen.h deleted file mode 100644 index 24cfd9b..0000000 --- a/i915/intel_screen.h +++ /dev/null @@ -1,112 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef _INTEL_INIT_H_ -#define _INTEL_INIT_H_ - -#include <sys/time.h> -#include "xmlconfig.h" -#include "dri_util.h" -#include "intel_rotate.h" -#include "i830_common.h" - - -/* This roughly corresponds to a gl_renderbuffer (Mesa 6.4) */ -typedef struct { - drm_handle_t handle; - drmSize size; /* region size in bytes */ - char *map; /* memory map */ - int offset; /* from start of video mem, in bytes */ - int pitch; /* row stride, in bytes */ -} intelRegion; - -typedef struct -{ - intelRegion front; - intelRegion back; - intelRegion rotated; - intelRegion depth; - intelRegion tex; - - int deviceID; - int width; - int height; - int mem; /* unused */ - - int cpp; /* for front and back buffers */ - int fbFormat; - - int logTextureGranularity; - - __DRIscreenPrivate *driScrnPriv; - unsigned int sarea_priv_offset; - - int drmMinor; - - int irq_active; - int allow_batchbuffer; - - struct matrix23 rotMatrix; - - int current_rotation; /* 0, 90, 180 or 270 */ - int rotatedWidth, rotatedHeight; - - /** - * Configuration cache with default values for all contexts - */ - driOptionCache optionCache; -} intelScreenPrivate; - - -extern GLboolean -intelMapScreenRegions(__DRIscreenPrivate *sPriv); - -extern void -intelUnmapScreenRegions(intelScreenPrivate *intelScreen); - -extern void -intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, - drmI830Sarea *sarea); - -extern void -intelDestroyContext(__DRIcontextPrivate *driContextPriv); - -extern GLboolean -intelUnbindContext(__DRIcontextPrivate *driContextPriv); - -extern GLboolean -intelMakeCurrent(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv); - -extern void -intelSwapBuffers(__DRIdrawablePrivate *dPriv); - -extern void -intelCopySubBuffer( __DRIdrawablePrivate *dPriv, int x, int y, int w, int h ); - -#endif diff --git a/i915/intel_span.c b/i915/intel_span.c deleted file mode 100644 index c3ffc4b..0000000 --- a/i915/intel_span.c +++ /dev/null @@ -1,258 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "glheader.h" -#include "macros.h" -#include "mtypes.h" -#include "colormac.h" - -#include "intel_screen.h" - -#include "intel_span.h" -#include "intel_ioctl.h" -#include "swrast/swrast.h" - - -#define DBG 0 - -#define LOCAL_VARS \ - intelContextPtr intel = INTEL_CONTEXT(ctx); \ - __DRIdrawablePrivate *dPriv = intel->driDrawable; \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - GLuint pitch = drb->pitch; \ - GLuint height = dPriv->h; \ - char *buf = (char *) drb->Base.Data + \ - dPriv->x * drb->cpp + \ - dPriv->y * pitch; \ - GLushort p; \ - (void) buf; (void) p - -#define LOCAL_DEPTH_VARS \ - intelContextPtr intel = INTEL_CONTEXT(ctx); \ - __DRIdrawablePrivate *dPriv = intel->driDrawable; \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - GLuint pitch = drb->pitch; \ - GLuint height = dPriv->h; \ - char *buf = (char *) drb->Base.Data + \ - dPriv->x * drb->cpp + \ - dPriv->y * pitch - -#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS - -#define INIT_MONO_PIXEL(p,color)\ - p = INTEL_PACKCOLOR565(color[0],color[1],color[2]) - -#define Y_FLIP(_y) (height - _y - 1) - -#define HW_LOCK() - -#define HW_UNLOCK() - -/* 16 bit, 565 rgb color spanline and pixel functions - */ -#define WRITE_RGBA( _x, _y, r, g, b, a ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = ( (((int)r & 0xf8) << 8) | \ - (((int)g & 0xfc) << 3) | \ - (((int)b & 0xf8) >> 3)) -#define WRITE_PIXEL( _x, _y, p ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = p - -#define READ_RGBA( rgba, _x, _y ) \ -do { \ - GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch); \ - rgba[0] = (((p >> 11) & 0x1f) * 255) / 31; \ - rgba[1] = (((p >> 5) & 0x3f) * 255) / 63; \ - rgba[2] = (((p >> 0) & 0x1f) * 255) / 31; \ - rgba[3] = 255; \ -} while(0) - -#define TAG(x) intel##x##_565 -#include "spantmp.h" - -/* 15 bit, 555 rgb color spanline and pixel functions - */ -#define WRITE_RGBA( _x, _y, r, g, b, a ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = (((r & 0xf8) << 7) | \ - ((g & 0xf8) << 3) | \ - ((b & 0xf8) >> 3)) - -#define WRITE_PIXEL( _x, _y, p ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = p - -#define READ_RGBA( rgba, _x, _y ) \ -do { \ - GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch); \ - rgba[0] = (p >> 7) & 0xf8; \ - rgba[1] = (p >> 3) & 0xf8; \ - rgba[2] = (p << 3) & 0xf8; \ - rgba[3] = 255; \ -} while(0) - -#define TAG(x) intel##x##_555 -#include "spantmp.h" - -/* 16 bit depthbuffer functions. - */ -#define WRITE_DEPTH( _x, _y, d ) \ - *(GLushort *)(buf + (_x)*2 + (_y)*pitch) = d; - -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch); - - -#define TAG(x) intel##x##_z16 -#include "depthtmp.h" - - -#undef LOCAL_VARS -#define LOCAL_VARS \ - intelContextPtr intel = INTEL_CONTEXT(ctx); \ - __DRIdrawablePrivate *dPriv = intel->driDrawable; \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - GLuint pitch = drb->pitch; \ - GLuint height = dPriv->h; \ - char *buf = (char *)drb->Base.Data + \ - dPriv->x * drb->cpp + \ - dPriv->y * pitch; \ - GLuint p; \ - (void) buf; (void) p - -#undef INIT_MONO_PIXEL -#define INIT_MONO_PIXEL(p,color)\ - p = INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3]) - -/* 32 bit, 8888 argb color spanline and pixel functions - */ -#define WRITE_RGBA(_x, _y, r, g, b, a) \ - *(GLuint *)(buf + _x*4 + _y*pitch) = ((r << 16) | \ - (g << 8) | \ - (b << 0) | \ - (a << 24) ) - -#define WRITE_PIXEL(_x, _y, p) \ - *(GLuint *)(buf + _x*4 + _y*pitch) = p - - -#define READ_RGBA(rgba, _x, _y) \ - do { \ - GLuint p = *(GLuint *)(buf + _x*4 + _y*pitch); \ - rgba[0] = (p >> 16) & 0xff; \ - rgba[1] = (p >> 8) & 0xff; \ - rgba[2] = (p >> 0) & 0xff; \ - rgba[3] = (p >> 24) & 0xff; \ - } while (0) - -#define TAG(x) intel##x##_8888 -#include "spantmp.h" - - -/* 24/8 bit interleaved depth/stencil functions - */ -#define WRITE_DEPTH( _x, _y, d ) { \ - GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch); \ - tmp &= 0xff000000; \ - tmp |= (d) & 0xffffff; \ - *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp; \ -} - -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) & 0xffffff; - - -#define TAG(x) intel##x##_z24_s8 -#include "depthtmp.h" - -#define WRITE_STENCIL( _x, _y, d ) { \ - GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch); \ - tmp &= 0xffffff; \ - tmp |= ((d)<<24); \ - *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp; \ -} - -#define READ_STENCIL( d, _x, _y ) \ - d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) >> 24; - -#define TAG(x) intel##x##_z24_s8 -#include "stenciltmp.h" - - -/* Move locking out to get reasonable span performance. - */ -void intelSpanRenderStart( GLcontext *ctx ) -{ - intelContextPtr intel = INTEL_CONTEXT(ctx); - - intelFlush(&intel->ctx); - LOCK_HARDWARE(intel); - intelWaitForIdle(intel); -} - -void intelSpanRenderFinish( GLcontext *ctx ) -{ - intelContextPtr intel = INTEL_CONTEXT( ctx ); - _swrast_flush( ctx ); - UNLOCK_HARDWARE( intel ); -} - -void intelInitSpanFuncs( GLcontext *ctx ) -{ - struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx); - swdd->SpanRenderStart = intelSpanRenderStart; - swdd->SpanRenderFinish = intelSpanRenderFinish; -} - - -/** - * Plug in the Get/Put routines for the given driRenderbuffer. - */ -void -intelSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis) -{ - if (drb->Base.InternalFormat == GL_RGBA) { - if (vis->redBits == 5 && vis->greenBits == 5 && vis->blueBits == 5) { - intelInitPointers_555(&drb->Base); - } - else if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) { - intelInitPointers_565(&drb->Base); - } - else { - assert(vis->redBits == 8); - assert(vis->greenBits == 8); - assert(vis->blueBits == 8); - intelInitPointers_8888(&drb->Base); - } - } - else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { - intelInitDepthPointers_z16(&drb->Base); - } - else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { - intelInitDepthPointers_z24_s8(&drb->Base); - } - else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { - intelInitStencilPointers_z24_s8(&drb->Base); - } -} diff --git a/i915/intel_state.c b/i915/intel_state.c index e5988a5..d1ca11d 100644 --- a/i915/intel_state.c +++ b/i915/intel_state.c @@ -30,252 +30,266 @@ #include "context.h" #include "macros.h" #include "enums.h" +#include "colormac.h" #include "dd.h" #include "intel_screen.h" #include "intel_context.h" +#include "intel_fbo.h" +#include "intel_regions.h" #include "swrast/swrast.h" -int intel_translate_compare_func( GLenum func ) +int +intel_translate_shadow_compare_func( GLenum func ) { switch(func) { case GL_NEVER: - return COMPAREFUNC_NEVER; + return COMPAREFUNC_ALWAYS; case GL_LESS: - return COMPAREFUNC_LESS; + return COMPAREFUNC_LEQUAL; case GL_LEQUAL: - return COMPAREFUNC_LEQUAL; + return COMPAREFUNC_LESS; case GL_GREATER: - return COMPAREFUNC_GREATER; + return COMPAREFUNC_GEQUAL; case GL_GEQUAL: - return COMPAREFUNC_GEQUAL; + return COMPAREFUNC_GREATER; case GL_NOTEQUAL: - return COMPAREFUNC_NOTEQUAL; - case GL_EQUAL: return COMPAREFUNC_EQUAL; + case GL_EQUAL: + return COMPAREFUNC_NOTEQUAL; case GL_ALWAYS: - return COMPAREFUNC_ALWAYS; + return COMPAREFUNC_NEVER; + } + + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_NEVER; +} + +int +intel_translate_compare_func(GLenum func) +{ + switch (func) { + case GL_NEVER: + return COMPAREFUNC_NEVER; + case GL_LESS: + return COMPAREFUNC_LESS; + case GL_LEQUAL: + return COMPAREFUNC_LEQUAL; + case GL_GREATER: + return COMPAREFUNC_GREATER; + case GL_GEQUAL: + return COMPAREFUNC_GEQUAL; + case GL_NOTEQUAL: + return COMPAREFUNC_NOTEQUAL; + case GL_EQUAL: + return COMPAREFUNC_EQUAL; + case GL_ALWAYS: + return COMPAREFUNC_ALWAYS; } fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); - return COMPAREFUNC_ALWAYS; + return COMPAREFUNC_ALWAYS; } -int intel_translate_stencil_op( GLenum op ) +int +intel_translate_stencil_op(GLenum op) { - switch(op) { - case GL_KEEP: - return STENCILOP_KEEP; - case GL_ZERO: - return STENCILOP_ZERO; - case GL_REPLACE: - return STENCILOP_REPLACE; - case GL_INCR: + switch (op) { + case GL_KEEP: + return STENCILOP_KEEP; + case GL_ZERO: + return STENCILOP_ZERO; + case GL_REPLACE: + return STENCILOP_REPLACE; + case GL_INCR: return STENCILOP_INCRSAT; - case GL_DECR: + case GL_DECR: return STENCILOP_DECRSAT; case GL_INCR_WRAP: - return STENCILOP_INCR; + return STENCILOP_INCR; case GL_DECR_WRAP: - return STENCILOP_DECR; - case GL_INVERT: - return STENCILOP_INVERT; - default: + return STENCILOP_DECR; + case GL_INVERT: + return STENCILOP_INVERT; + default: return STENCILOP_ZERO; } } -int intel_translate_blend_factor( GLenum factor ) +int +intel_translate_blend_factor(GLenum factor) { - switch(factor) { - case GL_ZERO: - return BLENDFACT_ZERO; - case GL_SRC_ALPHA: - return BLENDFACT_SRC_ALPHA; - case GL_ONE: - return BLENDFACT_ONE; - case GL_SRC_COLOR: - return BLENDFACT_SRC_COLR; - case GL_ONE_MINUS_SRC_COLOR: - return BLENDFACT_INV_SRC_COLR; - case GL_DST_COLOR: - return BLENDFACT_DST_COLR; - case GL_ONE_MINUS_DST_COLOR: - return BLENDFACT_INV_DST_COLR; + switch (factor) { + case GL_ZERO: + return BLENDFACT_ZERO; + case GL_SRC_ALPHA: + return BLENDFACT_SRC_ALPHA; + case GL_ONE: + return BLENDFACT_ONE; + case GL_SRC_COLOR: + return BLENDFACT_SRC_COLR; + case GL_ONE_MINUS_SRC_COLOR: + return BLENDFACT_INV_SRC_COLR; + case GL_DST_COLOR: + return BLENDFACT_DST_COLR; + case GL_ONE_MINUS_DST_COLOR: + return BLENDFACT_INV_DST_COLR; case GL_ONE_MINUS_SRC_ALPHA: - return BLENDFACT_INV_SRC_ALPHA; - case GL_DST_ALPHA: - return BLENDFACT_DST_ALPHA; + return BLENDFACT_INV_SRC_ALPHA; + case GL_DST_ALPHA: + return BLENDFACT_DST_ALPHA; case GL_ONE_MINUS_DST_ALPHA: - return BLENDFACT_INV_DST_ALPHA; - case GL_SRC_ALPHA_SATURATE: + return BLENDFACT_INV_DST_ALPHA; + case GL_SRC_ALPHA_SATURATE: return BLENDFACT_SRC_ALPHA_SATURATE; case GL_CONSTANT_COLOR: - return BLENDFACT_CONST_COLOR; + return BLENDFACT_CONST_COLOR; case GL_ONE_MINUS_CONSTANT_COLOR: return BLENDFACT_INV_CONST_COLOR; case GL_CONSTANT_ALPHA: - return BLENDFACT_CONST_ALPHA; + return BLENDFACT_CONST_ALPHA; case GL_ONE_MINUS_CONSTANT_ALPHA: return BLENDFACT_INV_CONST_ALPHA; } - + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor); return BLENDFACT_ZERO; } -int intel_translate_logic_op( GLenum opcode ) +int +intel_translate_logic_op(GLenum opcode) { - switch(opcode) { - case GL_CLEAR: - return LOGICOP_CLEAR; - case GL_AND: - return LOGICOP_AND; - case GL_AND_REVERSE: - return LOGICOP_AND_RVRSE; - case GL_COPY: - return LOGICOP_COPY; - case GL_COPY_INVERTED: - return LOGICOP_COPY_INV; - case GL_AND_INVERTED: - return LOGICOP_AND_INV; - case GL_NOOP: - return LOGICOP_NOOP; - case GL_XOR: - return LOGICOP_XOR; - case GL_OR: - return LOGICOP_OR; - case GL_OR_INVERTED: - return LOGICOP_OR_INV; - case GL_NOR: - return LOGICOP_NOR; - case GL_EQUIV: - return LOGICOP_EQUIV; - case GL_INVERT: - return LOGICOP_INV; - case GL_OR_REVERSE: - return LOGICOP_OR_RVRSE; - case GL_NAND: - return LOGICOP_NAND; - case GL_SET: - return LOGICOP_SET; - default: + switch (opcode) { + case GL_CLEAR: + return LOGICOP_CLEAR; + case GL_AND: + return LOGICOP_AND; + case GL_AND_REVERSE: + return LOGICOP_AND_RVRSE; + case GL_COPY: + return LOGICOP_COPY; + case GL_COPY_INVERTED: + return LOGICOP_COPY_INV; + case GL_AND_INVERTED: + return LOGICOP_AND_INV; + case GL_NOOP: + return LOGICOP_NOOP; + case GL_XOR: + return LOGICOP_XOR; + case GL_OR: + return LOGICOP_OR; + case GL_OR_INVERTED: + return LOGICOP_OR_INV; + case GL_NOR: + return LOGICOP_NOR; + case GL_EQUIV: + return LOGICOP_EQUIV; + case GL_INVERT: + return LOGICOP_INV; + case GL_OR_REVERSE: + return LOGICOP_OR_RVRSE; + case GL_NAND: + return LOGICOP_NAND; + case GL_SET: return LOGICOP_SET; - } -} - -static void intelDrawBuffer(GLcontext *ctx, GLenum mode ) -{ - intelContextPtr intel = INTEL_CONTEXT(ctx); - int front = 0; - - if (!ctx->DrawBuffer) - return; - - switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) { - case BUFFER_BIT_FRONT_LEFT: - front = 1; - FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE ); - break; - case BUFFER_BIT_BACK_LEFT: - front = 0; - FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE ); - break; default: - FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE ); - return; - } - - if ( intel->sarea->pf_current_page == 1 ) - front ^= 1; - - intelSetFrontClipRects( intel ); - - if (front) { - intel->drawRegion = &intel->intelScreen->front; - intel->readRegion = &intel->intelScreen->front; - } else { - intel->drawRegion = &intel->intelScreen->back; - intel->readRegion = &intel->intelScreen->back; + return LOGICOP_SET; } - - intel->vtbl.set_color_region( intel, intel->drawRegion ); -} - -static void intelReadBuffer( GLcontext *ctx, GLenum mode ) -{ - /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ } -static void intelClearColor(GLcontext *ctx, const GLfloat color[4]) +static void +intelClearColor(GLcontext * ctx, const GLfloat color[4]) { - intelContextPtr intel = INTEL_CONTEXT(ctx); - intelScreenPrivate *screen = intel->intelScreen; + struct intel_context *intel = intel_context(ctx); + GLubyte clear[4]; - CLAMPED_FLOAT_TO_UBYTE(intel->clear_red, color[0]); - CLAMPED_FLOAT_TO_UBYTE(intel->clear_green, color[1]); - CLAMPED_FLOAT_TO_UBYTE(intel->clear_blue, color[2]); - CLAMPED_FLOAT_TO_UBYTE(intel->clear_alpha, color[3]); + CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); - intel->ClearColor = INTEL_PACKCOLOR(screen->fbFormat, - intel->clear_red, - intel->clear_green, - intel->clear_blue, - intel->clear_alpha); + /* compute both 32 and 16-bit clear values */ + intel->ClearColor8888 = INTEL_PACKCOLOR8888(clear[0], clear[1], + clear[2], clear[3]); + intel->ClearColor565 = INTEL_PACKCOLOR565(clear[0], clear[1], clear[2]); } -static void intelCalcViewport( GLcontext *ctx ) +/** + * Update the viewport transformation matrix. Depends on: + * - viewport pos/size + * - depthrange + * - window pos/size or FBO size + */ +static void +intelCalcViewport(GLcontext * ctx) { - intelContextPtr intel = INTEL_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); const GLfloat *v = ctx->Viewport._WindowMap.m; + const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; GLfloat *m = intel->ViewportMatrix.m; - GLint h = 0; + GLfloat yScale, yBias; + + if (ctx->DrawBuffer->Name) { + /* User created FBO */ + struct intel_renderbuffer *irb + = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); + if (irb && !irb->RenderToTexture) { + /* y=0=top */ + yScale = -1.0; + yBias = irb->Base.Height; + } + else { + /* y=0=bottom */ + yScale = 1.0; + yBias = 0.0; + } + } + else { + /* window buffer, y=0=top */ + yScale = -1.0; + yBias = (intel->driDrawable) ? intel->driDrawable->h : 0.0F; + } - if (intel->driDrawable) - h = intel->driDrawable->h + SUBPIXEL_Y; + m[MAT_SX] = v[MAT_SX]; + m[MAT_TX] = v[MAT_TX]; - /* See also intel_translate_vertex. SUBPIXEL adjustments can be done - * via state vars, too. - */ - m[MAT_SX] = v[MAT_SX]; - m[MAT_TX] = v[MAT_TX] + SUBPIXEL_X; - m[MAT_SY] = - v[MAT_SY]; - m[MAT_TY] = - v[MAT_TY] + h; - m[MAT_SZ] = v[MAT_SZ] * intel->depth_scale; - m[MAT_TZ] = v[MAT_TZ] * intel->depth_scale; + m[MAT_SY] = v[MAT_SY] * yScale; + m[MAT_TY] = v[MAT_TY] * yScale + yBias; + + m[MAT_SZ] = v[MAT_SZ] * depthScale; + m[MAT_TZ] = v[MAT_TZ] * depthScale; } -static void intelViewport( GLcontext *ctx, - GLint x, GLint y, - GLsizei width, GLsizei height ) +static void +intelViewport(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height) { - intelCalcViewport( ctx ); + intelCalcViewport(ctx); } -static void intelDepthRange( GLcontext *ctx, - GLclampd nearval, GLclampd farval ) +static void +intelDepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) { - intelCalcViewport( ctx ); + intelCalcViewport(ctx); } /* Fallback to swrast for select and feedback. */ -static void intelRenderMode( GLcontext *ctx, GLenum mode ) +static void +intelRenderMode(GLcontext * ctx, GLenum mode) { - intelContextPtr intel = INTEL_CONTEXT(ctx); - FALLBACK( intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER) ); + struct intel_context *intel = intel_context(ctx); + FALLBACK(intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER)); } -void intelInitStateFuncs( struct dd_function_table *functions ) +void +intelInitStateFuncs(struct dd_function_table *functions) { - functions->DrawBuffer = intelDrawBuffer; - functions->ReadBuffer = intelReadBuffer; functions->RenderMode = intelRenderMode; functions->Viewport = intelViewport; functions->DepthRange = intelDepthRange; functions->ClearColor = intelClearColor; } - diff --git a/i915/intel_structs.h b/i915/intel_structs.h new file mode 100644 index 0000000..522e3bd --- /dev/null +++ b/i915/intel_structs.h @@ -0,0 +1,132 @@ +#ifndef INTEL_STRUCTS_H +#define INTEL_STRUCTS_H + +struct br0 { + GLuint length:8; + GLuint pad0:3; + GLuint dst_tiled:1; + GLuint pad1:8; + GLuint write_rgb:1; + GLuint write_alpha:1; + GLuint opcode:7; + GLuint client:3; +}; + + +struct br13 { + GLint dest_pitch:16; + GLuint rop:8; + GLuint color_depth:2; + GLuint pad1:3; + GLuint mono_source_transparency:1; + GLuint clipping_enable:1; + GLuint pad0:1; +}; + + + +/* This is an attempt to move some of the 2D interaction in this + * driver to using structs for packets rather than a bunch of #defines + * and dwords. + */ +struct xy_color_blit { + struct br0 br0; + struct br13 br13; + + struct { + GLuint dest_x1:16; + GLuint dest_y1:16; + } dw2; + + struct { + GLuint dest_x2:16; + GLuint dest_y2:16; + } dw3; + + GLuint dest_base_addr; + GLuint color; +}; + +struct xy_src_copy_blit { + struct br0 br0; + struct br13 br13; + + struct { + GLuint dest_x1:16; + GLuint dest_y1:16; + } dw2; + + struct { + GLuint dest_x2:16; + GLuint dest_y2:16; + } dw3; + + GLuint dest_base_addr; + + struct { + GLuint src_x1:16; + GLuint src_y1:16; + } dw5; + + struct { + GLint src_pitch:16; + GLuint pad:16; + } dw6; + + GLuint src_base_addr; +}; + +struct xy_setup_blit { + struct br0 br0; + struct br13 br13; + + struct { + GLuint clip_x1:16; + GLuint clip_y1:16; + } dw2; + + struct { + GLuint clip_x2:16; + GLuint clip_y2:16; + } dw3; + + GLuint dest_base_addr; + GLuint background_color; + GLuint foreground_color; + GLuint pattern_base_addr; +}; + + +struct xy_text_immediate_blit { + struct { + GLuint length:8; + GLuint pad2:3; + GLuint dst_tiled:1; + GLuint pad1:4; + GLuint byte_packed:1; + GLuint pad0:5; + GLuint opcode:7; + GLuint client:3; + } dw0; + + struct { + GLuint dest_x1:16; + GLuint dest_y1:16; + } dw1; + + struct { + GLuint dest_x2:16; + GLuint dest_y2:16; + } dw2; + + /* Src bitmap data follows as inline dwords. + */ +}; + + +#define CLIENT_2D 0x2 +#define OPCODE_XY_SETUP_BLT 0x1 +#define OPCODE_XY_COLOR_BLT 0x50 +#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31 + +#endif diff --git a/i915/intel_tex.c b/i915/intel_tex.c deleted file mode 100644 index 5bd2806..0000000 --- a/i915/intel_tex.c +++ /dev/null @@ -1,877 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "glheader.h" -#include "mtypes.h" -#include "imports.h" -#include "macros.h" -#include "simple_list.h" -#include "enums.h" -#include "image.h" -#include "texstore.h" -#include "texformat.h" -#include "teximage.h" -#include "texmem.h" -#include "texobj.h" -#include "swrast/swrast.h" - -#include "mm.h" - -#include "intel_screen.h" -#include "intel_batchbuffer.h" -#include "intel_context.h" -#include "intel_tex.h" -#include "intel_ioctl.h" - - - -static GLboolean -intelValidateClientStorage( intelContextPtr intel, GLenum target, - GLint internalFormat, - GLint srcWidth, GLint srcHeight, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) - -{ - GLcontext *ctx = &intel->ctx; - int texelBytes; - - if (0) - fprintf(stderr, "intformat %s format %s type %s\n", - _mesa_lookup_enum_by_nr( internalFormat ), - _mesa_lookup_enum_by_nr( format ), - _mesa_lookup_enum_by_nr( type )); - - if (!ctx->Unpack.ClientStorage) - return 0; - - if (ctx->_ImageTransferState || - texImage->IsCompressed || - texObj->GenerateMipmap) - return 0; - - - /* This list is incomplete - */ - switch ( internalFormat ) { - case GL_RGBA: - if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) { - texImage->TexFormat = &_mesa_texformat_argb8888; - texelBytes = 4; - } - else - return 0; - break; - - case GL_RGB: - if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) { - texImage->TexFormat = &_mesa_texformat_rgb565; - texelBytes = 2; - } - else - return 0; - break; - - case GL_YCBCR_MESA: - if ( format == GL_YCBCR_MESA && - type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) { - texImage->TexFormat = &_mesa_texformat_ycbcr_rev; - texelBytes = 2; - } - else if ( format == GL_YCBCR_MESA && - (type == GL_UNSIGNED_SHORT_8_8_APPLE || - type == GL_UNSIGNED_BYTE)) { - texImage->TexFormat = &_mesa_texformat_ycbcr; - texelBytes = 2; - } - else - return 0; - break; - - - default: - return 0; - } - - /* Could deal with these packing issues, but currently don't: - */ - if (packing->SkipPixels || - packing->SkipRows || - packing->SwapBytes || - packing->LsbFirst) { - return 0; - } - - { - GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, - format, type); - - - if (0) - fprintf(stderr, "%s: srcRowStride %d/%x\n", - __FUNCTION__, srcRowStride, srcRowStride); - - /* Could check this later in upload, pitch restrictions could be - * relaxed, but would need to store the image pitch somewhere, - * as packing details might change before image is uploaded: - */ - if (!intelIsAgpMemory( intel, pixels, srcHeight * srcRowStride ) || - (srcRowStride & 63)) - return 0; - - - /* Have validated that _mesa_transfer_teximage would be a straight - * memcpy at this point. NOTE: future calls to TexSubImage will - * overwrite the client data. This is explicitly mentioned in the - * extension spec. - */ - texImage->Data = (void *)pixels; - texImage->IsClientData = GL_TRUE; - texImage->RowStride = srcRowStride / texelBytes; - return 1; - } -} - - - -static void intelTexImage1D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - - assert(t); - intelFlush( ctx ); - driSwapOutTextureObject( t ); - - texImage->IsClientData = GL_FALSE; - - _mesa_store_teximage1d( ctx, target, level, internalFormat, - width, border, format, type, - pixels, packing, texObj, texImage ); - - t->dirty_images[0] |= (1 << level); -} - -static void intelTexSubImage1D( GLcontext *ctx, - GLenum target, - GLint level, - GLint xoffset, - GLsizei width, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - - assert(t); - intelFlush( ctx ); - driSwapOutTextureObject( t ); - - _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, - format, type, pixels, packing, texObj, - texImage); -} - - -/* Handles 2D, CUBE, RECT: - */ -static void intelTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - assert(t); - intelFlush( ctx ); - driSwapOutTextureObject( t ); - texImage->IsClientData = GL_FALSE; - - if (intelValidateClientStorage( INTEL_CONTEXT(ctx), target, - internalFormat, - width, height, - format, type, pixels, - packing, texObj, texImage)) { - if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); - } - else { - _mesa_store_teximage2d( ctx, target, level, internalFormat, - width, height, border, format, type, - pixels, packing, texObj, texImage ); - - t->dirty_images[face] |= (1 << level); - } -} - -static void intelTexSubImage2D( GLcontext *ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - if (texImage->IsClientData && - (char *)pixels == (char *)texImage->Data + - ((xoffset + yoffset * texImage->RowStride) * - texImage->TexFormat->TexelBytes)) { - - /* Notification only - no upload required */ - } - else { - assert( t ); /* this _should_ be true */ - intelFlush( ctx ); - driSwapOutTextureObject( t ); - - _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, type, pixels, packing, texObj, - texImage); - - t->dirty_images[face] |= (1 << level); - } -} - -static void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - assert(t); - intelFlush( ctx ); - - driSwapOutTextureObject( t ); - texImage->IsClientData = GL_FALSE; - - if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); - - _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width, - height, border, imageSize, data, texObj, texImage); - - t->dirty_images[face] |= (1 << level); -} - - -static void intelCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - assert( t ); /* this _should_ be true */ - intelFlush( ctx ); - driSwapOutTextureObject( t ); - - _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, imageSize, data, texObj, texImage); - - t->dirty_images[face] |= (1 << level); -} - - -static void intelTexImage3D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint depth, - GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - - assert(t); - driSwapOutTextureObject( t ); - texImage->IsClientData = GL_FALSE; - - _mesa_store_teximage3d(ctx, target, level, internalFormat, - width, height, depth, border, - format, type, pixels, - &ctx->Unpack, texObj, texImage); - - t->dirty_images[0] |= (1 << level); -} - - -static void -intelTexSubImage3D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - - assert( t ); /* this _should_ be true */ - driSwapOutTextureObject( t ); - - _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing, texObj, texImage); - - t->dirty_images[0] |= (1 << level); -} - - - - -static void intelDeleteTexture( GLcontext *ctx, struct gl_texture_object *tObj ) -{ - driTextureObject * t = (driTextureObject *) tObj->DriverData; - - if ( t != NULL ) { - intelFlush( ctx ); - driDestroyTextureObject( t ); - } - - /* Free mipmap images and the texture object itself */ - _mesa_delete_texture_object(ctx, tObj); -} - - -static const struct gl_texture_format * -intelChooseTextureFormat( GLcontext *ctx, GLint internalFormat, - GLenum format, GLenum type ) -{ - intelContextPtr intel = INTEL_CONTEXT( ctx ); - const GLboolean do32bpt = ( intel->intelScreen->cpp == 4 && - intel->intelScreen->tex.size > 4*1024*1024); - - switch ( internalFormat ) { - case 4: - case GL_RGBA: - case GL_COMPRESSED_RGBA: - if ( format == GL_BGRA ) { - if ( type == GL_UNSIGNED_INT_8_8_8_8_REV ) { - return &_mesa_texformat_argb8888; - } - else if ( type == GL_UNSIGNED_SHORT_4_4_4_4_REV ) { - return &_mesa_texformat_argb4444; - } - else if ( type == GL_UNSIGNED_SHORT_1_5_5_5_REV ) { - return &_mesa_texformat_argb1555; - } - } - return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444; - - case 3: - case GL_RGB: - case GL_COMPRESSED_RGB: - if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) { - return &_mesa_texformat_rgb565; - } - return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565; - - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444; - - case GL_RGBA4: - case GL_RGBA2: - return &_mesa_texformat_argb4444; - - case GL_RGB5_A1: - return &_mesa_texformat_argb1555; - - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565; - - case GL_RGB5: - case GL_RGB4: - case GL_R3_G3_B2: - return &_mesa_texformat_rgb565; - - case GL_ALPHA: - case GL_ALPHA4: - case GL_ALPHA8: - case GL_ALPHA12: - case GL_ALPHA16: - case GL_COMPRESSED_ALPHA: - return &_mesa_texformat_a8; - - case 1: - case GL_LUMINANCE: - case GL_LUMINANCE4: - case GL_LUMINANCE8: - case GL_LUMINANCE12: - case GL_LUMINANCE16: - case GL_COMPRESSED_LUMINANCE: - return &_mesa_texformat_l8; - - case 2: - case GL_LUMINANCE_ALPHA: - case GL_LUMINANCE4_ALPHA4: - case GL_LUMINANCE6_ALPHA2: - case GL_LUMINANCE8_ALPHA8: - case GL_LUMINANCE12_ALPHA4: - case GL_LUMINANCE12_ALPHA12: - case GL_LUMINANCE16_ALPHA16: - case GL_COMPRESSED_LUMINANCE_ALPHA: - return &_mesa_texformat_al88; - - case GL_INTENSITY: - case GL_INTENSITY4: - case GL_INTENSITY8: - case GL_INTENSITY12: - case GL_INTENSITY16: - case GL_COMPRESSED_INTENSITY: - return &_mesa_texformat_i8; - - case GL_YCBCR_MESA: - if (type == GL_UNSIGNED_SHORT_8_8_MESA || - type == GL_UNSIGNED_BYTE) - return &_mesa_texformat_ycbcr; - else - return &_mesa_texformat_ycbcr_rev; - - case GL_COMPRESSED_RGB_FXT1_3DFX: - return &_mesa_texformat_rgb_fxt1; - case GL_COMPRESSED_RGBA_FXT1_3DFX: - return &_mesa_texformat_rgba_fxt1; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; - - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return &_mesa_texformat_rgba_dxt1; - - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - return &_mesa_texformat_rgba_dxt3; - - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return &_mesa_texformat_rgba_dxt5; - - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH_COMPONENT32: - return &_mesa_texformat_z16; - - default: - fprintf(stderr, "unexpected texture format %s in %s\n", - _mesa_lookup_enum_by_nr(internalFormat), - __FUNCTION__); - return NULL; - } - - return NULL; /* never get here */ -} - - - -void intelDestroyTexObj(intelContextPtr intel, intelTextureObjectPtr t) -{ - unsigned i; - - if ( intel == NULL ) - return; - - if ( t->age > intel->dirtyAge ) - intel->dirtyAge = t->age; - - for ( i = 0 ; i < MAX_TEXTURE_UNITS ; i++ ) { - if ( t == intel->CurrentTexObj[ i ] ) - intel->CurrentTexObj[ i ] = NULL; - } -} - - - -/* Upload an image from mesa's internal copy. Image may be 1D, 2D or - * 3D. Cubemaps are expanded elsewhere. - */ -static void intelUploadTexImage( intelContextPtr intel, - intelTextureObjectPtr t, - const struct gl_texture_image *image, - const GLuint offset ) -{ - - if (!image || !image->Data) - return; - - if (image->Depth == 1 && image->IsClientData) { - if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "Blit uploading\n"); - - /* Do it with a blit. - */ - intelEmitCopyBlitLocked( intel, - image->TexFormat->TexelBytes, - image->RowStride, /* ? */ - intelGetMemoryOffsetMESA( NULL, 0, image->Data ), - t->Pitch / image->TexFormat->TexelBytes, - intelGetMemoryOffsetMESA( NULL, 0, t->BufAddr + offset ), - 0, 0, - 0, 0, - image->Width, - image->Height); - } - else if (image->IsCompressed) { - GLuint row_len = 0; - GLubyte *dst = (GLubyte *)(t->BufAddr + offset); - GLubyte *src = (GLubyte *)image->Data; - GLuint j; - - /* must always copy whole blocks (8/16 bytes) */ - switch (image->InternalFormat) { - case GL_COMPRESSED_RGB_FXT1_3DFX: - case GL_COMPRESSED_RGBA_FXT1_3DFX: - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - row_len = (image->Width * 2 + 7) & ~7; - break; - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - row_len = (image->Width * 4 + 15) & ~15; - break; - default: - fprintf(stderr,"Internal Compressed format not supported %d\n", image->InternalFormat); - break; - } - - if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "Upload image %dx%dx%d offset %xm row_len %x " - "pitch %x depth_pitch %x\n", - image->Width, image->Height, image->Depth, offset, - row_len, t->Pitch, t->depth_pitch); - - if (row_len) { - for (j = 0 ; j < (image->Height + 3)/4 ; j++, dst += (t->Pitch)) { - __memcpy(dst, src, row_len ); - src += row_len; - } - } - } - /* Time for another vtbl entry: - */ - else if (intel->intelScreen->deviceID == PCI_CHIP_I945_G || - intel->intelScreen->deviceID == PCI_CHIP_I945_GM || - intel->intelScreen->deviceID == PCI_CHIP_I945_GME || - intel->intelScreen->deviceID == PCI_CHIP_G33_G || - intel->intelScreen->deviceID == PCI_CHIP_Q33_G || - intel->intelScreen->deviceID == PCI_CHIP_Q35_G) { - GLuint row_len = image->Width * image->TexFormat->TexelBytes; - GLubyte *dst = (GLubyte *)(t->BufAddr + offset); - GLubyte *src = (GLubyte *)image->Data; - GLuint d, j; - - if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "Upload image %dx%dx%d offset %xm row_len %x " - "pitch %x depth_pitch %x\n", - image->Width, image->Height, image->Depth, offset, - row_len, t->Pitch, t->depth_pitch); - - if (row_len == t->Pitch) { - memcpy( dst, src, row_len * image->Height * image->Depth ); - } - else { - GLuint x = 0, y = 0; - - for (d = 0 ; d < image->Depth ; d++) { - GLubyte *dst0 = dst + x + y * t->Pitch; - - for (j = 0 ; j < image->Height ; j++) { - __memcpy(dst0, src, row_len ); - src += row_len; - dst0 += t->Pitch; - } - - x += MIN2(4, row_len); /* Guess: 4 byte minimum alignment */ - if (x > t->Pitch) { - x = 0; - y += image->Height; - } - } - } - - } - else { - GLuint row_len = image->Width * image->TexFormat->TexelBytes; - GLubyte *dst = (GLubyte *)(t->BufAddr + offset); - GLubyte *src = (GLubyte *)image->Data; - GLuint d, j; - - if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "Upload image %dx%dx%d offset %xm row_len %x " - "pitch %x depth_pitch %x\n", - image->Width, image->Height, image->Depth, offset, - row_len, t->Pitch, t->depth_pitch); - - if (row_len == t->Pitch) { - for (d = 0; d < image->Depth; d++) { - memcpy( dst, src, t->Pitch * image->Height ); - dst += t->depth_pitch; - src += row_len * image->Height; - } - } - else { - for (d = 0 ; d < image->Depth ; d++) { - for (j = 0 ; j < image->Height ; j++) { - __memcpy(dst, src, row_len ); - src += row_len; - dst += t->Pitch; - } - - dst += t->depth_pitch - (t->Pitch * image->Height); - } - } - } -} - - - -int intelUploadTexImages( intelContextPtr intel, - intelTextureObjectPtr t, - GLuint face) -{ - const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; - const struct gl_texture_image *firstImage = t->image[face][t->base.firstLevel].image; - int pitch = firstImage->RowStride * firstImage->TexFormat->TexelBytes; - - /* Can we texture out of the existing client data? */ - if ( numLevels == 1 && - firstImage->IsClientData && - (pitch & 3) == 0) { - - if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "AGP texturing from client memory\n"); - - t->TextureOffset = intelAgpOffsetFromVirtual( intel, firstImage->Data ); - t->BufAddr = 0; - t->dirty = ~0; - return GL_TRUE; - } - else { - if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "Uploading client data to agp\n"); - - INTEL_FIREVERTICES( intel ); - LOCK_HARDWARE( intel ); - - if ( t->base.memBlock == NULL ) { - int heap; - - heap = driAllocateTexture( intel->texture_heaps, intel->nr_heaps, - (driTextureObject *) t ); - if ( heap == -1 ) { - UNLOCK_HARDWARE( intel ); - return GL_FALSE; - } - - /* Set the base offset of the texture image */ - t->BufAddr = (GLubyte *) (intel->intelScreen->tex.map + - t->base.memBlock->ofs); - t->TextureOffset = intel->intelScreen->tex.offset + t->base.memBlock->ofs; - t->dirty = ~0; - } - - - /* Let the world know we've used this memory recently. - */ - driUpdateTextureLRU( (driTextureObject *) t ); - - - /* Upload any images that are new */ - if (t->base.dirty_images[face]) { - int i; - - intelWaitForIdle( intel ); - - for (i = 0 ; i < numLevels ; i++) { - int level = i + t->base.firstLevel; - - if (t->base.dirty_images[face] & (1<<level)) { - - const struct gl_texture_image *image = t->image[face][i].image; - GLuint offset = t->image[face][i].offset; - - if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "upload level %d, offset %x\n", - level, offset); - - intelUploadTexImage( intel, t, image, offset ); - } - } - t->base.dirty_images[face] = 0; - intel->perf_boxes |= I830_BOX_TEXTURE_LOAD; - } - - UNLOCK_HARDWARE( intel ); - return GL_TRUE; - } -} - -/** - * Allocate a new texture object. - * Called via ctx->Driver.NewTextureObject. - * Note: this function will be called during context creation to - * allocate the default texture objects. - * Note: we could use containment here to 'derive' the driver-specific - * texture object from the core mesa gl_texture_object. Not done at this time. - */ -static struct gl_texture_object * -intelNewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) -{ - struct gl_texture_object *obj = _mesa_new_texture_object(ctx, name, target); - INTEL_CONTEXT(ctx)->vtbl.alloc_tex_obj( obj ); - return obj; -} - - -void intelInitTextureFuncs( struct dd_function_table *functions ) -{ - functions->NewTextureObject = intelNewTextureObject; - functions->ChooseTextureFormat = intelChooseTextureFormat; - functions->TexImage1D = intelTexImage1D; - functions->TexImage2D = intelTexImage2D; - functions->TexImage3D = intelTexImage3D; - functions->TexSubImage1D = intelTexSubImage1D; - functions->TexSubImage2D = intelTexSubImage2D; - functions->TexSubImage3D = intelTexSubImage3D; - functions->CopyTexImage1D = _swrast_copy_teximage1d; - functions->CopyTexImage2D = _swrast_copy_teximage2d; - functions->CopyTexSubImage1D = _swrast_copy_texsubimage1d; - functions->CopyTexSubImage2D = _swrast_copy_texsubimage2d; - functions->CopyTexSubImage3D = _swrast_copy_texsubimage3d; - functions->DeleteTexture = intelDeleteTexture; - functions->UpdateTexturePalette = NULL; - functions->IsTextureResident = driIsTextureResident; - functions->TestProxyTexImage = _mesa_test_proxy_teximage; - functions->DeleteTexture = intelDeleteTexture; - functions->CompressedTexImage2D = intelCompressedTexImage2D; - functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; -} diff --git a/i915/intel_texmem.c b/i915/intel_texmem.c deleted file mode 100644 index 09beec9..0000000 --- a/i915/intel_texmem.c +++ /dev/null @@ -1,72 +0,0 @@ -#include "texmem.h" -#include "simple_list.h" -#include "imports.h" -#include "macros.h" - -#include "intel_tex.h" - -static GLuint -driLog2( GLuint n ) -{ - GLuint log2; - - for ( log2 = 1 ; n > 1 ; log2++ ) { - n >>= 1; - } - - return log2; -} - -static void calculate_heap_size( driTexHeap * heap, unsigned size, - unsigned nr_regions, unsigned alignmentShift ) -{ - unsigned l; - - l = driLog2( (size - 1) / nr_regions ); - if ( l < alignmentShift ) - { - l = alignmentShift; - } - - heap->logGranularity = l; - heap->size = size & ~((1L << l) - 1); -} - - -GLboolean -intel_driReinitTextureHeap( driTexHeap *heap, - unsigned size ) -{ - driTextureObject *t, *tmp; - - /* Kick out everything: - */ - foreach_s ( t, tmp, & heap->texture_objects ) { - if ( t->tObj != NULL ) { - driSwapOutTextureObject( t ); - } - else { - driDestroyTextureObject( t ); - } - } - - /* Destroy the memory manager: - */ - mmDestroy( heap->memory_heap ); - - /* Recreate the memory manager: - */ - calculate_heap_size(heap, size, heap->nrRegions, heap->alignmentShift); - heap->memory_heap = mmInit( 0, heap->size ); - if ( heap->memory_heap == NULL ) { - fprintf(stderr, "driReinitTextureHeap: couldn't recreate memory heap\n"); - FREE( heap ); - return GL_FALSE; - } - - make_empty_list( & heap->texture_objects ); - - return GL_TRUE; -} - - diff --git a/i915/intel_tris.c b/i915/intel_tris.c index b2787ee..bbb4e0f 100644 --- a/i915/intel_tris.c +++ b/i915/intel_tris.c @@ -29,6 +29,8 @@ #include "context.h" #include "macros.h" #include "enums.h" +#include "texobj.h" +#include "state.h" #include "dd.h" #include "swrast/swrast.h" @@ -38,19 +40,117 @@ #include "tnl/t_vertex.h" #include "intel_screen.h" +#include "intel_context.h" #include "intel_tris.h" #include "intel_batchbuffer.h" +#include "intel_buffers.h" #include "intel_reg.h" #include "intel_span.h" +#include "intel_tex.h" -/* XXX we shouldn't include these headers in this file, but we need them - * for fallbackStrings, below. +static void intelRenderPrimitive(GLcontext * ctx, GLenum prim); +static void intelRasterPrimitive(GLcontext * ctx, GLenum rprim, + GLuint hwprim); + +/* + */ +static void +intel_flush_inline_primitive(struct intel_context *intel) +{ + GLuint used = intel->batch->ptr - intel->prim.start_ptr; + + assert(intel->prim.primitive != ~0); + +/* _mesa_printf("/\n"); */ + + if (used < 8) + goto do_discard; + + *(int *) intel->prim.start_ptr = (_3DPRIMITIVE | + intel->prim.primitive | (used / 4 - 2)); + + goto finished; + + do_discard: + intel->batch->ptr -= used; + + finished: + intel->prim.primitive = ~0; + intel->prim.start_ptr = 0; + intel->prim.flush = 0; +} + + +/* Emit a primitive referencing vertices in a vertex buffer. */ -#include "i830_context.h" -#include "i915_context.h" +void +intelStartInlinePrimitive(struct intel_context *intel, + GLuint prim, GLuint batch_flags) +{ + BATCH_LOCALS; + + intel_wait_flips(intel); + + intel->vtbl.emit_state(intel); + + intel->no_batch_wrap = GL_TRUE; + +/* _mesa_printf("%s *", __progname); */ + + /* Emit a slot which will be filled with the inline primitive + * command later. + */ + BEGIN_BATCH(2, batch_flags); + OUT_BATCH(0); + + assert((intel->batch->dirty_state & (1<<1)) == 0); + + intel->prim.start_ptr = intel->batch->ptr; + intel->prim.primitive = prim; + intel->prim.flush = intel_flush_inline_primitive; + + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel->no_batch_wrap = GL_FALSE; + +/* _mesa_printf(">"); */ +} + + +void +intelWrapInlinePrimitive(struct intel_context *intel) +{ + GLuint prim = intel->prim.primitive; + enum cliprect_mode cliprect_mode = intel->batch->cliprect_mode; + + intel_flush_inline_primitive(intel); + intel_batchbuffer_flush(intel->batch); + intelStartInlinePrimitive(intel, prim, cliprect_mode); /* ??? */ +} + +GLuint * +intelExtendInlinePrimitive(struct intel_context *intel, GLuint dwords) +{ + GLuint sz = dwords * sizeof(GLuint); + GLuint *ptr; + + assert(intel->prim.flush == intel_flush_inline_primitive); + + if (intel_batchbuffer_space(intel->batch) < sz) + intelWrapInlinePrimitive(intel); + +/* _mesa_printf("."); */ + + intel->vtbl.assert_not_dirty(intel); + + ptr = (GLuint *) intel->batch->ptr; + intel->batch->ptr += sz; + + return ptr; +} + -static void intelRenderPrimitive( GLcontext *ctx, GLenum prim ); -static void intelRasterPrimitive( GLcontext *ctx, GLenum rprim, GLuint hwprim ); /*********************************************************************** * Emit primitives as inline vertices * @@ -69,75 +169,81 @@ do { \ #else #define COPY_DWORDS( j, vb, vertsize, v ) \ do { \ - if (0) fprintf(stderr, "\n"); \ for ( j = 0 ; j < vertsize ; j++ ) { \ - if (0) fprintf(stderr, " -- v(%d): %x/%f\n",j, \ - ((GLuint *)v)[j], \ - ((GLfloat *)v)[j]); \ vb[j] = ((GLuint *)v)[j]; \ } \ vb += vertsize; \ } while (0) #endif -static void __inline__ intel_draw_quad( intelContextPtr intel, - intelVertexPtr v0, - intelVertexPtr v1, - intelVertexPtr v2, - intelVertexPtr v3 ) +static void +intel_draw_quad(struct intel_context *intel, + intelVertexPtr v0, + intelVertexPtr v1, intelVertexPtr v2, intelVertexPtr v3) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intelExtendInlinePrimitive( intel, 6 * vertsize ); + GLuint *vb = intelExtendInlinePrimitive(intel, 6 * vertsize); int j; - COPY_DWORDS( j, vb, vertsize, v0 ); - COPY_DWORDS( j, vb, vertsize, v1 ); - COPY_DWORDS( j, vb, vertsize, v3 ); - COPY_DWORDS( j, vb, vertsize, v1 ); - COPY_DWORDS( j, vb, vertsize, v2 ); - COPY_DWORDS( j, vb, vertsize, v3 ); + COPY_DWORDS(j, vb, vertsize, v0); + COPY_DWORDS(j, vb, vertsize, v1); + + /* If smooth shading, draw like a trifan which gives better + * rasterization. Otherwise draw as two triangles with provoking + * vertex in third position as required for flat shading. + */ + if (intel->ctx.Light.ShadeModel == GL_FLAT) { + COPY_DWORDS(j, vb, vertsize, v3); + COPY_DWORDS(j, vb, vertsize, v1); + } + else { + COPY_DWORDS(j, vb, vertsize, v2); + COPY_DWORDS(j, vb, vertsize, v0); + } + + COPY_DWORDS(j, vb, vertsize, v2); + COPY_DWORDS(j, vb, vertsize, v3); } -static void __inline__ intel_draw_triangle( intelContextPtr intel, - intelVertexPtr v0, - intelVertexPtr v1, - intelVertexPtr v2 ) +static void +intel_draw_triangle(struct intel_context *intel, + intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intelExtendInlinePrimitive( intel, 3 * vertsize ); + GLuint *vb = intelExtendInlinePrimitive(intel, 3 * vertsize); int j; - - COPY_DWORDS( j, vb, vertsize, v0 ); - COPY_DWORDS( j, vb, vertsize, v1 ); - COPY_DWORDS( j, vb, vertsize, v2 ); + + COPY_DWORDS(j, vb, vertsize, v0); + COPY_DWORDS(j, vb, vertsize, v1); + COPY_DWORDS(j, vb, vertsize, v2); } -static __inline__ void intel_draw_line( intelContextPtr intel, - intelVertexPtr v0, - intelVertexPtr v1 ) +static void +intel_draw_line(struct intel_context *intel, + intelVertexPtr v0, intelVertexPtr v1) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intelExtendInlinePrimitive( intel, 2 * vertsize ); + GLuint *vb = intelExtendInlinePrimitive(intel, 2 * vertsize); int j; - COPY_DWORDS( j, vb, vertsize, v0 ); - COPY_DWORDS( j, vb, vertsize, v1 ); + COPY_DWORDS(j, vb, vertsize, v0); + COPY_DWORDS(j, vb, vertsize, v1); } -static __inline__ void intel_draw_point( intelContextPtr intel, - intelVertexPtr v0 ) +static void +intel_draw_point(struct intel_context *intel, intelVertexPtr v0) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intelExtendInlinePrimitive( intel, vertsize ); + GLuint *vb = intelExtendInlinePrimitive(intel, vertsize); int j; /* Adjust for sub pixel position -- still required for conform. */ - *(float *)&vb[0] = v0->v.x - 0.125; - *(float *)&vb[1] = v0->v.y - 0.125; - for (j = 2 ; j < vertsize ; j++) - vb[j] = v0->ui[j]; + *(float *) &vb[0] = v0->v.x; + *(float *) &vb[1] = v0->v.y; + for (j = 2; j < vertsize; j++) + vb[j] = v0->ui[j]; } @@ -146,13 +252,17 @@ static __inline__ void intel_draw_point( intelContextPtr intel, * Fixup for ARB_point_parameters * ***********************************************************************/ -static void intel_atten_point( intelContextPtr intel, intelVertexPtr v0 ) +/* Currently not working - VERT_ATTRIB_POINTSIZE isn't correctly + * represented in the fragment program InputsRead field. + */ +static void +intel_atten_point(struct intel_context *intel, intelVertexPtr v0) { GLcontext *ctx = &intel->ctx; GLfloat psz[4], col[4], restore_psz, restore_alpha; - _tnl_get_attr( ctx, v0, _TNL_ATTRIB_POINTSIZE, psz ); - _tnl_get_attr( ctx, v0, _TNL_ATTRIB_COLOR0, col ); + _tnl_get_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz); + _tnl_get_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col); restore_psz = psz[0]; restore_alpha = col[3]; @@ -170,19 +280,19 @@ static void intel_atten_point( intelContextPtr intel, intelVertexPtr v0 ) psz[0] = 1.0; if (restore_psz != psz[0] || restore_alpha != col[3]) { - _tnl_set_attr( ctx, v0, _TNL_ATTRIB_POINTSIZE, psz); - _tnl_set_attr( ctx, v0, _TNL_ATTRIB_COLOR0, col); - - intel_draw_point( intel, v0 ); + _tnl_set_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz); + _tnl_set_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col); + + intel_draw_point(intel, v0); psz[0] = restore_psz; col[3] = restore_alpha; - _tnl_set_attr( ctx, v0, _TNL_ATTRIB_POINTSIZE, psz); - _tnl_set_attr( ctx, v0, _TNL_ATTRIB_COLOR0, col); + _tnl_set_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz); + _tnl_set_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col); } else - intel_draw_point( intel, v0 ); + intel_draw_point(intel, v0); } @@ -195,45 +305,59 @@ static void intel_atten_point( intelContextPtr intel, intelVertexPtr v0 ) -static void intel_wpos_triangle( intelContextPtr intel, - intelVertexPtr v0, - intelVertexPtr v1, - intelVertexPtr v2 ) +static void +intel_wpos_triangle(struct intel_context *intel, + intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2) { GLuint offset = intel->wpos_offset; GLuint size = intel->wpos_size; - - __memcpy( ((char *)v0) + offset, v0, size ); - __memcpy( ((char *)v1) + offset, v1, size ); - __memcpy( ((char *)v2) + offset, v2, size ); + GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); + GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset); + GLfloat *v2_wpos = (GLfloat *)((char *)v2 + offset); + + __memcpy(v0_wpos, v0, size); + __memcpy(v1_wpos, v1, size); + __memcpy(v2_wpos, v2, size); + + v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h; + v1_wpos[1] = -v1_wpos[1] + intel->driDrawable->h; + v2_wpos[1] = -v2_wpos[1] + intel->driDrawable->h; + - intel_draw_triangle( intel, v0, v1, v2 ); + intel_draw_triangle(intel, v0, v1, v2); } -static void intel_wpos_line( intelContextPtr intel, - intelVertexPtr v0, - intelVertexPtr v1 ) +static void +intel_wpos_line(struct intel_context *intel, + intelVertexPtr v0, intelVertexPtr v1) { GLuint offset = intel->wpos_offset; GLuint size = intel->wpos_size; + GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); + GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset); + + __memcpy(v0_wpos, v0, size); + __memcpy(v1_wpos, v1, size); - __memcpy( ((char *)v0) + offset, v0, size ); - __memcpy( ((char *)v1) + offset, v1, size ); + v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h; + v1_wpos[1] = -v1_wpos[1] + intel->driDrawable->h; - intel_draw_line( intel, v0, v1 ); + intel_draw_line(intel, v0, v1); } -static void intel_wpos_point( intelContextPtr intel, - intelVertexPtr v0 ) +static void +intel_wpos_point(struct intel_context *intel, intelVertexPtr v0) { GLuint offset = intel->wpos_offset; GLuint size = intel->wpos_size; + GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); - __memcpy( ((char *)v0) + offset, v0, size ); + __memcpy(v0_wpos, v0, size); + v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h; - intel_draw_point( intel, v0 ); + intel_draw_point(intel, v0); } @@ -290,11 +414,12 @@ do { \ #define INTEL_MAX_TRIFUNC 0x10 -static struct { - tnl_points_func points; - tnl_line_func line; - tnl_triangle_func triangle; - tnl_quad_func quad; +static struct +{ + tnl_points_func points; + tnl_line_func line; + tnl_triangle_func triangle; + tnl_quad_func quad; } rast_tab[INTEL_MAX_TRIFUNC]; @@ -355,10 +480,10 @@ do { \ #define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] #define LOCAL_VARS(n) \ - intelContextPtr intel = INTEL_CONTEXT(ctx); \ - GLuint color[n], spec[n]; \ - GLuint coloroffset = intel->coloroffset; \ - GLboolean specoffset = intel->specoffset; \ + struct intel_context *intel = intel_context(ctx); \ + GLuint color[n] = { 0, }, spec[n] = { 0, }; \ + GLuint coloroffset = intel->coloroffset; \ + GLboolean specoffset = intel->specoffset; \ (void) color; (void) spec; (void) coloroffset; (void) specoffset; @@ -366,7 +491,7 @@ do { \ * Helpers for rendering unfilled primitives * ***********************************************************************/ -static const GLuint hw_prim[GL_POLYGON+1] = { +static const GLuint hw_prim[GL_POLYGON + 1] = { PRIM3D_POINTLIST, PRIM3D_LINELIST, PRIM3D_LINELIST, @@ -456,7 +581,8 @@ static const GLuint hw_prim[GL_POLYGON+1] = { #include "tnl_dd/t_dd_tritmp.h" -static void init_rast_tab( void ) +static void +init_rast_tab(void) { init(); init_offset(); @@ -487,10 +613,8 @@ static void init_rast_tab( void ) * primitives. */ static void -intel_fallback_tri( intelContextPtr intel, - intelVertex *v0, - intelVertex *v1, - intelVertex *v2 ) +intel_fallback_tri(struct intel_context *intel, + intelVertex * v0, intelVertex * v1, intelVertex * v2) { GLcontext *ctx = &intel->ctx; SWvertex v[3]; @@ -498,19 +622,20 @@ intel_fallback_tri( intelContextPtr intel, if (0) fprintf(stderr, "\n%s\n", __FUNCTION__); - _swsetup_Translate( ctx, v0, &v[0] ); - _swsetup_Translate( ctx, v1, &v[1] ); - _swsetup_Translate( ctx, v2, &v[2] ); - intelSpanRenderStart( ctx ); - _swrast_Triangle( ctx, &v[0], &v[1], &v[2] ); - intelSpanRenderFinish( ctx ); + INTEL_FIREVERTICES(intel); + + _swsetup_Translate(ctx, v0, &v[0]); + _swsetup_Translate(ctx, v1, &v[1]); + _swsetup_Translate(ctx, v2, &v[2]); + intelSpanRenderStart(ctx); + _swrast_Triangle(ctx, &v[0], &v[1], &v[2]); + intelSpanRenderFinish(ctx); } static void -intel_fallback_line( intelContextPtr intel, - intelVertex *v0, - intelVertex *v1 ) +intel_fallback_line(struct intel_context *intel, + intelVertex * v0, intelVertex * v1) { GLcontext *ctx = &intel->ctx; SWvertex v[2]; @@ -518,17 +643,18 @@ intel_fallback_line( intelContextPtr intel, if (0) fprintf(stderr, "\n%s\n", __FUNCTION__); - _swsetup_Translate( ctx, v0, &v[0] ); - _swsetup_Translate( ctx, v1, &v[1] ); - intelSpanRenderStart( ctx ); - _swrast_Line( ctx, &v[0], &v[1] ); - intelSpanRenderFinish( ctx ); -} + INTEL_FIREVERTICES(intel); + _swsetup_Translate(ctx, v0, &v[0]); + _swsetup_Translate(ctx, v1, &v[1]); + intelSpanRenderStart(ctx); + _swrast_Line(ctx, &v[0], &v[1]); + intelSpanRenderFinish(ctx); +} static void -intel_fallback_point( intelContextPtr intel, - intelVertex *v0 ) +intel_fallback_point(struct intel_context *intel, + intelVertex * v0) { GLcontext *ctx = &intel->ctx; SWvertex v[1]; @@ -536,12 +662,13 @@ intel_fallback_point( intelContextPtr intel, if (0) fprintf(stderr, "\n%s\n", __FUNCTION__); - _swsetup_Translate( ctx, v0, &v[0] ); - intelSpanRenderStart( ctx ); - _swrast_Point( ctx, &v[0] ); - intelSpanRenderFinish( ctx ); -} + INTEL_FIREVERTICES(intel); + _swsetup_Translate(ctx, v0, &v[0]); + intelSpanRenderStart(ctx); + _swrast_Point(ctx, &v[0]); + intelSpanRenderFinish(ctx); +} /**********************************************************************/ @@ -558,7 +685,7 @@ intel_fallback_point( intelContextPtr intel, #define INIT(x) intelRenderPrimitive( ctx, x ) #undef LOCAL_VARS #define LOCAL_VARS \ - intelContextPtr intel = INTEL_CONTEXT(ctx); \ + struct intel_context *intel = intel_context(ctx); \ GLubyte *vertptr = (GLubyte *)intel->verts; \ const GLuint vertsize = intel->vertex_size; \ const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ @@ -581,10 +708,10 @@ intel_fallback_point( intelContextPtr intel, -static void intelRenderClippedPoly( GLcontext *ctx, const GLuint *elts, - GLuint n ) +static void +intelRenderClippedPoly(GLcontext * ctx, const GLuint * elts, GLuint n) { - intelContextPtr intel = INTEL_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; GLuint prim = intel->render_primitive; @@ -593,39 +720,40 @@ static void intelRenderClippedPoly( GLcontext *ctx, const GLuint *elts, */ { GLuint *tmp = VB->Elts; - VB->Elts = (GLuint *)elts; - tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, - PRIM_BEGIN|PRIM_END ); + VB->Elts = (GLuint *) elts; + tnl->Driver.Render.PrimTabElts[GL_POLYGON] (ctx, 0, n, + PRIM_BEGIN | PRIM_END); VB->Elts = tmp; } /* Restore the render primitive */ if (prim != GL_POLYGON) - tnl->Driver.Render.PrimitiveNotify( ctx, prim ); + tnl->Driver.Render.PrimitiveNotify(ctx, prim); } -static void intelRenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj ) +static void +intelRenderClippedLine(GLcontext * ctx, GLuint ii, GLuint jj) { TNLcontext *tnl = TNL_CONTEXT(ctx); - tnl->Driver.Render.Line( ctx, ii, jj ); + tnl->Driver.Render.Line(ctx, ii, jj); } -static void intelFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, - GLuint n ) +static void +intelFastRenderClippedPoly(GLcontext * ctx, const GLuint * elts, GLuint n) { - intelContextPtr intel = INTEL_CONTEXT( ctx ); + struct intel_context *intel = intel_context(ctx); const GLuint vertsize = intel->vertex_size; - GLuint *vb = intelExtendInlinePrimitive( intel, (n-2) * 3 * vertsize ); - GLubyte *vertptr = (GLubyte *)intel->verts; - const GLuint *start = (const GLuint *)V(elts[0]); - int i,j; - - for (i = 2 ; i < n ; i++) { - COPY_DWORDS( j, vb, vertsize, V(elts[i-1]) ); - COPY_DWORDS( j, vb, vertsize, V(elts[i]) ); - COPY_DWORDS( j, vb, vertsize, start ); + GLuint *vb = intelExtendInlinePrimitive(intel, (n - 2) * 3 * vertsize); + GLubyte *vertptr = (GLubyte *) intel->verts; + const GLuint *start = (const GLuint *) V(elts[0]); + int i, j; + + for (i = 2; i < n; i++) { + COPY_DWORDS(j, vb, vertsize, V(elts[i - 1])); + COPY_DWORDS(j, vb, vertsize, V(elts[i])); + COPY_DWORDS(j, vb, vertsize, start); } } @@ -636,68 +764,75 @@ static void intelFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, -#define POINT_FALLBACK (0) -#define LINE_FALLBACK (DD_LINE_STIPPLE) -#define TRI_FALLBACK (0) -#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK|\ - DD_TRI_STIPPLE|DD_POINT_ATTEN) -#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED) +#define ANY_FALLBACK_FLAGS (DD_LINE_STIPPLE | DD_TRI_STIPPLE | DD_POINT_ATTEN | DD_POINT_SMOOTH | DD_TRI_SMOOTH) +#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE | DD_TRI_OFFSET | DD_TRI_UNFILLED) -void intelChooseRenderState(GLcontext *ctx) +void +intelChooseRenderState(GLcontext * ctx) { TNLcontext *tnl = TNL_CONTEXT(ctx); - intelContextPtr intel = INTEL_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); GLuint flags = ctx->_TriangleCaps; const struct gl_fragment_program *fprog = ctx->FragmentProgram._Current; GLboolean have_wpos = (fprog && (fprog->Base.InputsRead & FRAG_BIT_WPOS)); GLuint index = 0; if (INTEL_DEBUG & DEBUG_STATE) - fprintf(stderr,"\n%s\n",__FUNCTION__); + fprintf(stderr, "\n%s\n", __FUNCTION__); - if ((flags & (ANY_FALLBACK_FLAGS|ANY_RASTER_FLAGS)) || have_wpos) { + if ((flags & (ANY_FALLBACK_FLAGS | ANY_RASTER_FLAGS)) || have_wpos) { if (flags & ANY_RASTER_FLAGS) { - if (flags & DD_TRI_LIGHT_TWOSIDE) index |= INTEL_TWOSIDE_BIT; - if (flags & DD_TRI_OFFSET) index |= INTEL_OFFSET_BIT; - if (flags & DD_TRI_UNFILLED) index |= INTEL_UNFILLED_BIT; + if (flags & DD_TRI_LIGHT_TWOSIDE) + index |= INTEL_TWOSIDE_BIT; + if (flags & DD_TRI_OFFSET) + index |= INTEL_OFFSET_BIT; + if (flags & DD_TRI_UNFILLED) + index |= INTEL_UNFILLED_BIT; } if (have_wpos) { - intel->draw_point = intel_wpos_point; - intel->draw_line = intel_wpos_line; - intel->draw_tri = intel_wpos_triangle; + intel->draw_point = intel_wpos_point; + intel->draw_line = intel_wpos_line; + intel->draw_tri = intel_wpos_triangle; - /* Make sure these get called: - */ - index |= INTEL_FALLBACK_BIT; + /* Make sure these get called: + */ + index |= INTEL_FALLBACK_BIT; } else { - intel->draw_point = intel_draw_point; - intel->draw_line = intel_draw_line; - intel->draw_tri = intel_draw_triangle; + intel->draw_point = intel_draw_point; + intel->draw_line = intel_draw_line; + intel->draw_tri = intel_draw_triangle; } /* Hook in fallbacks for specific primitives. */ - if (flags & ANY_FALLBACK_FLAGS) - { - if (flags & POINT_FALLBACK) - intel->draw_point = intel_fallback_point; - - if (flags & LINE_FALLBACK) - intel->draw_line = intel_fallback_line; - - if (flags & TRI_FALLBACK) - intel->draw_tri = intel_fallback_tri; - - if ((flags & DD_TRI_STIPPLE) && !intel->hw_stipple) - intel->draw_tri = intel_fallback_tri; - - if (flags & DD_POINT_ATTEN) - intel->draw_point = intel_atten_point; - - index |= INTEL_FALLBACK_BIT; + if (flags & ANY_FALLBACK_FLAGS) { + if (flags & DD_LINE_STIPPLE) + intel->draw_line = intel_fallback_line; + + if ((flags & DD_TRI_STIPPLE) && !intel->hw_stipple) + intel->draw_tri = intel_fallback_tri; + + if (flags & DD_TRI_SMOOTH) { + if (intel->strict_conformance) + intel->draw_tri = intel_fallback_tri; + } + + if (flags & DD_POINT_ATTEN) { + if (0) + intel->draw_point = intel_atten_point; + else + intel->draw_point = intel_fallback_point; + } + + if (flags & DD_POINT_SMOOTH) { + if (intel->strict_conformance) + intel->draw_point = intel_fallback_point; + } + + index |= INTEL_FALLBACK_BIT; } } @@ -710,20 +845,21 @@ void intelChooseRenderState(GLcontext *ctx) tnl->Driver.Render.Quad = rast_tab[index].quad; if (index == 0) { - tnl->Driver.Render.PrimTabVerts = intel_render_tab_verts; - tnl->Driver.Render.PrimTabElts = intel_render_tab_elts; - tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */ - tnl->Driver.Render.ClippedPolygon = intelFastRenderClippedPoly; - } else { - tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; - tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts; - tnl->Driver.Render.ClippedLine = intelRenderClippedLine; - tnl->Driver.Render.ClippedPolygon = intelRenderClippedPoly; + tnl->Driver.Render.PrimTabVerts = intel_render_tab_verts; + tnl->Driver.Render.PrimTabElts = intel_render_tab_elts; + tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */ + tnl->Driver.Render.ClippedPolygon = intelFastRenderClippedPoly; + } + else { + tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; + tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts; + tnl->Driver.Render.ClippedLine = intelRenderClippedLine; + tnl->Driver.Render.ClippedPolygon = intelRenderClippedPoly; } } } -static const GLenum reduced_prim[GL_POLYGON+1] = { +static const GLenum reduced_prim[GL_POLYGON + 1] = { GL_POINTS, GL_LINES, GL_LINES, @@ -744,35 +880,52 @@ static const GLenum reduced_prim[GL_POLYGON+1] = { -static void intelRunPipeline( GLcontext *ctx ) +static void +intelRunPipeline(GLcontext * ctx) { - intelContextPtr intel = INTEL_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); + + _mesa_lock_context_textures(ctx); + + if (ctx->NewState) + _mesa_update_state_locked(ctx); if (intel->NewGLState) { if (intel->NewGLState & _NEW_TEXTURE) { - intel->vtbl.update_texture_state( intel ); + intel->vtbl.update_texture_state(intel); } if (!intel->Fallback) { - if (intel->NewGLState & _INTEL_NEW_RENDERSTATE) - intelChooseRenderState( ctx ); + if (intel->NewGLState & _INTEL_NEW_RENDERSTATE) + intelChooseRenderState(ctx); } intel->NewGLState = 0; } - _tnl_run_pipeline( ctx ); + _tnl_run_pipeline(ctx); + + _mesa_unlock_context_textures(ctx); } -static void intelRenderStart( GLcontext *ctx ) +static void +intelRenderStart(GLcontext * ctx) { - INTEL_CONTEXT(ctx)->vtbl.render_start( INTEL_CONTEXT(ctx) ); + struct intel_context *intel = intel_context(ctx); + + intel->vtbl.render_start(intel_context(ctx)); + intel->vtbl.emit_state(intel); } -static void intelRenderFinish( GLcontext *ctx ) +static void +intelRenderFinish(GLcontext * ctx) { - if (INTEL_CONTEXT(ctx)->RenderIndex & INTEL_FALLBACK_BIT) - _swrast_flush( ctx ); + struct intel_context *intel = intel_context(ctx); + + if (intel->RenderIndex & INTEL_FALLBACK_BIT) + _swrast_flush(ctx); + + INTEL_FIREVERTICES(intel); } @@ -781,28 +934,33 @@ static void intelRenderFinish( GLcontext *ctx ) /* System to flush dma and emit state changes based on the rasterized * primitive. */ -static void intelRasterPrimitive( GLcontext *ctx, GLenum rprim, GLuint hwprim ) +static void +intelRasterPrimitive(GLcontext * ctx, GLenum rprim, GLuint hwprim) { - intelContextPtr intel = INTEL_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); if (0) - fprintf(stderr, "%s %s %x\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(rprim), hwprim); + fprintf(stderr, "%s %s %x\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(rprim), hwprim); + + intel->vtbl.reduced_primitive_state(intel, rprim); - intel->vtbl.reduced_primitive_state( intel, rprim ); - /* Start a new primitive. Arrange to have it flushed later on. */ - if (hwprim != intel->prim.primitive) - intelStartInlinePrimitive( intel, hwprim ); + if (hwprim != intel->prim.primitive) { + INTEL_FIREVERTICES(intel); + + intelStartInlinePrimitive(intel, hwprim, LOOP_CLIPRECTS); + } } -/* - */ -static void intelRenderPrimitive( GLcontext *ctx, GLenum prim ) + /* + */ +static void +intelRenderPrimitive(GLcontext * ctx, GLenum prim) { - intelContextPtr intel = INTEL_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); if (0) fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim)); @@ -817,63 +975,54 @@ static void intelRenderPrimitive( GLcontext *ctx, GLenum prim ) * lower level functions in that case, potentially pingponging the * state: */ - if (reduced_prim[prim] == GL_TRIANGLES && + if (reduced_prim[prim] == GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) return; /* Set some primitive-dependent state and Start? a new primitive. */ - intelRasterPrimitive( ctx, reduced_prim[prim], hw_prim[prim] ); + intelRasterPrimitive(ctx, reduced_prim[prim], hw_prim[prim]); } -/**********************************************************************/ -/* Transition to/from hardware rasterization. */ -/**********************************************************************/ - -static struct { - GLuint bit; - const char *str; -} fallbackStrings[] = { - { INTEL_FALLBACK_DRAW_BUFFER, "Draw buffer" }, - { INTEL_FALLBACK_READ_BUFFER, "Read buffer" }, - { INTEL_FALLBACK_USER, "User" }, - { INTEL_FALLBACK_NO_BATCHBUFFER, "No Batchbuffer" }, - { INTEL_FALLBACK_NO_TEXMEM, "No Texmem" }, - { INTEL_FALLBACK_RENDERMODE, "Rendermode" }, - - { I830_FALLBACK_TEXTURE, "i830 texture" }, - { I830_FALLBACK_COLORMASK, "i830 colormask" }, - { I830_FALLBACK_STENCIL, "i830 stencil" }, - { I830_FALLBACK_STIPPLE, "i830 stipple" }, - { I830_FALLBACK_LOGICOP, "i830 logicop" }, - - { I915_FALLBACK_TEXTURE, "i915 texture" }, - { I915_FALLBACK_COLORMASK, "i915 colormask" }, - { I915_FALLBACK_STENCIL, "i915 stencil" }, - { I915_FALLBACK_STIPPLE, "i915 stipple" }, - { I915_FALLBACK_PROGRAM, "i915 program" }, - { I915_FALLBACK_LOGICOP, "i915 logicop" }, - { I915_FALLBACK_POLYGON_SMOOTH, "i915 polygon smooth" }, - { I915_FALLBACK_POINT_SMOOTH, "i915 point smooth" }, - - { 0, NULL } + /**********************************************************************/ + /* Transition to/from hardware rasterization. */ + /**********************************************************************/ + +static char *fallbackStrings[] = { + [0] = "Draw buffer", + [1] = "Read buffer", + [2] = "Depth buffer", + [3] = "Stencil buffer", + [4] = "User disable", + [5] = "Render mode", + + [12] = "Texture", + [13] = "Color mask", + [14] = "Stencil", + [15] = "Stipple", + [16] = "Program", + [17] = "Logic op", + [18] = "Smooth polygon", + [19] = "Smooth point", }; -static const char * +static char * getFallbackString(GLuint bit) { - int i; - for (i = 0; fallbackStrings[i].bit; i++) { - if (fallbackStrings[i].bit == bit) - return fallbackStrings[i].str; + int i = 0; + while (bit > 1) { + i++; + bit >>= 1; } - return "unknown fallback bit"; + return fallbackStrings[i]; } -void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode ) + +void +intelFallback(struct intel_context *intel, GLuint bit, GLboolean mode) { GLcontext *ctx = &intel->ctx; TNLcontext *tnl = TNL_CONTEXT(ctx); @@ -883,20 +1032,19 @@ void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode ) intel->Fallback |= bit; if (oldfallback == 0) { intelFlush(ctx); - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "ENTER FALLBACK 0x%x: %s\n", + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "ENTER FALLBACK %x: %s\n", bit, getFallbackString(bit)); - _swsetup_Wakeup( ctx ); + _swsetup_Wakeup(ctx); intel->RenderIndex = ~0; } } else { intel->Fallback &= ~bit; if (oldfallback == bit) { - _swrast_flush( ctx ); - if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "LEAVE FALLBACK 0x%x: %s\n", - bit, getFallbackString(bit)); + _swrast_flush(ctx); + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "LEAVE FALLBACK %s\n", getFallbackString(bit)); tnl->Driver.Render.Start = intelRenderStart; tnl->Driver.Render.PrimitiveNotify = intelRenderPrimitive; tnl->Driver.Render.Finish = intelRenderFinish; @@ -904,18 +1052,94 @@ void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode ) tnl->Driver.Render.CopyPV = _tnl_copy_pv; tnl->Driver.Render.Interp = _tnl_interp; - _tnl_invalidate_vertex_state( ctx, ~0 ); - _tnl_invalidate_vertices( ctx, ~0 ); - _tnl_install_attrs( ctx, - intel->vertex_attrs, - intel->vertex_attr_count, - intel->ViewportMatrix.m, 0 ); + _tnl_invalidate_vertex_state(ctx, ~0); + _tnl_invalidate_vertices(ctx, ~0); + _tnl_install_attrs(ctx, + intel->vertex_attrs, + intel->vertex_attr_count, + intel->ViewportMatrix.m, 0); intel->NewGLState |= _INTEL_NEW_RENDERSTATE; } } } +union fi +{ + GLfloat f; + GLint i; +}; + + +/**********************************************************************/ +/* Used only with the metaops callbacks. */ +/**********************************************************************/ +static void +intel_meta_draw_poly(struct intel_context *intel, + GLuint n, + GLfloat xy[][2], + GLfloat z, GLuint color, GLfloat tex[][2]) +{ + union fi *vb; + GLint i; + GLboolean was_locked = intel->locked; + + if (!was_locked) + LOCK_HARDWARE(intel); + + /* All 3d primitives should be emitted with LOOP_CLIPRECTS, + * otherwise the drawing origin (DR4) might not be set correctly. + */ + intelStartInlinePrimitive(intel, PRIM3D_TRIFAN, LOOP_CLIPRECTS); + vb = (union fi *) intelExtendInlinePrimitive(intel, n * 6); + + for (i = 0; i < n; i++) { + vb[0].f = xy[i][0]; + vb[1].f = xy[i][1]; + vb[2].f = z; + vb[3].i = color; + vb[4].f = tex[i][0]; + vb[5].f = tex[i][1]; + vb += 6; + } + + INTEL_FIREVERTICES(intel); + + if (!was_locked) + UNLOCK_HARDWARE(intel); +} + +static void +intel_meta_draw_quad(struct intel_context *intel, + GLfloat x0, GLfloat x1, + GLfloat y0, GLfloat y1, + GLfloat z, + GLuint color, + GLfloat s0, GLfloat s1, GLfloat t0, GLfloat t1) +{ + GLfloat xy[4][2]; + GLfloat tex[4][2]; + + xy[0][0] = x0; + xy[0][1] = y0; + xy[1][0] = x1; + xy[1][1] = y0; + xy[2][0] = x1; + xy[2][1] = y1; + xy[3][0] = x0; + xy[3][1] = y1; + + tex[0][0] = s0; + tex[0][1] = t0; + tex[1][0] = s1; + tex[1][1] = t0; + tex[2][0] = s1; + tex[2][1] = t1; + tex[3][0] = s0; + tex[3][1] = t1; + + intel_meta_draw_poly(intel, 4, xy, z, color, tex); +} @@ -924,8 +1148,10 @@ void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode ) /**********************************************************************/ -void intelInitTriFuncs( GLcontext *ctx ) +void +intelInitTriFuncs(GLcontext * ctx) { + struct intel_context *intel = intel_context(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); static int firsttime = 1; @@ -942,4 +1168,6 @@ void intelInitTriFuncs( GLcontext *ctx ) tnl->Driver.Render.BuildVertices = _tnl_build_vertices; tnl->Driver.Render.CopyPV = _tnl_copy_pv; tnl->Driver.Render.Interp = _tnl_interp; + + intel->vtbl.meta_draw_quad = intel_meta_draw_quad; } diff --git a/i915/intel_tris.h b/i915/intel_tris.h index d7e382f..021e5c6 100644 --- a/i915/intel_tris.h +++ b/i915/intel_tris.h @@ -30,6 +30,8 @@ #include "mtypes.h" + + #define _INTEL_NEW_RENDERSTATE (_DD_NEW_LINE_STIPPLE | \ _DD_NEW_TRI_UNFILLED | \ _DD_NEW_TRI_LIGHT_TWOSIDE | \ @@ -38,9 +40,15 @@ _NEW_PROGRAM | \ _NEW_POLYGONSTIPPLE) -extern void intelInitTriFuncs( GLcontext *ctx ); +extern void intelInitTriFuncs(GLcontext * ctx); + +extern void intelChooseRenderState(GLcontext * ctx); + +extern void intelStartInlinePrimitive(struct intel_context *intel, + GLuint prim, GLuint flags); +extern void intelWrapInlinePrimitive(struct intel_context *intel); -extern void intelPrintRenderState( const char *msg, GLuint state ); -extern void intelChooseRenderState( GLcontext *ctx ); +GLuint *intelExtendInlinePrimitive(struct intel_context *intel, + GLuint dwords); #endif diff --git a/i915/server/i830_common.h b/i915/server/i830_common.h deleted file mode 100644 index fb6ceaa..0000000 --- a/i915/server/i830_common.h +++ /dev/null @@ -1,212 +0,0 @@ -/************************************************************************** - -Copyright 2001 VA Linux Systems Inc., Fremont, California. -Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -on the rights to use, copy, modify, merge, publish, distribute, sub -license, and/or sell copies of the Software, and to permit persons to whom -the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next -paragraph) shall be included in all copies or substantial portions of the -Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_common.h,v 1.1 2002/09/11 00:29:32 dawes Exp $ */ - -#ifndef _I830_COMMON_H_ -#define _I830_COMMON_H_ - - -#define I830_NR_TEX_REGIONS 255 /* maximum due to use of chars for next/prev */ -#define I830_LOG_MIN_TEX_REGION_SIZE 14 - - -/* Driver specific DRM command indices - * NOTE: these are not OS specific, but they are driver specific - */ -#define DRM_I830_INIT 0x00 -#define DRM_I830_FLUSH 0x01 -#define DRM_I830_FLIP 0x02 -#define DRM_I830_BATCHBUFFER 0x03 -#define DRM_I830_IRQ_EMIT 0x04 -#define DRM_I830_IRQ_WAIT 0x05 -#define DRM_I830_GETPARAM 0x06 -#define DRM_I830_SETPARAM 0x07 -#define DRM_I830_ALLOC 0x08 -#define DRM_I830_FREE 0x09 -#define DRM_I830_INIT_HEAP 0x0a -#define DRM_I830_CMDBUFFER 0x0b -#define DRM_I830_DESTROY_HEAP 0x0c - -typedef struct { - enum { - I830_INIT_DMA = 0x01, - I830_CLEANUP_DMA = 0x02, - I830_RESUME_DMA = 0x03 - } func; - unsigned int mmio_offset; - int sarea_priv_offset; - unsigned int ring_start; - unsigned int ring_end; - unsigned int ring_size; - unsigned int front_offset; - unsigned int back_offset; - unsigned int depth_offset; - unsigned int w; - unsigned int h; - unsigned int pitch; - unsigned int pitch_bits; - unsigned int back_pitch; - unsigned int depth_pitch; - unsigned int cpp; - unsigned int chipset; -} drmI830Init; - -typedef struct { - drmTextureRegion texList[I830_NR_TEX_REGIONS+1]; - int last_upload; /* last time texture was uploaded */ - int last_enqueue; /* last time a buffer was enqueued */ - int last_dispatch; /* age of the most recently dispatched buffer */ - int ctxOwner; /* last context to upload state */ - int texAge; - int pf_enabled; /* is pageflipping allowed? */ - int pf_active; - int pf_current_page; /* which buffer is being displayed? */ - int perf_boxes; /* performance boxes to be displayed */ - int width, height; /* screen size in pixels */ - - drm_handle_t front_handle; - int front_offset; - int front_size; - - drm_handle_t back_handle; - int back_offset; - int back_size; - - drm_handle_t depth_handle; - int depth_offset; - int depth_size; - - drm_handle_t tex_handle; - int tex_offset; - int tex_size; - int log_tex_granularity; - int pitch; - int rotation; /* 0, 90, 180 or 270 */ - int rotated_offset; - int rotated_size; - int rotated_pitch; - int virtualX, virtualY; - - unsigned int front_tiled; - unsigned int back_tiled; - unsigned int depth_tiled; - unsigned int rotated_tiled; - unsigned int rotated2_tiled; - - int pipeA_x; - int pipeA_y; - int pipeA_w; - int pipeA_h; - int pipeB_x; - int pipeB_y; - int pipeB_w; - int pipeB_h; -} drmI830Sarea; - -/* Flags for perf_boxes - */ -#define I830_BOX_RING_EMPTY 0x1 /* populated by kernel */ -#define I830_BOX_FLIP 0x2 /* populated by kernel */ -#define I830_BOX_WAIT 0x4 /* populated by kernel & client */ -#define I830_BOX_TEXTURE_LOAD 0x8 /* populated by kernel */ -#define I830_BOX_LOST_CONTEXT 0x10 /* populated by client */ - - -typedef struct { - int start; /* agp offset */ - int used; /* nr bytes in use */ - int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ - int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ - int num_cliprects; /* mulitpass with multiple cliprects? */ - drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ -} drmI830BatchBuffer; - -typedef struct { - char *buf; /* agp offset */ - int sz; /* nr bytes in use */ - int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ - int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ - int num_cliprects; /* mulitpass with multiple cliprects? */ - drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ -} drmI830CmdBuffer; - -typedef struct { - int *irq_seq; -} drmI830IrqEmit; - -typedef struct { - int irq_seq; -} drmI830IrqWait; - -typedef struct { - int param; - int *value; -} drmI830GetParam; - -#define I830_PARAM_IRQ_ACTIVE 1 -#define I830_PARAM_ALLOW_BATCHBUFFER 2 - -typedef struct { - int param; - int value; -} drmI830SetParam; - -#define I830_SETPARAM_USE_MI_BATCHBUFFER_START 1 -#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 -#define I830_SETPARAM_ALLOW_BATCHBUFFER 3 - - -/* A memory manager for regions of shared memory: - */ -#define I830_MEM_REGION_AGP 1 - -typedef struct { - int region; - int alignment; - int size; - int *region_offset; /* offset from start of fb or agp */ -} drmI830MemAlloc; - -typedef struct { - int region; - int region_offset; -} drmI830MemFree; - -typedef struct { - int region; - int size; - int start; -} drmI830MemInitHeap; - -typedef struct { - int region; -} drmI830MemDestroyHeap; - - -#endif /* _I830_DRM_H_ */ diff --git a/i915/server/i830_dri.h b/i915/server/i830_dri.h deleted file mode 100644 index 6c9a709..0000000 --- a/i915/server/i830_dri.h +++ /dev/null @@ -1,73 +0,0 @@ -/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.4 2002/10/30 12:52:18 alanh Exp $ */ - -#ifndef _I830_DRI_H -#define _I830_DRI_H - -#include "xf86drm.h" -#include "i830_common.h" - -#define I830_MAX_DRAWABLES 256 - -#define I830_MAJOR_VERSION 1 -#define I830_MINOR_VERSION 3 -#define I830_PATCHLEVEL 0 - -#define I830_REG_SIZE 0x80000 - -typedef struct _I830DRIRec { - drm_handle_t regs; - drmSize regsSize; - - drmSize backbufferSize; - drm_handle_t backbuffer; - - drmSize depthbufferSize; - drm_handle_t depthbuffer; - - drmSize rotatedSize; - drm_handle_t rotatedbuffer; - - drm_handle_t textures; - int textureSize; - - drm_handle_t agp_buffers; - drmSize agp_buf_size; - - int deviceID; - int width; - int height; - int mem; - int cpp; - int bitsPerPixel; - - int fbOffset; - int fbStride; - - int backOffset; - int backPitch; - - int depthOffset; - int depthPitch; - - int rotatedOffset; - int rotatedPitch; - - int logTextureGranularity; - int textureOffset; - - int irq; - int sarea_priv_offset; -} I830DRIRec, *I830DRIPtr; - -typedef struct { - /* Nothing here yet */ - int dummy; -} I830ConfigPrivRec, *I830ConfigPrivPtr; - -typedef struct { - /* Nothing here yet */ - int dummy; -} I830DRIContextRec, *I830DRIContextPtr; - - -#endif diff --git a/i965/Makefile.am b/i965/Makefile.am index 163ad0f..b1b816c 100644 --- a/i965/Makefile.am +++ b/i965/Makefile.am @@ -1,30 +1,38 @@ AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING +I965_CFLAGS = -I../shared -I../shared/server + i965_dri_la_LTLIBRARIES = i965_dri.la -i965_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) -Iserver -I../shared +i965_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(I965_CFLAGS) i965_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl\ $(DRM_LIBS) $(DRI_LIBS) i965_dri_ladir = @libdir@/dri i965_dri_la_SOURCES = \ - bufmgr_fake.c \ - intel_batchbuffer.c \ - intel_blit.c \ - intel_buffer_objects.c \ - intel_buffers.c \ - intel_context.c \ - intel_ioctl.c \ - intel_mipmap_tree.c \ - intel_regions.c \ - intel_screen.c \ - intel_span.c \ - intel_pixel_copy.c \ - intel_pixel_bitmap.c \ + ../shared/intel_batchbuffer.c \ + ../shared/intel_blit.c \ + ../shared/intel_buffer_objects.c \ + ../shared/intel_buffers.c \ + ../shared/intel_bufmgr_ttm.c \ + ../shared/intel_context.c \ + ../shared/intel_decode.c \ + ../shared/intel_depthstencil.c \ + ../shared/intel_fbo.c \ + ../shared/intel_ioctl.c \ + ../shared/intel_mipmap_tree.c \ + ../shared/intel_regions.c \ + ../shared/intel_screen.c \ + ../shared/intel_span.c \ + ../shared/intel_pixel.c \ + ../shared/intel_pixel_copy.c \ + ../shared/intel_pixel_bitmap.c \ intel_state.c \ - intel_tex.c \ + ../shared/intel_tex.c \ + ../shared/intel_tex_copy.c \ + ../shared/intel_tex_format.c \ + ../shared/intel_tex_image.c \ ../shared/intel_tex_layout.c \ - intel_tex_validate.c \ - brw_aub.c \ - brw_aub_playback.c \ + ../shared/intel_tex_subimage.c \ + ../shared/intel_tex_validate.c \ brw_cc.c \ brw_clip.c \ brw_clip_line.c \ @@ -45,7 +53,6 @@ i965_dri_la_SOURCES = \ brw_gs.c \ brw_gs_emit.c \ brw_gs_state.c \ - brw_hal.c \ brw_metaops.c \ brw_misc_state.c \ brw_program.c \ @@ -54,7 +61,7 @@ i965_dri_la_SOURCES = \ brw_sf_state.c \ brw_state_batch.c \ brw_state_cache.c \ - brw_state_pool.c \ + brw_state_dump.c \ brw_state_upload.c \ brw_tex.c \ brw_tex_layout.c \ @@ -71,9 +78,10 @@ i965_dri_la_SOURCES = \ brw_wm_emit.c \ brw_wm_fp.c \ brw_wm_iz.c \ + brw_wm_glsl.c \ brw_wm_pass0.c \ brw_wm_pass1.c \ brw_wm_pass2.c \ brw_wm_sampler_state.c \ brw_wm_state.c \ - brw_wm_surface_state.c + brw_wm_surface_state.c diff --git a/i965/brw_aub.c b/i965/brw_aub.c deleted file mode 100644 index c549f7a..0000000 --- a/i965/brw_aub.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_context.h" -#include "brw_aub.h" -#include "intel_regions.h" -#include <stdio.h> - -extern char *__progname; - - -/* Registers to control page table - */ -#define PGETBL_CTL 0x2020 -#define PGETBL_ENABLED 0x1 - -#define NR_GTT_ENTRIES 65536 /* 256 mb */ - -#define FAIL \ -do { \ - fprintf(stderr, "failed to write aub data at %s/%d\n", __FUNCTION__, __LINE__); \ - exit(1); \ -} while (0) - - -/* Emit the headers at the top of each aubfile. Initialize the GTT. - */ -static void init_aubfile( FILE *aub_file ) -{ - struct aub_file_header fh; - struct aub_block_header bh; - unsigned int data; - - static int nr; - - nr++; - - /* Emit the aub header: - */ - memset(&fh, 0, sizeof(fh)); - - fh.instruction_type = AUB_FILE_HEADER; - fh.minor = 0x0; - fh.major = 0x7; - memcpy(fh.application, __progname, sizeof(fh.application)); - fh.day = (nr>>24) & 0xff; - fh.month = 0x0; - fh.year = 0x0; - fh.timezone = 0x0; - fh.second = nr & 0xff; - fh.minute = (nr>>8) & 0xff; - fh.hour = (nr>>16) & 0xff; - fh.comment_length = 0x0; - - if (fwrite(&fh, sizeof(fh), 1, aub_file) < 1) - FAIL; - - /* Setup the GTT starting at main memory address zero (!): - */ - memset(&bh, 0, sizeof(bh)); - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_MMI0_WRITE32; - bh.type = 0x0; - bh.address_space = ADDR_GTT; /* ??? */ - bh.general_state_type = 0x0; - bh.surface_state_type = 0x0; - bh.address = PGETBL_CTL; - bh.length = 0x4; - - if (fwrite(&bh, sizeof(bh), 1, aub_file) < 1) - FAIL; - - data = 0x0 | PGETBL_ENABLED; - - if (fwrite(&data, sizeof(data), 1, aub_file) < 1) - FAIL; -} - - -static void init_aub_gtt( struct brw_context *brw, - GLuint start_offset, - GLuint size ) -{ - FILE *aub_file = brw->intel.aub_file; - struct aub_block_header bh; - unsigned int i; - - assert(start_offset + size < NR_GTT_ENTRIES * 4096); - - - memset(&bh, 0, sizeof(bh)); - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_DATA_WRITE; - bh.type = 0x0; - bh.address_space = ADDR_MAIN; - bh.general_state_type = 0x0; - bh.surface_state_type = 0x0; - bh.address = start_offset / 4096 * 4; - bh.length = size / 4096 * 4; - - if (fwrite(&bh, sizeof(bh), 1, aub_file) < 1) - FAIL; - - for (i = 0; i < size / 4096; i++) { - GLuint data = brw->next_free_page | 1; - - brw->next_free_page += 4096; - - if (fwrite(&data, sizeof(data), 1, aub_file) < 1) - FAIL; - } - -} - -static void write_block_header( FILE *aub_file, - struct aub_block_header *bh, - const GLuint *data, - GLuint sz ) -{ - sz = (sz + 3) & ~3; - - if (fwrite(bh, sizeof(*bh), 1, aub_file) < 1) - FAIL; - - if (fwrite(data, sz, 1, aub_file) < 1) - FAIL; - - fflush(aub_file); -} - - -static void write_dump_bmp( FILE *aub_file, - struct aub_dump_bmp *db ) -{ - if (fwrite(db, sizeof(*db), 1, aub_file) < 1) - FAIL; - - fflush(aub_file); -} - - - -static void brw_aub_gtt_data( struct intel_context *intel, - GLuint offset, - const void *data, - GLuint sz, - GLuint type, - GLuint state_type ) -{ - struct aub_block_header bh; - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_DATA_WRITE; - bh.type = type; - bh.address_space = ADDR_GTT; - bh.pad0 = 0; - - if (type == DW_GENERAL_STATE) { - bh.general_state_type = state_type; - bh.surface_state_type = 0; - } - else { - bh.general_state_type = 0; - bh.surface_state_type = state_type; - } - - bh.pad1 = 0; - bh.address = offset; - bh.length = sz; - - write_block_header(intel->aub_file, &bh, data, sz); -} - - - -static void brw_aub_gtt_cmds( struct intel_context *intel, - GLuint offset, - const void *data, - GLuint sz ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - struct aub_block_header bh; - GLuint type = CW_PRIMARY_RING_A; - - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_COMMAND_WRITE; - bh.type = type; - bh.address_space = ADDR_GTT; - bh.pad0 = 0; - bh.general_state_type = 0; - bh.surface_state_type = 0; - bh.pad1 = 0; - bh.address = offset; - bh.length = sz; - - write_block_header(brw->intel.aub_file, &bh, data, sz); -} - -static void brw_aub_dump_bmp( struct intel_context *intel, - GLuint buffer ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - intelScreenPrivate *intelScreen = brw->intel.intelScreen; - struct aub_dump_bmp db; - GLuint format; - - if (intelScreen->cpp == 4) - format = 0x7; - else - format = 0x3; - - - if (buffer == 0) { - db.instruction_type = AUB_DUMP_BMP; - db.xmin = 0; - db.ymin = 0; - db.format = format; - db.bpp = intelScreen->cpp * 8; - db.pitch = intelScreen->front.pitch / intelScreen->cpp; - db.xsize = intelScreen->width; - db.ysize = intelScreen->height; - db.addr = intelScreen->front.offset; - db.unknown = 0x0; /* 4: xmajor tiled, 0: not tiled */ - - write_dump_bmp(brw->intel.aub_file, &db); - } - else { - db.instruction_type = AUB_DUMP_BMP; - db.xmin = 0; - db.ymin = 0; - db.format = format; - db.bpp = intel->back_region->cpp * 8; - db.pitch = intel->back_region->pitch; - db.xsize = intel->back_region->pitch; - db.ysize = intel->back_region->height; - db.addr = intelScreen->back.offset; - db.unknown = intel->back_region->tiled ? 0x4 : 0x0; - - write_dump_bmp(brw->intel.aub_file, &db); - } -} - -/* Attempt to prevent monster aubfiles by closing and reopening when - * the state pools wrap. - */ -static void brw_aub_wrap( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - if (intel->aub_file) { - brw_aub_destroy(brw); - brw_aub_init(brw); - } - brw->wrap = 1; /* ??? */ -} - - -int brw_aub_init( struct brw_context *brw ) -{ - struct intel_context *intel = &brw->intel; - intelScreenPrivate *intelScreen = intel->intelScreen; - char filename[80]; - int val; - static int i = 0; - - i++; - - if (_mesa_getenv("INTEL_REPLAY")) - return 0; - - if (_mesa_getenv("INTEL_AUBFILE")) { - val = snprintf(filename, sizeof(filename), "%s%d.aub", _mesa_getenv("INTEL_AUBFILE"), i%4); - _mesa_printf("--> Aub file: %s\n", filename); - brw->intel.aub_file = fopen(filename, "w"); - } - else if (_mesa_getenv("INTEL_AUB")) { - val = snprintf(filename, sizeof(filename), "%s.aub", __progname); - if (val < 0 || val > sizeof(filename)) - strcpy(filename, "default.aub"); - - _mesa_printf("--> Aub file: %s\n", filename); - brw->intel.aub_file = fopen(filename, "w"); - } - else { - return 0; - } - - if (!brw->intel.aub_file) { - _mesa_printf("couldn't open aubfile\n"); - exit(1); - } - - brw->intel.vtbl.aub_commands = brw_aub_gtt_cmds; - brw->intel.vtbl.aub_dump_bmp = brw_aub_dump_bmp; - brw->intel.vtbl.aub_gtt_data = brw_aub_gtt_data; - brw->intel.vtbl.aub_wrap = brw_aub_wrap; - - init_aubfile(brw->intel.aub_file); - - /* The GTT is located starting address zero in main memory. Pages - * to populate the gtt start after this point. - */ - brw->next_free_page = (NR_GTT_ENTRIES * 4 + 4095) & ~4095; - - /* More or less correspond with all the agp regions mapped by the - * driver: - */ - init_aub_gtt(brw, 0, 4096*4); /* so new fulsim doesn't crash */ - init_aub_gtt(brw, intelScreen->front.offset, intelScreen->back.size); - init_aub_gtt(brw, intelScreen->back.offset, intelScreen->back.size); - init_aub_gtt(brw, intelScreen->depth.offset, intelScreen->back.size); - init_aub_gtt(brw, intelScreen->tex.offset, intelScreen->tex.size); - - return 0; -} - -void brw_aub_destroy( struct brw_context *brw ) -{ - if (brw->intel.aub_file) { - fclose(brw->intel.aub_file); - brw->intel.aub_file = NULL; - } -} diff --git a/i965/brw_aub.h b/i965/brw_aub.h deleted file mode 100644 index 198e36d..0000000 --- a/i965/brw_aub.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#ifndef BRW_AUB_H -#define BRW_AUB_H - -struct aub_file_header { - unsigned int instruction_type; - unsigned int pad0:16; - unsigned int minor:8; - unsigned int major:8; - unsigned char application[8*4]; - unsigned int day:8; - unsigned int month:8; - unsigned int year:16; - unsigned int timezone:8; - unsigned int second:8; - unsigned int minute:8; - unsigned int hour:8; - unsigned int comment_length:16; - unsigned int pad1:16; -}; - -struct aub_block_header { - unsigned int instruction_type; - unsigned int operation:8; - unsigned int type:8; - unsigned int address_space:8; - unsigned int pad0:8; - unsigned int general_state_type:8; - unsigned int surface_state_type:8; - unsigned int pad1:16; - unsigned int address; - unsigned int length; -}; - -struct aub_dump_bmp { - unsigned int instruction_type; - unsigned int xmin:16; - unsigned int ymin:16; - unsigned int pitch:16; - unsigned int bpp:8; - unsigned int format:8; - unsigned int xsize:16; - unsigned int ysize:16; - unsigned int addr; - unsigned int unknown; -}; - -enum bh_operation { - BH_COMMENT, - BH_DATA_WRITE, - BH_COMMAND_WRITE, - BH_MMI0_WRITE32, - BH_END_SCENE, - BH_CONFIG_MEMORY_MAP, - BH_MAX_OPERATION -}; - -enum command_write_type { - CW_HWB_RING = 1, - CW_PRIMARY_RING_A, - CW_PRIMARY_RING_B, /* XXX - disagreement with listaub! */ - CW_PRIMARY_RING_C, - CW_MAX_TYPE -}; - -enum data_write_type { - DW_NOTYPE, - DW_BATCH_BUFFER, - DW_BIN_BUFFER, - DW_BIN_POINTER_LIST, - DW_SLOW_STATE_BUFFER, - DW_VERTEX_BUFFER, - DW_2D_MAP, - DW_CUBE_MAP, - DW_INDIRECT_STATE_BUFFER, - DW_VOLUME_MAP, - DW_1D_MAP, - DW_CONSTANT_BUFFER, - DW_CONSTANT_URB_ENTRY, - DW_INDEX_BUFFER, - DW_GENERAL_STATE, - DW_SURFACE_STATE, - DW_MEDIA_OBJECT_INDIRECT_DATA, - DW_MAX_TYPE -}; - -enum data_write_general_state_type { - DWGS_NOTYPE, - DWGS_VERTEX_SHADER_STATE, - DWGS_GEOMETRY_SHADER_STATE , - DWGS_CLIPPER_STATE, - DWGS_STRIPS_FANS_STATE, - DWGS_WINDOWER_IZ_STATE, - DWGS_COLOR_CALC_STATE, - DWGS_CLIPPER_VIEWPORT_STATE, /* was 0x7 */ - DWGS_STRIPS_FANS_VIEWPORT_STATE, - DWGS_COLOR_CALC_VIEWPORT_STATE, /* was 0x9 */ - DWGS_SAMPLER_STATE, - DWGS_KERNEL_INSTRUCTIONS, - DWGS_SCRATCH_SPACE, - DWGS_SAMPLER_DEFAULT_COLOR, - DWGS_INTERFACE_DESCRIPTOR, - DWGS_VLD_STATE, - DWGS_VFE_STATE, - DWGS_MAX_TYPE -}; - -enum data_write_surface_state_type { - DWSS_NOTYPE, - DWSS_BINDING_TABLE_STATE, - DWSS_SURFACE_STATE, - DWSS_MAX_TYPE -}; - -enum memory_map_type { - MM_DEFAULT, - MM_DYNAMIC, - MM_MAX_TYPE -}; - -enum address_space { - ADDR_GTT, - ADDR_LOCAL, - ADDR_MAIN, - ADDR_MAX -}; - - -#define AUB_FILE_HEADER 0xe085000b -#define AUB_BLOCK_HEADER 0xe0c10003 -#define AUB_DUMP_BMP 0xe09e0004 - -struct brw_context; -struct intel_context; - -int brw_aub_init( struct brw_context *brw ); -void brw_aub_destroy( struct brw_context *brw ); - -int brw_playback_aubfile(struct brw_context *brw, - const char *filename); - -#endif diff --git a/i965/brw_aub_playback.c b/i965/brw_aub_playback.c deleted file mode 100644 index 99d9475..0000000 --- a/i965/brw_aub_playback.c +++ /dev/null @@ -1,446 +0,0 @@ - -#include <stdio.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <fcntl.h> - -#include "brw_aub.h" -#include "brw_defines.h" -#include "brw_context.h" -#include "intel_ioctl.h" -#include "bufmgr.h" - -struct aub_state { - struct intel_context *intel; - const char *map; - unsigned int csr; - unsigned int sz; -}; - - -static int gobble( struct aub_state *s, int size ) -{ - if (s->csr + size > s->sz) { - _mesa_printf("EOF in %s\n", __FUNCTION__); - return 1; - } - - s->csr += size; - return 0; -} - -static void flush_and_fence( struct aub_state *s ) -{ - struct intel_context *intel = s->intel; - GLuint buf[2]; - - buf[0] = intel->vtbl.flush_cmd(); - buf[1] = 0; - - intel_cmd_ioctl(intel, (char *)&buf, sizeof(buf)); - - intelWaitIrq( intel, intelEmitIrqLocked( intel )); -} - -static void flush_cmds( struct aub_state *s, - const void *data, - int len ) -{ - DBG("%s %d\n", __FUNCTION__, len); - - if (len & 0x4) { - unsigned int *tmp = malloc(len + 4); - DBG("padding to octword\n"); - memcpy(tmp, data, len); - tmp[len/4] = MI_NOOP; - flush_cmds(s, tmp, len+4); - free(tmp); - return; - } - - /* For ring data, just send off immediately via an ioctl. - * This differs slightly from how the stream was executed - * initially as this would have been a batchbuffer. - */ - intel_cmd_ioctl(s->intel, (void *)data, len); - - if (1) - flush_and_fence(s); -} - -static const char *pstrings[] = { - "none", - "POINTLIST", - "LINELIST", - "LINESTRIP", - "TRILIST", - "TRISTRIP", - "TRIFAN", - "QUADLIST", - "QUADSTRIP", - "LINELIST_ADJ", - "LINESTRIP_ADJ", - "TRILIST_ADJ", - "TRISTRIP_ADJ", - "TRISTRIP_REVERSE", - "POLYGON", - "RECTLIST", - "LINELOOP", - "POINTLIST_BF", - "LINESTRIP_CONT", - "LINESTRIP_BF", - "LINESTRIP_CONT_BF", - "TRIFAN_NOSTIPPLE", -}; - -static void do_3d_prim( struct aub_state *s, - const void *data, - int len ) -{ - struct brw_3d_primitive prim; - const struct brw_3d_primitive *orig = data; - int i; - - assert(len == sizeof(prim)); - memcpy(&prim, data, sizeof(prim)); - -#define START 0 -#define BLOCK (12*28) - - if (orig->verts_per_instance < BLOCK) - flush_cmds(s, &prim, sizeof(prim)); - else { - for (i = START; i + BLOCK < orig->verts_per_instance; i += BLOCK/2) { - prim.start_vert_location = i; - prim.verts_per_instance = BLOCK; - _mesa_printf("%sprim %d/%s verts %d..%d (of %d)\n", - prim.header.indexed ? "INDEXED " : "", - prim.header.topology, pstrings[prim.header.topology%16], - prim.start_vert_location, - prim.start_vert_location + prim.verts_per_instance, - orig->verts_per_instance); - flush_cmds(s, &prim, sizeof(prim)); - } - } -} - - - -static struct { - int cmd; - const char *name; - int has_length; -} cmd_info[] = { - { 0, "NOOP", 0 }, - { 0x5410, "XY_COLOR_BLT_RGB", 1 }, - { 0x5430, "XY_COLOR_BLT_RGBA", 1 }, - { 0x54d0, "XY_SRC_COPY_BLT_RGB", 1 }, - { 0x54f0, "XY_SRC_COPY_BLT_RGBA", 1 }, - { CMD_URB_FENCE, "URB_FENCE", 1 }, - { CMD_CONST_BUFFER_STATE, "CONST_BUFFER_STATE", 1 }, - { CMD_CONST_BUFFER, "CONST_BUFFER", 1 }, - { CMD_STATE_BASE_ADDRESS, "STATE_BASE_ADDRESS", 1 }, - { CMD_STATE_INSN_POINTER, "STATE_INSN_POINTER", 1 }, - { CMD_PIPELINE_SELECT_965, "PIPELINE_SELECT", 0, }, - { CMD_PIPELINE_SELECT_IGD, "PIPELINE_SELECT", 0,}, - { CMD_PIPELINED_STATE_POINTERS, "PIPELINED_STATE_POINTERS", 1 }, - { CMD_BINDING_TABLE_PTRS, "BINDING_TABLE_PTRS", 1 }, - { CMD_VERTEX_BUFFER, "VERTEX_BUFFER", 1 }, - { CMD_VERTEX_ELEMENT, "VERTEX_ELEMENT", 1 }, - { CMD_INDEX_BUFFER, "INDEX_BUFFER", 1 }, - { CMD_VF_STATISTICS_965, "VF_STATISTICS", 0 }, - { CMD_VF_STATISTICS_IGD, "VF_STATISTICS", 0 }, - { CMD_DRAW_RECT, "DRAW_RECT", 1 }, - { CMD_BLEND_CONSTANT_COLOR, "BLEND_CONSTANT_COLOR", 1 }, - { CMD_CHROMA_KEY, "CHROMA_KEY", 1 }, - { CMD_DEPTH_BUFFER, "DEPTH_BUFFER", 1 }, - { CMD_POLY_STIPPLE_OFFSET, "POLY_STIPPLE_OFFSET", 1 }, - { CMD_POLY_STIPPLE_PATTERN, "POLY_STIPPLE_PATTERN", 1 }, - { CMD_LINE_STIPPLE_PATTERN, "LINE_STIPPLE_PATTERN", 1 }, - { CMD_AA_LINE_PARAMETERS, "AA_LINE_PARAMETERS", 1}, - { CMD_GLOBAL_DEPTH_OFFSET_CLAMP, "GLOBAL_DEPTH_OFFSET_CLAMP", 1 }, - { CMD_PIPE_CONTROL, "PIPE_CONTROL", 1 }, - { CMD_MI_FLUSH, "MI_FLUSH", 0 }, - { CMD_3D_PRIM, "3D_PRIM", 1 }, -}; - -#define NR_CMDS (sizeof(cmd_info)/sizeof(cmd_info[0])) - - -static int find_command( unsigned int cmd ) -{ - int i; - - for (i = 0; i < NR_CMDS; i++) - if (cmd == cmd_info[i].cmd) - return i; - - return -1; -} - - - -static int parse_commands( struct aub_state *s, - const unsigned int *data, - int len ) -{ - while (len) { - int cmd = data[0] >> 16; - int dwords; - int i; - - i = find_command(cmd); - - if (i < 0) { - _mesa_printf("couldn't find info for cmd %x\n", cmd); - return 1; - } - - if (cmd_info[i].has_length) - dwords = (data[0] & 0xff) + 2; - else - dwords = 1; - - _mesa_printf("%s (%d dwords) 0x%x\n", cmd_info[i].name, dwords, data[0]); - - if (len < dwords * 4) { - _mesa_printf("EOF in %s (%d bytes)\n", __FUNCTION__, len); - return 1; - } - - - if (0 && cmd == CMD_3D_PRIM) - do_3d_prim(s, data, dwords * 4); - else - flush_cmds(s, data, dwords * 4); - - data += dwords; - len -= dwords * 4; - } - - return 0; -} - - - -static void parse_data_write( struct aub_state *s, - const struct aub_block_header *bh, - void *dest, - const unsigned int *data, - int len ) -{ - switch (bh->type) { - case DW_GENERAL_STATE: - switch (bh->general_state_type) { - case DWGS_VERTEX_SHADER_STATE: { - struct brw_vs_unit_state vs; - assert(len == sizeof(vs)); - - _mesa_printf("DWGS_VERTEX_SHADER_STATE\n"); - memcpy(&vs, data, sizeof(vs)); - -/* vs.vs6.vert_cache_disable = 1; */ -/* vs.thread4.max_threads = 4; */ - - memcpy(dest, &vs, sizeof(vs)); - return; - } - case DWGS_CLIPPER_STATE: { - struct brw_clip_unit_state clip; - assert(len == sizeof(clip)); - - _mesa_printf("DWGS_CLIPPER_STATE\n"); - memcpy(&clip, data, sizeof(clip)); - -/* clip.thread4.max_threads = 0; */ -/* clip.clip5.clip_mode = BRW_CLIPMODE_REJECT_ALL; */ - - memcpy(dest, &clip, sizeof(clip)); - return; - } - - case DWGS_NOTYPE: - case DWGS_GEOMETRY_SHADER_STATE: - case DWGS_STRIPS_FANS_STATE: - break; - - case DWGS_WINDOWER_IZ_STATE: { - struct brw_wm_unit_state wm; - assert(len == sizeof(wm)); - - _mesa_printf("DWGS_WINDOWER_IZ_STATE\n"); - memcpy(&wm, data, sizeof(wm)); - -/* wm.wm5.max_threads = 10; */ - - memcpy(dest, &wm, sizeof(wm)); - return; - } - - case DWGS_COLOR_CALC_STATE: - case DWGS_CLIPPER_VIEWPORT_STATE: - case DWGS_STRIPS_FANS_VIEWPORT_STATE: - case DWGS_COLOR_CALC_VIEWPORT_STATE: - case DWGS_SAMPLER_STATE: - case DWGS_KERNEL_INSTRUCTIONS: - case DWGS_SCRATCH_SPACE: - case DWGS_SAMPLER_DEFAULT_COLOR: - case DWGS_INTERFACE_DESCRIPTOR: - case DWGS_VLD_STATE: - case DWGS_VFE_STATE: - default: - break; - } - break; - case DW_SURFACE_STATE: - break; - case DW_1D_MAP: - case DW_2D_MAP: - case DW_CUBE_MAP: - case DW_VOLUME_MAP: - case DW_CONSTANT_BUFFER: - case DW_CONSTANT_URB_ENTRY: - case DW_VERTEX_BUFFER: - case DW_INDEX_BUFFER: - default: - break; - } - - memcpy(dest, data, len); -} - - -/* In order to work, the memory layout has to be the same as the X - * server which created the aubfile. - */ -static int parse_block_header( struct aub_state *s ) -{ - struct aub_block_header *bh = (struct aub_block_header *)(s->map + s->csr); - void *data = (void *)(bh + 1); - unsigned int len = (bh->length + 3) & ~3; - - _mesa_printf("block header at 0x%x\n", s->csr); - - if (s->csr + len + sizeof(*bh) > s->sz) { - _mesa_printf("EOF in data in %s\n", __FUNCTION__); - return 1; - } - - if (bh->address_space == ADDR_GTT) { - - switch (bh->operation) - { - case BH_DATA_WRITE: { - void *dest = bmFindVirtual( s->intel, bh->address, len ); - if (dest == NULL) { - _mesa_printf("Couldn't find virtual address for offset %x\n", bh->address); - return 1; - } - -#if 1 - parse_data_write(s, bh, dest, data, len); -#else - memcpy(dest, data, len); -#endif - break; - } - case BH_COMMAND_WRITE: -#if 0 - intel_cmd_ioctl(s->intel, (void *)data, len); -#else - if (parse_commands(s, data, len) != 0) - _mesa_printf("parse_commands failed\n"); -#endif - break; - default: - break; - } - } - - s->csr += sizeof(*bh) + len; - return 0; -} - - -#define AUB_FILE_HEADER 0xe085000b -#define AUB_BLOCK_HEADER 0xe0c10003 -#define AUB_DUMP_BMP 0xe09e0004 - -int brw_playback_aubfile(struct brw_context *brw, - const char *filename) -{ - struct intel_context *intel = &brw->intel; - struct aub_state state; - struct stat sb; - int fd; - int retval = 0; - - state.intel = intel; - - fd = open(filename, O_RDONLY, 0); - if (fd < 0) { - _mesa_printf("couldn't open aubfile: %s\n", filename); - return 1; - } - - if (fstat(fd, &sb) != 0) { - _mesa_printf("couldn't open %s\n", filename); - return 1; - } - - state.csr = 0; - state.sz = sb.st_size; - state.map = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - - if (state.map == NULL) { - _mesa_printf("couldn't mmap %s\n", filename); - return 1; - } - - LOCK_HARDWARE(intel); - { - /* Make sure we don't confuse anything that might happen to be - * going on with the hardware: - */ -/* bmEvictAll(intel); */ -/* intel->vtbl.lost_hardware(intel); */ - - - /* Replay the aubfile item by item: - */ - while (retval == 0 && - state.csr != state.sz) { - unsigned int insn = *(unsigned int *)(state.map + state.csr); - - switch (insn) { - case AUB_FILE_HEADER: - retval = gobble(&state, sizeof(struct aub_file_header)); - break; - - case AUB_BLOCK_HEADER: - retval = parse_block_header(&state); - break; - - case AUB_DUMP_BMP: - retval = gobble(&state, sizeof(struct aub_dump_bmp)); - break; - - default: - _mesa_printf("unknown instruction %x\n", insn); - retval = 1; - break; - } - } - } - UNLOCK_HARDWARE(intel); - return retval; -} - - - - - - - diff --git a/i965/brw_cc.c b/i965/brw_cc.c index 8a1d152..9d8984f 100644 --- a/i965/brw_cc.c +++ b/i965/brw_cc.c @@ -37,7 +37,7 @@ #include "macros.h" #include "enums.h" -static void upload_cc_vp( struct brw_context *brw ) +static int upload_cc_vp( struct brw_context *brw ) { struct brw_cc_viewport ccv; @@ -46,7 +46,9 @@ static void upload_cc_vp( struct brw_context *brw ) ccv.min_depth = 0.0; ccv.max_depth = 1.0; - brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); + dri_bo_unreference(brw->cc.vp_bo); + brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); + return dri_bufmgr_check_aperture_space(brw->cc.vp_bo); } const struct brw_tracked_state brw_cc_vp = { @@ -55,57 +57,148 @@ const struct brw_tracked_state brw_cc_vp = { .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .update = upload_cc_vp + .prepare = upload_cc_vp }; +struct brw_cc_unit_key { + GLboolean stencil, stencil_two_side, color_blend, alpha_enabled; -static void upload_cc_unit( struct brw_context *brw ) + GLenum stencil_func[2], stencil_fail_op[2]; + GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; + GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2]; + GLenum logic_op; + + GLenum blend_eq_rgb, blend_eq_a; + GLenum blend_src_rgb, blend_src_a; + GLenum blend_dst_rgb, blend_dst_a; + + GLenum alpha_func; + GLclampf alpha_ref; + + GLboolean dither; + + GLboolean depth_test, depth_write; + GLenum depth_func; +}; + +static void +cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) +{ + struct gl_stencil_attrib *stencil = brw->attribs.Stencil; + + memset(key, 0, sizeof(*key)); + + key->stencil = stencil->Enabled; + key->stencil_two_side = stencil->_TestTwoSide; + + if (key->stencil) { + key->stencil_func[0] = stencil->Function[0]; + key->stencil_fail_op[0] = stencil->FailFunc[0]; + key->stencil_pass_depth_fail_op[0] = stencil->ZFailFunc[0]; + key->stencil_pass_depth_pass_op[0] = stencil->ZPassFunc[0]; + key->stencil_ref[0] = stencil->Ref[0]; + key->stencil_write_mask[0] = stencil->WriteMask[0]; + key->stencil_test_mask[0] = stencil->ValueMask[0]; + } + if (key->stencil_two_side) { + key->stencil_func[1] = stencil->Function[1]; + key->stencil_fail_op[1] = stencil->FailFunc[1]; + key->stencil_pass_depth_fail_op[1] = stencil->ZFailFunc[1]; + key->stencil_pass_depth_pass_op[1] = stencil->ZPassFunc[1]; + key->stencil_ref[1] = stencil->Ref[1]; + key->stencil_write_mask[1] = stencil->WriteMask[1]; + key->stencil_test_mask[1] = stencil->ValueMask[1]; + } + + if (brw->attribs.Color->_LogicOpEnabled) + key->logic_op = brw->attribs.Color->LogicOp; + else + key->logic_op = GL_COPY; + + key->color_blend = brw->attribs.Color->BlendEnabled; + if (key->color_blend) { + key->blend_eq_rgb = brw->attribs.Color->BlendEquationRGB; + key->blend_eq_a = brw->attribs.Color->BlendEquationA; + key->blend_src_rgb = brw->attribs.Color->BlendSrcRGB; + key->blend_dst_rgb = brw->attribs.Color->BlendDstRGB; + key->blend_src_a = brw->attribs.Color->BlendSrcA; + key->blend_dst_a = brw->attribs.Color->BlendDstA; + } + + key->alpha_enabled = brw->attribs.Color->AlphaEnabled; + if (key->alpha_enabled) { + key->alpha_func = brw->attribs.Color->AlphaFunc; + key->alpha_ref = brw->attribs.Color->AlphaRef; + } + + key->dither = brw->attribs.Color->DitherFlag; + + key->depth_test = brw->attribs.Depth->Test; + if (key->depth_test) { + key->depth_func = brw->attribs.Depth->Func; + key->depth_write = brw->attribs.Depth->Mask; + } +} + +/** + * Creates the state cache entry for the given CC unit key. + */ +static dri_bo * +cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) { struct brw_cc_unit_state cc; - + dri_bo *bo; + memset(&cc, 0, sizeof(cc)); /* _NEW_STENCIL */ - if (brw->attribs.Stencil->Enabled) { - cc.cc0.stencil_enable = brw->attribs.Stencil->Enabled; - cc.cc0.stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[0]); - cc.cc0.stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[0]); - cc.cc0.stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[0]); - cc.cc0.stencil_pass_depth_pass_op = intel_translate_stencil_op(brw->attribs.Stencil->ZPassFunc[0]); - cc.cc1.stencil_ref = brw->attribs.Stencil->Ref[0]; - cc.cc1.stencil_write_mask = brw->attribs.Stencil->WriteMask[0]; - cc.cc1.stencil_test_mask = brw->attribs.Stencil->ValueMask[0]; - - if (brw->attribs.Stencil->TestTwoSide) { - cc.cc0.bf_stencil_enable = brw->attribs.Stencil->TestTwoSide; - cc.cc0.bf_stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[1]); - cc.cc0.bf_stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[1]); - cc.cc0.bf_stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[1]); - cc.cc0.bf_stencil_pass_depth_pass_op = intel_translate_stencil_op(brw->attribs.Stencil->ZPassFunc[1]); - cc.cc1.bf_stencil_ref = brw->attribs.Stencil->Ref[1]; - cc.cc2.bf_stencil_write_mask = brw->attribs.Stencil->WriteMask[1]; - cc.cc2.bf_stencil_test_mask = brw->attribs.Stencil->ValueMask[1]; + if (key->stencil) { + cc.cc0.stencil_enable = 1; + cc.cc0.stencil_func = + intel_translate_compare_func(key->stencil_func[0]); + cc.cc0.stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[0]); + cc.cc0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + cc.cc0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); + cc.cc1.stencil_ref = key->stencil_ref[0]; + cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; + cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; + + if (key->stencil_two_side) { + cc.cc0.bf_stencil_enable = 1; + cc.cc0.bf_stencil_func = + intel_translate_compare_func(key->stencil_func[1]); + cc.cc0.bf_stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); + cc.cc1.bf_stencil_ref = key->stencil_ref[1]; + cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; + cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; } /* Not really sure about this: */ - if (brw->attribs.Stencil->WriteMask[0] || - (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1])) + if (key->stencil_write_mask[0] || + (key->stencil_two_side && key->stencil_write_mask[1])) cc.cc0.stencil_write_enable = 1; } /* _NEW_COLOR */ - if (brw->attribs.Color->_LogicOpEnabled) { + if (key->logic_op != GL_COPY) { cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = intel_translate_logic_op( brw->attribs.Color->LogicOp ); - } - else if (brw->attribs.Color->BlendEnabled) { - GLenum eqRGB = brw->attribs.Color->BlendEquationRGB; - GLenum eqA = brw->attribs.Color->BlendEquationA; - GLenum srcRGB = brw->attribs.Color->BlendSrcRGB; - GLenum dstRGB = brw->attribs.Color->BlendDstRGB; - GLenum srcA = brw->attribs.Color->BlendSrcA; - GLenum dstA = brw->attribs.Color->BlendDstA; + cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; if (eqRGB == GL_MIN || eqRGB == GL_MAX) { srcRGB = dstRGB = GL_ONE; @@ -115,49 +208,78 @@ static void upload_cc_unit( struct brw_context *brw ) srcA = dstA = GL_ONE; } - cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); - cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); - cc.cc6.blend_function = brw_translate_blend_equation( eqRGB ); + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); - cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); - cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA ); + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); cc.cc3.blend_enable = 1; - cc.cc3.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || eqA != eqRGB); } - if (brw->attribs.Color->AlphaEnabled) { + if (key->alpha_enabled) { cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = intel_translate_compare_func(brw->attribs.Color->AlphaFunc); - - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], brw->attribs.Color->AlphaRef); - + cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); } - if (brw->attribs.Color->DitherFlag) { + if (key->dither) { cc.cc5.dither_enable = 1; - cc.cc6.y_dither_offset = 0; - cc.cc6.x_dither_offset = 0; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; } /* _NEW_DEPTH */ - if (brw->attribs.Depth->Test) { - cc.cc2.depth_test = brw->attribs.Depth->Test; - cc.cc2.depth_test_function = intel_translate_compare_func(brw->attribs.Depth->Func); - cc.cc2.depth_write_enable = brw->attribs.Depth->Mask; + if (key->depth_test) { + cc.cc2.depth_test = 1; + cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); + cc.cc2.depth_write_enable = key->depth_write; } - + /* CACHE_NEW_CC_VP */ - cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5; - + cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ + if (INTEL_DEBUG & DEBUG_STATS) - cc.cc5.statistics_enable = 1; + cc.cc5.statistics_enable = 1; + + bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, + key, sizeof(*key), + &brw->cc.vp_bo, 1, + &cc, sizeof(cc), + NULL, NULL); + + /* Emit CC viewport relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + offsetof(struct brw_cc_unit_state, cc4), + brw->cc.vp_bo); + + return bo; +} + +static int prepare_cc_unit( struct brw_context *brw ) +{ + struct brw_cc_unit_key key; + + cc_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->cc.state_bo); + brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT, + &key, sizeof(key), + &brw->cc.vp_bo, 1, + NULL); - brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); + if (brw->cc.state_bo == NULL) + brw->cc.state_bo = cc_unit_create_from_key(brw, &key); + return dri_bufmgr_check_aperture_space(brw->cc.state_bo); } const struct brw_tracked_state brw_cc_unit = { @@ -166,7 +288,7 @@ const struct brw_tracked_state brw_cc_unit = { .brw = 0, .cache = CACHE_NEW_CC_VP }, - .update = upload_cc_unit + .prepare = prepare_cc_unit, }; diff --git a/i965/brw_clip.c b/i965/brw_clip.c index 8f907be..540108e 100644 --- a/i965/brw_clip.c +++ b/i965/brw_clip.c @@ -119,31 +119,19 @@ static void compile_clip_prog( struct brw_context *brw, /* Upload */ - brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->clip.prog_data ); + dri_bo_unreference(brw->clip.prog_bo); + brw->clip.prog_bo = brw_upload_cache( &brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->clip.prog_data ); } - -static GLboolean search_cache( struct brw_context *brw, - struct brw_clip_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_CLIP_PROG], - key, sizeof(*key), - &brw->clip.prog_data, - &brw->clip.prog_gs_offset); -} - - - - /* Calculate interpolants for triangle and line rasterization. */ -static void upload_clip_prog( struct brw_context *brw ) +static int upload_clip_prog( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct brw_clip_prog_key key; @@ -180,12 +168,10 @@ static void upload_clip_prog( struct brw_context *brw ) offset_front = 0; break; case GL_LINE: - key.do_unfilled = 1; fill_front = CLIP_LINE; offset_front = brw->attribs.Polygon->OffsetLine; break; case GL_POINT: - key.do_unfilled = 1; fill_front = CLIP_POINT; offset_front = brw->attribs.Polygon->OffsetPoint; break; @@ -200,26 +186,23 @@ static void upload_clip_prog( struct brw_context *brw ) offset_back = 0; break; case GL_LINE: - key.do_unfilled = 1; fill_back = CLIP_LINE; offset_back = brw->attribs.Polygon->OffsetLine; break; case GL_POINT: - key.do_unfilled = 1; fill_back = CLIP_POINT; offset_back = brw->attribs.Polygon->OffsetPoint; break; } } - if (brw->attribs.Polygon->BackMode != GL_FILL || - brw->attribs.Polygon->FrontMode != GL_FILL) - key.do_unfilled = 1; + if (brw->attribs.Polygon->BackMode != GL_FILL || + brw->attribs.Polygon->FrontMode != GL_FILL) { + key.do_unfilled = 1; - /* Most cases the fixed function units will handle. Cases where - * one or more polygon faces are unfilled will require help: - */ - if (key.do_unfilled) { + /* Most cases the fixed function units will handle. Cases where + * one or more polygon faces are unfilled will require help: + */ key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; if (offset_back || offset_front) { @@ -252,8 +235,15 @@ static void upload_clip_prog( struct brw_context *brw ) } } - if (!search_cache(brw, &key)) + dri_bo_unreference(brw->clip.prog_bo); + brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, + &key, sizeof(key), + NULL, 0, + &brw->clip.prog_data); + if (brw->clip.prog_bo == NULL) compile_clip_prog( brw, &key ); + + return dri_bufmgr_check_aperture_space(brw->clip.prog_bo); } @@ -266,5 +256,5 @@ const struct brw_tracked_state brw_clip_prog = { .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, - .update = upload_clip_prog + .prepare = upload_clip_prog }; diff --git a/i965/brw_clip.h b/i965/brw_clip.h index 49b2770..e067478 100644 --- a/i965/brw_clip.h +++ b/i965/brw_clip.h @@ -42,7 +42,7 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint attrs:16; + GLuint attrs:32; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -51,7 +51,7 @@ struct brw_clip_prog_key { GLuint fill_ccw:2; /* includes cull information */ GLuint offset_cw:1; GLuint offset_ccw:1; - GLuint pad0:1; + GLuint pad0:17; GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; @@ -167,4 +167,9 @@ void brw_clip_copy_colors( struct brw_clip_compile *c, void brw_clip_init_clipmask( struct brw_clip_compile *c ); +struct brw_reg get_tmp( struct brw_clip_compile *c ); + +void brw_clip_project_position(struct brw_clip_compile *c, + struct brw_reg pos ); + #endif diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c index 8318227..0930e6a 100644 --- a/i965/brw_clip_line.c +++ b/i965/brw_clip_line.c @@ -130,6 +130,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_instruction *plane_loop; struct brw_instruction *plane_active; struct brw_instruction *is_negative; + struct brw_instruction *is_neg2; struct brw_instruction *not_culled; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); @@ -146,6 +147,16 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_planes(c); brw_clip_init_clipmask(c); + /* -ve rhw workaround */ + if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw))) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + } + + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + plane_loop = brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) @@ -183,13 +194,20 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* Coming back in. We know that both cannot be negative * because the line would have been culled in that case. */ - brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); - brw_MOV(p, c->reg.t0, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /* If both are positive, do nothing */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_neg2); } brw_ENDIF(p, is_negative); } diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c index 37a25a9..2d0b24c 100644 --- a/i965/brw_clip_state.c +++ b/i965/brw_clip_state.c @@ -34,46 +34,75 @@ #include "brw_defines.h" #include "macros.h" +struct brw_clip_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int clip_mode; + unsigned int curbe_offset; -static void upload_clip_unit( struct brw_context *brw ) -{ - struct brw_clip_unit_state clip; + unsigned int nr_urb_entries, urb_size; +}; - memset(&clip, 0, sizeof(clip)); +static void +clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) +{ + memset(key, 0, sizeof(*key)); /* CACHE_NEW_CLIP_PROG */ - clip.thread0.grf_reg_count = ((brw->clip.prog_data->total_grf-1) & ~15) / 16; - clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; - clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; - clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; - clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; + key->total_grf = brw->clip.prog_data->total_grf; + key->urb_entry_read_length = brw->clip.prog_data->urb_read_length; + key->curb_entry_read_length = brw->clip.prog_data->curb_read_length; + key->clip_mode = brw->clip.prog_data->clip_mode; /* BRW_NEW_CURBE_OFFSETS */ - clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + key->curbe_offset = brw->curbe.clip_start; /* BRW_NEW_URB_FENCE */ - clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; - clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - clip.thread4.max_threads = 0; /* Hmm, maybe the max is 1 or 2 threads */ + key->nr_urb_entries = brw->urb.nr_clip_entries; + key->urb_size = brw->urb.vsize; +} - if (INTEL_DEBUG & DEBUG_STATS) - clip.thread4.stats_enable = 1; +static dri_bo * +clip_unit_create_from_key(struct brw_context *brw, + struct brw_clip_unit_key *key) +{ + struct brw_clip_unit_state clip; + dri_bo *bo; + + memset(&clip, 0, sizeof(clip)); + + clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + /* reloc */ + clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; - /* CONSTANT */ clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; clip.thread1.single_program_flow = 1; + + clip.thread3.urb_entry_read_length = key->urb_entry_read_length; + clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; clip.thread3.dispatch_grf_start_reg = 1; clip.thread3.urb_entry_read_offset = 0; + + clip.thread4.nr_urb_entries = key->nr_urb_entries; + clip.thread4.urb_entry_allocation_size = key->urb_size - 1; + clip.thread4.max_threads = 1; /* 2 threads */ + + if (INTEL_DEBUG & DEBUG_STATS) + clip.thread4.stats_enable = 1; + clip.clip5.userclip_enable_flags = 0x7f; clip.clip5.userclip_must_clip = 1; clip.clip5.guard_band_enable = 0; clip.clip5.viewport_z_clip_enable = 1; clip.clip5.viewport_xy_clip_enable = 1; clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; - clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip5.clip_mode = key->clip_mode; - if (BRW_IS_IGD(brw)) + if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) clip.clip5.negative_w_clip_test = 1; clip.clip6.clipper_viewport_state_ptr = 0; @@ -82,9 +111,42 @@ static void upload_clip_unit( struct brw_context *brw ) clip.viewport_ymin = -1; clip.viewport_ymax = 1; - brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); + bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, + key, sizeof(*key), + &brw->clip.prog_bo, 1, + &clip, sizeof(clip), + NULL, NULL); + + /* Emit clip program relocation */ + assert(brw->clip.prog_bo); + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + clip.thread0.grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); + + return bo; } +static int upload_clip_unit( struct brw_context *brw ) +{ + struct brw_clip_unit_key key; + int ret = 0; + + clip_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->clip.state_bo); + brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT, + &key, sizeof(key), + &brw->clip.prog_bo, 1, + NULL); + if (brw->clip.state_bo == NULL) { + brw->clip.state_bo = clip_unit_create_from_key(brw, &key); + } + + ret = dri_bufmgr_check_aperture_space(brw->clip.state_bo); + return ret; +} const struct brw_tracked_state brw_clip_unit = { .dirty = { @@ -93,5 +155,5 @@ const struct brw_tracked_state brw_clip_unit = { BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG }, - .update = upload_clip_unit + .prepare = upload_clip_unit, }; diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c index 0fc7306..0003901 100644 --- a/i965/brw_clip_tri.c +++ b/i965/brw_clip_tri.c @@ -42,6 +42,10 @@ #include "brw_util.h" #include "brw_clip.h" +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, @@ -435,15 +439,103 @@ static void maybe_do_clip_tri( struct brw_clip_compile *c ) brw_ENDIF(p, do_clip); } - +static void brw_clip_test( struct brw_clip_compile *c ) +{ + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_compile *p = &c->func; + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + + /* test nearz, xmin, ymin plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* test farz, xmax, ymax plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + release_tmps(c); +} void brw_emit_tri_clip( struct brw_clip_compile *c ) { + struct brw_instruction *neg_rhw; + struct brw_compile *p = &c->func; brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); brw_clip_init_clipmask(c); + /* if -ve rhw workaround bit is set, + do cliptest */ + if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw))) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + neg_rhw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p, neg_rhw); + } /* Can't push into do_clip_tri because with polygon (or quad) * flatshading, need to apply the flatshade here because we don't * respect the PV when converting to trifan for emit: @@ -462,6 +554,3 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) */ brw_clip_kill_thread(c); } - - - diff --git a/i965/brw_clip_unfilled.c b/i965/brw_clip_unfilled.c index 918e000..6f20d79 100644 --- a/i965/brw_clip_unfilled.c +++ b/i965/brw_clip_unfilled.c @@ -58,10 +58,30 @@ static void compute_tri_direction( struct brw_clip_compile *c ) struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v0n = get_tmp(c); + struct brw_reg v1n = get_tmp(c); + struct brw_reg v2n = get_tmp(c); + + /* Convert to NDC. + * NOTE: We can't modify the original vertex coordinates, + * as it may impact further operations. + * So, we have to keep normalized coordinates in temp registers. + * + * TBD-KC + * Try to optimize unnecessary MOV's. + */ + brw_MOV(p, v0n, v0); + brw_MOV(p, v1n, v1); + brw_MOV(p, v2n, v2); + + brw_clip_project_position(c, v0n); + brw_clip_project_position(c, v1n); + brw_clip_project_position(c, v2n); + /* Calculate the vectors of two edges of the triangle: */ - brw_ADD(p, e, v0, negate(v2)); - brw_ADD(p, f, v1, negate(v2)); + brw_ADD(p, e, v0n, negate(v2n)); + brw_ADD(p, f, v1n, negate(v2n)); /* Take their crossproduct: */ @@ -220,8 +240,8 @@ static void apply_one_offset( struct brw_clip_compile *c, struct brw_indirect vert ) { struct brw_compile *p = &c->func; - struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); - struct brw_reg z = get_element(pos, 2); + struct brw_reg z = deref_1f(vert, c->header_position_offset + + 2 * type_sz(BRW_REGISTER_TYPE_F)); brw_ADD(p, z, z, vec1(c->reg.offset)); } diff --git a/i965/brw_clip_util.c b/i965/brw_clip_util.c index 41d9b75..c32bd4e 100644 --- a/i965/brw_clip_util.c +++ b/i965/brw_clip_util.c @@ -46,8 +46,7 @@ - -static struct brw_reg get_tmp( struct brw_clip_compile *c ) +struct brw_reg get_tmp( struct brw_clip_compile *c ) { struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); @@ -90,7 +89,7 @@ void brw_clip_init_planes( struct brw_clip_compile *c ) /* Project 'pos' to screen space (or back again), overwrite with results: */ -static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) { struct brw_compile *p = &c->func; @@ -272,6 +271,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c) + struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) { return brw_address(c->reg.fixed_planes); @@ -327,8 +327,7 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) /* Shift so that lowest outcode bit is rightmost: */ - brw_MOV(p, c->reg.planemask, incoming); - brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(26)); + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); if (c->key.nr_userclip) { struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); @@ -342,15 +341,5 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) release_tmp(c, tmp); } - - if (!BRW_IS_IGD(p->brw)) { - /* Test for -ve rhw workaround - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, vec1(brw_null_reg()), incoming, brw_imm_ud(1<<20)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); - } - - brw_set_predicate_control(p, BRW_PREDICATE_NONE); } diff --git a/i965/brw_context.c b/i965/brw_context.c index 397a9bd..1c7ad5c 100644 --- a/i965/brw_context.c +++ b/i965/brw_context.c @@ -31,7 +31,6 @@ #include "brw_context.h" -#include "brw_aub.h" #include "brw_defines.h" #include "brw_draw.h" #include "brw_vs.h" @@ -39,33 +38,35 @@ #include "intel_tex.h" #include "intel_blit.h" #include "intel_batchbuffer.h" +#include "intel_pixel.h" +#include "intel_span.h" +#include "tnl/t_pipeline.h" #include "utils.h" #include "api_noop.h" #include "vtxfmt.h" +#include "shader/shader_api.h" + /*************************************** * Mesa's Driver Functions ***************************************/ -static const struct dri_extension brw_extensions[] = +static void brwUseProgram(GLcontext *ctx, GLuint program) { - { "GL_ARB_depth_texture", NULL }, - { "GL_ARB_fragment_program", NULL }, - { "GL_ARB_shadow", NULL }, - { "GL_EXT_shadow_funcs", NULL }, - /* ARB extn won't work if not enabled */ - { "GL_SGIX_depth_texture", NULL }, - { "GL_ARB_texture_env_crossbar", NULL }, - { NULL, NULL } -}; - + _mesa_use_program(ctx, program); +} +static void brwInitProgFuncs( struct dd_function_table *functions ) +{ + functions->UseProgram = brwUseProgram; +} static void brwInitDriverFunctions( struct dd_function_table *functions ) { intelInitDriverFunctions( functions ); - brwInitTextureFuncs( functions ); + brwInitFragProgFuncs( functions ); + brwInitProgFuncs( functions ); } @@ -116,10 +117,15 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, return GL_FALSE; } + /* Initialize swrast, tnl driver tables: */ + intelInitSpanFuncs(ctx); + + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT; ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; ctx->Const.MaxTextureCoordUnits = BRW_MAX_TEX_UNIT; - + ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ /* Advertise the full hardware capabilities. The new memory * manager should cope much better with overload situations: @@ -132,11 +138,6 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /* ctx->Const.MaxNativeVertexProgramTemps = 32; */ - - driInitExtensions( ctx, brw_extensions, GL_FALSE ); - - brw_aub_init( brw ); - brw_init_attribs( brw ); brw_init_metaops( brw ); brw_init_state( brw ); @@ -144,8 +145,6 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; - memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); - brw->emit_state_always = 0; ctx->FragmentProgram._MaintainTexEnvProgram = 1; @@ -154,16 +153,6 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw_ProgramCacheInit( ctx ); - brw_FrameBufferTexInit( brw ); - - { - const char *filename = getenv("INTEL_REPLAY"); - if (filename) { - brw_playback_aubfile(brw, filename); - exit(0); - } - } - return GL_TRUE; } diff --git a/i965/brw_context.h b/i965/brw_context.h index 08fdc54..32e0554 100644 --- a/i965/brw_context.h +++ b/i965/brw_context.h @@ -135,13 +135,23 @@ struct brw_context; #define BRW_NEW_METAOPS 0x1000 #define BRW_NEW_FENCE 0x2000 #define BRW_NEW_LOCK 0x4000 - - +/** + * Used for any batch entry with a relocated pointer that will be used + * by any 3D rendering. + */ +#define BRW_NEW_BATCH 0x8000 +/** brw->depth_region updated */ +#define BRW_NEW_DEPTH_BUFFER 0x10000 struct brw_state_flags { + /** State update flags signalled by mesa internals */ GLuint mesa; - GLuint cache; + /** + * State update flags signalled as the result of brw_tracked_state updates + */ GLuint brw; + /** State update flags signalled by brw_state_cache.c searches */ + GLuint cache; }; struct brw_vertex_program { @@ -230,32 +240,46 @@ struct brw_vs_ouput_sizes { #define BRW_MAX_TEX_UNIT 8 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 - -/* Create a fixed sized struct for caching binding tables: - */ -struct brw_surface_binding_table { - GLuint surf_ss_offset[BRW_WM_MAX_SURF]; -}; - - -struct brw_cache; +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS -struct brw_mem_pool { - struct buffer *buffer; - - GLuint size; - GLuint offset; /* offset of first free byte */ +enum brw_cache_id { + BRW_CC_VP, + BRW_CC_UNIT, + BRW_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER, + BRW_WM_UNIT, + BRW_SF_PROG, + BRW_SF_VP, + BRW_SF_UNIT, + BRW_VS_UNIT, + BRW_VS_PROG, + BRW_GS_UNIT, + BRW_GS_PROG, + BRW_CLIP_VP, + BRW_CLIP_UNIT, + BRW_CLIP_PROG, + BRW_SS_SURFACE, + BRW_SS_SURF_BIND, - struct brw_context *brw; + BRW_MAX_CACHE }; struct brw_cache_item { + /** + * Effectively part of the key, cache_id identifies what kind of state + * buffer is involved, and also which brw->state.dirty.cache flag should + * be set when this cache item is chosen. + */ + enum brw_cache_id cache_id; + /** 32-bit hash of the key data */ GLuint hash; GLuint key_size; /* for variable-sized keys */ const void *key; + dri_bo **reloc_bufs; + GLuint nr_reloc_bufs; - GLuint offset; /* offset within pool's buffer */ + dri_bo *bo; GLuint data_size; struct brw_cache_item *next; @@ -264,23 +288,19 @@ struct brw_cache_item { struct brw_cache { - GLuint id; - - const char *name; - struct brw_context *brw; - struct brw_mem_pool *pool; struct brw_cache_item **items; GLuint size, n_items; - - GLuint key_size; /* for fixed-size keys */ - GLuint aux_size; - GLuint aub_type; - GLuint aub_sub_type; - - GLuint last_addr; /* offset of active item */ + GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ + GLuint aux_size[BRW_MAX_CACHE]; + char *name[BRW_MAX_CACHE]; + + /* Record of the last BOs chosen for each cache_id. Used to set + * brw->state.dirty.cache when a new cache item is chosen. + */ + dri_bo *last_bo[BRW_MAX_CACHE]; }; @@ -312,34 +332,8 @@ struct brw_state_pointers { */ struct brw_tracked_state { struct brw_state_flags dirty; - void (*update)( struct brw_context *brw ); -}; - - -enum brw_cache_id { - BRW_CC_VP, - BRW_CC_UNIT, - BRW_WM_PROG, - BRW_SAMPLER_DEFAULT_COLOR, - BRW_SAMPLER, - BRW_WM_UNIT, - BRW_SF_PROG, - BRW_SF_VP, - BRW_SF_UNIT, - BRW_VS_UNIT, - BRW_VS_PROG, - BRW_GS_UNIT, - BRW_GS_PROG, - BRW_CLIP_VP, - BRW_CLIP_UNIT, - BRW_CLIP_PROG, - - /* These two are in the SS pool: - */ - BRW_SS_SURFACE, - BRW_SS_SURF_BIND, - - BRW_MAX_CACHE + int (*prepare)( struct brw_context *brw ); + void (*emit)( struct brw_context *brw ); }; /* Flags for brw->state.cache. @@ -363,16 +357,6 @@ enum brw_cache_id { #define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) #define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) - - - -enum brw_mempool_id { - BRW_GS_POOL, - BRW_SS_POOL, - BRW_MAX_POOL -}; - - struct brw_cached_batch_item { struct header *header; GLuint sz; @@ -389,12 +373,16 @@ struct brw_cached_batch_item { struct brw_vertex_element { const struct gl_client_array *glarray; - struct brw_vertex_element_state *vep; - - GLuint index; + /** Size of a complete element */ GLuint element_size; + /** Number of uploaded elements for this input. */ GLuint count; - GLuint vbo_rebase_offset; + /** Byte stride between elements in the uploaded array */ + GLuint stride; + /** Offset of the first element within the buffer object */ + unsigned int offset; + /** Buffer object containing the uploaded vertex data */ + dri_bo *bo; }; @@ -431,40 +419,31 @@ struct brw_context GLboolean emit_state_always; GLboolean wrap; GLboolean tmp_fallback; + GLboolean no_batch_wrap; struct { struct brw_state_flags dirty; struct brw_tracked_state **atoms; GLuint nr_atoms; - - struct intel_region *draw_region; + GLuint nr_draw_regions; + struct intel_region *draw_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; } state; struct brw_state_pointers attribs; - struct brw_mem_pool pool[BRW_MAX_POOL]; - struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cache cache; struct brw_cached_batch_item *cached_batch_items; struct { - - /* Arrays with buffer objects to copy non-bufferobj arrays into - * for upload: - */ - struct gl_client_array vbo_array[VERT_ATTRIB_MAX]; - struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) struct { - struct gl_buffer_object *vbo[BRW_NR_UPLOAD_BUFS]; - GLuint buf; + dri_bo *bo; GLuint offset; - GLuint size; - GLuint wrap; } upload; /* Summary of size and varying of active arrays, so we can check @@ -483,9 +462,12 @@ struct brw_context struct gl_buffer_object *vbo; struct intel_region *saved_draw_region; + GLuint saved_nr_draw_regions; struct intel_region *saved_depth_region; - GLuint restore_draw_mask; + GLuint restore_draw_buffers[MAX_DRAW_BUFFERS]; + GLuint restore_num_draw_buffers; + struct gl_fragment_program *restore_fp; GLboolean active; @@ -552,7 +534,11 @@ struct brw_context */ struct brw_tracked_state tracked_state; - GLuint gs_offset; + dri_bo *curbe_bo; + /** Offset within curbe_bo of space for current curbe entry */ + GLuint curbe_offset; + /** Offset within curbe_bo of space for next curbe entry */ + GLuint curbe_next_offset; GLfloat *last_buf; GLuint last_bufsz; @@ -561,33 +547,33 @@ struct brw_context struct { struct brw_vs_prog_data *prog_data; - GLuint prog_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; } vs; struct { struct brw_gs_prog_data *prog_data; GLboolean prog_active; - GLuint prog_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; } gs; struct { struct brw_clip_prog_data *prog_data; - GLuint prog_gs_offset; - GLuint vp_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; + dri_bo *vp_bo; } clip; struct { struct brw_sf_prog_data *prog_data; - GLuint prog_gs_offset; - GLuint vp_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; + dri_bo *vp_bo; } sf; struct { @@ -598,33 +584,31 @@ struct brw_context */ GLuint input_size_masks[4]; - - /* State structs - */ - struct brw_sampler_default_color sdc[BRW_MAX_TEX_UNIT]; - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + /** Array of surface default colors (texture border color) */ + dri_bo *sdc_bo[BRW_MAX_TEX_UNIT]; GLuint render_surf; GLuint nr_surfaces; GLuint max_threads; - struct buffer *scratch_buffer; - GLuint scratch_buffer_size; + dri_bo *scratch_buffer; GLuint sampler_count; - GLuint sampler_gs_offset; + dri_bo *sampler_bo; - struct brw_surface_binding_table bind; - GLuint bind_ss_offset; + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_WM_MAX_SURF]; - GLuint prog_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; } wm; struct { - GLuint vp_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; + dri_bo *vp_bo; } cc; @@ -657,19 +641,24 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /*====================================================================== * brw_state.c */ -void brw_validate_state( struct brw_context *brw ); +int brw_validate_state( struct brw_context *brw ); void brw_init_state( struct brw_context *brw ); void brw_destroy_state( struct brw_context *brw ); +/*====================================================================== + * brw_state_dump.c + */ +void brw_debug_batch(struct intel_context *intel); /*====================================================================== * brw_tex.c */ void brwUpdateTextureState( struct intel_context *intel ); -void brwInitTextureFuncs( struct dd_function_table *functions ); -void brw_FrameBufferTexInit( struct brw_context *brw ); +void brw_FrameBufferTexInit( struct brw_context *brw, + struct intel_region *region ); void brw_FrameBufferTexDestroy( struct brw_context *brw ); +void brw_validate_textures( struct brw_context *brw ); /*====================================================================== * brw_metaops.c @@ -696,11 +685,13 @@ void brw_upload_constant_buffer_state(struct brw_context *brw); * Inline conversion functions. These are better-typed than the * macros used previously: */ -static inline struct brw_context * +static INLINE struct brw_context * brw_context( GLcontext *ctx ) { return (struct brw_context *)ctx; } +#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) + #endif diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c index 5bf0ed5..5ff4e29 100644 --- a/i965/brw_curbe.c +++ b/i965/brw_curbe.c @@ -42,12 +42,11 @@ #include "brw_defines.h" #include "brw_state.h" #include "brw_util.h" -#include "brw_aub.h" /* Partition the CURBE between the various users of constant values: */ -static void calculate_curbe_offsets( struct brw_context *brw ) +static int calculate_curbe_offsets( struct brw_context *brw ) { /* CACHE_NEW_WM_PROG */ GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; @@ -118,6 +117,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; } + return 0; } @@ -127,7 +127,7 @@ const struct brw_tracked_state brw_curbe_offsets = { .brw = BRW_NEW_VERTEX_PROGRAM, .cache = CACHE_NEW_WM_PROG }, - .update = calculate_curbe_offsets + .prepare = calculate_curbe_offsets }; @@ -183,12 +183,11 @@ static GLfloat fixed_plane[6][4] = { * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ -static void upload_constant_buffer(struct brw_context *brw) +static int prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; GLuint sz = brw->curbe.total_size; GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; @@ -202,13 +201,6 @@ static void upload_constant_buffer(struct brw_context *brw) brw->curbe.tracked_state.dirty.mesa |= fp->param_state; if (sz == 0) { - struct brw_constant_buffer cb; - cb.header.opcode = CMD_CONST_BUFFER; - cb.header.length = sizeof(cb)/4 - 2; - cb.header.valid = 0; - cb.bits0.buffer_length = 0; - cb.bits0.buffer_address = 0; - BRW_BATCH_STRUCT(brw, &cb); if (brw->curbe.last_buf) { free(brw->curbe.last_buf); @@ -216,7 +208,7 @@ static void upload_constant_buffer(struct brw_context *brw) brw->curbe.last_bufsz = 0; } - return; + return 0; } buf = (GLfloat *)malloc(bufsz); @@ -290,11 +282,11 @@ static void upload_constant_buffer(struct brw_context *brw) brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } - if (brw->curbe.last_buf && + if (brw->curbe.curbe_bo != NULL && + brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { free(buf); -/* return; */ } else { if (brw->curbe.last_buf) @@ -302,61 +294,69 @@ static void upload_constant_buffer(struct brw_context *brw) brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; - - if (!brw_pool_alloc(pool, - bufsz, - 6, - &brw->curbe.gs_offset)) { - _mesa_printf("out of GS memory for curbe\n"); - assert(0); - return; + if (brw->curbe.curbe_bo != NULL && + brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) + { + dri_bo_unreference(brw->curbe.curbe_bo); + brw->curbe.curbe_bo = NULL; } - + + if (brw->curbe.curbe_bo == NULL) { + /* Allocate a single page for CURBE entries for this batchbuffer. + * They're generally around 64b. + */ + brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", + 4096, 1 << 6, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); + brw->curbe.curbe_next_offset = 0; + } + + brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; + brw->curbe.curbe_next_offset += bufsz; + brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64); /* Copy data to the buffer: */ - bmBufferSubDataAUB(&brw->intel, - pool->buffer, - brw->curbe.gs_offset, - bufsz, - buf, - DW_CONSTANT_BUFFER, - 0); + dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf); } - /* TODO: only emit the constant_buffer packet when necessary, ie: - - contents have changed - - offset has changed - - hw requirements due to other packets emitted. - */ - { - struct brw_constant_buffer cb; - - memset(&cb, 0, sizeof(cb)); - - cb.header.opcode = CMD_CONST_BUFFER; - cb.header.length = sizeof(cb)/4 - 2; - cb.header.valid = 1; - cb.bits0.buffer_length = sz - 1; - cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; - - /* Because this provokes an action (ie copy the constants into the - * URB), it shouldn't be shortcircuited if identical to the - * previous time - because eg. the urb destination may have - * changed, or the urb contents different to last time. - * - * Note that the data referred to is actually copied internally, - * not just used in place according to passed pointer. - * - * It appears that the CS unit takes care of using each available - * URB entry (Const URB Entry == CURBE) in turn, and issuing - * flushes as necessary when doublebuffering of CURBEs isn't - * possible. - */ -/* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */ - BRW_BATCH_STRUCT(brw, &cb); -/* intel_batchbuffer_align(brw->intel.batch, 64, 0); */ + + /* Because this provokes an action (ie copy the constants into the + * URB), it shouldn't be shortcircuited if identical to the + * previous time - because eg. the urb destination may have + * changed, or the urb contents different to last time. + * + * Note that the data referred to is actually copied internally, + * not just used in place according to passed pointer. + * + * It appears that the CS unit takes care of using each available + * URB entry (Const URB Entry == CURBE) in turn, and issuing + * flushes as necessary when doublebuffering of CURBEs isn't + * possible. + */ + + /* check aperture space for this bo */ + return dri_bufmgr_check_aperture_space(brw->curbe.curbe_bo); +} + + +static void emit_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLuint sz = brw->curbe.total_size; + + BEGIN_BATCH(2, IGNORE_CLIPRECTS); + if (sz == 0) { + OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); + OUT_BATCH(0); + } else { + OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); + OUT_RELOC(brw->curbe.curbe_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + (sz - 1) + brw->curbe.curbe_offset); } + ADVANCE_BATCH(); } /* This tracked state is unique in that the state it monitors varies @@ -372,9 +372,11 @@ const struct brw_tracked_state brw_constant_buffer = { BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ - BRW_NEW_CURBE_OFFSETS), + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_BATCH), .cache = (CACHE_NEW_WM_PROG) }, - .update = upload_constant_buffer + .prepare = prepare_constant_buffer, + .emit = emit_constant_buffer, }; diff --git a/i965/brw_defines.h b/i965/brw_defines.h index 101828b..92c058a 100644 --- a/i965/brw_defines.h +++ b/i965/brw_defines.h @@ -240,6 +240,8 @@ #define BRW_FRONTWINDING_CW 0 #define BRW_FRONTWINDING_CCW 1 +#define BRW_SPRITE_POINT_ENABLE 16 + #define BRW_INDEX_BYTE 0 #define BRW_INDEX_WORD 1 #define BRW_INDEX_DWORD 2 @@ -485,20 +487,6 @@ #define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 #define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 -#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 -#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 - -#define BRW_VFCOMPONENT_NOSTORE 0 -#define BRW_VFCOMPONENT_STORE_SRC 1 -#define BRW_VFCOMPONENT_STORE_0 2 -#define BRW_VFCOMPONENT_STORE_1_FLT 3 -#define BRW_VFCOMPONENT_STORE_1_INT 4 -#define BRW_VFCOMPONENT_STORE_VID 5 -#define BRW_VFCOMPONENT_STORE_IID 6 -#define BRW_VFCOMPONENT_STORE_PID 7 - - - /* Execution Unit (EU) defines */ @@ -816,15 +804,39 @@ #define CMD_STATE_BASE_ADDRESS 0x6101 #define CMD_STATE_INSN_POINTER 0x6102 #define CMD_PIPELINE_SELECT_965 0x6104 -#define CMD_PIPELINE_SELECT_IGD 0x6904 +#define CMD_PIPELINE_SELECT_GM45 0x6904 #define CMD_PIPELINED_STATE_POINTERS 0x7800 #define CMD_BINDING_TABLE_PTRS 0x7801 + #define CMD_VERTEX_BUFFER 0x7808 +# define BRW_VB0_INDEX_SHIFT 27 +# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) +# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define BRW_VB0_PITCH_SHIFT 0 + #define CMD_VERTEX_ELEMENT 0x7809 +# define BRW_VE0_INDEX_SHIFT 27 +# define BRW_VE0_FORMAT_SHIFT 16 +# define BRW_VE0_VALID (1 << 26) +# define BRW_VE0_SRC_OFFSET_SHIFT 0 +# define BRW_VE1_COMPONENT_NOSTORE 0 +# define BRW_VE1_COMPONENT_STORE_SRC 1 +# define BRW_VE1_COMPONENT_STORE_0 2 +# define BRW_VE1_COMPONENT_STORE_1_FLT 3 +# define BRW_VE1_COMPONENT_STORE_1_INT 4 +# define BRW_VE1_COMPONENT_STORE_VID 5 +# define BRW_VE1_COMPONENT_STORE_IID 6 +# define BRW_VE1_COMPONENT_STORE_PID 7 +# define BRW_VE1_COMPONENT_0_SHIFT 28 +# define BRW_VE1_COMPONENT_1_SHIFT 24 +# define BRW_VE1_COMPONENT_2_SHIFT 20 +# define BRW_VE1_COMPONENT_3_SHIFT 16 +# define BRW_VE1_DST_OFFSET_SHIFT 0 + #define CMD_INDEX_BUFFER 0x780a #define CMD_VF_STATISTICS_965 0x780b -#define CMD_VF_STATISTICS_IGD 0x680b +#define CMD_VF_STATISTICS_GM45 0x680b #define CMD_DRAW_RECT 0x7900 #define CMD_BLEND_CONSTANT_COLOR 0x7901 @@ -848,9 +860,12 @@ #define R02_PRIM_END 0x1 #define R02_PRIM_START 0x2 -#define BRW_IS_IGD(brw) ((brw)->intel.intelScreen->deviceID == PCI_CHIP_IGD_GM) -#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_IGD(brw)) ? CMD_PIPELINE_SELECT_IGD : CMD_PIPELINE_SELECT_965) -#define CMD_VF_STATISTICS(brw) ((BRW_IS_IGD(brw)) ? CMD_VF_STATISTICS_IGD : CMD_VF_STATISTICS_965) -#define URB_SIZES(brw) ((BRW_IS_IGD(brw)) ? 384 : 256) /* 512 bit unit */ +#include "intel_chipset.h" + +#define BRW_IS_GM45(brw) (IS_GM45_GM((brw)->intel.intelScreen->deviceID)) +#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) +#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) +#define CMD_VF_STATISTICS(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) +#define URB_SIZES(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? 384 : 256) /* 512 bit unit */ #endif diff --git a/i965/brw_draw.c b/i965/brw_draw.c index f796472..f90c5f7 100644 --- a/i965/brw_draw.c +++ b/i965/brw_draw.c @@ -36,7 +36,6 @@ #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" -#include "brw_aub.h" #include "brw_state.h" #include "brw_fallback.h" @@ -47,8 +46,9 @@ #include "tnl/tnl.h" #include "vbo/vbo_context.h" #include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" - +#define FILE_DEBUG_FLAG DEBUG_BATCH static GLuint hw_prim[GL_POLYGON+1] = { _3DPRIM_POINTLIST, @@ -83,8 +83,9 @@ static const GLenum reduced_prim[GL_POLYGON+1] = { * programs be immune to the active primitive (ie. cope with all * possibilities). That may not be realistic however. */ -static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) +static GLuint brw_set_prim(struct brw_context *brw, GLenum prim, GLboolean *need_flush) { + int ret; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); @@ -105,7 +106,9 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; } - brw_validate_state(brw); + ret = brw_validate_state(brw); + if (ret) + *need_flush = GL_TRUE; } return hw_prim[prim]; @@ -123,30 +126,12 @@ static GLuint trim(GLenum prim, GLuint length) } -static void brw_emit_cliprect( struct brw_context *brw, - const drm_clip_rect_t *rect ) -{ - struct brw_drawrect bdr; - - bdr.header.opcode = CMD_DRAW_RECT; - bdr.header.length = sizeof(bdr)/4 - 2; - bdr.xmin = rect->x1; - bdr.xmax = rect->x2 - 1; - bdr.ymin = rect->y1; - bdr.ymax = rect->y2 - 1; - bdr.xorg = brw->intel.drawX; - bdr.yorg = brw->intel.drawY; - - intel_batchbuffer_data( brw->intel.batch, &bdr, sizeof(bdr), - INTEL_BATCH_NO_CLIPRECTS); -} - - static void brw_emit_prim( struct brw_context *brw, const struct _mesa_prim *prim ) { struct brw_3d_primitive prim_packet; + GLboolean need_flush = GL_FALSE; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), @@ -155,7 +140,7 @@ static void brw_emit_prim( struct brw_context *brw, prim_packet.header.opcode = CMD_3D_PRIM; prim_packet.header.length = sizeof(prim_packet)/4 - 2; prim_packet.header.pad = 0; - prim_packet.header.topology = brw_set_prim(brw, prim->mode); + prim_packet.header.topology = brw_set_prim(brw, prim->mode, &need_flush); prim_packet.header.indexed = prim->indexed; prim_packet.verts_per_instance = trim(prim->mode, prim->count); @@ -165,9 +150,11 @@ static void brw_emit_prim( struct brw_context *brw, prim_packet.base_vert_location = 0; if (prim_packet.verts_per_instance) { - intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), - INTEL_BATCH_NO_CLIPRECTS); + intel_batchbuffer_data( brw->intel.batch, &prim_packet, + sizeof(prim_packet), LOOP_CLIPRECTS); } + + assert(need_flush == GL_FALSE); } static void brw_merge_inputs( struct brw_context *brw, @@ -270,11 +257,17 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; - GLuint i, j; + GLuint i; + GLuint ib_offset; + dri_bo *ib_bo; + GLboolean force_flush = GL_FALSE; + int ret; if (ctx->NewState) _mesa_update_state( ctx ); + brw_validate_textures( brw ); + /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); @@ -289,19 +282,41 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, LOCK_HARDWARE(intel); if (brw->intel.numClipRects == 0) { - assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); UNLOCK_HARDWARE(intel); return GL_TRUE; } { + /* Flush the batch if it's approaching full, so that we don't wrap while + * we've got validated state that needs to be in the same batch as the + * primitives. This fraction is just a guess (minimal full state plus + * a primitive is around 512 bytes), and would be better if we had + * an upper bound of how much we might emit in a single + * brw_try_draw_prims(). + */ + flush: + if (force_flush) + brw->no_batch_wrap = GL_FALSE; + + if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4 + /* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */ + || intel->batch->cliprect_mode != LOOP_CLIPRECTS || (force_flush == GL_TRUE)) + intel_batchbuffer_flush(intel->batch); + + force_flush = GL_FALSE; + brw->no_batch_wrap = GL_TRUE; + /* Set the first primitive early, ahead of validate_state: */ - brw_set_prim(brw, prim[0].mode); + brw_set_prim(brw, prim[0].mode, &force_flush); /* XXX: Need to separate validate and upload of state. */ - brw_validate_state( brw ); + ret = brw_validate_state( brw ); + if (ret) { + force_flush = GL_TRUE; + goto flush; + } /* Various fallback checks: */ @@ -310,76 +325,42 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (check_fallbacks( brw, prim, nr_prims )) goto out; + + /* need to account for index buffer and vertex buffer */ + if (ib) { + ret = brw_prepare_indices( brw, ib , &ib_bo, &ib_offset); + if (ret) { + force_flush = GL_TRUE; + goto flush; + } + } + + ret = brw_prepare_vertices( brw, min_index, max_index); + if (ret < 0) + goto out; + + if (ret > 0) { + force_flush = GL_TRUE; + goto flush; + } /* Upload index, vertex data: */ if (ib) - brw_upload_indices( brw, ib ); + brw_emit_indices( brw, ib, ib_bo, ib_offset); - if (!brw_upload_vertices( brw, min_index, max_index)) { - goto out; - } + brw_emit_vertices( brw, min_index, max_index); - /* For single cliprect, state is already emitted: - */ - if (brw->intel.numClipRects == 1) { - for (i = 0; i < nr_prims; i++) { - brw_emit_prim(brw, &prim[i]); - } - } - else { - /* Otherwise, explicitly do the cliprects at this point: - */ - GLuint nprims = 0; - for (j = 0; j < brw->intel.numClipRects; j++) { - brw_emit_cliprect(brw, &brw->intel.pClipRects[j]); - - /* Emit prims to batchbuffer: - */ - for (i = 0; i < nr_prims; i++) { - brw_emit_prim(brw, &prim[i]); - - if (++nprims == VBO_MAX_PRIM) { - intel_batchbuffer_flush(brw->intel.batch); - nprims = 0; - } - } - } + for (i = 0; i < nr_prims; i++) { + brw_emit_prim(brw, &prim[i]); } - - intel->need_flush = GL_TRUE; + retval = GL_TRUE; } out: - /* Currently have to do this to synchronize with the map/unmap of - * the vertex buffer in brw_exec_api.c. Not sure if there is any - * way around this, as not every flush is due to a buffer filling - * up. - */ - if (!intel_batchbuffer_flush( brw->intel.batch )) { - DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__); - retval = GL_FALSE; - } - - if (retval && intel->thrashing) { - bmSetFence(intel); - } - - /* Free any old data so it doesn't clog up texture memory - we - * won't be referencing it again. - */ - while (brw->vb.upload.wrap != brw->vb.upload.buf) { - ctx->Driver.BufferData(ctx, - GL_ARRAY_BUFFER_ARB, - BRW_UPLOAD_INIT_SIZE, - NULL, - GL_DYNAMIC_DRAW_ARB, - brw->vb.upload.vbo[brw->vb.upload.wrap]); - brw->vb.upload.wrap++; - brw->vb.upload.wrap %= BRW_NR_UPLOAD_BUFS; - } + brw->no_batch_wrap = GL_FALSE; UNLOCK_HARDWARE(intel); @@ -425,7 +406,6 @@ void brw_draw_prims( GLcontext *ctx, GLuint min_index, GLuint max_index ) { - struct intel_context *intel = intel_context(ctx); GLboolean retval; /* Decide if we want to rebase. If so we end up recursing once @@ -445,20 +425,6 @@ void brw_draw_prims( GLcontext *ctx, */ retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - - /* This looks like out-of-memory but potentially we have - * situation where there is enough memory but it has become - * fragmented. Clear out all heaps and start from scratch by - * faking a contended lock event: (done elsewhere) - */ - if (!retval && !intel->Fallback && bmError(intel)) { - DBG("retrying\n"); - /* Then try a second time only to upload textures and draw the - * primitives: - */ - retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - } - /* Otherwise, we really are out of memory. Pass the drawing * command to the software tnl module and which will in turn call * swrast to do the drawing. @@ -467,57 +433,22 @@ void brw_draw_prims( GLcontext *ctx, _swsetup_Wakeup(ctx); _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); } - - if (intel->aub_file && (INTEL_DEBUG & DEBUG_SYNC)) { - intelFinish( &intel->ctx ); - intel->aub_wrap = 1; - } -} - - -static void brw_invalidate_vbo_cb( struct intel_context *intel, void *ptr ) -{ - /* nothing to do, we don't rely on the contents being preserved */ } - void brw_draw_init( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct vbo_context *vbo = vbo_context(ctx); - GLuint i; - + /* Register our drawing function: */ vbo->draw_prims = brw_draw_prims; - - brw->vb.upload.size = BRW_UPLOAD_INIT_SIZE; - - for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) { - brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB); - - /* NOTE: These are set to no-backing-store. - */ - bmBufferSetInvalidateCB(&brw->intel, - intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])), - brw_invalidate_vbo_cb, - &brw->intel, - GL_TRUE); - } - - ctx->Driver.BufferData( ctx, - GL_ARRAY_BUFFER_ARB, - BRW_UPLOAD_INIT_SIZE, - NULL, - GL_DYNAMIC_DRAW_ARB, - brw->vb.upload.vbo[0] ); } void brw_draw_destroy( struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; - GLuint i; - - for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) - ctx->Driver.DeleteBuffer(ctx, brw->vb.upload.vbo[i]); + if (brw->vb.upload.bo != NULL) { + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = NULL; + } } diff --git a/i965/brw_draw.h b/i965/brw_draw.h index 0f7b738..b354740 100644 --- a/i965/brw_draw.h +++ b/i965/brw_draw.h @@ -31,6 +31,7 @@ #include "mtypes.h" /* for GLcontext... */ #include "vbo/vbo.h" +#include "dri_bufmgr.h" struct brw_context; @@ -53,10 +54,21 @@ void brw_init_current_values(GLcontext *ctx, /* brw_draw_upload.c */ -void brw_upload_indices( struct brw_context *brw, - const struct _mesa_index_buffer *index_buffer); +int brw_prepare_indices( struct brw_context *brw, + const struct _mesa_index_buffer *index_buffer, + dri_bo **bo_return, + GLuint *offset_return); -GLboolean brw_upload_vertices( struct brw_context *brw, +void brw_emit_indices( struct brw_context *brw, + const struct _mesa_index_buffer *index_buffer, + dri_bo *bo, + GLuint offset); + +int brw_prepare_vertices( struct brw_context *brw, + GLuint min_index, + GLuint max_index ); + +void brw_emit_vertices( struct brw_context *brw, GLuint min_index, GLuint max_index ); diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c index 6150cac..7946ffd 100644 --- a/i965/brw_draw_upload.c +++ b/i965/brw_draw_upload.c @@ -36,43 +36,13 @@ #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" -#include "brw_aub.h" #include "brw_state.h" #include "brw_fallback.h" #include "intel_ioctl.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" - - -struct brw_array_state { - union header_union header; - - struct { - union { - struct { - GLuint pitch:11; - GLuint pad:15; - GLuint access_type:1; - GLuint vb_index:5; - } bits; - GLuint dword; - } vb0; - - struct buffer *buffer; - GLuint offset; - - GLuint max_index; - GLuint instance_data_step_rate; - - } vb[BRW_VBP_MAX]; -}; - - -static struct buffer *array_buffer( const struct gl_client_array *array ) -{ - return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj)); -} +#include "intel_tex.h" static GLuint double_types[5] = { 0, @@ -247,194 +217,169 @@ static GLuint get_index_type(GLenum type) } } -static void copy_strided_array( GLubyte *dest, - const GLubyte *src, - GLuint size, - GLuint stride, - GLuint count ) -{ - if (size == stride) - do_memcpy(dest, src, count * size); - else { - GLuint i,j; - - for (i = 0; i < count; i++) { - for (j = 0; j < size; j++) - *dest++ = *src++; - src += (stride - size); - } - } -} - static void wrap_buffers( struct brw_context *brw, GLuint size ) { - GLcontext *ctx = &brw->intel.ctx; - if (size < BRW_UPLOAD_INIT_SIZE) size = BRW_UPLOAD_INIT_SIZE; - brw->vb.upload.buf++; - brw->vb.upload.buf %= BRW_NR_UPLOAD_BUFS; brw->vb.upload.offset = 0; - ctx->Driver.BufferData(ctx, - GL_ARRAY_BUFFER_ARB, - size, - NULL, - GL_DYNAMIC_DRAW_ARB, - brw->vb.upload.vbo[brw->vb.upload.buf]); + if (brw->vb.upload.bo != NULL) + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", + size, 1, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); + + /* Set the internal VBO\ to no-backing-store. We only use them as a + * temporary within a brw_try_draw_prims while the lock is held. + */ + /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH + FAKE TO PUSH THIS STUFF */ +// if (!brw->intel.ttm) +// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); } static void get_space( struct brw_context *brw, GLuint size, - struct gl_buffer_object **vbo_return, + dri_bo **bo_return, GLuint *offset_return ) { - size = (size + 63) & ~63; - - if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE) + size = ALIGN(size, 64); + + if (brw->vb.upload.bo == NULL || + brw->vb.upload.offset + size > brw->vb.upload.bo->size) { wrap_buffers(brw, size); + } - *vbo_return = brw->vb.upload.vbo[brw->vb.upload.buf]; + dri_bo_reference(brw->vb.upload.bo); + *bo_return = brw->vb.upload.bo; *offset_return = brw->vb.upload.offset; brw->vb.upload.offset += size; } - - -static struct gl_client_array * +static void copy_array_to_vbo_array( struct brw_context *brw, - GLuint i, - const struct gl_client_array *array, - GLuint element_size, - GLuint count) + struct brw_vertex_element *element, + GLuint dst_stride) { - GLcontext *ctx = &brw->intel.ctx; - struct gl_client_array *vbo_array = &brw->vb.vbo_array[i]; - GLuint size = count * element_size; - struct gl_buffer_object *vbo; - GLuint offset; - GLuint new_stride; + GLuint size = element->count * dst_stride; - get_space(brw, size, &vbo, &offset); + get_space(brw, size, &element->bo, &element->offset); - if (array->StrideB == 0) { - assert(count == 1); - new_stride = 0; + if (element->glarray->StrideB == 0) { + assert(element->count == 1); + element->stride = 0; + } else { + element->stride = dst_stride; } - else - new_stride = element_size; - - vbo_array->Size = array->Size; - vbo_array->Type = array->Type; - vbo_array->Stride = new_stride; - vbo_array->StrideB = new_stride; - vbo_array->Ptr = (const void *)offset; - vbo_array->Enabled = 1; - vbo_array->Normalized = array->Normalized; - vbo_array->_MaxElement = array->_MaxElement; /* ? */ - vbo_array->BufferObj = vbo; - - { - GLubyte *map = ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - GL_DYNAMIC_DRAW_ARB, - vbo); - - map += offset; - copy_strided_array( map, - array->Ptr, - element_size, - array->StrideB, - count); + if (dst_stride == element->glarray->StrideB) { + dri_bo_subdata(element->bo, + element->offset, + size, + element->glarray->Ptr); + } else { + void *data; + char *dest; + const char *src = element->glarray->Ptr; + int i; + + data = _mesa_malloc(dst_stride * element->count); + dest = data; + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo_array->BufferObj); + dri_bo_subdata(element->bo, + element->offset, + size, + data); + _mesa_free(data); } - - return vbo_array; } - - -static struct gl_client_array * -interleaved_vbo_array( struct brw_context *brw, - GLuint i, - const struct gl_client_array *uploaded_array, - const struct gl_client_array *array, - const char *ptr) -{ - struct gl_client_array *vbo_array = &brw->vb.vbo_array[i]; - - vbo_array->Size = array->Size; - vbo_array->Type = array->Type; - vbo_array->Stride = array->Stride; - vbo_array->StrideB = array->StrideB; - vbo_array->Ptr = (const void *)((const char *)uploaded_array->Ptr + - ((const char *)array->Ptr - ptr)); - vbo_array->Enabled = 1; - vbo_array->Normalized = array->Normalized; - vbo_array->_MaxElement = array->_MaxElement; - vbo_array->BufferObj = uploaded_array->BufferObj; - - return vbo_array; -} - - -GLboolean brw_upload_vertices( struct brw_context *brw, +int brw_prepare_vertices( struct brw_context *brw, GLuint min_index, GLuint max_index ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); GLuint tmp = brw->vs.prog_data->inputs_read; - struct brw_vertex_element_packet vep; - struct brw_array_state vbp; GLuint i; - const void *ptr = NULL; + const unsigned char *ptr = NULL; GLuint interleave = 0; + int ret = 0; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint nr_enabled = 0; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; - - - memset(&vbp, 0, sizeof(vbp)); - memset(&vep, 0, sizeof(vep)); /* First build an array of pointers to ve's in vb.inputs_read */ if (0) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - + + /* Accumulate the list of enabled arrays. */ while (tmp) { GLuint i = _mesa_ffsll(tmp)-1; struct brw_vertex_element *input = &brw->vb.inputs[i]; tmp &= ~(1<<i); enabled[nr_enabled++] = input; + } + + /* XXX: In the rare cases where this happens we fallback all + * the way to software rasterization, although a tnl fallback + * would be sufficient. I don't know of *any* real world + * cases with > 17 vertex attributes enabled, so it probably + * isn't an issue at this point. + */ + if (nr_enabled >= BRW_VEP_MAX) + return -1; + + for (i = 0; i < nr_enabled; i++) { + struct brw_vertex_element *input = enabled[i]; - input->index = i; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; - if (!input->glarray->BufferObj->Name) { + if (input->glarray->BufferObj->Name != 0) { + struct intel_buffer_object *intel_buffer = + intel_buffer_object(input->glarray->BufferObj); + + /* Named buffer object: Just reference its contents directly. */ + input->bo = intel_bufferobj_buffer(intel, intel_buffer, + INTEL_READ); + dri_bo_reference(input->bo); + input->offset = (unsigned long)input->glarray->Ptr; + input->stride = input->glarray->StrideB; + + ret |= dri_bufmgr_check_aperture_space(input->bo); + } else { + /* Queue the buffer object up to be uploaded in the next pass, + * when we've decided if we're doing interleaved or not. + */ if (i == 0) { /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) - return GL_FALSE; + return -1; interleave = input->glarray->StrideB; ptr = input->glarray->Ptr; } else if (interleave != input->glarray->StrideB || - (const char *)input->glarray->Ptr - (const char *)ptr < 0 || - (const char *)input->glarray->Ptr - (const char *)ptr > interleave) { + (const unsigned char *)input->glarray->Ptr - ptr < 0 || + (const unsigned char *)input->glarray->Ptr - ptr > interleave) + { interleave = 0; } @@ -451,131 +396,137 @@ GLboolean brw_upload_vertices( struct brw_context *brw, } } - /* Upload interleaved arrays if all uploads are interleaved - */ - if (nr_uploads > 1 && - interleave && - interleave <= 256) { - struct brw_vertex_element *input0 = upload[0]; - - input0->glarray = copy_array_to_vbo_array(brw, 0, - input0->glarray, - interleave, - input0->count); + /* Handle any arrays to be uploaded. */ + if (nr_uploads > 1 && interleave && interleave <= 256) { + /* All uploads are interleaved, so upload the arrays together as + * interleaved. First, upload the contents and set up upload[0]. + */ + copy_array_to_vbo_array(brw, upload[0], interleave); + ret |= dri_bufmgr_check_aperture_space(upload[0]->bo); for (i = 1; i < nr_uploads; i++) { - upload[i]->glarray = interleaved_vbo_array(brw, - i, - input0->glarray, - upload[i]->glarray, - ptr); + /* Then, just point upload[i] at upload[0]'s buffer. */ + upload[i]->stride = interleave; + upload[i]->offset = upload[0]->offset + + ((const unsigned char *)upload[i]->glarray->Ptr - ptr); + upload[i]->bo = upload[0]->bo; + dri_bo_reference(upload[i]->bo); } } else { + /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { - struct brw_vertex_element *input = upload[i]; - - input->glarray = copy_array_to_vbo_array(brw, i, - input->glarray, - input->element_size, - input->count); - + copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); + if (upload[i]->bo) { + ret |= dri_bufmgr_check_aperture_space(upload[i]->bo); + } } } - /* XXX: In the rare cases where this happens we fallback all - * the way to software rasterization, although a tnl fallback - * would be sufficient. I don't know of *any* real world - * cases with > 17 vertex attributes enabled, so it probably - * isn't an issue at this point. - */ - if (nr_enabled >= BRW_VEP_MAX) - return GL_FALSE; - /* This still defines a hardware VB for each input, even if they - * are interleaved or from the same VBO. TBD if this makes a - * performance difference. - */ - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + if (ret) + return 1; - input->vep = &vep.ve[i]; - input->vep->ve0.src_format = get_surface_type(input->glarray->Type, - input->glarray->Size, - input->glarray->Normalized); - input->vep->ve0.valid = 1; - input->vep->ve1.dst_offset = (i) * 4; - input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; - switch (input->glarray->Size) { - case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0; - case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } + return 0; +} - input->vep->ve0.vertex_buffer_index = i; - input->vep->ve0.src_offset = 0; +void brw_emit_vertices( struct brw_context *brw, + GLuint min_index, + GLuint max_index ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = intel_context(ctx); + GLuint tmp = brw->vs.prog_data->inputs_read; + struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; + GLuint i; + GLuint nr_enabled = 0; - vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB; - vbp.vb[i].vb0.bits.pad = 0; - vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; - vbp.vb[i].vb0.bits.vb_index = i; - vbp.vb[i].offset = (GLuint)input->glarray->Ptr; - vbp.vb[i].buffer = array_buffer(input->glarray); - vbp.vb[i].max_index = max_index; - } + /* Accumulate the list of enabled arrays. */ + while (tmp) { + i = _mesa_ffsll(tmp)-1; + struct brw_vertex_element *input = &brw->vb.inputs[i]; + tmp &= ~(1<<i); + enabled[nr_enabled++] = input; + } - /* Now emit VB and VEP state packets: + /* Now emit VB and VEP state packets. + * + * This still defines a hardware VB for each input, even if they + * are interleaved or from the same VBO. TBD if this makes a + * performance difference. */ - vbp.header.bits.length = (1 + nr_enabled * 4) - 2; - vbp.header.bits.opcode = CMD_VERTEX_BUFFER; + BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_BUFFER << 16) | + ((1 + nr_enabled * 4) - 2)); - BEGIN_BATCH(vbp.header.bits.length+2, 0); - OUT_BATCH( vbp.header.dword ); - for (i = 0; i < nr_enabled; i++) { - OUT_BATCH( vbp.vb[i].vb0.dword ); - OUT_BATCH( bmBufferOffset(&brw->intel, vbp.vb[i].buffer) + vbp.vb[i].offset); - OUT_BATCH( vbp.vb[i].max_index ); - OUT_BATCH( vbp.vb[i].instance_data_step_rate ); + struct brw_vertex_element *input = enabled[i]; + + OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | + BRW_VB0_ACCESS_VERTEXDATA | + (input->stride << BRW_VB0_PITCH_SHIFT)); + OUT_RELOC(input->bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + input->offset); + OUT_BATCH(max_index); + OUT_BATCH(0); /* Instance data step rate */ + + /* Unreference the buffer so it can get freed, now that we won't + * touch it any more. + */ + dri_bo_unreference(input->bo); + input->bo = NULL; } ADVANCE_BATCH(); - vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; - vep.header.opcode = CMD_VERTEX_ELEMENT; - brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); - - return GL_TRUE; -} + BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2)); + for (i = 0; i < nr_enabled; i++) { + struct brw_vertex_element *input = enabled[i]; + uint32_t format = get_surface_type(input->glarray->Type, + input->glarray->Size, + input->glarray->Normalized); + uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; + switch (input->glarray->Size) { + case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; + case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; + case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; + case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; + break; + } -static GLuint element_size( GLenum type ) -{ - switch(type) { - case GL_UNSIGNED_INT: return 4; - case GL_UNSIGNED_SHORT: return 2; - case GL_UNSIGNED_BYTE: return 1; - default: assert(0); return 0; + OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | + ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } + ADVANCE_BATCH(); } - - -void brw_upload_indices( struct brw_context *brw, - const struct _mesa_index_buffer *index_buffer ) +int brw_prepare_indices( struct brw_context *brw, + const struct _mesa_index_buffer *index_buffer, + dri_bo **bo_return, + GLuint *offset_return) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; + dri_bo *bo; struct gl_buffer_object *bufferobj = index_buffer->obj; GLuint offset = (GLuint)index_buffer->ptr; + int ret; /* Turn into a proper VBO: */ @@ -583,23 +534,51 @@ void brw_upload_indices( struct brw_context *brw, /* Get new bufferobj, offset: */ - get_space(brw, ib_size, &bufferobj, &offset); + get_space(brw, ib_size, &bo, &offset); /* Straight upload */ - ctx->Driver.BufferSubData( ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - offset, - ib_size, - index_buffer->ptr, - bufferobj); + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } else { + /* If the index buffer isn't aligned to its element size, we have to + * rebase it into a temporary. + */ + if ((get_size(index_buffer->type) - 1) & offset) { + GLubyte *map = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_DYNAMIC_DRAW_ARB, + bufferobj); + map += offset; + + get_space(brw, ib_size, &bo, &offset); + + dri_bo_subdata(bo, offset, ib_size, map); + + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); + } else { + bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), + INTEL_READ); + dri_bo_reference(bo); + } } + *bo_return = bo; + *offset_return = offset; + ret = dri_bufmgr_check_aperture_space(bo); + return ret; +} + +void brw_emit_indices(struct brw_context *brw, + const struct _mesa_index_buffer *index_buffer, + dri_bo *bo, + GLuint offset) +{ + struct intel_context *intel = &brw->intel; + GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; /* Emit the indexbuffer packet: */ { struct brw_indexbuffer ib; - struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj)); memset(&ib, 0, sizeof(ib)); @@ -609,11 +588,15 @@ void brw_upload_indices( struct brw_context *brw, ib.header.bits.cut_index_enable = 0; - BEGIN_BATCH(4, 0); + BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); - OUT_BATCH( bmBufferOffset(intel, buffer) + offset ); - OUT_BATCH( bmBufferOffset(intel, buffer) + offset + ib_size ); + OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, offset); + OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + offset + ib_size); OUT_BATCH( 0 ); ADVANCE_BATCH(); + + dri_bo_unreference(bo); } } + diff --git a/i965/brw_eu.h b/i965/brw_eu.h index 9d46aac..207b8b7 100644 --- a/i965/brw_eu.h +++ b/i965/brw_eu.h @@ -110,7 +110,7 @@ struct brw_compile { -static __inline int type_sz( GLuint type ) +static INLINE int type_sz( GLuint type ) { switch( type ) { case BRW_REGISTER_TYPE_UD: @@ -129,7 +129,7 @@ static __inline int type_sz( GLuint type ) } } -static __inline struct brw_reg brw_reg( GLuint file, +static INLINE struct brw_reg brw_reg( GLuint file, GLuint nr, GLuint subnr, GLuint type, @@ -166,7 +166,7 @@ static __inline struct brw_reg brw_reg( GLuint file, return reg; } -static __inline struct brw_reg brw_vec16_reg( GLuint file, +static INLINE struct brw_reg brw_vec16_reg( GLuint file, GLuint nr, GLuint subnr ) { @@ -181,7 +181,7 @@ static __inline struct brw_reg brw_vec16_reg( GLuint file, WRITEMASK_XYZW); } -static __inline struct brw_reg brw_vec8_reg( GLuint file, +static INLINE struct brw_reg brw_vec8_reg( GLuint file, GLuint nr, GLuint subnr ) { @@ -197,7 +197,7 @@ static __inline struct brw_reg brw_vec8_reg( GLuint file, } -static __inline struct brw_reg brw_vec4_reg( GLuint file, +static INLINE struct brw_reg brw_vec4_reg( GLuint file, GLuint nr, GLuint subnr ) { @@ -213,7 +213,7 @@ static __inline struct brw_reg brw_vec4_reg( GLuint file, } -static __inline struct brw_reg brw_vec2_reg( GLuint file, +static INLINE struct brw_reg brw_vec2_reg( GLuint file, GLuint nr, GLuint subnr ) { @@ -228,7 +228,7 @@ static __inline struct brw_reg brw_vec2_reg( GLuint file, WRITEMASK_XY); } -static __inline struct brw_reg brw_vec1_reg( GLuint file, +static INLINE struct brw_reg brw_vec1_reg( GLuint file, GLuint nr, GLuint subnr ) { @@ -244,14 +244,14 @@ static __inline struct brw_reg brw_vec1_reg( GLuint file, } -static __inline struct brw_reg retype( struct brw_reg reg, +static INLINE struct brw_reg retype( struct brw_reg reg, GLuint type ) { reg.type = type; return reg; } -static __inline struct brw_reg suboffset( struct brw_reg reg, +static INLINE struct brw_reg suboffset( struct brw_reg reg, GLuint delta ) { reg.subnr += delta * type_sz(reg.type); @@ -259,7 +259,7 @@ static __inline struct brw_reg suboffset( struct brw_reg reg, } -static __inline struct brw_reg offset( struct brw_reg reg, +static INLINE struct brw_reg offset( struct brw_reg reg, GLuint delta ) { reg.nr += delta; @@ -267,7 +267,7 @@ static __inline struct brw_reg offset( struct brw_reg reg, } -static __inline struct brw_reg byte_offset( struct brw_reg reg, +static INLINE struct brw_reg byte_offset( struct brw_reg reg, GLuint bytes ) { GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; @@ -277,28 +277,28 @@ static __inline struct brw_reg byte_offset( struct brw_reg reg, } -static __inline struct brw_reg brw_uw16_reg( GLuint file, +static INLINE struct brw_reg brw_uw16_reg( GLuint file, GLuint nr, GLuint subnr ) { return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } -static __inline struct brw_reg brw_uw8_reg( GLuint file, +static INLINE struct brw_reg brw_uw8_reg( GLuint file, GLuint nr, GLuint subnr ) { return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } -static __inline struct brw_reg brw_uw1_reg( GLuint file, +static INLINE struct brw_reg brw_uw1_reg( GLuint file, GLuint nr, GLuint subnr ) { return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } -static __inline struct brw_reg brw_imm_reg( GLuint type ) +static INLINE struct brw_reg brw_imm_reg( GLuint type ) { return brw_reg( BRW_IMMEDIATE_VALUE, 0, @@ -311,38 +311,38 @@ static __inline struct brw_reg brw_imm_reg( GLuint type ) 0); } -static __inline struct brw_reg brw_imm_f( GLfloat f ) +static INLINE struct brw_reg brw_imm_f( GLfloat f ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); imm.dw1.f = f; return imm; } -static __inline struct brw_reg brw_imm_d( GLint d ) +static INLINE struct brw_reg brw_imm_d( GLint d ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); imm.dw1.d = d; return imm; } -static __inline struct brw_reg brw_imm_ud( GLuint ud ) +static INLINE struct brw_reg brw_imm_ud( GLuint ud ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); imm.dw1.ud = ud; return imm; } -static __inline struct brw_reg brw_imm_uw( GLushort uw ) +static INLINE struct brw_reg brw_imm_uw( GLushort uw ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); - imm.dw1.ud = uw; + imm.dw1.ud = uw | (uw << 16); return imm; } -static __inline struct brw_reg brw_imm_w( GLshort w ) +static INLINE struct brw_reg brw_imm_w( GLshort w ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); - imm.dw1.d = w; + imm.dw1.d = w | (w << 16); return imm; } @@ -352,7 +352,7 @@ static __inline struct brw_reg brw_imm_w( GLshort w ) /* Vector of eight signed half-byte values: */ -static __inline struct brw_reg brw_imm_v( GLuint v ) +static INLINE struct brw_reg brw_imm_v( GLuint v ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); imm.vstride = BRW_VERTICAL_STRIDE_0; @@ -364,7 +364,7 @@ static __inline struct brw_reg brw_imm_v( GLuint v ) /* Vector of four 8-bit float values: */ -static __inline struct brw_reg brw_imm_vf( GLuint v ) +static INLINE struct brw_reg brw_imm_vf( GLuint v ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); imm.vstride = BRW_VERTICAL_STRIDE_0; @@ -378,7 +378,7 @@ static __inline struct brw_reg brw_imm_vf( GLuint v ) #define VF_ONE 0x30 #define VF_NEG (1<<7) -static __inline struct brw_reg brw_imm_vf4( GLuint v0, +static INLINE struct brw_reg brw_imm_vf4( GLuint v0, GLuint v1, GLuint v2, GLuint v3) @@ -395,51 +395,51 @@ static __inline struct brw_reg brw_imm_vf4( GLuint v0, } -static __inline struct brw_reg brw_address( struct brw_reg reg ) +static INLINE struct brw_reg brw_address( struct brw_reg reg ) { return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); } -static __inline struct brw_reg brw_vec1_grf( GLuint nr, +static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr ) { return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static __inline struct brw_reg brw_vec8_grf( GLuint nr, +static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr ) { return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static __inline struct brw_reg brw_vec4_grf( GLuint nr, +static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr ) { return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static __inline struct brw_reg brw_vec2_grf( GLuint nr, +static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr ) { return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static __inline struct brw_reg brw_uw8_grf( GLuint nr, +static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr ) { return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static __inline struct brw_reg brw_null_reg( void ) +static INLINE struct brw_reg brw_null_reg( void ) { return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0); } -static __inline struct brw_reg brw_address_reg( GLuint subnr ) +static INLINE struct brw_reg brw_address_reg( GLuint subnr ) { return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, @@ -450,7 +450,7 @@ static __inline struct brw_reg brw_address_reg( GLuint subnr ) * aren't xyzw. This goes against the convention for other scalar * regs: */ -static __inline struct brw_reg brw_ip_reg( void ) +static INLINE struct brw_reg brw_ip_reg( void ) { return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_IP, @@ -463,7 +463,7 @@ static __inline struct brw_reg brw_ip_reg( void ) WRITEMASK_XYZW); /* NOTE! */ } -static __inline struct brw_reg brw_acc_reg( void ) +static INLINE struct brw_reg brw_acc_reg( void ) { return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, @@ -471,7 +471,7 @@ static __inline struct brw_reg brw_acc_reg( void ) } -static __inline struct brw_reg brw_flag_reg( void ) +static INLINE struct brw_reg brw_flag_reg( void ) { return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_FLAG, @@ -479,14 +479,14 @@ static __inline struct brw_reg brw_flag_reg( void ) } -static __inline struct brw_reg brw_mask_reg( GLuint subnr ) +static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) { return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr); } -static __inline struct brw_reg brw_message_reg( GLuint nr ) +static INLINE struct brw_reg brw_message_reg( GLuint nr ) { return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, @@ -499,7 +499,7 @@ static __inline struct brw_reg brw_message_reg( GLuint nr ) /* This is almost always called with a numeric constant argument, so * make things easy to evaluate at compile time: */ -static __inline GLuint cvt( GLuint val ) +static INLINE GLuint cvt( GLuint val ) { switch (val) { case 0: return 0; @@ -513,7 +513,7 @@ static __inline GLuint cvt( GLuint val ) return 0; } -static __inline struct brw_reg stride( struct brw_reg reg, +static INLINE struct brw_reg stride( struct brw_reg reg, GLuint vstride, GLuint width, GLuint hstride ) @@ -525,43 +525,43 @@ static __inline struct brw_reg stride( struct brw_reg reg, return reg; } -static __inline struct brw_reg vec16( struct brw_reg reg ) +static INLINE struct brw_reg vec16( struct brw_reg reg ) { return stride(reg, 16,16,1); } -static __inline struct brw_reg vec8( struct brw_reg reg ) +static INLINE struct brw_reg vec8( struct brw_reg reg ) { return stride(reg, 8,8,1); } -static __inline struct brw_reg vec4( struct brw_reg reg ) +static INLINE struct brw_reg vec4( struct brw_reg reg ) { return stride(reg, 4,4,1); } -static __inline struct brw_reg vec2( struct brw_reg reg ) +static INLINE struct brw_reg vec2( struct brw_reg reg ) { return stride(reg, 2,2,1); } -static __inline struct brw_reg vec1( struct brw_reg reg ) +static INLINE struct brw_reg vec1( struct brw_reg reg ) { return stride(reg, 0,1,0); } -static __inline struct brw_reg get_element( struct brw_reg reg, GLuint elt ) +static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt ) { return vec1(suboffset(reg, elt)); } -static __inline struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt ) +static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt ) { return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); } -static __inline struct brw_reg brw_swizzle( struct brw_reg reg, +static INLINE struct brw_reg brw_swizzle( struct brw_reg reg, GLuint x, GLuint y, GLuint z, @@ -575,33 +575,33 @@ static __inline struct brw_reg brw_swizzle( struct brw_reg reg, } -static __inline struct brw_reg brw_swizzle1( struct brw_reg reg, +static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg, GLuint x ) { return brw_swizzle(reg, x, x, x, x); } -static __inline struct brw_reg brw_writemask( struct brw_reg reg, +static INLINE struct brw_reg brw_writemask( struct brw_reg reg, GLuint mask ) { reg.dw1.bits.writemask &= mask; return reg; } -static __inline struct brw_reg brw_set_writemask( struct brw_reg reg, +static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg, GLuint mask ) { reg.dw1.bits.writemask = mask; return reg; } -static __inline struct brw_reg negate( struct brw_reg reg ) +static INLINE struct brw_reg negate( struct brw_reg reg ) { reg.negate ^= 1; return reg; } -static __inline struct brw_reg brw_abs( struct brw_reg reg ) +static INLINE struct brw_reg brw_abs( struct brw_reg reg ) { reg.abs = 1; return reg; @@ -609,7 +609,7 @@ static __inline struct brw_reg brw_abs( struct brw_reg reg ) /*********************************************************************** */ -static __inline struct brw_reg brw_vec4_indirect( GLuint subnr, +static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr, GLint offset ) { struct brw_reg reg = brw_vec4_grf(0, 0); @@ -619,7 +619,7 @@ static __inline struct brw_reg brw_vec4_indirect( GLuint subnr, return reg; } -static __inline struct brw_reg brw_vec1_indirect( GLuint subnr, +static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr, GLint offset ) { struct brw_reg reg = brw_vec1_grf(0, 0); @@ -629,38 +629,48 @@ static __inline struct brw_reg brw_vec1_indirect( GLuint subnr, return reg; } -static __inline struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset) +static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset) { return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); } -static __inline struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset) +static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset) { return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); } -static __inline struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset) +static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset) { return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); } -static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset) +static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset) { return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); } -static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr) { return brw_address_reg(ptr.addr_subnr); } -static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset ) +static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset ) { ptr.addr_offset += offset; return ptr; } -static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset ) +static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset ) { struct brw_indirect ptr; ptr.addr_subnr = addr_subnr; @@ -669,7 +679,10 @@ static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offse return ptr; } - +static INLINE struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} void brw_pop_insn_state( struct brw_compile *p ); void brw_push_insn_state( struct brw_compile *p ); @@ -809,9 +822,11 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size); -void brw_WHILE(struct brw_compile *p, +struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *patch_insn); +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); /* Forward jumps: */ void brw_land_fwd_jump(struct brw_compile *p, @@ -861,5 +876,6 @@ void brw_math_invert( struct brw_compile *p, struct brw_reg dst, struct brw_reg src); - +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ); #endif diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c index 1c717e4..6b97f8b 100644 --- a/i965/brw_eu_emit.c +++ b/i965/brw_eu_emit.c @@ -164,7 +164,7 @@ static void brw_set_src0( struct brw_instruction *insn, } -static void brw_set_src1( struct brw_instruction *insn, +void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); @@ -186,7 +186,7 @@ static void brw_set_src1( struct brw_instruction *insn, * in the future: */ assert (reg.address_mode == BRW_ADDRESS_DIRECT); - assert (reg.file == BRW_GENERAL_REGISTER_FILE); + //assert (reg.file == BRW_GENERAL_REGISTER_FILE); if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits3.da1.src1_subreg_nr = reg.subnr; @@ -319,7 +319,7 @@ static void brw_set_dp_read_message( struct brw_instruction *insn, } static void brw_set_sampler_message(struct brw_context *brw, - struct brw_instruction *insn, + struct brw_instruction *insn, GLuint binding_table_index, GLuint sampler, GLuint msg_type, @@ -329,14 +329,14 @@ static void brw_set_sampler_message(struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_IGD(brw)) { - insn->bits3.sampler_igd.binding_table_index = binding_table_index; - insn->bits3.sampler_igd.sampler = sampler; - insn->bits3.sampler_igd.msg_type = msg_type; - insn->bits3.sampler_igd.response_length = response_length; - insn->bits3.sampler_igd.msg_length = msg_length; - insn->bits3.sampler_igd.end_of_thread = eot; - insn->bits3.sampler_igd.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) { + insn->bits3.sampler_gm45_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_gm45_g4x.sampler = sampler; + insn->bits3.sampler_gm45_g4x.msg_type = msg_type; + insn->bits3.sampler_gm45_g4x.response_length = response_length; + insn->bits3.sampler_gm45_g4x.msg_length = msg_length; + insn->bits3.sampler_gm45_g4x.end_of_thread = eot; + insn->bits3.sampler_gm45_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; } else { insn->bits3.sampler.binding_table_index = binding_table_index; insn->bits3.sampler.sampler = sampler; @@ -608,6 +608,34 @@ void brw_ENDIF(struct brw_compile *p, } } +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + /* DO/WHILE loop: */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) @@ -619,13 +647,15 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) /* Override the defaults for this instruction: */ - brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(insn, brw_null_reg()); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; /* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.mask_control = BRW_MASK_DISABLE; return insn; } @@ -633,7 +663,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) -void brw_WHILE(struct brw_compile *p, +struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { struct brw_instruction *insn; @@ -657,14 +687,16 @@ void brw_WHILE(struct brw_compile *p, insn->header.execution_size = do_insn->header.execution_size; assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = do_insn - insn; + insn->bits3.if_else.jump_count = do_insn - insn + 1; insn->bits3.if_else.pop_count = 0; insn->bits3.if_else.pad0 = 0; } /* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.mask_control = BRW_MASK_DISABLE; p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; } diff --git a/i965/brw_exec_generic.c b/i965/brw_exec_generic.c deleted file mode 100644 index 11d1ef7..0000000 --- a/i965/brw_exec_generic.c +++ /dev/null @@ -1,530 +0,0 @@ -/************************************************************************** - -Copyright 2004 Tungsten Graphics Inc., Cedar Park, Texas. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -on the rights to use, copy, modify, merge, publish, distribute, sub -license, and/or sell copies of the Software, and to permit persons to whom -the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next -paragraph) shall be included in all copies or substantial portions of the -Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "glheader.h" -#include "context.h" -#include "macros.h" -#include "vtxfmt.h" -#include "dlist.h" -#include "state.h" -#include "light.h" -#include "api_arrayelt.h" -#include "api_noop.h" - -#include "brw_exec.h" - - -/* Versions of all the entrypoints for situations where codegen isn't - * available. - * - * Note: Only one size for each attribute may be active at once. - * Eg. if Color3f is installed/active, then Color4f may not be, even - * if the vertex actually contains 4 color coordinates. This is - * because the 3f version won't otherwise set color[3] to 1.0 -- this - * is the job of the chooser function when switching between Color4f - * and Color3f. - */ -#define ATTRFV( ATTR, N ) \ -static void attrib_##ATTR##_##N( const GLfloat *v ) \ -{ \ - GET_CURRENT_CONTEXT( ctx ); \ - struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; \ - \ - if ((ATTR) == 0) { \ - GLuint i; \ - \ - if (N>0) exec->vtx.vbptr[0] = v[0]; \ - if (N>1) exec->vtx.vbptr[1] = v[1]; \ - if (N>2) exec->vtx.vbptr[2] = v[2]; \ - if (N>3) exec->vtx.vbptr[3] = v[3]; \ - \ - for (i = N; i < exec->vtx.vertex_size; i++) \ - exec->vtx.vbptr[i] = exec->vtx.vertex[i]; \ - \ - exec->vtx.vbptr += exec->vtx.vertex_size; \ - exec->ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; \ - \ - if (++exec->vtx.vert_count >= exec->vtx.max_vert) \ - brw_exec_vtx_wrap( exec ); \ - } \ - else { \ - GLfloat *dest = exec->vtx.attrptr[ATTR]; \ - if (N>0) dest[0] = v[0]; \ - if (N>1) dest[1] = v[1]; \ - if (N>2) dest[2] = v[2]; \ - if (N>3) dest[3] = v[3]; \ - } \ -} - -#define INIT(TAB, ATTR) \ - TAB[ATTR][0] = attrib_##ATTR##_1; \ - TAB[ATTR][1] = attrib_##ATTR##_2; \ - TAB[ATTR][2] = attrib_##ATTR##_3; \ - TAB[ATTR][3] = attrib_##ATTR##_4; - - -#define ATTRS( ATTRIB ) \ - ATTRFV( ATTRIB, 1 ) \ - ATTRFV( ATTRIB, 2 ) \ - ATTRFV( ATTRIB, 3 ) \ - ATTRFV( ATTRIB, 4 ) - -ATTRS( 0 ) -ATTRS( 1 ) -ATTRS( 2 ) -ATTRS( 3 ) -ATTRS( 4 ) -ATTRS( 5 ) -ATTRS( 6 ) -ATTRS( 7 ) -ATTRS( 8 ) -ATTRS( 9 ) -ATTRS( 10 ) -ATTRS( 11 ) -ATTRS( 12 ) -ATTRS( 13 ) -ATTRS( 14 ) -ATTRS( 15 ) - -void brw_exec_generic_attr_table_init( brw_attrfv_func (*tab)[4] ) -{ - INIT( tab, 0 ); - INIT( tab, 1 ); - INIT( tab, 2 ); - INIT( tab, 3 ); - INIT( tab, 4 ); - INIT( tab, 5 ); - INIT( tab, 6 ); - INIT( tab, 7 ); - INIT( tab, 8 ); - INIT( tab, 9 ); - INIT( tab, 10 ); - INIT( tab, 11 ); - INIT( tab, 12 ); - INIT( tab, 13 ); - INIT( tab, 14 ); - INIT( tab, 15 ); -} - -/* These can be made efficient with codegen. Further, by adding more - * logic to do_choose(), the double-dispatch for legacy entrypoints - * like glVertex3f() can be removed. - */ -#define DISPATCH_ATTRFV( ATTR, COUNT, P ) \ -do { \ - GET_CURRENT_CONTEXT( ctx ); \ - struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; \ - exec->vtx.tabfv[ATTR][COUNT-1]( P ); \ -} while (0) - -#define DISPATCH_ATTR1FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 1, V ) -#define DISPATCH_ATTR2FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 2, V ) -#define DISPATCH_ATTR3FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 3, V ) -#define DISPATCH_ATTR4FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 4, V ) - -#define DISPATCH_ATTR1F( ATTR, S ) DISPATCH_ATTRFV( ATTR, 1, &(S) ) - -#define DISPATCH_ATTR2F( ATTR, S,T ) \ -do { \ - GLfloat v[2]; \ - v[0] = S; v[1] = T; \ - DISPATCH_ATTR2FV( ATTR, v ); \ -} while (0) -#define DISPATCH_ATTR3F( ATTR, S,T,R ) \ -do { \ - GLfloat v[3]; \ - v[0] = S; v[1] = T; v[2] = R; \ - DISPATCH_ATTR3FV( ATTR, v ); \ -} while (0) -#define DISPATCH_ATTR4F( ATTR, S,T,R,Q ) \ -do { \ - GLfloat v[4]; \ - v[0] = S; v[1] = T; v[2] = R; v[3] = Q; \ - DISPATCH_ATTR4FV( ATTR, v ); \ -} while (0) - - -static void GLAPIENTRY brw_Vertex2f( GLfloat x, GLfloat y ) -{ - DISPATCH_ATTR2F( BRW_ATTRIB_POS, x, y ); -} - -static void GLAPIENTRY brw_Vertex2fv( const GLfloat *v ) -{ - DISPATCH_ATTR2FV( BRW_ATTRIB_POS, v ); -} - -static void GLAPIENTRY brw_Vertex3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( BRW_ATTRIB_POS, x, y, z ); -} - -static void GLAPIENTRY brw_Vertex3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( BRW_ATTRIB_POS, v ); -} - -static void GLAPIENTRY brw_Vertex4f( GLfloat x, GLfloat y, GLfloat z, - GLfloat w ) -{ - DISPATCH_ATTR4F( BRW_ATTRIB_POS, x, y, z, w ); -} - -static void GLAPIENTRY brw_Vertex4fv( const GLfloat *v ) -{ - DISPATCH_ATTR4FV( BRW_ATTRIB_POS, v ); -} - -static void GLAPIENTRY brw_TexCoord1f( GLfloat x ) -{ - DISPATCH_ATTR1F( BRW_ATTRIB_TEX0, x ); -} - -static void GLAPIENTRY brw_TexCoord1fv( const GLfloat *v ) -{ - DISPATCH_ATTR1FV( BRW_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY brw_TexCoord2f( GLfloat x, GLfloat y ) -{ - DISPATCH_ATTR2F( BRW_ATTRIB_TEX0, x, y ); -} - -static void GLAPIENTRY brw_TexCoord2fv( const GLfloat *v ) -{ - DISPATCH_ATTR2FV( BRW_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY brw_TexCoord3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( BRW_ATTRIB_TEX0, x, y, z ); -} - -static void GLAPIENTRY brw_TexCoord3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( BRW_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY brw_TexCoord4f( GLfloat x, GLfloat y, GLfloat z, - GLfloat w ) -{ - DISPATCH_ATTR4F( BRW_ATTRIB_TEX0, x, y, z, w ); -} - -static void GLAPIENTRY brw_TexCoord4fv( const GLfloat *v ) -{ - DISPATCH_ATTR4FV( BRW_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY brw_Normal3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( BRW_ATTRIB_NORMAL, x, y, z ); -} - -static void GLAPIENTRY brw_Normal3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( BRW_ATTRIB_NORMAL, v ); -} - -static void GLAPIENTRY brw_FogCoordfEXT( GLfloat x ) -{ - DISPATCH_ATTR1F( BRW_ATTRIB_FOG, x ); -} - -static void GLAPIENTRY brw_FogCoordfvEXT( const GLfloat *v ) -{ - DISPATCH_ATTR1FV( BRW_ATTRIB_FOG, v ); -} - -static void GLAPIENTRY brw_Color3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( BRW_ATTRIB_COLOR0, x, y, z ); -} - -static void GLAPIENTRY brw_Color3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( BRW_ATTRIB_COLOR0, v ); -} - -static void GLAPIENTRY brw_Color4f( GLfloat x, GLfloat y, GLfloat z, - GLfloat w ) -{ - DISPATCH_ATTR4F( BRW_ATTRIB_COLOR0, x, y, z, w ); -} - -static void GLAPIENTRY brw_Color4fv( const GLfloat *v ) -{ - DISPATCH_ATTR4FV( BRW_ATTRIB_COLOR0, v ); -} - -static void GLAPIENTRY brw_SecondaryColor3fEXT( GLfloat x, GLfloat y, - GLfloat z ) -{ - DISPATCH_ATTR3F( BRW_ATTRIB_COLOR1, x, y, z ); -} - -static void GLAPIENTRY brw_SecondaryColor3fvEXT( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( BRW_ATTRIB_COLOR1, v ); -} - -static void GLAPIENTRY brw_MultiTexCoord1f( GLenum target, GLfloat x ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR1F( attr, x ); -} - -static void GLAPIENTRY brw_MultiTexCoord1fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR1FV( attr, v ); -} - -static void GLAPIENTRY brw_MultiTexCoord2f( GLenum target, GLfloat x, - GLfloat y ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR2F( attr, x, y ); -} - -static void GLAPIENTRY brw_MultiTexCoord2fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR2FV( attr, v ); -} - -static void GLAPIENTRY brw_MultiTexCoord3f( GLenum target, GLfloat x, - GLfloat y, GLfloat z) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR3F( attr, x, y, z ); -} - -static void GLAPIENTRY brw_MultiTexCoord3fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR3FV( attr, v ); -} - -static void GLAPIENTRY brw_MultiTexCoord4f( GLenum target, GLfloat x, - GLfloat y, GLfloat z, - GLfloat w ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR4F( attr, x, y, z, w ); -} - -static void GLAPIENTRY brw_MultiTexCoord4fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR4FV( attr, v ); -} - - -static void GLAPIENTRY brw_VertexAttrib1fNV( GLuint index, GLfloat x ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR1F( index, x ); -} - -static void GLAPIENTRY brw_VertexAttrib1fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR1FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib2fNV( GLuint index, GLfloat x, - GLfloat y ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR2F( index, x, y ); -} - -static void GLAPIENTRY brw_VertexAttrib2fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR2FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib3fNV( GLuint index, GLfloat x, - GLfloat y, GLfloat z ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR3F( index, x, y, z ); -} - -static void GLAPIENTRY brw_VertexAttrib3fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR3FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib4fNV( GLuint index, GLfloat x, - GLfloat y, GLfloat z, - GLfloat w ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR4F( index, x, y, z, w ); -} - -static void GLAPIENTRY brw_VertexAttrib4fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR4FV( index, v ); -} - - -/* - * XXX adjust index - */ - -static void GLAPIENTRY brw_VertexAttrib1fARB( GLuint index, GLfloat x ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR1F( index, x ); -} - -static void GLAPIENTRY brw_VertexAttrib1fvARB( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR1FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib2fARB( GLuint index, GLfloat x, - GLfloat y ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR2F( index, x, y ); -} - -static void GLAPIENTRY brw_VertexAttrib2fvARB( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR2FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib3fARB( GLuint index, GLfloat x, - GLfloat y, GLfloat z ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR3F( index, x, y, z ); -} - -static void GLAPIENTRY brw_VertexAttrib3fvARB( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR3FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib4fARB( GLuint index, GLfloat x, - GLfloat y, GLfloat z, - GLfloat w ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR4F( index, x, y, z, w ); -} - -static void GLAPIENTRY brw_VertexAttrib4fvARB( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR4FV( index, v ); -} - - -/* Install the generic versions of the 2nd level dispatch - * functions. Some of these have a codegen alternative. - */ -void brw_exec_vtx_generic_init( struct brw_exec_context *exec ) -{ - GLvertexformat *vfmt = &exec->vtxfmt; - - vfmt->Color3f = brw_Color3f; - vfmt->Color3fv = brw_Color3fv; - vfmt->Color4f = brw_Color4f; - vfmt->Color4fv = brw_Color4fv; - vfmt->FogCoordfEXT = brw_FogCoordfEXT; - vfmt->FogCoordfvEXT = brw_FogCoordfvEXT; - vfmt->MultiTexCoord1fARB = brw_MultiTexCoord1f; - vfmt->MultiTexCoord1fvARB = brw_MultiTexCoord1fv; - vfmt->MultiTexCoord2fARB = brw_MultiTexCoord2f; - vfmt->MultiTexCoord2fvARB = brw_MultiTexCoord2fv; - vfmt->MultiTexCoord3fARB = brw_MultiTexCoord3f; - vfmt->MultiTexCoord3fvARB = brw_MultiTexCoord3fv; - vfmt->MultiTexCoord4fARB = brw_MultiTexCoord4f; - vfmt->MultiTexCoord4fvARB = brw_MultiTexCoord4fv; - vfmt->Normal3f = brw_Normal3f; - vfmt->Normal3fv = brw_Normal3fv; - vfmt->SecondaryColor3fEXT = brw_SecondaryColor3fEXT; - vfmt->SecondaryColor3fvEXT = brw_SecondaryColor3fvEXT; - vfmt->TexCoord1f = brw_TexCoord1f; - vfmt->TexCoord1fv = brw_TexCoord1fv; - vfmt->TexCoord2f = brw_TexCoord2f; - vfmt->TexCoord2fv = brw_TexCoord2fv; - vfmt->TexCoord3f = brw_TexCoord3f; - vfmt->TexCoord3fv = brw_TexCoord3fv; - vfmt->TexCoord4f = brw_TexCoord4f; - vfmt->TexCoord4fv = brw_TexCoord4fv; - vfmt->Vertex2f = brw_Vertex2f; - vfmt->Vertex2fv = brw_Vertex2fv; - vfmt->Vertex3f = brw_Vertex3f; - vfmt->Vertex3fv = brw_Vertex3fv; - vfmt->Vertex4f = brw_Vertex4f; - vfmt->Vertex4fv = brw_Vertex4fv; - vfmt->VertexAttrib1fNV = brw_VertexAttrib1fNV; - vfmt->VertexAttrib1fvNV = brw_VertexAttrib1fvNV; - vfmt->VertexAttrib2fNV = brw_VertexAttrib2fNV; - vfmt->VertexAttrib2fvNV = brw_VertexAttrib2fvNV; - vfmt->VertexAttrib3fNV = brw_VertexAttrib3fNV; - vfmt->VertexAttrib3fvNV = brw_VertexAttrib3fvNV; - vfmt->VertexAttrib4fNV = brw_VertexAttrib4fNV; - vfmt->VertexAttrib4fvNV = brw_VertexAttrib4fvNV; - vfmt->VertexAttrib1fARB = brw_VertexAttrib1fARB; - vfmt->VertexAttrib1fvARB = brw_VertexAttrib1fvARB; - vfmt->VertexAttrib2fARB = brw_VertexAttrib2fARB; - vfmt->VertexAttrib2fvARB = brw_VertexAttrib2fvARB; - vfmt->VertexAttrib3fARB = brw_VertexAttrib3fARB; - vfmt->VertexAttrib3fvARB = brw_VertexAttrib3fvARB; - vfmt->VertexAttrib4fARB = brw_VertexAttrib4fARB; - vfmt->VertexAttrib4fvARB = brw_VertexAttrib4fvARB; -} diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c index 86464b2..8a8fb50 100644 --- a/i965/brw_fallback.c +++ b/i965/brw_fallback.c @@ -39,38 +39,32 @@ #include "macros.h" #include "mtypes.h" - - - - - +#define FILE_DEBUG_FLAG DEBUG_FALLBACKS static GLboolean do_check_fallback(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; GLuint i; - + /* BRW_NEW_METAOPS */ if (brw->metaops.active) return GL_FALSE; - if (brw->intel.no_rast) - return GL_TRUE; - - /* _NEW_BUFFERS - */ - if (ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_FRONT_LEFT && - ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_BACK_LEFT) + if (brw->intel.no_rast) { + DBG("FALLBACK: rasterization disabled\n"); return GL_TRUE; + } /* _NEW_RENDERMODE * * XXX: need to save/restore RenderMode in metaops state, or * somehow move to a new attribs pointer: */ - if (ctx->RenderMode != GL_RENDER) + if (ctx->RenderMode != GL_RENDER) { + DBG("FALLBACK: render mode\n"); return GL_TRUE; + } /* _NEW_TEXTURE: */ @@ -79,8 +73,13 @@ static GLboolean do_check_fallback(struct brw_context *brw) if (texUnit->_ReallyEnabled) { struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current); struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel]; - if (texImage->Border) + if (texImage->Border || + ((texImage->_BaseFormat == GL_DEPTH_COMPONENT) && + ((texImage->TexObject->WrapS == GL_CLAMP_TO_BORDER) || + (texImage->TexObject->WrapT == GL_CLAMP_TO_BORDER)))) { + DBG("FALLBACK: texture border\n"); return GL_TRUE; + } } } @@ -88,6 +87,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) */ if (brw->attribs.Stencil->Enabled && !brw->intel.hw_stencil) { + DBG("FALLBACK: stencil\n"); return GL_TRUE; } @@ -95,9 +95,10 @@ static GLboolean do_check_fallback(struct brw_context *brw) return GL_FALSE; } -static void check_fallback(struct brw_context *brw) +static int check_fallback(struct brw_context *brw) { brw->intel.Fallback = do_check_fallback(brw); + return 0; } const struct brw_tracked_state brw_check_fallback = { @@ -106,7 +107,7 @@ const struct brw_tracked_state brw_check_fallback = { .brw = BRW_NEW_METAOPS, .cache = 0 }, - .update = check_fallback + .prepare = check_fallback }; diff --git a/i965/brw_gs.c b/i965/brw_gs.c index 119d07d..9419315 100644 --- a/i965/brw_gs.c +++ b/i965/brw_gs.c @@ -119,26 +119,15 @@ static void compile_gs_prog( struct brw_context *brw, /* Upload */ - brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->gs.prog_data ); + dri_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->gs.prog_data ); } - -static GLboolean search_cache( struct brw_context *brw, - struct brw_gs_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_GS_PROG], - key, sizeof(*key), - &brw->gs.prog_data, - &brw->gs.prog_gs_offset); -} - - static const GLenum gs_prim[GL_POLYGON+1] = { GL_POINTS, GL_LINES, @@ -173,10 +162,10 @@ static void populate_key( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static void upload_gs_prog( struct brw_context *brw ) +static int prepare_gs_prog( struct brw_context *brw ) { struct brw_gs_prog_key key; - + int ret = 0; /* Populate the key: */ populate_key(brw, &key); @@ -187,9 +176,18 @@ static void upload_gs_prog( struct brw_context *brw ) } if (brw->gs.prog_active) { - if (!search_cache(brw, &key)) + dri_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->gs.prog_data); + if (brw->gs.prog_bo == NULL) compile_gs_prog( brw, &key ); + + ret |= dri_bufmgr_check_aperture_space(brw->gs.prog_bo); } + + return ret; } @@ -199,5 +197,5 @@ const struct brw_tracked_state brw_gs_prog = { .brw = BRW_NEW_PRIMITIVE, .cache = CACHE_NEW_VS_PROG }, - .update = upload_gs_prog + .prepare = prepare_gs_prog }; diff --git a/i965/brw_gs.h b/i965/brw_gs.h index 29a4e80..18a4537 100644 --- a/i965/brw_gs.h +++ b/i965/brw_gs.h @@ -40,11 +40,11 @@ #define MAX_GS_VERTS (4) struct brw_gs_prog_key { + GLuint attrs:32; GLuint primitive:4; - GLuint attrs:16; GLuint hint_gs_always:1; GLuint need_gs_prog:1; - GLuint pad:10; + GLuint pad:26; }; struct brw_gs_compile { diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c index 5826c01..f1f9e01 100644 --- a/i965/brw_gs_state.c +++ b/i965/brw_gs_state.c @@ -36,47 +36,102 @@ #include "brw_defines.h" #include "macros.h" +struct brw_gs_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curbe_offset; -static void upload_gs_unit( struct brw_context *brw ) -{ - struct brw_gs_unit_state gs; + unsigned int nr_urb_entries, urb_size; + GLboolean prog_active; +}; - memset(&gs, 0, sizeof(gs)); +static void +gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) +{ + memset(key, 0, sizeof(*key)); /* CACHE_NEW_GS_PROG */ - if (brw->gs.prog_active) { - gs.thread0.grf_reg_count = ((brw->gs.prog_data->total_grf-1) & ~15) / 16; - gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; - gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; - } - else { - gs.thread0.grf_reg_count = 0; - gs.thread0.kernel_start_pointer = 0; - gs.thread3.urb_entry_read_length = 1; + key->prog_active = brw->gs.prog_active; + if (key->prog_active) { + key->total_grf = brw->gs.prog_data->total_grf; + key->urb_entry_read_length = brw->gs.prog_data->urb_read_length; + } else { + key->total_grf = 1; + key->urb_entry_read_length = 1; } + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.clip_start; + /* BRW_NEW_URB_FENCE */ - gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; - gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + key->nr_urb_entries = brw->urb.nr_gs_entries; + key->urb_size = brw->urb.vsize; +} - gs.thread4.max_threads = 0; /* Hardware requirement */ +static dri_bo * +gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) +{ + struct brw_gs_unit_state gs; + dri_bo *bo; - if (INTEL_DEBUG & DEBUG_STATS) - gs.thread4.stats_enable = 1; + memset(&gs, 0, sizeof(gs)); + + gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + if (key->prog_active) /* reloc */ + gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; - /* CONSTANT */ gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; gs.thread1.single_program_flow = 1; + gs.thread3.dispatch_grf_start_reg = 1; gs.thread3.const_urb_entry_read_offset = 0; gs.thread3.const_urb_entry_read_length = 0; gs.thread3.urb_entry_read_offset = 0; - + gs.thread3.urb_entry_read_length = key->urb_entry_read_length; + + gs.thread4.nr_urb_entries = key->nr_urb_entries; + gs.thread4.urb_entry_allocation_size = key->urb_size - 1; + + gs.thread4.max_threads = 0; /* Hardware requirement */ + + if (INTEL_DEBUG & DEBUG_STATS) + gs.thread4.stats_enable = 1; + + bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, + key, sizeof(*key), + &brw->gs.prog_bo, 1, + &gs, sizeof(gs), + NULL, NULL); - brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); + if (key->prog_active) { + /* Emit GS program relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + gs.thread0.grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); + } + + return bo; } +static int prepare_gs_unit( struct brw_context *brw ) +{ + struct brw_gs_unit_key key; + + gs_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->gs.state_bo); + brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT, + &key, sizeof(key), + &brw->gs.prog_bo, 1, + NULL); + if (brw->gs.state_bo == NULL) { + brw->gs.state_bo = gs_unit_create_from_key(brw, &key); + } + return dri_bufmgr_check_aperture_space(brw->gs.state_bo); +} const struct brw_tracked_state brw_gs_unit = { .dirty = { @@ -85,5 +140,5 @@ const struct brw_tracked_state brw_gs_unit = { BRW_NEW_URB_FENCE), .cache = CACHE_NEW_GS_PROG }, - .update = upload_gs_unit + .prepare = prepare_gs_unit, }; diff --git a/i965/brw_hal.c b/i965/brw_hal.c deleted file mode 100644 index 3126102..0000000 --- a/i965/brw_hal.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - -#include "intel_batchbuffer.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_hal.h" -#include <dlfcn.h> - -static void *brw_hal_lib; -static GLboolean brw_hal_tried; - -void * -brw_hal_find_symbol (char *symbol) -{ - if (!brw_hal_tried) - { - char *brw_hal_name = getenv ("INTEL_HAL"); - - if (!brw_hal_name) - brw_hal_name = "/usr/lib/xorg/modules/drivers/intel_hal.so"; - - brw_hal_lib = dlopen (brw_hal_name, RTLD_LAZY|RTLD_LOCAL); - brw_hal_tried = 1; - } - if (!brw_hal_lib) - return NULL; - return dlsym (brw_hal_lib, symbol); -} diff --git a/i965/brw_hal.h b/i965/brw_hal.h deleted file mode 100644 index cd86e39..0000000 --- a/i965/brw_hal.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - -void * -brw_hal_find_symbol (char *symbol); diff --git a/i965/brw_metaops.c b/i965/brw_metaops.c index 6e030f1..252a899 100644 --- a/i965/brw_metaops.c +++ b/i965/brw_metaops.c @@ -41,6 +41,7 @@ #include "intel_screen.h" #include "intel_batchbuffer.h" #include "intel_regions.h" +#include "intel_buffers.h" #include "brw_context.h" #include "brw_defines.h" @@ -156,7 +157,7 @@ static const char *fp_tex_prog = * FragmentProgram->_Current * VertexProgram->_Enabled * brw->vertex_program - * DrawBuffer->_ColorDrawBufferMask[0] + * DrawBuffer->_ColorDrawBufferIndexes[0] * * * More if drawpixels-through-texture is added. @@ -195,7 +196,7 @@ static void init_metaops_state( struct brw_context *brw ) vp_prog, strlen(vp_prog), brw->metaops.vp); - brw->metaops.attribs.VertexProgram->Current = brw->metaops.vp; + brw->metaops.attribs.VertexProgram->_Current = brw->metaops.vp; brw->metaops.attribs.VertexProgram->_Enabled = GL_TRUE; brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp; @@ -361,13 +362,21 @@ static void meta_draw_region( struct intel_context *intel, struct brw_context *brw = brw_context(&intel->ctx); if (!brw->metaops.saved_draw_region) { - brw->metaops.saved_draw_region = brw->state.draw_region; + brw->metaops.saved_draw_region = brw->state.draw_regions[0]; + brw->metaops.saved_nr_draw_regions = brw->state.nr_draw_regions; brw->metaops.saved_depth_region = brw->state.depth_region; } - brw->state.draw_region = draw_region; + brw->state.draw_regions[0] = draw_region; + brw->state.nr_draw_regions = 1; brw->state.depth_region = depth_region; + if (intel->frame_buffer_texobj != NULL) + brw_FrameBufferTexDestroy(brw); + + if (draw_region) + brw_FrameBufferTexInit(brw, draw_region); + brw->state.dirty.mesa |= _NEW_BUFFERS; } @@ -376,8 +385,7 @@ static void meta_draw_quad(struct intel_context *intel, GLfloat x0, GLfloat x1, GLfloat y0, GLfloat y1, GLfloat z, - GLubyte red, GLubyte green, - GLubyte blue, GLubyte alpha, + GLuint color, GLfloat s0, GLfloat s1, GLfloat t0, GLfloat t1) { @@ -388,7 +396,6 @@ static void meta_draw_quad(struct intel_context *intel, struct gl_client_array *attribs[VERT_ATTRIB_MAX]; struct _mesa_prim prim[1]; GLfloat pos[4][3]; - GLubyte color[4]; ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB, @@ -413,7 +420,6 @@ static void meta_draw_quad(struct intel_context *intel, pos[3][1] = y1; pos[3][2] = z; - ctx->Driver.BufferSubData(ctx, GL_ARRAY_BUFFER_ARB, 0, @@ -421,16 +427,15 @@ static void meta_draw_quad(struct intel_context *intel, pos, brw->metaops.vbo); - color[0] = red; - color[1] = green; - color[2] = blue; - color[3] = alpha; + /* Convert incoming ARGB to required RGBA */ + /* Note this color is stored as GL_UNSIGNED_BYTE */ + color = (color & 0xff00ff00) | (((color >> 16) | (color << 16)) & 0xff00ff); ctx->Driver.BufferSubData(ctx, GL_ARRAY_BUFFER_ARB, sizeof(pos), sizeof(color), - color, + &color, brw->metaops.vbo); /* Ignoring texture coords. @@ -486,6 +491,7 @@ static void install_meta_state( struct intel_context *intel ) { GLcontext *ctx = &intel->ctx; struct brw_context *brw = brw_context(ctx); + GLuint i; if (!brw->metaops.vbo) { init_metaops_state(brw); @@ -495,12 +501,18 @@ static void install_meta_state( struct intel_context *intel ) meta_no_texture(&brw->intel); meta_flat_shade(&brw->intel); - brw->metaops.restore_draw_mask = ctx->DrawBuffer->_ColorDrawBufferMask[0]; + for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { + brw->metaops.restore_draw_buffers[i] + = ctx->DrawBuffer->_ColorDrawBufferIndexes[i]; + } + brw->metaops.restore_num_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers; + brw->metaops.restore_fp = ctx->FragmentProgram.Current; /* This works without adjusting refcounts. Fix later? */ - brw->metaops.saved_draw_region = brw->state.draw_region; + brw->metaops.saved_draw_region = brw->state.draw_regions[0]; + brw->metaops.saved_nr_draw_regions = brw->state.nr_draw_regions; brw->metaops.saved_depth_region = brw->state.depth_region; brw->metaops.active = 1; @@ -511,13 +523,20 @@ static void leave_meta_state( struct intel_context *intel ) { GLcontext *ctx = &intel->ctx; struct brw_context *brw = brw_context(ctx); + GLuint i; restore_attribs(brw); - ctx->DrawBuffer->_ColorDrawBufferMask[0] = brw->metaops.restore_draw_mask; + for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { + ctx->DrawBuffer->_ColorDrawBufferIndexes[i] + = brw->metaops.restore_draw_buffers[i]; + } + ctx->DrawBuffer->_NumColorDrawBuffers = brw->metaops.restore_num_draw_buffers; + ctx->FragmentProgram.Current = brw->metaops.restore_fp; - brw->state.draw_region = brw->metaops.saved_draw_region; + brw->state.draw_regions[0] = brw->metaops.saved_draw_region; + brw->state.nr_draw_regions = brw->metaops.saved_nr_draw_regions; brw->state.depth_region = brw->metaops.saved_depth_region; brw->metaops.saved_draw_region = NULL; brw->metaops.saved_depth_region = NULL; diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c index fe476c9..62df259 100644 --- a/i965/brw_misc_state.c +++ b/i965/brw_misc_state.c @@ -68,137 +68,75 @@ const struct brw_tracked_state brw_blend_constant_color = { .brw = 0, .cache = 0 }, - .update = upload_blend_constant_color + .emit = upload_blend_constant_color }; -/*********************************************************************** - * Drawing rectangle -- Need for AUB file only. +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is 0. */ -static void upload_drawing_rect(struct brw_context *brw) +static void upload_binding_table_pointers(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - __DRIdrawablePrivate *dPriv = intel->driDrawable; - struct brw_drawrect bdr; - int x1, y1; - int x2, y2; - /* If there is a single cliprect, set it here. Otherwise iterate - * over them in brw_draw_prim(). - */ - if (brw->intel.numClipRects > 1) - return; - - x1 = brw->intel.pClipRects[0].x1; - y1 = brw->intel.pClipRects[0].y1; - x2 = brw->intel.pClipRects[0].x2; - y2 = brw->intel.pClipRects[0].y2; - - if (x1 < 0) x1 = 0; - if (y1 < 0) y1 = 0; - if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width; - if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height; - - memset(&bdr, 0, sizeof(bdr)); - bdr.header.opcode = CMD_DRAW_RECT; - bdr.header.length = sizeof(bdr)/4 - 2; - bdr.xmin = x1; - bdr.ymin = y1; - bdr.xmax = x2; - bdr.ymax = y2; - bdr.xorg = dPriv->x; - bdr.yorg = dPriv->y; - - /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted - * uncached in brw_draw.c: - */ - BRW_BATCH_STRUCT(brw, &bdr); -} - -const struct brw_tracked_state brw_drawing_rect = { - .dirty = { - .mesa = _NEW_WINDOW_POS, - .brw = 0, - .cache = 0 - }, - .update = upload_drawing_rect -}; - -/*********************************************************************** - * Binding table pointers - */ - -static void upload_binding_table_pointers(struct brw_context *brw) -{ - struct brw_binding_table_pointers btp; - memset(&btp, 0, sizeof(btp)); - - /* The binding table has been emitted to the SS pool already, so we - * know what its offset is. When the batch buffer is fired, the - * binding table and surface structs will get fixed up to point to - * where the textures actually landed, but that won't change the - * value of the offsets here: - */ - btp.header.opcode = CMD_BINDING_TABLE_PTRS; - btp.header.length = sizeof(btp)/4 - 2; - btp.vs = 0; - btp.gs = 0; - btp.clp = 0; - btp.sf = 0; - btp.wm = brw->wm.bind_ss_offset; - - BRW_CACHED_BATCH_STRUCT(brw, &btp); + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + OUT_BATCH(0); /* clip */ + OUT_BATCH(0); /* sf */ + OUT_RELOC(brw->wm.bind_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = 0, - .cache = CACHE_NEW_SURF_BIND + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SURF_BIND, }, - .update = upload_binding_table_pointers + .emit = upload_binding_table_pointers, }; -/*********************************************************************** - * Pipelined state pointers. This is the key state packet from which - * the hardware chases pointers to all the uploaded state in VRAM. +/** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is 0. */ - static void upload_pipelined_state_pointers(struct brw_context *brw ) { - struct brw_pipelined_state_pointers psp; - memset(&psp, 0, sizeof(psp)); - - psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; - psp.header.length = sizeof(psp)/4 - 2; - - psp.vs.offset = brw->vs.state_gs_offset >> 5; - psp.sf.offset = brw->sf.state_gs_offset >> 5; - psp.wm.offset = brw->wm.state_gs_offset >> 5; - psp.cc.offset = brw->cc.state_gs_offset >> 5; - - /* GS gets turned on and off regularly. Need to re-emit URB fence - * after this occurs. - */ - if (brw->gs.prog_active) { - psp.gs.offset = brw->gs.state_gs_offset >> 5; - psp.gs.enable = 1; - } - - if (!brw->metaops.active) { - psp.clp.offset = brw->clip.state_gs_offset >> 5; - psp.clp.enable = 1; - } - + struct intel_context *intel = &brw->intel; - if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) - brw->state.dirty.brw |= BRW_NEW_PSP; + BEGIN_BATCH(7, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); + OUT_RELOC(brw->vs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + if (brw->gs.prog_active) + OUT_RELOC(brw->gs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1); + else + OUT_BATCH(0); + if (!brw->metaops.active) + OUT_RELOC(brw->clip.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1); + else + OUT_BATCH(0); + OUT_RELOC(brw->sf.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(brw->wm.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(brw->cc.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + ADVANCE_BATCH(); + + brw->state.dirty.brw |= BRW_NEW_PSP; } +#if 0 +/* Combined into brw_psp_urb_cbs */ const struct brw_tracked_state brw_pipelined_state_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_METAOPS, + .brw = BRW_NEW_METAOPS | BRW_NEW_BATCH, .cache = (CACHE_NEW_VS_UNIT | CACHE_NEW_GS_UNIT | CACHE_NEW_GS_PROG | @@ -207,8 +145,9 @@ const struct brw_tracked_state brw_pipelined_state_pointers = { CACHE_NEW_WM_UNIT | CACHE_NEW_CC_UNIT) }, - .update = upload_pipelined_state_pointers + .emit = upload_pipelined_state_pointers }; +#endif static void upload_psp_urb_cbs(struct brw_context *brw ) { @@ -221,7 +160,7 @@ static void upload_psp_urb_cbs(struct brw_context *brw ) const struct brw_tracked_state brw_psp_urb_cbs = { .dirty = { .mesa = 0, - .brw = BRW_NEW_URB_FENCE | BRW_NEW_METAOPS, + .brw = BRW_NEW_URB_FENCE | BRW_NEW_METAOPS | BRW_NEW_BATCH, .cache = (CACHE_NEW_VS_UNIT | CACHE_NEW_GS_UNIT | CACHE_NEW_GS_PROG | @@ -230,74 +169,91 @@ const struct brw_tracked_state brw_psp_urb_cbs = { CACHE_NEW_WM_UNIT | CACHE_NEW_CC_UNIT) }, - .update = upload_psp_urb_cbs + .emit = upload_psp_urb_cbs, }; +/** + * Upload the depthbuffer offset and format. + * + * We have to do this per state validation as we need to emit the relocation + * in the batch buffer. + */ +static int prepare_depthbuffer(struct brw_context *brw) +{ + struct intel_region *region = brw->state.depth_region; + if (!region || !region->buffer) + return 0; + return dri_bufmgr_check_aperture_space(region->buffer); +} -/*********************************************************************** - * Depthbuffer - currently constant, but rotation would change that. - */ - -static void upload_depthbuffer(struct brw_context *brw) +static void emit_depthbuffer(struct brw_context *brw) { - /* 0x79050003 Depth Buffer */ struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - struct brw_depthbuffer bd; - memset(&bd, 0, sizeof(bd)); - - bd.header.bits.opcode = CMD_DEPTH_BUFFER; - bd.header.bits.length = BRW_IS_IGD(brw) ? (sizeof(bd)/4-2) : (sizeof(bd)/4-3); - bd.dword1.bits.pitch = (region->pitch * region->cpp) - 1; - - switch (region->cpp) { - case 2: - bd.dword1.bits.format = BRW_DEPTHFORMAT_D16_UNORM; - break; - case 4: - if (intel->depth_buffer_is_float) - bd.dword1.bits.format = BRW_DEPTHFORMAT_D32_FLOAT; - else - bd.dword1.bits.format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; - break; - default: - assert(0); - return; + unsigned int len = (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? sizeof(struct brw_depthbuffer_gm45_g4x) / 4 : sizeof(struct brw_depthbuffer) / 4; + + if (region == NULL) { + BEGIN_BATCH(len, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | + (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } else { + unsigned int format; + + switch (region->cpp) { + case 2: + format = BRW_DEPTHFORMAT_D16_UNORM; + break; + case 4: + if (intel->depth_buffer_is_float) + format = BRW_DEPTHFORMAT_D32_FLOAT; + else + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + break; + default: + assert(0); + return; + } + + BEGIN_BATCH(len, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH(((region->pitch * region->cpp) - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | + (region->tiled << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(region->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((region->pitch - 1) << 6) | + ((region->height - 1) << 19)); + OUT_BATCH(0); + + if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) + OUT_BATCH(0); + + ADVANCE_BATCH(); } - - bd.dword1.bits.depth_offset_disable = 0; /* coordinate offset */ - - /* The depthbuffer can only use YMAJOR tiling... This is a bit of - * a shame as it clashes with the 2d blitter which only supports - * XMAJOR tiling... - */ - bd.dword1.bits.tile_walk = BRW_TILEWALK_YMAJOR; - bd.dword1.bits.tiled_surface = intel->depth_region->tiled; - bd.dword1.bits.surface_type = BRW_SURFACE_2D; - - /* BRW_NEW_LOCK */ - bd.dword2_base_addr = bmBufferOffset(intel, region->buffer); - - bd.dword3.bits.mipmap_layout = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - bd.dword3.bits.lod = 0; - bd.dword3.bits.width = region->pitch - 1; /* XXX: width ? */ - bd.dword3.bits.height = region->height - 1; - - bd.dword4.bits.min_array_element = 0; - bd.dword4.bits.depth = 0; - - BRW_CACHED_BATCH_STRUCT(brw, &bd); } const struct brw_tracked_state brw_depthbuffer = { .dirty = { .mesa = 0, - .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK, - .cache = 0 + .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH, + .cache = 0, }, - .update = upload_depthbuffer + .prepare = prepare_depthbuffer, + .emit = emit_depthbuffer, }; @@ -327,7 +283,7 @@ const struct brw_tracked_state brw_polygon_stipple = { .brw = 0, .cache = 0 }, - .update = upload_polygon_stipple + .emit = upload_polygon_stipple }; @@ -350,13 +306,15 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) BRW_CACHED_BATCH_STRUCT(brw, &bpso); } +#define _NEW_WINDOW_POS 0x40000000 + const struct brw_tracked_state brw_polygon_stipple_offset = { .dirty = { .mesa = _NEW_WINDOW_POS, .brw = 0, .cache = 0 }, - .update = upload_polygon_stipple_offset + .emit = upload_polygon_stipple_offset }; /********************************************************************** @@ -366,7 +324,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) { struct brw_aa_line_parameters balp; - if (!BRW_IS_IGD(brw)) + if (!(BRW_IS_GM45(brw) || BRW_IS_G4X(brw))) return; /* use legacy aa line coverage computation */ @@ -383,7 +341,7 @@ const struct brw_tracked_state brw_aa_line_parameters = { .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .update = upload_aa_line_parameters + .emit = upload_aa_line_parameters }; /*********************************************************************** @@ -418,7 +376,7 @@ const struct brw_tracked_state brw_line_stipple = { .brw = 0, .cache = 0 }, - .update = upload_line_stipple + .emit = upload_line_stipple }; @@ -449,10 +407,10 @@ static void upload_pipe_control(struct brw_context *brw) const struct brw_tracked_state brw_pipe_control = { .dirty = { .mesa = 0, - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_BATCH, .cache = 0 }, - .update = upload_pipe_control + .emit = upload_pipe_control }; @@ -518,43 +476,39 @@ const struct brw_tracked_state brw_invarient_state = { .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .update = upload_invarient_state + .emit = upload_invarient_state }; - -/* State pool addresses: +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. */ static void upload_state_base_address( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; - struct brw_state_base_address sba; - - memset(&sba, 0, sizeof(sba)); - - sba.header.opcode = CMD_STATE_BASE_ADDRESS; - sba.header.length = 0x4; - /* BRW_NEW_LOCK */ - sba.bits0.general_state_address = bmBufferOffset(intel, brw->pool[BRW_GS_POOL].buffer) >> 5; - sba.bits0.modify_enable = 1; - - /* BRW_NEW_LOCK */ - sba.bits1.surface_state_address = bmBufferOffset(intel, brw->pool[BRW_SS_POOL].buffer) >> 5; - sba.bits1.modify_enable = 1; - - sba.bits2.modify_enable = 1; - sba.bits3.modify_enable = 1; - sba.bits4.modify_enable = 1; - - BRW_CACHED_BATCH_STRUCT(brw, &sba); + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); } - const struct brw_tracked_state brw_state_base_address = { .dirty = { .mesa = 0, - .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK, - .cache = 0 + .brw = BRW_NEW_CONTEXT, + .cache = 0, }, - .update = upload_state_base_address + .emit = upload_state_base_address }; diff --git a/i965/brw_program.c b/i965/brw_program.c index 752fe49..c38610b 100644 --- a/i965/brw_program.c +++ b/i965/brw_program.c @@ -29,15 +29,15 @@ * Keith Whitwell <keith@tungstengraphics.com> */ +#include "main/imports.h" +#include "main/enums.h" #include "shader/prog_parameter.h" -#include "brw_context.h" -#include "brw_aub.h" -#include "brw_util.h" -#include "program.h" -#include "imports.h" -#include "enums.h" +#include "shader/program.h" +#include "shader/programopt.h" #include "tnl/tnl.h" +#include "brw_context.h" +#include "brw_util.h" static void brwBindProgram( GLcontext *ctx, GLenum target, @@ -125,6 +125,9 @@ static void brwProgramStringNotify( GLcontext *ctx, struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; if (p == vp) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + if (p->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &p->program); + } p->id = brw->program_id++; p->param_state = p->program.Base.Parameters->StateFlags; diff --git a/i965/brw_sf.c b/i965/brw_sf.c index 6dcfa62..0b61748 100644 --- a/i965/brw_sf.c +++ b/i965/brw_sf.c @@ -43,8 +43,6 @@ #include "brw_sf.h" #include "brw_state.h" -#define DO_SETUP_BITS ((1<<FRAG_ATTRIB_MAX)-1) - static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { @@ -74,6 +72,11 @@ static void compile_sf_prog( struct brw_context *brw, if (c.key.attrs & (1<<i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; + if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { + c.point_attrs[i].CoordReplace = + brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0]; + } else + c.point_attrs[i].CoordReplace = GL_FALSE; idx++; } @@ -90,7 +93,10 @@ static void compile_sf_prog( struct brw_context *brw, break; case SF_POINTS: c.nr_verts = 1; - brw_emit_point_setup( &c, GL_TRUE ); + if (key->do_point_sprite) + brw_emit_point_sprite_setup( &c, GL_TRUE ); + else + brw_emit_point_setup( &c, GL_TRUE ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; @@ -108,29 +114,18 @@ static void compile_sf_prog( struct brw_context *brw, /* Upload */ - brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->sf.prog_data ); + dri_bo_unreference(brw->sf.prog_bo); + brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->sf.prog_data ); } - -static GLboolean search_cache( struct brw_context *brw, - struct brw_sf_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_SF_PROG], - key, sizeof(*key), - &brw->sf.prog_data, - &brw->sf.prog_gs_offset); -} - - /* Calculate interpolants for triangle and line rasterization. */ -static void upload_sf_prog( struct brw_context *brw ) +static int upload_sf_prog( struct brw_context *brw ) { struct brw_sf_prog_key key; @@ -162,7 +157,8 @@ static void upload_sf_prog( struct brw_context *brw ) break; } - + key.do_point_sprite = brw->attribs.Point->PointSprite; + key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; /* _NEW_LIGHT */ key.do_flat_shading = (brw->attribs.Light->ShadeModel == GL_FLAT); key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); @@ -171,18 +167,23 @@ static void upload_sf_prog( struct brw_context *brw ) if (key.do_twoside_color) key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); - - if (!search_cache(brw, &key)) + dri_bo_unreference(brw->sf.prog_bo); + brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, + &key, sizeof(key), + NULL, 0, + &brw->sf.prog_data); + if (brw->sf.prog_bo == NULL) compile_sf_prog( brw, &key ); + return dri_bufmgr_check_aperture_space(brw->sf.prog_bo); } const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (_NEW_LIGHT|_NEW_POLYGON), + .mesa = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, - .update = upload_sf_prog + .prepare = upload_sf_prog }; diff --git a/i965/brw_sf.h b/i965/brw_sf.h index b321cda..1c0fb70 100644 --- a/i965/brw_sf.h +++ b/i965/brw_sf.h @@ -34,9 +34,9 @@ #define BRW_SF_H +#include "shader/program.h" #include "brw_context.h" #include "brw_eu.h" -#include "program.h" #define SF_POINTS 0 @@ -45,14 +45,19 @@ #define SF_UNFILLED_TRIS 3 struct brw_sf_prog_key { + GLuint attrs:32; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; - GLuint attrs:16; GLuint frontface_ccw:1; - GLuint pad:11; + GLuint do_point_sprite:1; + GLuint pad:10; + GLenum SpriteOrigin; }; +struct brw_sf_point_tex { + GLboolean CoordReplace; +}; struct brw_sf_compile { struct brw_compile func; @@ -94,12 +99,14 @@ struct brw_sf_compile { GLubyte attr_to_idx[VERT_RESULT_MAX]; GLubyte idx_to_attr[VERT_RESULT_MAX]; + struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; }; void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate ); void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate ); void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate ); void brw_emit_anyprim_setup( struct brw_sf_compile *c ); #endif diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c index 94be815..6fba8c8 100644 --- a/i965/brw_sf_emit.c +++ b/i965/brw_sf_emit.c @@ -59,6 +59,35 @@ static GLboolean have_attr(struct brw_sf_compile *c, return (c->key.attrs & (1<<attr)) ? 1 : 0; } +/** + * Sets VERT_RESULT_FOGC.Y for gl_FrontFacing + * + * This is currently executed if the fragment program uses VERT_RESULT_FOGC + * at all, but this could be eliminated with a scan of the FP contents. + */ +static void +do_front_facing( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + int i; + + if (!have_attr(c, VERT_RESULT_FOGC)) + return; + + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), + c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L, + c->det, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + for (i = 0; i < 3; i++) { + struct brw_reg fogc = get_vert_attr(c, c->vert[i],FRAG_ATTRIB_FOGC); + brw_MOV(p, get_element(fogc, 1), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, get_element(fogc, 1), brw_imm_f(1)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_pop_insn_state(p); +} /*********************************************************************** @@ -355,6 +384,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) invert_det(c); copy_z_inv_w(c); + do_front_facing(c); if (c->key.do_twoside_color) do_twoside_color(c); @@ -503,6 +533,90 @@ void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate) } } +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + if (!tex->CoordReplace) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + } + + if (tex->CoordReplace) { + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, + c->tmp, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->dx0, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + + if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } else { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + } else { + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + } + + { + brw_set_predicate_control_flag_value(p, pc); + if (tex->CoordReplace) { + if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); + brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); + } + else + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + } else { + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + /* Points setup - several simplifications as all attributes are * constant across the face of the point (point sprites excluded!) */ @@ -569,6 +683,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); + struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; struct brw_instruction *jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -623,6 +738,18 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) } brw_land_fwd_jump(p, jmp); + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_point_sprite_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + } + brw_land_fwd_jump(p, jmp); + brw_emit_point_setup( c, GL_FALSE ); } diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c index 2fd75a0..24388b7 100644 --- a/i965/brw_sf_state.c +++ b/i965/brw_sf_state.c @@ -35,69 +35,71 @@ #include "brw_state.h" #include "brw_defines.h" #include "macros.h" +#include "intel_fbo.h" -static void upload_sf_vp(struct brw_context *brw) +static int upload_sf_vp(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; + const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport sfv; + struct intel_renderbuffer *irb = + intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); + GLfloat y_scale, y_bias; memset(&sfv, 0, sizeof(sfv)); - - if (brw->intel.driDrawable) - { - /* _NEW_VIEWPORT, BRW_NEW_METAOPS */ - - if (!brw->metaops.active) { - const GLfloat *v = brw->intel.ctx.Viewport._WindowMap.m; - - sfv.viewport.m00 = v[MAT_SX]; - sfv.viewport.m11 = - v[MAT_SY]; - sfv.viewport.m22 = v[MAT_SZ] * brw->intel.depth_scale; - sfv.viewport.m30 = v[MAT_TX]; - sfv.viewport.m31 = - v[MAT_TY] + brw->intel.driDrawable->h; - sfv.viewport.m32 = v[MAT_TZ] * brw->intel.depth_scale; - } - else { - sfv.viewport.m00 = 1; - sfv.viewport.m11 = - 1; - sfv.viewport.m22 = 1; - sfv.viewport.m30 = 0; - sfv.viewport.m31 = brw->intel.driDrawable->h; - sfv.viewport.m32 = 0; + + if (ctx->DrawBuffer->Name) { + /* User-created FBO */ + if (irb && !irb->RenderToTexture) { + y_scale = -1.0; + y_bias = ctx->DrawBuffer->Height; + } else { + y_scale = 1.0; + y_bias = 0; } + } else { + y_scale = -1.0; + y_bias = ctx->DrawBuffer->Height; } - /* XXX: what state for this? */ - if (brw->intel.driDrawable) - { - intelScreenPrivate *screen = brw->intel.intelScreen; - /* _NEW_SCISSOR */ - GLint x = brw->attribs.Scissor->X; - GLint y = brw->attribs.Scissor->Y; - GLuint w = brw->attribs.Scissor->Width; - GLuint h = brw->attribs.Scissor->Height; - - GLint x1 = x; - GLint y1 = brw->intel.driDrawable->h - (y + h); - GLint x2 = x + w - 1; - GLint y2 = y1 + h - 1; - - if (x1 < 0) x1 = 0; - if (y1 < 0) y1 = 0; - if (x2 < 0) x2 = 0; - if (y2 < 0) y2 = 0; - - if (x2 >= screen->width) x2 = screen->width-1; - if (y2 >= screen->height) y2 = screen->height-1; - if (x1 >= screen->width) x1 = screen->width-1; - if (y1 >= screen->height) y1 = screen->height-1; - - sfv.scissor.xmin = x1; - sfv.scissor.xmax = x2; - sfv.scissor.ymin = y1; - sfv.scissor.ymax = y2; + /* _NEW_VIEWPORT, BRW_NEW_METAOPS */ + + if (!brw->metaops.active) { + const GLfloat *v = ctx->Viewport._WindowMap.m; + + sfv.viewport.m00 = v[MAT_SX]; + sfv.viewport.m11 = v[MAT_SY] * y_scale; + sfv.viewport.m22 = v[MAT_SZ] * depth_scale; + sfv.viewport.m30 = v[MAT_TX]; + sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; + sfv.viewport.m32 = v[MAT_TZ] * depth_scale; + } else { + sfv.viewport.m00 = 1; + sfv.viewport.m11 = - 1; + sfv.viewport.m22 = 1; + sfv.viewport.m30 = 0; + sfv.viewport.m31 = ctx->DrawBuffer->Height; + sfv.viewport.m32 = 0; } - brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); + /* _NEW_SCISSOR */ + + /* The scissor only needs to handle the intersection of drawable and + * scissor rect. Clipping to the boundaries of static shared buffers + * for front/back/depth is covered by looping over cliprects in brw_draw.c. + * + * Note that the hardware's coordinates are inclusive, while Mesa's min is + * inclusive but max is exclusive. + */ + sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; + sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; + sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + + dri_bo_unreference(brw->sf.vp_bo); + brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); + + return dri_bufmgr_check_aperture_space(brw->sf.vp_bo); } const struct brw_tracked_state brw_sf_vp = { @@ -107,84 +109,129 @@ const struct brw_tracked_state brw_sf_vp = { .brw = BRW_NEW_METAOPS, .cache = 0 }, - .update = upload_sf_vp + .prepare = upload_sf_vp +}; + +struct brw_sf_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + + unsigned int nr_urb_entries, urb_size, sfsize; + + GLenum front_face, cull_face; + GLboolean scissor, line_smooth, point_sprite, point_attenuated; + float line_width; + float point_size; }; +static void +sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_SF_PROG */ + key->total_grf = brw->sf.prog_data->total_grf; + key->urb_entry_read_length = brw->sf.prog_data->urb_read_length; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_sf_entries; + key->urb_size = brw->urb.vsize; + key->sfsize = brw->urb.sfsize; + key->scissor = brw->attribs.Scissor->Enabled; + key->front_face = brw->attribs.Polygon->FrontFace; + + if (brw->attribs.Polygon->CullFlag) + key->cull_face = brw->attribs.Polygon->CullFaceMode; + else + key->cull_face = GL_NONE; + + key->line_width = brw->attribs.Line->Width; + key->line_smooth = brw->attribs.Line->SmoothFlag; + + key->point_sprite = brw->attribs.Point->PointSprite; + key->point_size = brw->attribs.Point->Size; + key->point_attenuated = brw->attribs.Point->_Attenuated; +} -static void upload_sf_unit( struct brw_context *brw ) +static dri_bo * +sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, + dri_bo **reloc_bufs) { struct brw_sf_unit_state sf; + dri_bo *bo; + memset(&sf, 0, sizeof(sf)); - /* CACHE_NEW_SF_PROG */ - sf.thread0.grf_reg_count = ((brw->sf.prog_data->total_grf-1) & ~15) / 16; - sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; - sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; + sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + sf.thread3.dispatch_grf_start_reg = 3; sf.thread3.urb_entry_read_offset = 1; + sf.thread3.urb_entry_read_length = key->urb_entry_read_length; - /* BRW_NEW_URB_FENCE */ - sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries; - sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; - sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1; + sf.thread4.nr_urb_entries = key->nr_urb_entries; + sf.thread4.urb_entry_allocation_size = key->sfsize - 1; + sf.thread4.max_threads = MIN2(12, key->nr_urb_entries / 2) - 1; if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - sf.thread4.max_threads = 0; + sf.thread4.max_threads = 0; if (INTEL_DEBUG & DEBUG_STATS) - sf.thread4.stats_enable = 1; + sf.thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ - sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; - + sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */ + sf.sf5.viewport_transform = 1; - + /* _NEW_SCISSOR */ - if (brw->attribs.Scissor->Enabled) - sf.sf6.scissor = 1; + if (key->scissor) + sf.sf6.scissor = 1; /* _NEW_POLYGON */ - if (brw->attribs.Polygon->FrontFace == GL_CCW) + if (key->front_face == GL_CCW) sf.sf5.front_winding = BRW_FRONTWINDING_CCW; else sf.sf5.front_winding = BRW_FRONTWINDING_CW; - if (brw->attribs.Polygon->CullFlag) { - switch (brw->attribs.Polygon->CullFaceMode) { - case GL_FRONT: - sf.sf6.cull_mode = BRW_CULLMODE_FRONT; - break; - case GL_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BACK; - break; - case GL_FRONT_AND_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BOTH; - break; - default: - assert(0); - break; - } - } - else + switch (key->cull_face) { + case GL_FRONT: + sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + break; + case GL_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BACK; + break; + case GL_FRONT_AND_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + break; + case GL_NONE: sf.sf6.cull_mode = BRW_CULLMODE_NONE; - + break; + default: + assert(0); + break; + } /* _NEW_LINE */ - sf.sf6.line_width = brw->attribs.Line->_Width * (1<<1); + /* XXX use ctx->Const.Min/MaxLineWidth here */ + sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1); sf.sf6.line_endcap_aa_region_width = 1; - if (brw->attribs.Line->SmoothFlag) + if (key->line_smooth) sf.sf6.aa_enable = 1; - else if (sf.sf6.line_width <= 0x2) - sf.sf6.line_width = 0; + else if (sf.sf6.line_width <= 0x2) + sf.sf6.line_width = 0; /* _NEW_POINT */ - sf.sf6.point_rast_rule = 1; /* opengl conventions */ - sf.sf7.point_size = brw->attribs.Point->_Size * (1<<3); - sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated; + sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; /* opengl conventions */ + /* XXX clamp max depends on AA vs. non-AA */ + + sf.sf7.sprite_point = key->point_sprite; + sf.sf7.point_size = CLAMP(nearbyint(key->point_size), 1, 255) * (1<<3); + sf.sf7.use_point_size_state = !key->point_attenuated; sf.sf7.aa_line_distance_mode = 0; /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: @@ -199,9 +246,58 @@ static void upload_sf_unit( struct brw_context *brw ) sf.sf6.dest_org_vbias = 0x8; sf.sf6.dest_org_hbias = 0x8; - brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); + bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, + key, sizeof(*key), + reloc_bufs, 2, + &sf, sizeof(sf), + NULL, NULL); + + /* Emit SF program relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + sf.thread0.grf_reg_count << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); + + /* Emit SF viewport relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); + + return bo; } +static int upload_sf_unit( struct brw_context *brw ) +{ + struct brw_sf_unit_key key; + dri_bo *reloc_bufs[2]; + int ret = 0; + + sf_unit_populate_key(brw, &key); + + reloc_bufs[0] = brw->sf.prog_bo; + reloc_bufs[1] = brw->sf.vp_bo; + + dri_bo_unreference(brw->sf.state_bo); + brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT, + &key, sizeof(key), + reloc_bufs, 2, + NULL); + if (brw->sf.state_bo == NULL) { + brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs); + } + + if (reloc_bufs[0]) + ret |= dri_bufmgr_check_aperture_space(reloc_bufs[0]); + + if (reloc_bufs[1]) + ret |= dri_bufmgr_check_aperture_space(reloc_bufs[1]); + + ret |= dri_bufmgr_check_aperture_space(brw->sf.state_bo); + return ret; +} const struct brw_tracked_state brw_sf_unit = { .dirty = { @@ -214,7 +310,5 @@ const struct brw_tracked_state brw_sf_unit = { .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) }, - .update = upload_sf_unit + .prepare = upload_sf_unit, }; - - diff --git a/i965/brw_state.h b/i965/brw_state.h index 41ac095..d1fca05 100644 --- a/i965/brw_state.h +++ b/i965/brw_state.h @@ -48,7 +48,6 @@ const struct brw_tracked_state brw_curbe_offsets; const struct brw_tracked_state brw_invarient_state; const struct brw_tracked_state brw_gs_prog; const struct brw_tracked_state brw_gs_unit; -const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_line_stipple; const struct brw_tracked_state brw_aa_line_parameters; const struct brw_tracked_state brw_pipelined_state_pointers; @@ -84,64 +83,52 @@ const struct brw_tracked_state brw_clear_batch_cache; /*********************************************************************** * brw_state_cache.c */ -GLuint brw_cache_data(struct brw_cache *cache, - const void *data ); - -GLuint brw_cache_data_sz(struct brw_cache *cache, - const void *data, - GLuint data_sz); - -GLuint brw_upload_cache( struct brw_cache *cache, - const void *key, - GLuint key_sz, - const void *data, - GLuint data_sz, - const void *aux, - void *aux_return ); - -GLboolean brw_search_cache( struct brw_cache *cache, - const void *key, - GLuint key_size, - void *aux_return, - GLuint *offset_return); - -void brw_init_caches( struct brw_context *brw ); -void brw_destroy_caches( struct brw_context *brw ); +dri_bo *brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs); + +dri_bo *brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs); + +dri_bo *brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz, + const void *aux, + void *aux_return ); + +dri_bo *brw_search_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + void *aux_return); +void brw_state_cache_check_size( struct brw_context *brw ); + +void brw_init_cache( struct brw_context *brw ); +void brw_destroy_cache( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), 0) +#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, const void *data, GLuint sz ); - void brw_destroy_batch_cache( struct brw_context *brw ); - - -/*********************************************************************** - * brw_state_pool.c - */ -void brw_init_pools( struct brw_context *brw ); -void brw_destroy_pools( struct brw_context *brw ); - -GLboolean brw_pool_alloc( struct brw_mem_pool *pool, - GLuint size, - GLuint alignment, - GLuint *offset_return); - -void brw_pool_fence( struct brw_context *brw, - struct brw_mem_pool *pool, - GLuint fence ); - - -void brw_pool_check_wrap( struct brw_context *brw, - struct brw_mem_pool *pool ); - -void brw_clear_all_caches( struct brw_context *brw ); -void brw_invalidate_pools( struct brw_context *brw ); void brw_clear_batch_cache_flush( struct brw_context *brw ); #endif diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c index c93d66a..77e2736 100644 --- a/i965/brw_state_batch.c +++ b/i965/brw_state_batch.c @@ -32,7 +32,6 @@ #include "brw_state.h" -#include "brw_aub.h" #include "intel_batchbuffer.h" #include "imports.h" @@ -49,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - intel_batchbuffer_data(brw->intel.batch, data, sz, 0); + intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } @@ -76,7 +75,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, emit: memcpy(item->header, newheader, sz); - intel_batchbuffer_data(brw->intel.batch, data, sz, 0); + intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } @@ -92,18 +91,10 @@ static void clear_batch_cache( struct brw_context *brw ) } brw->cached_batch_items = NULL; - - - brw_clear_all_caches(brw); - - bmReleaseBuffers(&brw->intel); - - brw_invalidate_pools(brw); } void brw_clear_batch_cache_flush( struct brw_context *brw ) { - bmFinishFenceLock(&(brw->intel), bmSetFenceLock(&(brw->intel))); clear_batch_cache(brw); brw->wrap = 0; diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c index 71c6938..d617650 100644 --- a/i965/brw_state_cache.c +++ b/i965/brw_state_cache.c @@ -28,10 +28,35 @@ * Authors: * Keith Whitwell <keith@tungstengraphics.com> */ - + +/** @file brw_state_cache.c + * + * This file implements a simple static state cache for 965. The consumers + * can query the hash table of state using a cache_id, opaque key data, + * and list of buffers that will be used in relocations, and receive the + * corresponding state buffer object of state (plus associated auxiliary + * data) in return. + * + * The inner workings are a simple hash table based on a CRC of the key data. + * The cache_id and relocation target buffers associated with the state + * buffer are included as auxiliary key data, but are not part of the hash + * value (this should be fixed, but will likely be fixed instead by making + * consumers use structured keys). + * + * Replacement is not implemented. Instead, when the cache gets too big, at + * a safe point (unlock) we throw out all of the cache data and let it + * regenerate for the next rendering operation. + * + * The reloc_buf pointers need to be included as key data, otherwise the + * non-unique values stuffed in the offset in key data through + * brw_cache_data() may result in successful probe for state buffers + * even when the buffer being referenced doesn't match. The result would be + * that the same state cache entry is used twice for different buffers, + * only one of the two buffers referenced gets put into the offset, and the + * incorrect program is run for the other instance. + */ #include "brw_state.h" -#include "brw_aub.h" #include "intel_batchbuffer.h" #include "imports.h" @@ -44,16 +69,8 @@ #include "brw_sf.h" #include "brw_gs.h" - -/*********************************************************************** - * Check cache for uploaded version of struct, else upload new one. - * Fail when memory is exhausted. - * - * XXX: FIXME: Currently search is so slow it would be quicker to - * regenerate the data every time... - */ - -static GLuint hash_key( const void *key, GLuint key_size ) +static GLuint hash_key( const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -62,23 +79,63 @@ static GLuint hash_key( const void *key, GLuint key_size ) /* I'm sure this can be improved on: */ - for (i = 0; i < key_size/4; i++) + for (i = 0; i < key_size/4; i++) { hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } + + /* Include the BO pointers as key data as well */ + ikey = (GLuint *)reloc_bufs; + key_size = nr_reloc_bufs * sizeof(dri_bo *); + for (i = 0; i < key_size/4; i++) { + hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } return hash; } -static struct brw_cache_item *search_cache( struct brw_cache *cache, - GLuint hash, - const void *key, - GLuint key_size) +/** + * Marks a new buffer as being chosen for the given cache id. + */ +static void +update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, + dri_bo *bo) +{ + if (bo == cache->last_bo[cache_id]) + return; /* no change */ + + dri_bo_unreference(cache->last_bo[cache_id]); + cache->last_bo[cache_id] = bo; + dri_bo_reference(cache->last_bo[cache_id]); + cache->brw->state.dirty.cache |= 1 << cache_id; +} + +static struct brw_cache_item * +search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, + GLuint hash, const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { struct brw_cache_item *c; +#if 0 + int bucketcount = 0; + + for (c = cache->items[hash % cache->size]; c; c = c->next) + bucketcount++; + + fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size, + cache->size, bucketcount, cache->n_items); +#endif + for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->hash == hash && + if (c->cache_id == cache_id && + c->hash == hash && c->key_size == key_size && - memcmp(c->key, key, key_size) == 0) + memcmp(c->key, key, key_size) == 0 && + c->nr_reloc_bufs == nr_reloc_bufs && + memcmp(c->reloc_bufs, reloc_bufs, + nr_reloc_bufs * sizeof(dri_bo *)) == 0) return c; } @@ -93,8 +150,7 @@ static void rehash( struct brw_cache *cache ) GLuint size, i; size = cache->size * 3; - items = (struct brw_cache_item**) _mesa_malloc(size * sizeof(*items)); - _mesa_memset(items, 0, size * sizeof(*items)); + items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items)); for (i = 0; i < cache->size; i++) for (c = cache->items[i]; c; c = next) { @@ -108,142 +164,183 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } - -GLboolean brw_search_cache( struct brw_cache *cache, - const void *key, - GLuint key_size, - void *aux_return, - GLuint *offset_return) +/** + * Returns the buffer object matching cache_id and key, or NULL. + */ +dri_bo *brw_search_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return ) { struct brw_cache_item *item; - GLuint addr = 0; - GLuint hash = hash_key(key, key_size); + GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); - item = search_cache(cache, hash, key, key_size); + item = search_cache(cache, cache_id, hash, key, key_size, + reloc_bufs, nr_reloc_bufs); - if (item) { - if (aux_return) - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - - *offset_return = addr = item->offset; - } - - if (item == NULL || addr != cache->last_addr) { - cache->brw->state.dirty.cache |= 1<<cache->id; - cache->last_addr = addr; - } - - return item != NULL; + if (item == NULL) + return NULL; + + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + update_cache_last(cache, cache_id, item->bo); + + dri_bo_reference(item->bo); + return item->bo; } -GLuint brw_upload_cache( struct brw_cache *cache, - const void *key, - GLuint key_size, - const void *data, - GLuint data_size, - const void *aux, - void *aux_return ) -{ - GLuint offset; +dri_bo * +brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size, + const void *aux, + void *aux_return ) +{ struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - GLuint hash = hash_key(key, key_size); - void *tmp = _mesa_malloc(key_size + cache->aux_size); - - if (!brw_pool_alloc(cache->pool, data_size, 6, &offset)) { - /* Should not be possible: - */ - _mesa_printf("brw_pool_alloc failed\n"); - exit(1); - } + GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *); + GLuint aux_size = cache->aux_size[cache_id]; + void *tmp; + dri_bo *bo; + int i; + + /* Create the buffer object to contain the data */ + bo = dri_bo_alloc(cache->brw->intel.bufmgr, + cache->name[cache_id], data_size, 1 << 6, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); + + + /* Set up the memory containing the key, aux_data, and reloc_bufs */ + tmp = _mesa_malloc(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); + memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); + for (i = 0; i < nr_reloc_bufs; i++) { + if (reloc_bufs[i] != NULL) + dri_bo_reference(reloc_bufs[i]); + } - if (cache->aux_size) - memcpy(tmp+key_size, aux, cache->aux_size); - + item->cache_id = cache_id; item->key = tmp; item->hash = hash; item->key_size = key_size; - item->offset = offset; + item->reloc_bufs = tmp + key_size + aux_size; + item->nr_reloc_bufs = nr_reloc_bufs; + + item->bo = bo; + dri_bo_reference(bo); item->data_size = data_size; - if (++cache->n_items > cache->size * 1.5) + if (cache->n_items > cache->size * 1.5) rehash(cache); - + hash %= cache->size; item->next = cache->items[hash]; cache->items[hash] = item; - + cache->n_items++; + if (aux_return) { - assert(cache->aux_size); + assert(cache->aux_size[cache_id]); *(void **)aux_return = (void *)((char *)item->key + item->key_size); } if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("upload %s: %d bytes to pool buffer %d offset %x\n", - cache->name, - data_size, - cache->pool->buffer, - offset); + _mesa_printf("upload %s: %d bytes to cache id %d\n", + cache->name[cache_id], + data_size, cache_id); - /* Copy data to the buffer: - */ - bmBufferSubDataAUB(&cache->brw->intel, - cache->pool->buffer, - offset, - data_size, - data, - cache->aub_type, - cache->aub_sub_type); - - - cache->brw->state.dirty.cache |= 1<<cache->id; - cache->last_addr = offset; - - return offset; + /* Copy data to the buffer */ + dri_bo_subdata(bo, 0, data_size, data); + + update_cache_last(cache, cache_id, bo); + + return bo; } /* This doesn't really work with aux data. Use search/upload instead */ -GLuint brw_cache_data_sz(struct brw_cache *cache, - const void *data, - GLuint data_size) +dri_bo * +brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) { - GLuint addr; + dri_bo *bo; + struct brw_cache_item *item; + GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); - if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { - addr = brw_upload_cache(cache, - data, data_size, - data, data_size, - NULL, NULL); + item = search_cache(cache, cache_id, hash, data, data_size, + reloc_bufs, nr_reloc_bufs); + if (item) { + update_cache_last(cache, cache_id, item->bo); + dri_bo_reference(item->bo); + return item->bo; } - return addr; + bo = brw_upload_cache(cache, cache_id, + data, data_size, + reloc_bufs, nr_reloc_bufs, + data, data_size, + NULL, NULL); + + return bo; } -GLuint brw_cache_data(struct brw_cache *cache, - const void *data) +/** + * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. + * + * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be + * better to use, as the potentially changing offsets in the data-used-as-key + * will result in excessive cache misses. + */ +dri_bo * +brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) { - return brw_cache_data_sz(cache, data, cache->key_size); + return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], + reloc_bufs, nr_reloc_bufs); } +enum pool_type { + DW_SURFACE_STATE, + DW_GENERAL_STATE +}; + +static void +brw_init_cache_id( struct brw_context *brw, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) +{ + struct brw_cache *cache = &brw->cache; + cache->name[id] = strdup(name); + cache->key_size[id] = key_size; + cache->aux_size[id] = aux_size; +} - - -static void brw_init_cache( struct brw_context *brw, - const char *name, - GLuint id, - GLuint key_size, - GLuint aux_size, - GLuint aub_type, - GLuint aub_sub_type ) +void brw_init_cache( struct brw_context *brw ) { - struct brw_cache *cache = &brw->cache[id]; + struct brw_cache *cache = &brw->cache; + cache->brw = brw; - cache->id = id; - cache->name = name; - cache->items = NULL; cache->size = 7; cache->n_items = 0; @@ -251,200 +348,133 @@ static void brw_init_cache( struct brw_context *brw, _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - - cache->key_size = key_size; - cache->aux_size = aux_size; - cache->aub_type = aub_type; - cache->aub_sub_type = aub_sub_type; - switch (aub_type) { - case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; - case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; - default: assert(0); break; - } -} - -void brw_init_caches( struct brw_context *brw ) -{ - - brw_init_cache(brw, - "CC_VP", - BRW_CC_VP, - sizeof(struct brw_cc_viewport), - 0, - DW_GENERAL_STATE, - DWGS_COLOR_CALC_VIEWPORT_STATE); - - brw_init_cache(brw, - "CC_UNIT", - BRW_CC_UNIT, - sizeof(struct brw_cc_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_COLOR_CALC_STATE); - - brw_init_cache(brw, - "WM_PROG", - BRW_WM_PROG, - sizeof(struct brw_wm_prog_key), - sizeof(struct brw_wm_prog_data), - DW_GENERAL_STATE, - DWGS_KERNEL_INSTRUCTIONS); - - brw_init_cache(brw, - "SAMPLER_DEFAULT_COLOR", - BRW_SAMPLER_DEFAULT_COLOR, - sizeof(struct brw_sampler_default_color), - 0, - DW_GENERAL_STATE, - DWGS_SAMPLER_DEFAULT_COLOR); - - brw_init_cache(brw, - "SAMPLER", - BRW_SAMPLER, - 0, /* variable key/data size */ - 0, - DW_GENERAL_STATE, - DWGS_SAMPLER_STATE); - - brw_init_cache(brw, - "WM_UNIT", - BRW_WM_UNIT, - sizeof(struct brw_wm_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_WINDOWER_IZ_STATE); - - brw_init_cache(brw, - "SF_PROG", - BRW_SF_PROG, - sizeof(struct brw_sf_prog_key), - sizeof(struct brw_sf_prog_data), - DW_GENERAL_STATE, - DWGS_KERNEL_INSTRUCTIONS); - - brw_init_cache(brw, - "SF_VP", - BRW_SF_VP, - sizeof(struct brw_sf_viewport), - 0, - DW_GENERAL_STATE, - DWGS_STRIPS_FANS_VIEWPORT_STATE); - - brw_init_cache(brw, - "SF_UNIT", - BRW_SF_UNIT, - sizeof(struct brw_sf_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_STRIPS_FANS_STATE); - - brw_init_cache(brw, - "VS_UNIT", - BRW_VS_UNIT, - sizeof(struct brw_vs_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_VERTEX_SHADER_STATE); - - brw_init_cache(brw, - "VS_PROG", - BRW_VS_PROG, - sizeof(struct brw_vs_prog_key), - sizeof(struct brw_vs_prog_data), - DW_GENERAL_STATE, - DWGS_KERNEL_INSTRUCTIONS); - - brw_init_cache(brw, - "CLIP_UNIT", - BRW_CLIP_UNIT, - sizeof(struct brw_clip_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_CLIPPER_STATE); - - brw_init_cache(brw, - "CLIP_PROG", - BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_key), - sizeof(struct brw_clip_prog_data), - DW_GENERAL_STATE, - DWGS_KERNEL_INSTRUCTIONS); - - brw_init_cache(brw, - "GS_UNIT", - BRW_GS_UNIT, - sizeof(struct brw_gs_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_GEOMETRY_SHADER_STATE); - - brw_init_cache(brw, - "GS_PROG", - BRW_GS_PROG, - sizeof(struct brw_gs_prog_key), - sizeof(struct brw_gs_prog_data), - DW_GENERAL_STATE, - DWGS_KERNEL_INSTRUCTIONS); - - brw_init_cache(brw, - "SS_SURFACE", - BRW_SS_SURFACE, - sizeof(struct brw_surface_state), - 0, - DW_SURFACE_STATE, - DWSS_SURFACE_STATE); - - brw_init_cache(brw, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - sizeof(struct brw_surface_binding_table), - 0, - DW_SURFACE_STATE, - DWSS_BINDING_TABLE_STATE); + brw_init_cache_id(brw, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0); + + brw_init_cache_id(brw, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0); + + brw_init_cache_id(brw, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data)); + + brw_init_cache_id(brw, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0); + + brw_init_cache_id(brw, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0); + + brw_init_cache_id(brw, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0); + + brw_init_cache_id(brw, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data)); + + brw_init_cache_id(brw, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0); + + brw_init_cache_id(brw, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0); + + brw_init_cache_id(brw, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0); + + brw_init_cache_id(brw, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data)); + + brw_init_cache_id(brw, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0); + + brw_init_cache_id(brw, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data)); + + brw_init_cache_id(brw, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0); + + brw_init_cache_id(brw, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data)); + + brw_init_cache_id(brw, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0); + + brw_init_cache_id(brw, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + 0, + 0); } - -/* When we lose hardware context, need to invalidate the surface cache - * as these structs must be explicitly re-uploaded. They are subject - * to fixup by the memory manager as they contain absolute agp - * offsets, so we need to ensure there is a fresh version of the - * struct available to receive the fixup. - * - * XXX: Need to ensure that there aren't two versions of a surface or - * bufferobj with different backing data active in the same buffer at - * once? Otherwise the cache could confuse them. Maybe better not to - * cache at all? - * - * --> Isn't this the same as saying need to ensure batch is flushed - * before new data is uploaded to an existing buffer? We - * already try to make sure of that. - */ -static void clear_cache( struct brw_cache *cache ) +static void +brw_clear_cache( struct brw_context *brw ) { struct brw_cache_item *c, *next; GLuint i; - for (i = 0; i < cache->size; i++) { - for (c = cache->items[i]; c; c = next) { + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + for (i = 0; i < brw->cache.size; i++) { + for (c = brw->cache.items[i]; c; c = next) { + int j; + next = c->next; + for (j = 0; j < c->nr_reloc_bufs; j++) + dri_bo_unreference(c->reloc_bufs[j]); + dri_bo_unreference(c->bo); free((void *)c->key); free(c); } - cache->items[i] = NULL; + brw->cache.items[i] = NULL; } - cache->n_items = 0; -} - -void brw_clear_all_caches( struct brw_context *brw ) -{ - GLint i; - - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); - - for (i = 0; i < BRW_MAX_CACHE; i++) - clear_cache(&brw->cache[i]); + brw->cache.n_items = 0; if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -456,14 +486,24 @@ void brw_clear_all_caches( struct brw_context *brw ) brw->state.dirty.cache |= ~0; } +void brw_state_cache_check_size( struct brw_context *brw ) +{ + /* un-tuned guess. We've got around 20 state objects for a total of around + * 32k, so 1000 of them is around 1.5MB. + */ + if (brw->cache.n_items > 1000) + brw_clear_cache(brw); +} - - - -void brw_destroy_caches( struct brw_context *brw ) +void brw_destroy_cache( struct brw_context *brw ) { GLuint i; + brw_clear_cache(brw); for (i = 0; i < BRW_MAX_CACHE; i++) - clear_cache(&brw->cache[i]); + free(brw->cache.name[i]); + + free(brw->cache.items); + brw->cache.items = NULL; + brw->cache.size = 0; } diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c new file mode 100644 index 0000000..3a93f9f --- /dev/null +++ b/i965/brw_state_dump.c @@ -0,0 +1,200 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "mtypes.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/** + * Prints out a header, the contents, and the message associated with + * the hardware state data given. + * + * \param name Name of the state object + * \param data Pointer to the base of the state object + * \param hw_offset Hardware offset of the base of the state data. + * \param index Index of the DWORD being output. + */ +static void +state_out(const char *name, void *data, uint32_t hw_offset, int index, + char *fmt, ...) +{ + va_list va; + + fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", + name, hw_offset + index * 4, ((uint32_t *)data)[index]); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); +} + +/** Generic, undecoded state buffer debug printout */ +static void +state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size) +{ + int i; + + if (buffer == NULL) + return; + + dri_bo_map(buffer, GL_FALSE); + for (i = 0; i < state_size / 4; i++) { + state_out(name, buffer->virtual, buffer->offset, i, + "dword %d\n", i); + } + dri_bo_unmap(buffer); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static void dump_wm_surface_state(struct brw_context *brw) +{ + int i; + + for (i = 0; i < brw->wm.nr_surfaces; i++) { + dri_bo *surf_bo = brw->wm.surf_bo[i]; + unsigned int surfoff; + struct brw_surface_state *surf; + char name[20]; + + if (surf_bo == NULL) { + fprintf(stderr, "WM SS%d: NULL\n", i); + continue; + } + dri_bo_map(surf_bo, GL_FALSE); + surfoff = surf_bo->offset; + surf = (struct brw_surface_state *)(surf_bo->virtual); + + sprintf(name, "WM SS%d", i); + state_out(name, surf, surfoff, 0, "%s\n", + get_965_surfacetype(surf->ss0.surface_type)); + state_out(name, surf, surfoff, 1, "offset\n"); + state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", + surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); + state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", + surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); + state_out(name, surf, surfoff, 4, "mip base %d\n", + surf->ss4.min_lod); + + dri_bo_unmap(surf_bo); + } +} + +static void dump_sf_viewport_state(struct brw_context *brw) +{ + const char *name = "SF VP"; + struct brw_sf_viewport *vp; + uint32_t vp_off; + + if (brw->sf.vp_bo == NULL) + return; + + dri_bo_map(brw->sf.vp_bo, GL_FALSE); + + vp = brw->sf.vp_bo->virtual; + vp_off = brw->sf.vp_bo->offset; + + state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); + state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); + state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); + state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); + state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); + state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); + + state_out(name, vp, vp_off, 6, "top left = %d,%d\n", + vp->scissor.xmin, vp->scissor.ymin); + state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", + vp->scissor.xmax, vp->scissor.ymax); + + dri_bo_unmap(brw->sf.vp_bo); +} + +static void brw_debug_prog(const char *name, dri_bo *prog) +{ + unsigned int i; + uint32_t *data; + + if (prog == NULL) + return; + + dri_bo_map(prog, GL_FALSE); + + data = prog->virtual; + + for (i = 0; i < prog->size / 4 / 4; i++) { + fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", + name, (unsigned int)prog->offset + i * 4 * 4, + data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + } + + dri_bo_unmap(prog); +} + + +/** + * Print additional debug information associated with the batchbuffer + * when DEBUG_BATCH is set. + * + * For 965, this means mapping the state buffers that would have been referenced + * by the batchbuffer and dumping them. + * + * The buffer offsets printed rely on the buffer containing the last offset + * it was validated at. + */ +void brw_debug_batch(struct intel_context *intel) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); + dump_wm_surface_state(brw); + + state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); + brw_debug_prog("VS prog", brw->vs.prog_bo); + + state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state)); + brw_debug_prog("GS prog", brw->gs.prog_bo); + + state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state)); + dump_sf_viewport_state(brw); + brw_debug_prog("SF prog", brw->sf.prog_bo); + + state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state)); + brw_debug_prog("WM prog", brw->wm.prog_bo); +} diff --git a/i965/brw_state_pool.c b/i965/brw_state_pool.c deleted file mode 100644 index b9926f2..0000000 --- a/i965/brw_state_pool.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_state.h" -#include "imports.h" - -#include "intel_ioctl.h" -#include "bufmgr.h" - -GLboolean brw_pool_alloc( struct brw_mem_pool *pool, - GLuint size, - GLuint align, - GLuint *offset_return) -{ - GLuint align_mask = (1<<align)-1; - GLuint fixup = ((pool->offset + align_mask) & ~align_mask) - pool->offset; - - size = (size + 3) & ~3; - - if (pool->offset + fixup + size >= pool->size) { - _mesa_printf("%s failed\n", __FUNCTION__); - assert(0); - exit(0); - } - - pool->offset += fixup; - *offset_return = pool->offset; - pool->offset += size; - - return GL_TRUE; -} - -static -void brw_invalidate_pool( struct intel_context *intel, - struct brw_mem_pool *pool ) -{ - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("\n\n\n %s \n\n\n", __FUNCTION__); - - bmBufferData(intel, - pool->buffer, - pool->size, - NULL, - 0); - - pool->offset = 0; - - brw_clear_all_caches(pool->brw); -} - -static void brw_invalidate_pool_cb( struct intel_context *intel, void *ptr ) -{ - struct brw_mem_pool *pool = (struct brw_mem_pool *) ptr; - - pool->offset = 0; - brw_clear_all_caches(pool->brw); -} - - - -static void brw_init_pool( struct brw_context *brw, - GLuint pool_id, - GLuint size ) -{ - struct brw_mem_pool *pool = &brw->pool[pool_id]; - - pool->size = size; - pool->brw = brw; - - bmGenBuffers(&brw->intel, "pool", 1, &pool->buffer, 12); - - /* Also want to say not to wait on fences when data is presented - */ - bmBufferSetInvalidateCB(&brw->intel, pool->buffer, - brw_invalidate_pool_cb, - pool, - GL_TRUE); - - bmBufferData(&brw->intel, - pool->buffer, - pool->size, - NULL, - 0); - -} - -static void brw_destroy_pool( struct brw_context *brw, - GLuint pool_id ) -{ - struct brw_mem_pool *pool = &brw->pool[pool_id]; - - bmDeleteBuffers(&brw->intel, 1, &pool->buffer); -} - - -void brw_pool_check_wrap( struct brw_context *brw, - struct brw_mem_pool *pool ) -{ - if (pool->offset > (pool->size * 3) / 4) { - if (brw->intel.aub_file) - brw->intel.aub_wrap = 1; - else - brw->state.dirty.brw |= BRW_NEW_CONTEXT; - } - -} - -void brw_init_pools( struct brw_context *brw ) -{ - brw_init_pool(brw, BRW_GS_POOL, 0x80000); - brw_init_pool(brw, BRW_SS_POOL, 0x80000); -} - -void brw_destroy_pools( struct brw_context *brw ) -{ - brw_destroy_pool(brw, BRW_GS_POOL); - brw_destroy_pool(brw, BRW_SS_POOL); -} - - -void brw_invalidate_pools( struct brw_context *brw ) -{ - brw_invalidate_pool(&brw->intel, &brw->pool[BRW_GS_POOL]); - brw_invalidate_pool(&brw->intel, &brw->pool[BRW_SS_POOL]); -} diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c index 9bd2881..3b2ccd4 100644 --- a/i965/brw_state_upload.c +++ b/i965/brw_state_upload.c @@ -33,7 +33,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" #include "intel_batchbuffer.h" /* This is used to initialize brw->state.atoms[]. We could use this @@ -85,7 +85,6 @@ const struct brw_tracked_state *atoms[] = &brw_binding_table_pointers, &brw_blend_constant_color, - &brw_drawing_rect, &brw_depthbuffer, &brw_polygon_stipple, @@ -112,8 +111,7 @@ void brw_init_state( struct brw_context *brw ) { GLuint i; - brw_init_pools(brw); - brw_init_caches(brw); + brw_init_cache(brw); brw->state.atoms = _mesa_malloc(sizeof(atoms)); brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); @@ -138,9 +136,8 @@ void brw_destroy_state( struct brw_context *brw ) brw->state.atoms = NULL; } - brw_destroy_caches(brw); + brw_destroy_cache(brw); brw_destroy_batch_cache(brw); - brw_destroy_pools(brw); } /*********************************************************************** @@ -176,10 +173,10 @@ static void xor_states( struct brw_state_flags *result, /*********************************************************************** * Emit all state: */ -void brw_validate_state( struct brw_context *brw ) +int brw_validate_state( struct brw_context *brw ) { struct brw_state_flags *state = &brw->state.dirty; - GLuint i; + GLuint i, ret, count; state->mesa |= brw->intel.NewGLState; brw->intel.NewGLState = 0; @@ -205,18 +202,33 @@ void brw_validate_state( struct brw_context *brw ) if (state->mesa == 0 && state->cache == 0 && state->brw == 0) - return; + return 0; if (brw->state.dirty.brw & BRW_NEW_CONTEXT) brw_clear_batch_cache_flush(brw); + brw->intel.Fallback = 0; - /* Make an early reference to the state pools, as we don't cope - * well with them being evicted from here down. - */ - (void)bmBufferOffset(&brw->intel, brw->pool[BRW_GS_POOL].buffer); - (void)bmBufferOffset(&brw->intel, brw->pool[BRW_SS_POOL].buffer); - (void)bmBufferOffset(&brw->intel, brw->intel.batch->buffer); + count = 0; + + /* do prepare stage for all atoms */ + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = brw->state.atoms[i]; + + if (brw->intel.Fallback) + break; + + if (check_state(state, &atom->dirty)) { + if (atom->prepare) { + ret = atom->prepare(brw); + if (ret) + return ret; + } + } + } + + if (brw->intel.Fallback) + return 0; if (INTEL_DEBUG) { /* Debug version which enforces various sanity checks on the @@ -234,12 +246,13 @@ void brw_validate_state( struct brw_context *brw ) assert(atom->dirty.mesa || atom->dirty.brw || atom->dirty.cache); - assert(atom->update); + + if (brw->intel.Fallback) + break; if (check_state(state, &atom->dirty)) { - brw->state.atoms[i]->update( brw ); - -/* emit_foo(brw); */ + if (atom->emit) + atom->emit( brw ); } accumulate_state(&examined, &atom->dirty); @@ -255,10 +268,19 @@ void brw_validate_state( struct brw_context *brw ) } else { for (i = 0; i < Elements(atoms); i++) { - if (check_state(state, &brw->state.atoms[i]->dirty)) - brw->state.atoms[i]->update( brw ); + const struct brw_tracked_state *atom = brw->state.atoms[i]; + + if (brw->intel.Fallback) + break; + + if (check_state(state, &atom->dirty)) { + if (atom->emit) + atom->emit( brw ); + } } } - memset(state, 0, sizeof(*state)); + if (!brw->intel.Fallback) + memset(state, 0, sizeof(*state)); + return 0; } diff --git a/i965/brw_structs.h b/i965/brw_structs.h index a799122..ec865c9 100644 --- a/i965/brw_structs.h +++ b/i965/brw_structs.h @@ -173,6 +173,48 @@ struct brw_depthbuffer } bits; GLuint dword; } dword4; +}; + +struct brw_depthbuffer_gm45_g4x +{ + union header_union header; + + union { + struct { + GLuint pitch:18; + GLuint format:3; + GLuint pad:2; + GLuint software_tiled_rendering_mode:2; + GLuint depth_offset_disable:1; + GLuint tile_walk:1; + GLuint tiled_surface:1; + GLuint pad2:1; + GLuint surface_type:3; + } bits; + GLuint dword; + } dword1; + + GLuint dword2_base_addr; + + union { + struct { + GLuint pad:1; + GLuint mipmap_layout:1; + GLuint lod:4; + GLuint width:13; + GLuint height:13; + } bits; + GLuint dword; + } dword3; + + union { + struct { + GLuint pad:10; + GLuint min_array_element:11; + GLuint depth:11; + } bits; + GLuint dword; + } dword4; union { struct { @@ -267,39 +309,39 @@ struct brw_pipelined_state_pointers struct { GLuint pad:5; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } vs; struct { GLuint enable:1; GLuint pad:4; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } gs; struct { GLuint enable:1; GLuint pad:4; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } clp; struct { GLuint pad:5; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } sf; struct { GLuint pad:5; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } wm; struct { GLuint pad:5; - GLuint offset:27; /* KW: check me! */ + GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */ } cc; }; @@ -502,7 +544,7 @@ struct thread0 GLuint pad0:1; GLuint grf_reg_count:3; GLuint pad1:2; - GLuint kernel_start_pointer:26; + GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */ }; struct thread1 @@ -666,7 +708,7 @@ struct brw_cc_unit_state struct { GLuint pad0:5; - GLuint cc_viewport_state_offset:27; + GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ } cc4; struct @@ -728,7 +770,7 @@ struct brw_sf_unit_state GLuint front_winding:1; GLuint viewport_transform:1; GLuint pad0:3; - GLuint sf_viewport_state_offset:27; + GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ } sf5; struct @@ -960,6 +1002,7 @@ struct brw_sf_viewport GLfloat m32; } viewport; + /* scissor coordinates are inclusive */ struct { GLshort xmin; GLshort ymin; @@ -1362,7 +1405,7 @@ struct brw_instruction GLuint msg_target:4; GLuint pad1:3; GLuint end_of_thread:1; - } sampler_igd; + } sampler_gm45_g4x; struct brw_urb_immediate urb; diff --git a/i965/brw_tex.c b/i965/brw_tex.c index 9d4b986..258c626 100644 --- a/i965/brw_tex.c +++ b/i965/brw_tex.c @@ -44,151 +44,16 @@ #include "intel_context.h" #include "intel_ioctl.h" #include "intel_regions.h" +#include "intel_tex.h" #include "brw_context.h" #include "brw_defines.h" - - -static const struct gl_texture_format * -brwChooseTextureFormat( GLcontext *ctx, GLint internalFormat, - GLenum srcFormat, GLenum srcType ) -{ - switch ( internalFormat ) { - case 4: - case GL_RGBA: - case GL_COMPRESSED_RGBA: - if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV) - return &_mesa_texformat_argb4444; - else if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV) - return &_mesa_texformat_argb1555; - else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || - (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) || - (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8)) - return &_mesa_texformat_rgba8888_rev; - else - return &_mesa_texformat_argb8888; - - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - return &_mesa_texformat_argb8888; - - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - /* Broadwater doesn't support RGB888 textures, so these must be - * stored as ARGB. - */ - return &_mesa_texformat_argb8888; - - case 3: - case GL_COMPRESSED_RGB: - case GL_RGB: - if (srcFormat == GL_RGB && - srcType == GL_UNSIGNED_SHORT_5_6_5) - return &_mesa_texformat_rgb565; - else - return &_mesa_texformat_argb8888; - - - case GL_RGB5: - case GL_RGB5_A1: - return &_mesa_texformat_argb1555; - - case GL_R3_G3_B2: - case GL_RGBA2: - case GL_RGBA4: - case GL_RGB4: - return &_mesa_texformat_argb4444; - - case GL_ALPHA: - case GL_ALPHA4: - case GL_ALPHA8: - case GL_ALPHA12: - case GL_ALPHA16: - case GL_COMPRESSED_ALPHA: - return &_mesa_texformat_a8; - - case 1: - case GL_LUMINANCE: - case GL_LUMINANCE4: - case GL_LUMINANCE8: - case GL_LUMINANCE12: - case GL_LUMINANCE16: - case GL_COMPRESSED_LUMINANCE: - return &_mesa_texformat_l8; - - case 2: - case GL_LUMINANCE_ALPHA: - case GL_LUMINANCE4_ALPHA4: - case GL_LUMINANCE6_ALPHA2: - case GL_LUMINANCE8_ALPHA8: - case GL_LUMINANCE12_ALPHA4: - case GL_LUMINANCE12_ALPHA12: - case GL_LUMINANCE16_ALPHA16: - case GL_COMPRESSED_LUMINANCE_ALPHA: - return &_mesa_texformat_al88; - - case GL_INTENSITY: - case GL_INTENSITY4: - case GL_INTENSITY8: - case GL_INTENSITY12: - case GL_INTENSITY16: - case GL_COMPRESSED_INTENSITY: - return &_mesa_texformat_i8; - - case GL_YCBCR_MESA: - if (srcType == GL_UNSIGNED_SHORT_8_8_MESA || - srcType == GL_UNSIGNED_BYTE) - return &_mesa_texformat_ycbcr; - else - return &_mesa_texformat_ycbcr_rev; - - case GL_COMPRESSED_RGB_FXT1_3DFX: - return &_mesa_texformat_rgb_fxt1; - case GL_COMPRESSED_RGBA_FXT1_3DFX: - return &_mesa_texformat_rgba_fxt1; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; /* there is no rgba support? */ - - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH_COMPONENT32: - return &_mesa_texformat_z16; - - default: - fprintf(stderr, "unexpected texture format %s in %s\n", - _mesa_lookup_enum_by_nr(internalFormat), - __FUNCTION__); - return NULL; - } - - return NULL; /* never get here */ -} - - -void brwInitTextureFuncs( struct dd_function_table *functions ) -{ - functions->ChooseTextureFormat = brwChooseTextureFormat; -} - -void brw_FrameBufferTexInit( struct brw_context *brw ) +void brw_FrameBufferTexInit( struct brw_context *brw, + struct intel_region *region ) { struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; - struct intel_region *region = intel->front_region; struct gl_texture_object *obj; struct gl_texture_image *img; @@ -209,6 +74,26 @@ void brw_FrameBufferTexInit( struct brw_context *brw ) void brw_FrameBufferTexDestroy( struct brw_context *brw ) { - brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx, - brw->intel.frame_buffer_texobj ); + if (brw->intel.frame_buffer_texobj != NULL) + brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx, + brw->intel.frame_buffer_texobj ); + brw->intel.frame_buffer_texobj = NULL; +} + +/** + * Finalizes all textures, completing any rendering that needs to be done + * to prepare them. + */ +void brw_validate_textures( struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + int i; + + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; + + if (texUnit->_ReallyEnabled) { + intel_finalize_mipmap_tree(intel, i); + } + } } diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c index d4888a4..e437c41 100644 --- a/i965/brw_tex_layout.c +++ b/i965/brw_tex_layout.c @@ -35,8 +35,10 @@ #include "intel_mipmap_tree.h" #include "intel_tex_layout.h" +#include "intel_context.h" #include "macros.h" +#define FILE_DEBUG_FLAG DEBUG_MIPTREE GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt ) { @@ -53,11 +55,20 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t GLuint pack_x_pitch, pack_x_nr; GLuint pack_y_pitch; GLuint level; + GLuint align_h = 2; + GLuint align_w = 4; - mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp; mt->total_height = 0; + + if (mt->compressed) { + align_w = intel_compressed_alignment(mt->internal_format); + mt->pitch = ALIGN(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else { + mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + pack_y_pitch = ALIGN(mt->height0, align_h); + } - pack_y_pitch = MAX2(mt->height0, 2); pack_x_pitch = mt->pitch; pack_x_nr = 1; @@ -83,20 +94,30 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t mt->total_height += y; - - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - } - width = minify(width); height = minify(height); depth = minify(depth); + + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } + } break; } diff --git a/i965/brw_urb.c b/i965/brw_urb.c index 4ca6e99..c423dbe 100644 --- a/i965/brw_urb.c +++ b/i965/brw_urb.c @@ -35,7 +35,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "brw_hal.h" #define VS 0 #define GS 1 @@ -53,7 +52,7 @@ static const struct { GLuint min_entry_size; GLuint max_entry_size; } limits[CS+1] = { - { 8, 32, 1, 5 }, /* vs */ + { 16, 32, 1, 5 }, /* vs */ { 4, 8, 1, 5 }, /* gs */ { 6, 8, 1, 5 }, /* clp */ { 1, 8, 1, 12 }, /* sf */ @@ -75,26 +74,12 @@ static GLboolean check_urb_layout( struct brw_context *brw ) /* Most minimal update, forces re-emit of URB fence packet after GS * unit turned on/off. */ -static void recalculate_urb_fence( struct brw_context *brw ) +static int recalculate_urb_fence( struct brw_context *brw ) { GLuint csize = brw->curbe.total_size; GLuint vsize = brw->vs.prog_data->urb_entry_size; GLuint sfsize = brw->sf.prog_data->urb_entry_size; - static GLboolean (*hal_recalculate_urb_fence) (struct brw_context *brw); - static GLboolean hal_tried; - - if (!hal_tried) - { - hal_recalculate_urb_fence = brw_hal_find_symbol ("intel_hal_recalculate_urb_fence"); - hal_tried = 1; - } - if (hal_recalculate_urb_fence) - { - if ((*hal_recalculate_urb_fence) (brw)) - return; - } - if (csize < limits[CS].min_entry_size) csize = limits[CS].min_entry_size; @@ -157,6 +142,7 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_URB_FENCE; } + return 0; } @@ -167,7 +153,7 @@ const struct brw_tracked_state brw_recalculate_urb_fence = { .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_SF_PROG) }, - .update = recalculate_urb_fence + .prepare = recalculate_urb_fence }; diff --git a/i965/brw_util.c b/i965/brw_util.c index b6deee2..d8d35c5 100644 --- a/i965/brw_util.c +++ b/i965/brw_util.c @@ -45,86 +45,6 @@ GLuint brw_count_bits( GLuint val ) } -static GLuint brw_parameter_state_flags(const gl_state_index state[]) -{ - switch (state[0]) { - case STATE_MATERIAL: - case STATE_LIGHT: - case STATE_LIGHTMODEL_AMBIENT: - case STATE_LIGHTMODEL_SCENECOLOR: - case STATE_LIGHTPROD: - return _NEW_LIGHT; - - case STATE_TEXGEN: - case STATE_TEXENV_COLOR: - return _NEW_TEXTURE; - - case STATE_FOG_COLOR: - case STATE_FOG_PARAMS: - return _NEW_FOG; - - case STATE_CLIPPLANE: - return _NEW_TRANSFORM; - - case STATE_POINT_SIZE: - case STATE_POINT_ATTENUATION: - return _NEW_POINT; - - case STATE_MODELVIEW_MATRIX: - return _NEW_MODELVIEW; - case STATE_PROJECTION_MATRIX: - return _NEW_PROJECTION; - case STATE_MVP_MATRIX: - return _NEW_MODELVIEW | _NEW_PROJECTION; - case STATE_TEXTURE_MATRIX: - return _NEW_TEXTURE_MATRIX; - case STATE_PROGRAM_MATRIX: - return _NEW_TRACK_MATRIX; - - case STATE_DEPTH_RANGE: - return _NEW_VIEWPORT; - - case STATE_FRAGMENT_PROGRAM: - case STATE_VERTEX_PROGRAM: - return _NEW_PROGRAM; - - case STATE_INTERNAL: - switch (state[1]) { - case STATE_NORMAL_SCALE: - return _NEW_MODELVIEW; - case STATE_TEXRECT_SCALE: - return _NEW_TEXTURE; - default: - assert(0); - return 0; - } - - default: - assert(0); - return 0; - } -} - - -GLuint -brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList) -{ - GLuint i; - GLuint result = 0; - - if (!paramList) - return 0; - - for (i = 0; i < paramList->NumParameters; i++) { - if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { - result |= brw_parameter_state_flags(paramList->Parameters[i].StateIndexes); - } - } - - return result; -} - - GLuint brw_translate_blend_equation( GLenum mode ) { switch (mode) { diff --git a/i965/brw_vs.c b/i965/brw_vs.c index 50826d9..f89b0e1 100644 --- a/i965/brw_vs.c +++ b/i965/brw_vs.c @@ -73,19 +73,17 @@ static void do_vs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); - /* - */ - brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->vs.prog_data); + dri_bo_unreference(brw->vs.prog_bo); + brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->vs.prog_data ); } -static void brw_upload_vs_prog( struct brw_context *brw ) +static int brw_upload_vs_prog( struct brw_context *brw ) { struct brw_vs_prog_key key; struct brw_vertex_program *vp = @@ -110,13 +108,14 @@ static void brw_upload_vs_prog( struct brw_context *brw ) /* Make an early check for the key. */ - if (brw_search_cache(&brw->cache[BRW_VS_PROG], - &key, sizeof(key), - &brw->vs.prog_data, - &brw->vs.prog_gs_offset)) - return; - - do_vs_prog(brw, vp, &key); + dri_bo_unreference(brw->vs.prog_bo); + brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->vs.prog_data); + if (brw->vs.prog_bo == NULL) + do_vs_prog(brw, vp, &key); + return dri_bufmgr_check_aperture_space(brw->vs.prog_bo); } @@ -128,5 +127,5 @@ const struct brw_tracked_state brw_vs_prog = { .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_METAOPS, .cache = 0 }, - .update = brw_upload_vs_prog + .prepare = brw_upload_vs_prog }; diff --git a/i965/brw_vs.h b/i965/brw_vs.h index fdb5785..41a33ff 100644 --- a/i965/brw_vs.h +++ b/i965/brw_vs.h @@ -36,7 +36,7 @@ #include "brw_context.h" #include "brw_eu.h" -#include "program.h" +#include "shader/program.h" struct brw_vs_prog_key { @@ -67,6 +67,12 @@ struct brw_vs_compile { struct brw_reg r1; struct brw_reg regs[PROGRAM_ADDRESS+1][128]; struct brw_reg tmp; + struct brw_reg stack; + + struct { + GLboolean used_in_src; + struct brw_reg reg; + } output_regs[128]; struct brw_reg userplane[6]; diff --git a/i965/brw_vs_constval.c b/i965/brw_vs_constval.c index caef042..a0106b8 100644 --- a/i965/brw_vs_constval.c +++ b/i965/brw_vs_constval.c @@ -166,7 +166,7 @@ static GLuint get_input_size(struct brw_context *brw, /* Calculate sizes of vertex program outputs. Size is the largest * component index which might vary from [0,0,0,1] */ -static void calc_wm_input_sizes( struct brw_context *brw ) +static int calc_wm_input_sizes( struct brw_context *brw ) { /* BRW_NEW_VERTEX_PROGRAM */ struct brw_vertex_program *vp = @@ -210,6 +210,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)); brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS; } + return 0; } const struct brw_tracked_state brw_wm_input_sizes = { @@ -218,6 +219,6 @@ const struct brw_tracked_state brw_wm_input_sizes = { .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, .cache = 0 }, - .update = calc_wm_input_sizes + .prepare = calc_wm_input_sizes }; diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c index c38e998..8759826 100644 --- a/i965/brw_vs_emit.c +++ b/i965/brw_vs_emit.c @@ -30,8 +30,8 @@ */ -#include "program.h" -#include "macros.h" +#include "main/macros.h" +#include "shader/program.h" #include "shader/prog_parameter.h" #include "shader/prog_print.h" #include "brw_context.h" @@ -134,6 +134,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) WRITEMASK_X); reg++; } + + for (i = 0; i < 128; i++) { + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; /* Some opcodes need an internal temporary: @@ -201,7 +211,7 @@ static void unalias2( struct brw_vs_compile *c, struct brw_reg, struct brw_reg )) { - if ((dst.file == arg0.file && dst.nr == arg0.nr) && + if ((dst.file == arg0.file && dst.nr == arg0.nr) || (dst.file == arg1.file && dst.nr == arg1.nr)) { struct brw_compile *p = &c->func; struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); @@ -213,57 +223,65 @@ static void unalias2( struct brw_vs_compile *c, } } +static void emit_sop( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint cond) +{ + brw_MOV(p, dst, brw_imm_f(0.0f)); + brw_CMP(p, brw_null_reg(), cond, arg0, arg1); + brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_set_predicate_control_flag_value(p, 0xff); +} +static void emit_seq( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); +} - +static void emit_sne( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); +} static void emit_slt( struct brw_compile *p, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - /* Could be done with an if/else/endif, but this method uses half - * the instructions. Note that we are careful to reference the - * arguments before writing the dest. That means we emit the - * instructions in an odd order and have to play with the flag - * values. - */ - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); - - /* Write all values to 1: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(1.0)); + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); +} - /* Where the test succeeded, overwite with zero: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(0.0)); - brw_pop_insn_state(p); +static void emit_sle( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); } +static void emit_sgt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); +} static void emit_sge( struct brw_compile *p, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); - - /* Write all values to zero: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(0)); - - /* Where the test succeeded, overwite with 1: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(1.0)); - brw_pop_insn_state(p); + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); } - static void emit_max( struct brw_compile *p, struct brw_reg dst, struct brw_reg arg0, @@ -592,9 +610,13 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, case PROGRAM_TEMPORARY: case PROGRAM_INPUT: case PROGRAM_OUTPUT: - case PROGRAM_STATE_VAR: assert(c->regs[file][index].nr != 0); return c->regs[file][index]; + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; case PROGRAM_ADDRESS: assert(index == 0); return c->regs[file][index]; @@ -668,28 +690,28 @@ static void emit_arl( struct brw_vs_compile *c, * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - struct prog_src_register src ) + struct prog_src_register *src ) { struct brw_reg reg; - if (src.File == PROGRAM_UNDEFINED) + if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src.RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); + if (src->RelAddr) + reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); else - reg = get_reg(c, src.File, src.Index); + reg = get_reg(c, src->File, src->Index); /* Convert 3-bit swizzle to 2-bit. */ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src.Swizzle, 0), - GET_SWZ(src.Swizzle, 1), - GET_SWZ(src.Swizzle, 2), - GET_SWZ(src.Swizzle, 3)); + reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); /* Note this is ok for non-swizzle instructions: */ - reg.negate = src.NegateBase ? 1 : 0; + reg.negate = src->NegateBase ? 1 : 0; return reg; } @@ -845,7 +867,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (!BRW_IS_IGD(p->brw) && !c->key.know_w_is_one) { + if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw)) && !c->key.know_w_is_one) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -891,17 +913,50 @@ static void emit_vertex_write( struct brw_vs_compile *c) } - - +static void +post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +{ + GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, target_insn; + struct prog_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->vp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case OPCODE_CAL: + case OPCODE_BRA: + target_insn = inst1->BranchTarget; + inst2 = &c->vp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + case OPCODE_END: + offset = end_inst - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } +} /* Emit the fragment program instructions here. */ -void brw_vs_emit( struct brw_vs_compile *c ) +void brw_vs_emit(struct brw_vs_compile *c ) { +#define MAX_IFSN 32 struct brw_compile *p = &c->func; GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn; + GLuint insn, if_insn = 0; + struct brw_instruction *end_inst; + struct brw_instruction *if_inst[MAX_IFSN]; + struct brw_indirect stack_index = brw_indirect(0, 0); + GLuint index; + GLuint file; if (INTEL_DEBUG & DEBUG_VS) { _mesa_printf("\n\n\nvs-emit:\n"); @@ -912,9 +967,24 @@ void brw_vs_emit( struct brw_vs_compile *c ) brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); + /* Message registers can't be read, so copy the output into GRF register + if they are used in source registers */ + for (insn = 0; insn < nr_insns; insn++) { + GLuint i; + struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + for (i = 0; i < 3; i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + GLuint index = src->Index; + GLuint file = src->File; + if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) + c->output_regs[index].used_in_src = GL_TRUE; + } + } + /* Static register allocation */ brw_vs_alloc_regs(c); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (insn = 0; insn < nr_insns; insn++) { @@ -924,17 +994,29 @@ void brw_vs_emit( struct brw_vs_compile *c ) /* Get argument regs. SWZ is special and does this itself. */ + inst->Data = &p->store[p->nr_insn]; if (inst->Opcode != OPCODE_SWZ) - for (i = 0; i < 3; i++) - args[i] = get_arg(c, inst->SrcReg[i]); + for (i = 0; i < 3; i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + index = src->Index; + file = src->File; + if (file == PROGRAM_OUTPUT&&c->output_regs[index].used_in_src) + args[i] = c->output_regs[index].reg; + else + args[i] = get_arg(c, src); + } /* Get dest regs. Note that it is possible for a reg to be both * dst and arg, given the static allocation of registers. So * care needs to be taken emitting multi-operation instructions. - */ - dst = get_dst(c, inst->DstReg); + */ + index = inst->DstReg.Index; + file = inst->DstReg.File; + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) + dst = c->output_regs[index].reg; + else + dst = get_dst(c, inst->DstReg); - switch (inst->Opcode) { case OPCODE_ABS: brw_MOV(p, dst, brw_abs(args[0])); @@ -1003,12 +1085,25 @@ void brw_vs_emit( struct brw_vs_compile *c ) case OPCODE_RSQ: emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); break; + + case OPCODE_SEQ: + emit_seq(p, dst, args[0], args[1]); + break; + case OPCODE_SNE: + emit_sne(p, dst, args[0], args[1]); + break; case OPCODE_SGE: emit_sge(p, dst, args[0], args[1]); break; + case OPCODE_SGT: + emit_sgt(p, dst, args[0], args[1]); + break; case OPCODE_SLT: emit_slt(p, dst, args[0], args[1]); break; + case OPCODE_SLE: + emit_sle(p, dst, args[0], args[1]); + break; case OPCODE_SUB: brw_ADD(p, dst, args[0], negate(args[1])); break; @@ -1021,21 +1116,82 @@ void brw_vs_emit( struct brw_vs_compile *c ) case OPCODE_XPD: emit_xpd(p, dst, args[0], args[1]); break; + case OPCODE_IF: + assert(if_insn < MAX_IFSN); + if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case OPCODE_ELSE: + if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + break; + case OPCODE_ENDIF: + assert(if_insn > 0); + brw_ENDIF(p, if_inst[--if_insn]); + break; + case OPCODE_BRA: + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_set_predicate_control_flag_value(p, 0xff); + break; + case OPCODE_CAL: + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + inst->Data = &p->store[p->nr_insn]; + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case OPCODE_RET: + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); case OPCODE_END: + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; case OPCODE_PRINT: + case OPCODE_BGNSUB: + case OPCODE_ENDSUB: break; default: + _mesa_printf("Unsupport opcode %d in vertex shader\n", inst->Opcode); break; } + if ((inst->DstReg.File == PROGRAM_OUTPUT) + && (inst->DstReg.Index != VERT_RESULT_HPOS) + && c->output_regs[inst->DstReg.Index].used_in_src) { + brw_MOV(p, get_dst(c, inst->DstReg), dst); + } + + /* Result color clamping. + * + * When destination register is an output register and + * it's primary/secondary front/back color, we have to clamp + * the result to [0,1]. This is done by enabling the + * saturation bit for the last instruction. + * + * We don't use brw_set_saturate() as it modifies + * p->current->header.saturate, which affects all the subsequent + * instructions. Instead, we directly modify the header + * of the last (already stored) instruction. + */ + if (inst->DstReg.File == PROGRAM_OUTPUT) { + if ((inst->DstReg.Index == VERT_RESULT_COL0) + || (inst->DstReg.Index == VERT_RESULT_COL1) + || (inst->DstReg.Index == VERT_RESULT_BFC0) + || (inst->DstReg.Index == VERT_RESULT_BFC1)) { + p->store[p->nr_insn-1].header.saturate = 1; + } + } + release_tmps(c); } + end_inst = &p->store[p->nr_insn]; emit_vertex_write(c); - + post_vs_emit(c, end_inst); + for (insn = 0; insn < nr_insns; insn++) + c->vp->program.Base.Instructions[insn].Data = NULL; } - - - - - diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c index c225bf8..2a64f3d 100644 --- a/i965/brw_vs_state.c +++ b/i965/brw_vs_state.c @@ -36,60 +36,110 @@ #include "brw_defines.h" #include "macros.h" -static void upload_vs_unit( struct brw_context *brw ) -{ - struct brw_vs_unit_state vs; - - memset(&vs, 0, sizeof(vs)); - - /* CACHE_NEW_VS_PROG */ - vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; - vs.thread0.grf_reg_count = ((brw->vs.prog_data->total_grf-1) & ~15) / 16; - vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; - vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; - vs.thread3.dispatch_grf_start_reg = 1; +struct brw_vs_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int curbe_offset; - /* BRW_NEW_URB_FENCE */ - vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; - vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - vs.thread4.max_threads = MIN2( - MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), - 15); + unsigned int nr_urb_entries, urb_size; +}; +static void +vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + /* CACHE_NEW_VS_PROG */ + key->total_grf = brw->vs.prog_data->total_grf; + key->urb_entry_read_length = brw->vs.prog_data->urb_read_length; + key->curb_entry_read_length = brw->vs.prog_data->curb_read_length; - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - vs.thread4.max_threads = 0; + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_vs_entries; + key->urb_size = brw->urb.vsize; /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ if (brw->attribs.Transform->ClipPlanesEnabled) { /* Note that we read in the userclip planes as well, hence * clip_start: */ - vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + key->curbe_offset = brw->curbe.clip_start; } else { - vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; + key->curbe_offset = brw->curbe.vs_start; } +} + +static dri_bo * +vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) +{ + struct brw_vs_unit_state vs; + dri_bo *bo; + + memset(&vs, 0, sizeof(vs)); + vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ + vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; + vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + vs.thread3.dispatch_grf_start_reg = 1; vs.thread3.urb_entry_read_offset = 0; + vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + + vs.thread4.nr_urb_entries = key->nr_urb_entries; + vs.thread4.urb_entry_allocation_size = key->urb_size - 1; + vs.thread4.max_threads = MIN2(MAX2(0, (key->nr_urb_entries - 6) / 2 - 1), + 15); + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + vs.thread4.max_threads = 0; /* No samplers for ARB_vp programs: */ vs.vs5.sampler_count = 0; if (INTEL_DEBUG & DEBUG_STATS) - vs.thread4.stats_enable = 1; + vs.thread4.stats_enable = 1; - /* Vertex program always enabled: + /* Vertex program always enabled: */ vs.vs6.vs_enable = 1; - brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); + bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, + key, sizeof(*key), + &brw->vs.prog_bo, 1, + &vs, sizeof(vs), + NULL, NULL); + + /* Emit VS program relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + vs.thread0.grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); + + return bo; } +static int prepare_vs_unit( struct brw_context *brw ) +{ + struct brw_vs_unit_key key; + + vs_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->vs.state_bo); + brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT, + &key, sizeof(key), + &brw->vs.prog_bo, 1, + NULL); + if (brw->vs.state_bo == NULL) { + brw->vs.state_bo = vs_unit_create_from_key(brw, &key); + } + return dri_bufmgr_check_aperture_space(brw->vs.state_bo); +} const struct brw_tracked_state brw_vs_unit = { .dirty = { @@ -98,5 +148,5 @@ const struct brw_tracked_state brw_vs_unit = { BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, - .update = upload_vs_unit + .prepare = prepare_vs_unit, }; diff --git a/i965/brw_vs_tnl.c b/i965/brw_vs_tnl.c index 14483b3..e409620 100644 --- a/i965/brw_vs_tnl.c +++ b/i965/brw_vs_tnl.c @@ -404,7 +404,7 @@ static struct ureg register_const4f( struct tnl_program *p, values[3] = s3; idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, &swizzle); - /* XXX what about swizzle? */ + assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */ return make_ureg(PROGRAM_STATE_VAR, idx); } @@ -524,10 +524,13 @@ static void emit_op3fn(struct tnl_program *p, GLuint nr = p->program->Base.NumInstructions++; if (nr >= p->nr_instructions) { + int new_nr_instructions = p->nr_instructions * 2; + p->program->Base.Instructions = _mesa_realloc(p->program->Base.Instructions, sizeof(struct prog_instruction) * p->nr_instructions, - sizeof(struct prog_instruction) * (p->nr_instructions *= 2)); + sizeof(struct prog_instruction) * new_nr_instructions); + p->nr_instructions = new_nr_instructions; } { @@ -1000,16 +1003,16 @@ static void build_lighting( struct tnl_program *p ) STATE_POSITION); struct ureg V = get_eye_position(p); struct ureg dist = get_temp(p); - struct ureg tmpPpli = get_temp(p); + struct ureg tmpPpli = get_temp(p); VPpli = get_temp(p); half = get_temp(p); - /* In homogeneous object coordinates - */ - emit_op1(p, OPCODE_RCP, dist, 0, swizzle1(Ppli, W)); - emit_op2(p, OPCODE_MUL, tmpPpli, 0, Ppli, dist); - + /* In homogeneous object coordinates + */ + emit_op1(p, OPCODE_RCP, dist, 0, swizzle1(Ppli, W)); + emit_op2(p, OPCODE_MUL, tmpPpli, 0, Ppli, dist); + /* Calulate VPpli vector */ emit_op2(p, OPCODE_SUB, VPpli, 0, tmpPpli, V); @@ -1044,7 +1047,7 @@ static void build_lighting( struct tnl_program *p ) emit_normalize_vec3(p, half, half); release_temp(p, dist); - release_temp(p, tmpPpli); + release_temp(p, tmpPpli); } /* Calculate dot products: @@ -1161,12 +1164,19 @@ static void build_fog( struct tnl_program *p ) { struct ureg fog = register_output(p, VERT_RESULT_FOGC); struct ureg input; - + GLuint useabs = p->state->fog_source_is_depth && p->state->fog_option && + (p->state->fog_option != FOG_EXP2); + if (p->state->fog_source_is_depth) { input = swizzle1(get_eye_position(p), Z); } else { input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); + if (p->state->fog_option && + p->state->tnl_do_vertex_fog) + input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); + else + input = register_input(p, VERT_ATTRIB_FOG); } if (p->state->fog_option && @@ -1178,26 +1188,30 @@ static void build_fog( struct tnl_program *p ) emit_op1(p, OPCODE_MOV, fog, 0, id); + if (useabs) { + emit_op1(p, OPCODE_ABS, tmp, 0, input); + } + switch (p->state->fog_option) { case FOG_LINEAR: { - emit_op1(p, OPCODE_ABS, tmp, 0, input); - emit_op3(p, OPCODE_MAD, tmp, 0, tmp, swizzle1(params,X), swizzle1(params,Y)); + emit_op3(p, OPCODE_MAD, tmp, 0, useabs ? tmp : input, + swizzle1(params,X), swizzle1(params,Y)); emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); break; } case FOG_EXP: - emit_op1(p, OPCODE_ABS, tmp, 0, input); - emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z)); + emit_op2(p, OPCODE_MUL, tmp, 0, useabs ? tmp : input, + swizzle1(params,Z)); emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp)); break; case FOG_EXP2: emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W)); - emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); + emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp)); break; } - + release_temp(p, tmp); } else { @@ -1205,7 +1219,7 @@ static void build_fog( struct tnl_program *p ) * * KW: Is it really necessary to do anything in this case? */ - emit_op1(p, OPCODE_MOV, fog, 0, input); + emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, 0, input); } } @@ -1567,7 +1581,7 @@ static GLuint hash_key( struct state_key *key ) return hash; } -static void update_tnl_program( struct brw_context *brw ) +static int prepare_tnl_program( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct state_key key; @@ -1575,8 +1589,8 @@ static void update_tnl_program( struct brw_context *brw ) struct gl_vertex_program *old = brw->tnl_program; /* _NEW_PROGRAM */ - if (brw->attribs.VertexProgram->_Enabled) - return; + if (brw->attribs.VertexProgram->_Current) + return 0; /* Grab all the relevent state and put it in a single structure: */ @@ -1609,6 +1623,7 @@ static void update_tnl_program( struct brw_context *brw ) if (old != brw->tnl_program) brw->state.dirty.brw |= BRW_NEW_TNL_PROGRAM; + return 0; } /* Note: See brw_draw.c - the vertex program must not rely on @@ -1622,24 +1637,25 @@ const struct brw_tracked_state brw_tnl_vertprog = { _NEW_FOG | _NEW_HINT | _NEW_POINT | - _NEW_TEXTURE), + _NEW_TEXTURE | + _NEW_TEXTURE_MATRIX), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_INPUT_VARYING), .cache = 0 }, - .update = update_tnl_program + .prepare = prepare_tnl_program }; -static void update_active_vertprog( struct brw_context *brw ) +static int prepare_active_vertprog( struct brw_context *brw ) { const struct gl_vertex_program *prev = brw->vertex_program; /* NEW_PROGRAM */ - if (brw->attribs.VertexProgram->_Enabled) { - brw->vertex_program = brw->attribs.VertexProgram->Current; + if (brw->attribs.VertexProgram->_Current) { + brw->vertex_program = brw->attribs.VertexProgram->_Current; } else { /* BRW_NEW_TNL_PROGRAM */ @@ -1648,6 +1664,8 @@ static void update_active_vertprog( struct brw_context *brw ) if (brw->vertex_program != prev) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + + return 0; } @@ -1658,7 +1676,7 @@ const struct brw_tracked_state brw_active_vertprog = { .brw = BRW_NEW_TNL_PROGRAM, .cache = 0 }, - .update = update_active_vertprog + .prepare = prepare_active_vertprog }; diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c index 786f30e..31e96a2 100644 --- a/i965/brw_vtbl.c +++ b/i965/brw_vtbl.c @@ -47,10 +47,9 @@ #include "brw_draw.h" #include "brw_state.h" -#include "brw_aub.h" #include "brw_fallback.h" #include "brw_vs.h" - +#include <stdarg.h> /* called from intelDestroyContext() @@ -60,8 +59,6 @@ static void brw_destroy_context( struct intel_context *intel ) GLcontext *ctx = &intel->ctx; struct brw_context *brw = brw_context(&intel->ctx); - brw_aub_destroy(brw); - brw_destroy_metaops(brw); brw_destroy_state(brw); brw_draw_destroy( brw ); @@ -73,38 +70,54 @@ static void brw_destroy_context( struct intel_context *intel ) /* called from intelDrawBuffer() */ static void brw_set_draw_region( struct intel_context *intel, - struct intel_region *draw_region, - struct intel_region *depth_region) + struct intel_region *draw_regions[], + struct intel_region *depth_region, + GLuint num_regions) { struct brw_context *brw = brw_context(&intel->ctx); - - intel_region_release(intel, &brw->state.draw_region); - intel_region_release(intel, &brw->state.depth_region); - intel_region_reference(&brw->state.draw_region, draw_region); + int i; + if (brw->state.depth_region != depth_region) + brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; + for (i = 0; i < brw->state.nr_draw_regions; i++) + intel_region_release(&brw->state.draw_regions[i]); + intel_region_release(&brw->state.depth_region); + for (i = 0; i < num_regions; i++) + intel_region_reference(&brw->state.draw_regions[i], draw_regions[i]); intel_region_reference(&brw->state.depth_region, depth_region); + brw->state.nr_draw_regions = num_regions; } /* called from intelFlushBatchLocked */ -static void brw_lost_hardware( struct intel_context *intel ) +static void brw_new_batch( struct intel_context *intel ) { struct brw_context *brw = brw_context(&intel->ctx); - /* Note that we effectively lose the context after this. - * - * Setting this flag provokes a state buffer wrap and also flushes - * the hardware caches. - */ - brw->state.dirty.brw |= BRW_NEW_CONTEXT; + /* Check that we didn't just wrap our batchbuffer at a bad time. */ + assert(!brw->no_batch_wrap); + + dri_bo_unreference(brw->curbe.curbe_bo); + brw->curbe.curbe_bo = NULL; - /* Which means there shouldn't be any commands already queued: + /* Mark all context state as needing to be re-emitted. + * This is probably not as severe as on 915, since almost all of our state + * is just in referenced buffers. */ - assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); + brw->state.dirty.brw |= BRW_NEW_CONTEXT; brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; + + /* Move to the end of the current upload buffer so that we'll force choosing + * a new buffer next time. + */ + if (brw->vb.upload.bo != NULL) { + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = NULL; + brw->vb.upload.offset = 0; + } } static void brw_note_fence( struct intel_context *intel, @@ -115,10 +128,9 @@ static void brw_note_fence( struct intel_context *intel, static void brw_note_unlock( struct intel_context *intel ) { - struct brw_context *brw = brw_context(&intel->ctx); + struct brw_context *brw = brw_context(&intel->ctx); - brw_pool_check_wrap(brw, &brw->pool[BRW_GS_POOL]); - brw_pool_check_wrap(brw, &brw->pool[BRW_SS_POOL]); + brw_state_cache_check_size(brw); brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK; } @@ -156,9 +168,6 @@ static GLuint brw_flush_cmd( void ) return *(GLuint *)&flush; } - - - static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -176,10 +185,11 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.invalidate_state = brw_invalidate_state; brw->intel.vtbl.note_fence = brw_note_fence; brw->intel.vtbl.note_unlock = brw_note_unlock; - brw->intel.vtbl.lost_hardware = brw_lost_hardware; + brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.flush_cmd = brw_flush_cmd; brw->intel.vtbl.emit_flush = brw_emit_flush; + brw->intel.vtbl.debug_batch = brw_debug_batch; } diff --git a/i965/brw_wm.c b/i965/brw_wm.c index f80ba17..a470a25 100644 --- a/i965/brw_wm.c +++ b/i965/brw_wm.c @@ -29,12 +29,11 @@ * Keith Whitwell <keith@tungstengraphics.com> */ - +#include "main/texformat.h" #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" #include "brw_state.h" -#include "brw_hal.h" GLuint brw_wm_nr_args( GLuint opcode ) @@ -66,7 +65,11 @@ GLuint brw_wm_nr_args( GLuint opcode ) case OPCODE_POW: case OPCODE_SUB: case OPCODE_SGE: + case OPCODE_SGT: + case OPCODE_SLE: case OPCODE_SLT: + case OPCODE_SEQ: + case OPCODE_SNE: case OPCODE_ADD: case OPCODE_MAX: case OPCODE_MIN: @@ -116,20 +119,6 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) } -static void brw_wm_pass_hal (struct brw_wm_compile *c) -{ - static void (*hal_wm_pass) (struct brw_wm_compile *c); - static GLboolean hal_tried; - - if (!hal_tried) - { - hal_wm_pass = brw_hal_find_symbol ("intel_hal_wm_pass"); - hal_tried = 1; - } - if (hal_wm_pass) - (*hal_wm_pass) (c); -} - static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) @@ -150,59 +139,53 @@ static void do_wm_prog( struct brw_context *brw, c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Hal optimization - */ - brw_wm_pass_hal (c); - - /* Register allocation. - */ - c->grf_limit = BRW_WM_MAX_GRF/2; - - /* This is where we start emitting gen4 code: - */ - brw_init_compile(brw, &c->func); - - brw_wm_pass2(c); - - c->prog_data.total_grf = c->max_wm_grf; - if (c->last_scratch) { - c->prog_data.total_scratch = - c->last_scratch + 0x40; + brw_init_compile(brw, &c->func); + if (brw_wm_is_glsl(&c->fp->program)) { + brw_wm_glsl_emit(brw, c); } else { - c->prog_data.total_scratch = 0; + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Register allocation. + */ + c->grf_limit = BRW_WM_MAX_GRF/2; + + brw_wm_pass2(c); + + c->prog_data.total_grf = c->max_wm_grf; + if (c->last_scratch) { + c->prog_data.total_scratch = + c->last_scratch + 0x40; + } else { + c->prog_data.total_scratch = 0; + } + + /* Emit GEN4 code. + */ + brw_wm_emit(c); } - - /* Emit GEN4 code. - */ - brw_wm_emit(c); - /* get the program */ program = brw_get_program(&c->func, &program_size); - /* - */ - brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], - &c->key, - sizeof(c->key), - program, - program_size, - &c->prog_data, - &brw->wm.prog_data ); + dri_bo_unreference(brw->wm.prog_bo); + brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + &brw->wm.prog_data ); } @@ -242,7 +225,8 @@ static void brw_wm_populate_key( struct brw_context *brw, lookup |= IZ_STENCIL_TEST_ENABLE_BIT; if (brw->attribs.Stencil->WriteMask[0] || - (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1])) + (brw->attribs.Stencil->_TestTwoSide && + brw->attribs.Stencil->WriteMask[1])) lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; } @@ -284,7 +268,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->projtex_mask = brw->wm.input_size_masks[4-1]; + key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS); /* _NEW_LIGHT */ key->flat_shade = (brw->attribs.Light->ShadeModel == GL_FLAT); @@ -295,17 +279,41 @@ static void brw_wm_populate_key( struct brw_context *brw, const struct gl_texture_object *t = unit->_Current; if (unit->_ReallyEnabled) { - - if (t->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && - t->Image[0][t->BaseLevel]->_BaseFormat == GL_DEPTH_COMPONENT) { - key->shadowtex_mask |= 1<<i; - } - - if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) + if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) { key->yuvtex_mask |= 1<<i; + if (t->Image[0][t->BaseLevel]->TexFormat->MesaFormat == + MESA_FORMAT_YCBCR) + key->yuvtex_swap_mask |= 1<< i; + } } } - + + /* Shadow */ + key->shadowtex_mask = fp->program.Base.ShadowSamplers; + + /* _NEW_BUFFERS */ + /* + * Include the draw buffer origin and height so that we can calculate + * fragment position values relative to the bottom left of the drawable, + * from the incoming screen origin relative position we get as part of our + * payload. + * + * We could avoid recompiling by including this as a constant referenced by + * our program, but if we were to do that it would also be nice to handle + * getting that constant updated at batchbuffer submit time (when we + * hold the lock and know where the buffer really is) rather than at emit + * time when we don't hold the lock and are just guessing. We could also + * just avoid using this as key data if the program doesn't use + * fragment.position. + * + * This pretty much becomes moot with DRI2 and redirected buffers anyway, + * as our origins will always be zero then. + */ + if (brw->intel.driDrawable != NULL) { + key->origin_x = brw->intel.driDrawable->x; + key->origin_y = brw->intel.driDrawable->y; + key->drawable_height = brw->intel.driDrawable->h; + } /* Extra info: */ @@ -314,7 +322,7 @@ static void brw_wm_populate_key( struct brw_context *brw, } -static void brw_upload_wm_prog( struct brw_context *brw ) +static int brw_prepare_wm_prog( struct brw_context *brw ) { struct brw_wm_prog_key key; struct brw_fragment_program *fp = (struct brw_fragment_program *) @@ -324,13 +332,15 @@ static void brw_upload_wm_prog( struct brw_context *brw ) /* Make an early check for the key. */ - if (brw_search_cache(&brw->cache[BRW_WM_PROG], - &key, sizeof(key), - &brw->wm.prog_data, - &brw->wm.prog_gs_offset)) - return; - - do_wm_prog(brw, fp, &key); + dri_bo_unreference(brw->wm.prog_bo); + brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, + &key, sizeof(key), + NULL, 0, + &brw->wm.prog_data); + if (brw->wm.prog_bo == NULL) + do_wm_prog(brw, fp, &key); + + return dri_bufmgr_check_aperture_space(brw->wm.prog_bo); } @@ -344,12 +354,13 @@ const struct brw_tracked_state brw_wm_prog = { _NEW_POLYGON | _NEW_LINE | _NEW_LIGHT | + _NEW_BUFFERS | _NEW_TEXTURE), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), .cache = 0 }, - .update = brw_upload_wm_prog + .prepare = brw_prepare_wm_prog }; diff --git a/i965/brw_wm.h b/i965/brw_wm.h index f5fddfd..297617e 100644 --- a/i965/brw_wm.h +++ b/i965/brw_wm.h @@ -34,9 +34,9 @@ #define BRW_WM_H +#include "shader/prog_instruction.h" #include "brw_context.h" #include "brw_eu.h" -#include "prog_instruction.h" /* A big lookup table is used to figure out which and how many * additional regs will inserted before the main payload in the WM @@ -69,9 +69,12 @@ struct brw_wm_prog_key { GLuint runtime_check_aads_emit:1; GLuint yuvtex_mask:8; - GLuint pad1:24; + GLuint yuvtex_swap_mask:8; /* UV swaped */ + GLuint pad1:16; GLuint program_string_id:32; + GLuint origin_x, origin_y; + GLuint drawable_height; }; @@ -140,6 +143,8 @@ struct brw_wm_instruction { GLuint writemask:4; GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ + GLuint eot:1; /* End of thread indicator for FB_WRITE*/ + GLuint target:10; /* target binding table index for FB_WRITE*/ }; @@ -194,6 +199,8 @@ struct brw_wm_compile { GLuint nr_fp_insns; GLuint fp_temp; GLuint fp_interp_emitted; + GLuint fp_fragcolor_emitted; + GLuint fp_deriv_emitted; struct prog_src_register pixel_xy; struct prog_src_register delta_xy; @@ -231,6 +238,15 @@ struct brw_wm_compile { GLuint grf_limit; GLuint max_wm_grf; GLuint last_scratch; + + struct { + GLboolean inited; + struct brw_reg reg; + } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + struct brw_reg stack; + struct brw_reg emit_mask_reg; + GLuint reg_index; + GLuint tmp_index; }; @@ -259,4 +275,6 @@ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, struct brw_wm_prog_key *key ); +GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); +void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); #endif diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c index 80bd576..9b919b9 100644 --- a/i965/brw_wm_emit.c +++ b/i965/brw_wm_emit.c @@ -39,7 +39,7 @@ /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ -static __inline struct brw_reg sechalf( struct brw_reg reg ) +static INLINE struct brw_reg sechalf( struct brw_reg reg ) { if (reg.vstride) reg.nr++; @@ -122,26 +122,30 @@ static void emit_delta_xy(struct brw_compile *p, } } -static void emit_wpos_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +static void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. + struct brw_compile *p = &c->func; + + /* Calculate the pixel offset from window bottom left into destination + * X and Y channels. */ if (mask & WRITEMASK_X) { - brw_MOV(p, + /* X' = X - origin */ + brw_ADD(p, dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_UW)); + retype(arg0[0], BRW_REGISTER_TYPE_W), + brw_imm_d(0 - c->key.origin_x)); } if (mask & WRITEMASK_Y) { - /* TODO -- window_height - Y */ - brw_MOV(p, + /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + brw_ADD(p, dst[1], - negate(retype(arg0[1], BRW_REGISTER_TYPE_UW))); - + negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); } } @@ -219,6 +223,10 @@ static void emit_pinterp( struct brw_compile *p, if (mask & (1<<i)) { brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } + } + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { brw_MUL(p, dst[i], dst[i], w[3]); } } @@ -229,20 +237,20 @@ static void emit_cinterp( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { - struct brw_reg interp[4]; - GLuint nr = arg0[0].nr; - GLuint i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ - } - } + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ + } + } } @@ -343,11 +351,10 @@ static void emit_lrp( struct brw_compile *p, } } } - - -static void emit_slt( struct brw_compile *p, +static void emit_sop( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, + GLuint cond, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { @@ -356,34 +363,66 @@ static void emit_slt( struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_MOV(p, dst[i], brw_imm_f(0)); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); brw_MOV(p, dst[i], brw_imm_f(1.0)); brw_set_predicate_control_flag_value(p, 0xff); } } } -/* Isn't this just the same as the above with the args swapped? - */ -static void emit_sge( struct brw_compile *p, +static void emit_slt( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - GLuint i; + emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1); +} - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], brw_imm_f(0)); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]); - brw_MOV(p, dst[i], brw_imm_f(1.0)); - brw_set_predicate_control_flag_value(p, 0xff); - } - } +static void emit_sle( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1); } +static void emit_sgt( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1); +} +static void emit_sge( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1); +} + +static void emit_seq( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1); +} + +static void emit_sne( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); +} static void emit_cmp( struct brw_compile *p, const struct brw_reg *dst, @@ -465,6 +504,9 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); @@ -482,6 +524,9 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); @@ -500,6 +545,9 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); @@ -543,8 +591,11 @@ static void emit_math1( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || - function == BRW_MATH_FUNCTION_SINCOS); + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + + //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || + // function == BRW_MATH_FUNCTION_SINCOS); brw_MOV(p, brw_message_reg(2), arg0[0]); @@ -567,6 +618,9 @@ static void emit_math2( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_push_insn_state(p); @@ -661,7 +715,7 @@ static void emit_tex( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + 1, /* surface */ + inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ inst->tex_unit, /* sampler */ inst->writemask, (shadow ? @@ -670,7 +724,6 @@ static void emit_tex( struct brw_wm_compile *c, responseLength, msgLength, 0); - } @@ -712,7 +765,7 @@ static void emit_txb( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + 1, /* surface */ + inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ inst->tex_unit, /* sampler */ inst->writemask, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, @@ -785,7 +838,9 @@ static void emit_kil( struct brw_wm_compile *c, static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, - GLuint nr ) + GLuint nr, + GLuint target, + GLuint eot ) { struct brw_compile *p = &c->func; @@ -808,10 +863,10 @@ static void fire_fb_write( struct brw_wm_compile *c, retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - 0, /* render surface always 0 */ + target, nr, 0, - 1); + eot); } static void emit_aa( struct brw_wm_compile *c, @@ -836,7 +891,9 @@ static void emit_aa( struct brw_wm_compile *c, static void emit_fb_write( struct brw_wm_compile *c, struct brw_reg *arg0, struct brw_reg *arg1, - struct brw_reg *arg2) + struct brw_reg *arg2, + GLuint target, + GLuint eot) { struct brw_compile *p = &c->func; GLuint nr = 2; @@ -891,15 +948,16 @@ static void emit_fb_write( struct brw_wm_compile *c, GLuint off = c->key.dest_depth_reg % 2; if (off != 0) { - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(nr), arg1[comp]); - /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1)); - brw_pop_insn_state(p); + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_pop_insn_state(p); } else { - brw_MOV(p, brw_message_reg(nr), arg1[comp]); + brw_MOV(p, brw_message_reg(nr), arg1[comp]); } nr += 2; } @@ -909,7 +967,7 @@ static void emit_fb_write( struct brw_wm_compile *c, if (c->key.aa_dest_stencil_reg) emit_aa(c, arg1, 2); - fire_fb_write(c, 0, nr); + fire_fb_write(c, 0, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -926,14 +984,14 @@ static void emit_fb_write( struct brw_wm_compile *c, jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); - fire_fb_write(c, 0, nr); + fire_fb_write(c, 0, nr, target, eot); /* note - thread killed in subroutine */ } brw_land_fwd_jump(p, jmp); /* ELSE: Shuffle up one register to fill in the hole left for AA: */ - fire_fb_write(c, 1, nr-1); + fire_fb_write(c, 1, nr-1, target, eot); } } @@ -1081,7 +1139,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case WM_WPOSXY: - emit_wpos_xy(p, dst, dst_flags, args[0]); + emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_PIXELW: @@ -1101,7 +1159,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case WM_FB_WRITE: - emit_fb_write(c, args[0], args[1], args[2]); + emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot); break; /* Straightforward arithmetic: @@ -1209,9 +1267,21 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_slt(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_SLE: + emit_sle(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_SGT: + emit_sgt(p, dst, dst_flags, args[0], args[1]); + break; case OPCODE_SGE: emit_sge(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_SEQ: + emit_seq(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_SNE: + emit_sne(p, dst, dst_flags, args[0], args[1]); + break; case OPCODE_LIT: emit_lit(p, dst, dst_flags, args[0]); @@ -1232,7 +1302,8 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; default: - assert(0); + _mesa_printf("unsupport opcode %d in fragment program\n", + inst->opcode); } for (i = 0; i < 4; i++) diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c index dc57fd2..bc933fe 100644 --- a/i965/brw_wm_fp.c +++ b/i965/brw_wm_fp.c @@ -144,7 +144,7 @@ static struct prog_dst_register dst_undef( void ) static struct prog_dst_register get_temp( struct brw_wm_compile *c ) { - int bit = ffs( ~c->fp_temp ); + int bit = _mesa_ffs( ~c->fp_temp ); if (!bit) { _mesa_printf("%s: out of temporaries\n", __FILE__); @@ -158,7 +158,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) { - c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP); + c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); } @@ -176,6 +176,7 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c, { struct prog_instruction *inst = get_fp_inst(c); *inst = *inst0; + inst->Data = (void *)inst0; return inst; } @@ -201,7 +202,6 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; - return inst; } @@ -361,6 +361,37 @@ static void emit_interp( struct brw_wm_compile *c, c->fp_interp_emitted |= 1<<idx; } +static void emit_ddx( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + GLuint idx = inst->SrcReg[0].Index; + struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + + c->fp_deriv_emitted |= 1<<idx; + emit_op(c, + OPCODE_DDX, + inst->DstReg, + 0, 0, 0, + interp, + get_pixel_w(c), + src_undef()); +} + +static void emit_ddy( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + GLuint idx = inst->SrcReg[0].Index; + struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + + c->fp_deriv_emitted |= 1<<idx; + emit_op(c, + OPCODE_DDY, + inst->DstReg, + 0, 0, 0, + interp, + get_pixel_w(c), + src_undef()); +} /*********************************************************************** * Hacks to extend the program parameter and constant lists. @@ -433,7 +464,7 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, } idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); - /* XXX what about swizzle? */ + assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */ return src_reg(PROGRAM_STATE_VAR, idx); } @@ -463,17 +494,20 @@ static void precalc_dst( struct brw_wm_compile *c, if (dst.WriteMask & WRITEMASK_XZ) { + struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); /* dst.xz = swz src0.1zzz */ - emit_op(c, - OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XZ), - inst->SaturateMode, 0, 0, - src_swizzle(src0, SWIZZLE_ONE, z, z, z), - src_undef(), - src_undef()); + swz = emit_op(c, + OPCODE_SWZ, + dst_mask(dst, WRITEMASK_XZ), + inst->SaturateMode, 0, 0, + src_swizzle(src0, SWIZZLE_ONE, z, z, z), + src_undef(), + src_undef()); + /* Avoid letting negation flag of src0 affect our 1 constant. */ + swz->SrcReg[0].NegateBase &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w @@ -496,15 +530,19 @@ static void precalc_lit( struct brw_wm_compile *c, struct prog_dst_register dst = inst->DstReg; if (dst.WriteMask & WRITEMASK_XW) { + struct prog_instruction *swz; + /* dst.xw = swz src0.1111 */ - emit_op(c, - OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XW), - 0, 0, 0, - src_swizzle1(src0, SWIZZLE_ONE), - src_undef(), - src_undef()); + swz = emit_op(c, + OPCODE_SWZ, + dst_mask(dst, WRITEMASK_XW), + 0, 0, 0, + src_swizzle1(src0, SWIZZLE_ONE), + src_undef(), + src_undef()); + /* Avoid letting the negation flag of src0 affect our 1 constant. */ + swz->SrcReg[0].NegateBase = 0; } @@ -524,6 +562,7 @@ static void precalc_tex( struct brw_wm_compile *c, { struct prog_src_register coord; struct prog_dst_register tmpcoord; + GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { struct prog_instruction *out; @@ -580,7 +619,7 @@ static void precalc_tex( struct brw_wm_compile *c, search_or_add_param5( c, STATE_INTERNAL, STATE_TEXRECT_SCALE, - inst->TexSrcUnit, + unit, 0,0 ); tmpcoord = get_temp(c); @@ -606,29 +645,33 @@ static void precalc_tex( struct brw_wm_compile *c, * conversion requires allocating a temporary variable which we * don't have the facility to do that late in the compilation. */ - if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) { + if (!(c->key.yuvtex_mask & (1<<unit))) { emit_op(c, OPCODE_TEX, inst->DstReg, inst->SaturateMode, - inst->TexSrcUnit, + unit, inst->TexSrcTarget, coord, src_undef(), src_undef()); } else { + GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); + /* CONST C0 = { -.5, -.0625, -.5, 1.164 } CONST C1 = { 1.596, -0.813, 2.018, -.391 } UYV = TEX ... UYV.xyz = ADD UYV, C0 UYV.y = MUL UYV.y, C0.w - RGB.xyz = MAD UYV.xxz, C1, UYV.y + if (UV swaped) + RGB.xyz = MAD UYV.zzx, C1, UYV.y + else + RGB.xyz = MAD UYV.xxz, C1, UYV.y RGB.y = MAD UYV.z, C1.w, RGB.y */ struct prog_dst_register dst = inst->DstReg; - struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register tmp = get_temp(c); struct prog_src_register tmpsrc = src_reg_from_dst(tmp); struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); @@ -640,9 +683,9 @@ static void precalc_tex( struct brw_wm_compile *c, OPCODE_TEX, tmp, inst->SaturateMode, - inst->TexSrcUnit, + unit, inst->TexSrcTarget, - src0, + coord, src_undef(), src_undef()); @@ -658,6 +701,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* YUV.y = MUL YUV.y, C0.w */ + emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), @@ -666,13 +710,18 @@ static void precalc_tex( struct brw_wm_compile *c, src_swizzle1(C0, W), src_undef()); - /* RGB.xyz = MAD YUV.xxz, C1, YUV.y + /* + * if (UV swaped) + * RGB.xyz = MAD YUV.zzx, C1, YUV.y + * else + * RGB.xyz = MAD YUV.xxz, C1, YUV.y */ + emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), 0, 0, 0, - src_swizzle(tmpsrc, X,X,Z,Z), + swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), C1, src_swizzle1(tmpsrc, Y)); @@ -689,7 +738,8 @@ static void precalc_tex( struct brw_wm_compile *c, release_temp(c, tmp); } - if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV) + if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || + (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) release_temp(c, tmpcoord); } @@ -710,7 +760,7 @@ static GLboolean projtex( struct brw_wm_compile *c, return 0; /* ut2004 gun rendering !?! */ else if (src.File == PROGRAM_INPUT && GET_SWZ(src.Swizzle, W) == W && - (c->key.projtex_mask & (1<<src.Index)) == 0) + (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0) return 0; else return 1; @@ -820,14 +870,34 @@ static void emit_fb_write( struct brw_wm_compile *c ) struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR); + GLuint i; - emit_op(c, - WM_FB_WRITE, - dst_mask(dst_undef(),0), - 0, 0, 0, - outcolor, - payload_r0_depth, - outdepth); + struct prog_instruction *inst, *last_inst; + struct brw_context *brw = c->func.brw; + + /* inst->Sampler is not used by backend, + use it for fb write target and eot */ + + if (brw->state.nr_draw_regions > 1) { + for (i = 0 ; i < brw->state.nr_draw_regions; i++) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); + last_inst = inst = emit_op(c, + WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0, + outcolor, payload_r0_depth, outdepth); + inst->Sampler = (i<<1); + if (c->fp_fragcolor_emitted) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, 0, 0, outcolor, payload_r0_depth, outdepth); + inst->Sampler = (i<<1); + } + } + last_inst->Sampler |= 1; //eot + }else { + inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, 0, 0, outcolor, payload_r0_depth, outdepth); + inst->Sampler = 1|(0<<1); + } } @@ -853,7 +923,15 @@ static void validate_src_regs( struct brw_wm_compile *c, } } - +static void validate_dst_regs( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + if (inst->DstReg.File == PROGRAM_OUTPUT) { + GLuint idx = inst->DstReg.Index; + if (idx == FRAG_RESULT_COLR) + c->fp_fragcolor_emitted = 1; + } +} static void print_insns( const struct prog_instruction *insn, GLuint nr ) @@ -898,12 +976,16 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; + validate_src_regs(c, inst); + validate_dst_regs(c, inst); + } + for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { + const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; struct prog_instruction *out; /* Check for INPUT values, emit INTERP instructions where * necessary: */ - validate_src_regs(c, inst); switch (inst->Opcode) { @@ -939,11 +1021,20 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_LIT: precalc_lit(c, inst); break; - + + case OPCODE_TEX: + precalc_tex(c, inst); + break; + case OPCODE_TXP: precalc_txp(c, inst); break; + case OPCODE_TXB: + out = emit_insn(c, inst); + out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + break; + case OPCODE_XPD: out = emit_insn(c, inst); /* This should probably be done in the parser. @@ -957,8 +1048,16 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) */ out->DstReg.WriteMask = 0; break; - + case OPCODE_DDX: + emit_ddx(c, inst); + break; + case OPCODE_DDY: + emit_ddy(c, inst); + break; case OPCODE_END: + emit_fog(c); + emit_fb_write(c); + break; case OPCODE_PRINT: break; @@ -967,15 +1066,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) break; } } - - emit_fog(c); - emit_fb_write(c); - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("\n\n\npass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + _mesa_printf("\n\n\npass_fp:\n"); + print_insns( c->prog_instructions, c->nr_fp_insns ); + _mesa_printf("\n"); } } diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c new file mode 100644 index 0000000..305100f --- /dev/null +++ b/i965/brw_wm_glsl.c @@ -0,0 +1,1375 @@ +#include "macros.h" +#include "shader/prog_parameter.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" + +/* Only guess, need a flag in gl_fragment_program later */ +GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) +{ + int i; + for (i = 0; i < fp->Base.NumInstructions; i++) { + struct prog_instruction *inst = &fp->Base.Instructions[i]; + switch (inst->Opcode) { + case OPCODE_IF: + case OPCODE_INT: + case OPCODE_ENDIF: + case OPCODE_CAL: + case OPCODE_BRK: + case OPCODE_RET: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_BGNLOOP: + return GL_TRUE; + default: + break; + } + } + return GL_FALSE; +} + +static void set_reg(struct brw_wm_compile *c, int file, int index, + int component, struct brw_reg reg) +{ + c->wm_regs[file][index][component].reg = reg; + c->wm_regs[file][index][component].inited = GL_TRUE; +} + +static int get_scalar_dst_index(struct prog_instruction *inst) +{ + int i; + for (i = 0; i < 4; i++) + if (inst->DstReg.WriteMask & (1<<i)) + break; + return i; +} + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + struct brw_reg reg; + reg = brw_vec8_grf(c->tmp_index--, 0); + return reg; +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 127; +} + +static struct brw_reg +get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs) +{ + struct brw_reg reg; + switch (file) { + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + file = PROGRAM_STATE_VAR; + break; + case PROGRAM_UNDEFINED: + return brw_null_reg(); + default: + break; + } + + if(c->wm_regs[file][index][component].inited) + reg = c->wm_regs[file][index][component].reg; + else + reg = brw_vec8_grf(c->reg_index, 0); + + if(!c->wm_regs[file][index][component].inited) { + set_reg(c, file, index, component, reg); + c->reg_index++; + } + + if (neg & (1<< component)) { + reg = negate(reg); + } + if (abs) + reg = brw_abs(reg); + return reg; +} + +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + struct brw_reg reg; + int nr_interp_regs = 0; + GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + + for (i = 0; i < 4; i++) { + reg = (i < c->key.nr_depth_regs) + ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); + set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); + } + c->reg_index += 2*c->key.nr_depth_regs; + { + int nr_params = c->fp->program.Base.Parameters->NumParameters; + struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + c->prog_data.nr_params = 4*nr_params; + for (i = 0; i < nr_params; i++) { + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index/8, + index%8); + c->prog_data.param[index] = + &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + c->nr_creg = 2*((4*nr_params+15)/16); + c->reg_index += c->nr_creg; + } + for (i = 0; i < FRAG_ATTRIB_MAX; i++) { + if (inputs & (1<<i)) { + nr_interp_regs++; + reg = brw_vec8_grf(c->reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, PROGRAM_PAYLOAD, i, j, reg); + c->reg_index += 2; + + } + } + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.curb_read_length = c->nr_creg; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index += 2; +} + +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + struct prog_instruction *inst, int component, int nr) +{ + return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, + 0, 0); +} + +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + struct prog_src_register *src, int index, int nr) +{ + int component = GET_SWZ(src->Swizzle, index); + return get_reg(c, src->File, src->Index, component, nr, + src->NegateBase, src->Abs); +} + +static void emit_abs( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (inst->DstReg.WriteMask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i, 1); + src = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_MOV(p, dst, brw_abs(src)); + } + } + brw_set_saturate(p, 0); +} + +static void emit_int( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i, 1) ; + src = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_RNDD(p, dst, src); + } + } + brw_set_saturate(p, 0); +} + +static void emit_mov( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i, 1); + src = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_MOV(p, dst, src); + } + } + brw_set_saturate(p, 0); +} + +static void emit_pixel_xy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + + struct brw_reg dst0, dst1; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + dst0 = get_dst_reg(c, inst, 0, 1); + dst1 = get_dst_reg(c, inst, 1, 1); + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } + +} + +static void emit_delta_xy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg dst0, dst1, src0, src1; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + dst0 = get_dst_reg(c, inst, 0, 1); + dst1 = get_dst_reg(c, inst, 1, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + dst0, + retype(src0, BRW_REGISTER_TYPE_UW), + negate(r1)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + dst1, + retype(src1, BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + + } + +} + + +static void fire_fb_write( struct brw_wm_compile *c, + GLuint base_reg, + GLuint nr, + GLuint target, + GLuint eot) +{ + struct brw_compile *p = &c->func; + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + target, + nr, + 0, + eot); +} + +static void emit_fb_write(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + GLuint target, eot; + struct brw_reg src0; + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + { + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + } + + if (c->key.source_depth_to_render_target) + { + if (c->key.computes_depth) { + src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } else { + src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } + target = inst->Sampler >> 1; + eot = inst->Sampler & 1; + fire_fb_write(c, 0, nr, target, eot); +} + +static void emit_pixel_w( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + if (mask & WRITEMASK_W) { + struct brw_reg dst, src0, delta0, delta1; + struct brw_reg interp3; + + dst = get_dst_reg(c, inst, 3, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + + interp3 = brw_vec1_grf(src0.nr+1, 4); + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + brw_LINE(p, brw_null_reg(), interp3, delta0); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); + + /* Calc w */ + brw_math_16( p, dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} + +static void emit_linterp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst, delta0, delta1; + struct brw_reg src0; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + GLuint nr = src0.nr; + int i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_LINE(p, brw_null_reg(), interp[i], delta0); + brw_MAC(p, dst, suboffset(interp[i],1), delta1); + } + } +} + +static void emit_cinterp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + struct brw_reg interp[4]; + struct brw_reg dst, src0; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + GLuint nr = src0.nr; + int i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, suboffset(interp[i],3)); + } + } +} + +static void emit_pinterp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + struct brw_reg interp[4]; + struct brw_reg dst, delta0, delta1; + struct brw_reg src0, w; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + w = get_src_reg(c, &inst->SrcReg[2], 3, 1); + GLuint nr = src0.nr; + int i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_LINE(p, brw_null_reg(), interp[i], delta0); + brw_MAC(p, dst, suboffset(interp[i],1), + delta1); + brw_MUL(p, dst, dst, w); + } + } +} + +static void emit_xpd(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + for (i = 0; i < 4; i++) { + GLuint i2 = (i+2)%3; + GLuint i1 = (i+1)%3; + if (mask & (1<<i)) { + struct brw_reg src0, src1, dst; + dst = get_dst_reg(c, inst, i, 1); + src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); + src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); + brw_MUL(p, brw_null_reg(), src0, src1); + src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_dp3(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + } + + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); +} + +static void emit_dp4(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_dph(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_ADD(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_math1(struct brw_wm_compile *c, + struct prog_instruction *inst, GLuint func) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, + dst, + func, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_rcp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); +} + +static void emit_rsq(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); +} + +static void emit_sin(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); +} + +static void emit_cos(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); +} + +static void emit_ex2(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); +} + +static void emit_lg2(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); +} + +static void emit_add(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_ADD(p, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + +static void emit_sub(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_ADD(p, dst, src0, negate(src1)); + } + } + brw_set_saturate(p, 0); +} + +static void emit_mul(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_MUL(p, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + +static void emit_frc(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_FRC(p, dst, src0); + } + } + if (inst->SaturateMode != SATURATE_OFF) + brw_set_saturate(p, 0); +} + +static void emit_flr(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_RNDD(p, dst, src0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_max(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_min(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_pow(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); + + brw_math(p, + dst, + BRW_MATH_FUNCTION_POW, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_lrp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + + if (src1.nr == dst.nr) { + tmp1 = alloc_tmp(c); + brw_MOV(p, tmp1, src1); + } else + tmp1 = src1; + + src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + if (src2.nr == dst.nr) { + tmp2 = alloc_tmp(c); + brw_MOV(p, tmp2, src2); + } else + tmp2 = src2; + + brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, tmp2); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c); + } +} + +static void emit_kil(struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); +} + +static void emit_mad(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + brw_MUL(p, dst, src0, src1); + + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } +} + +static void emit_sop(struct brw_wm_compile *c, + struct prog_instruction *inst, GLuint cond) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_CMP(p, brw_null_reg(), cond, src0, src1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_pop_insn_state(p); +} + +static void emit_slt(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_L); +} + +static void emit_sle(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_LE); +} + +static void emit_sgt(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_G); +} + +static void emit_sge(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_GE); +} + +static void emit_seq(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_EQ); +} + +static void emit_sne(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_NEQ); +} + +static void emit_ddx(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + GLuint nr, i; + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + nr = src0.nr; + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, interp[i]); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +static void emit_ddy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + GLuint nr, i; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + nr = src0.nr; + w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, suboffset(interp[i], 1)); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +static void emit_wpos_xy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0[2], dst[2]; + + dst[0] = get_dst_reg(c, inst, 0, 1); + dst[1] = get_dst_reg(c, inst, 1, 1); + + src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); + + /* Calculate the pixel offset from window bottom left into destination + * X and Y channels. + */ + if (mask & WRITEMASK_X) { + /* X' = X - origin_x */ + brw_ADD(p, + dst[0], + retype(src0[0], BRW_REGISTER_TYPE_W), + brw_imm_d(0 - c->key.origin_x)); + } + + if (mask & WRITEMASK_Y) { + /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + brw_ADD(p, + dst[1], + negate(retype(src0[1], BRW_REGISTER_TYPE_W)), + brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + } +} + +/* TODO + BIAS on SIMD8 not workind yet... + */ +static void emit_txb(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4], src[4], payload_reg; + GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + + GLuint i; + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i, 1); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + default: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + } + brw_MOV(p, brw_message_reg(5), src[3]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + unit + MAX_DRAW_BUFFERS, /* surface */ + unit, /* sampler */ + inst->DstReg.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + 4, + 4, + 0); +} + +static void emit_tex(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4], src[4], payload_reg; + GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + + GLuint msg_len; + GLuint i, nr; + GLuint emit; + GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0; + + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i, 1); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + + + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + default: + emit = WRITEMASK_XYZ; + nr = 3; + break; + } + msg_len = 1; + + for (i = 0; i < nr; i++) { + static const GLuint swz[4] = {0,1,2,2}; + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); + else + brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); + msg_len += 1; + } + + if (shadow) { + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(6), src[2]); + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + unit + MAX_DRAW_BUFFERS, /* surface */ + unit, /* sampler */ + inst->DstReg.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, + 4, + shadow ? 6 : 4, + 0); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +} + +static void post_wm_emit( struct brw_wm_compile *c ) +{ + GLuint nr_insns = c->fp->program.Base.NumInstructions; + GLuint insn, target_insn; + struct prog_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->fp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case OPCODE_CAL: + target_insn = inst1->BranchTarget; + inst2 = &c->fp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } +} + +static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) +{ +#define MAX_IFSN 32 +#define MAX_LOOP_DEPTH 32 + struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; + struct brw_instruction *inst0, *inst1; + int i, if_insn = 0, loop_insn = 0; + struct brw_compile *p = &c->func; + struct brw_indirect stack_index = brw_indirect(0, 0); + + c->reg_index = 0; + prealloc_reg(c); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + + for (i = 0; i < c->nr_fp_insns; i++) { + struct prog_instruction *inst = &c->prog_instructions[i]; + struct prog_instruction *orig_inst; + + if ((orig_inst = inst->Data) != 0) + orig_inst->Data = current_insn(p); + + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + + switch (inst->Opcode) { + case WM_PIXELXY: + emit_pixel_xy(c, inst); + break; + case WM_DELTAXY: + emit_delta_xy(c, inst); + break; + case WM_PIXELW: + emit_pixel_w(c, inst); + break; + case WM_LINTERP: + emit_linterp(c, inst); + break; + case WM_PINTERP: + emit_pinterp(c, inst); + break; + case WM_CINTERP: + emit_cinterp(c, inst); + break; + case WM_WPOSXY: + emit_wpos_xy(c, inst); + break; + case WM_FB_WRITE: + emit_fb_write(c, inst); + break; + case OPCODE_ABS: + emit_abs(c, inst); + break; + case OPCODE_ADD: + emit_add(c, inst); + break; + case OPCODE_SUB: + emit_sub(c, inst); + break; + case OPCODE_FRC: + emit_frc(c, inst); + break; + case OPCODE_FLR: + emit_flr(c, inst); + break; + case OPCODE_LRP: + emit_lrp(c, inst); + break; + case OPCODE_INT: + emit_int(c, inst); + break; + case OPCODE_MOV: + emit_mov(c, inst); + break; + case OPCODE_DP3: + emit_dp3(c, inst); + break; + case OPCODE_DP4: + emit_dp4(c, inst); + break; + case OPCODE_XPD: + emit_xpd(c, inst); + break; + case OPCODE_DPH: + emit_dph(c, inst); + break; + case OPCODE_RCP: + emit_rcp(c, inst); + break; + case OPCODE_RSQ: + emit_rsq(c, inst); + break; + case OPCODE_SIN: + emit_sin(c, inst); + break; + case OPCODE_COS: + emit_cos(c, inst); + break; + case OPCODE_EX2: + emit_ex2(c, inst); + break; + case OPCODE_LG2: + emit_lg2(c, inst); + break; + case OPCODE_MAX: + emit_max(c, inst); + break; + case OPCODE_MIN: + emit_min(c, inst); + break; + case OPCODE_DDX: + emit_ddx(c, inst); + break; + case OPCODE_DDY: + emit_ddy(c, inst); + break; + case OPCODE_SLT: + emit_slt(c, inst); + break; + case OPCODE_SLE: + emit_sle(c, inst); + break; + case OPCODE_SGT: + emit_sgt(c, inst); + break; + case OPCODE_SGE: + emit_sge(c, inst); + break; + case OPCODE_SEQ: + emit_seq(c, inst); + break; + case OPCODE_SNE: + emit_sne(c, inst); + break; + case OPCODE_MUL: + emit_mul(c, inst); + break; + case OPCODE_POW: + emit_pow(c, inst); + break; + case OPCODE_MAD: + emit_mad(c, inst); + break; + case OPCODE_TEX: + emit_tex(c, inst); + break; + case OPCODE_TXB: + emit_txb(c, inst); + break; + case OPCODE_KIL_NV: + emit_kil(c); + break; + case OPCODE_IF: + assert(if_insn < MAX_IFSN); + if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case OPCODE_ELSE: + if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + break; + case OPCODE_ENDIF: + assert(if_insn > 0); + brw_ENDIF(p, if_inst[--if_insn]); + break; + case OPCODE_BGNSUB: + case OPCODE_ENDSUB: + break; + case OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + orig_inst = inst->Data; + orig_inst->Data = &p->store[p->nr_insn]; + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case OPCODE_RET: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); + + break; + case OPCODE_BGNLOOP: + loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + loop_insn--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); + /* patch all the BREAK instructions from + last BEGINLOOP */ + while (inst0 > loop_inst[loop_insn]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + inst0->bits3.if_else.pop_count = 0; + } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = inst1 - inst0; + inst0->bits3.if_else.pop_count = 0; + } + } + break; + default: + _mesa_printf("unsupported IR in fragment shader %d\n", + inst->Opcode); + } + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + post_wm_emit(c); + for (i = 0; i < c->fp->program.Base.NumInstructions; i++) + c->fp->program.Base.Instructions[i].Data = NULL; +} + +void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + brw_wm_pass_fp(c); + c->tmp_index = 127; + brw_wm_emit_glsl(brw, c); + c->prog_data.total_grf = c->reg_index; + c->prog_data.total_scratch = 0; +} diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c index 00f6f6b..205a716 100644 --- a/i965/brw_wm_pass0.c +++ b/i965/brw_wm_pass0.c @@ -168,6 +168,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, case PROGRAM_PAYLOAD: case PROGRAM_TEMPORARY: case PROGRAM_OUTPUT: + case PROGRAM_VARYING: break; case PROGRAM_LOCAL_PARAM: @@ -179,6 +180,8 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, break; case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + case PROGRAM_CONSTANT: case PROGRAM_NAMED_PARAM: { struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; @@ -197,6 +200,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, break; case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: /* These may change from run to run: */ ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); @@ -344,6 +348,8 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, out->saturate = (inst->SaturateMode != SATURATE_OFF); out->tex_unit = inst->TexSrcUnit; out->tex_idx = inst->TexSrcTarget; + out->eot = inst->Sampler & 1; + out->target = inst->Sampler>>1; /* Args: */ diff --git a/i965/brw_wm_pass1.c b/i965/brw_wm_pass1.c index d668def..f6f3a38 100644 --- a/i965/brw_wm_pass1.c +++ b/i965/brw_wm_pass1.c @@ -150,12 +150,17 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_FLR: case OPCODE_FRC: case OPCODE_MOV: + case OPCODE_SWZ: read0 = writemask; break; case OPCODE_SUB: case OPCODE_SLT: + case OPCODE_SLE: case OPCODE_SGE: + case OPCODE_SGT: + case OPCODE_SEQ: + case OPCODE_SNE: case OPCODE_ADD: case OPCODE_MAX: case OPCODE_MIN: @@ -253,11 +258,9 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read0 = WRITEMASK_XYW; break; - case OPCODE_SWZ: case OPCODE_DST: case OPCODE_TXP: default: - assert(0); break; } diff --git a/i965/brw_wm_pass2.c b/i965/brw_wm_pass2.c index a1edbd6..6fca9ad 100644 --- a/i965/brw_wm_pass2.c +++ b/i965/brw_wm_pass2.c @@ -69,7 +69,8 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { - GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; + struct brw_context *brw = c->func.brw; + GLuint inputs = (brw->vs.prog_data->outputs_written & DO_SETUP_BITS); GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -85,8 +86,15 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < FRAG_ATTRIB_MAX; j++) if (inputs & (1<<j)) { + /* index for vs output and ps input are not the same + in shader varying */ + GLuint index; + if (j > FRAG_ATTRIB_VAR0) + index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else + index = j; nr_interp_regs++; - prealloc_reg(c, &c->payload.input_interp[j], i++); + prealloc_reg(c, &c->payload.input_interp[index], i++); } assert(nr_interp_regs >= 1); @@ -328,7 +336,7 @@ void brw_wm_pass2( struct brw_wm_compile *c ) c->state = PASS2_DONE; if (INTEL_DEBUG & DEBUG_WM) { - brw_wm_print_program(c, "pass2/done"); + brw_wm_print_program(c, "pass2/done"); } } diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c index 794c7d9..d40332e 100644 --- a/i965/brw_wm_sampler_state.c +++ b/i965/brw_wm_sampler_state.c @@ -54,7 +54,7 @@ static GLuint translate_wrap_mode( GLenum wrap ) case GL_REPEAT: return BRW_TEXCOORDMODE_WRAP; case GL_CLAMP: - return BRW_TEXCOORDMODE_CLAMP_BORDER; /* conform likes it this way */ + return BRW_TEXCOORDMODE_CLAMP; case GL_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ case GL_CLAMP_TO_BORDER: @@ -79,27 +79,43 @@ static GLint S_FIXED(GLfloat value, GLuint frac_bits) } -static GLuint upload_default_color( struct brw_context *brw, - const GLfloat *color ) +static dri_bo *upload_default_color( struct brw_context *brw, + const GLfloat *color ) { struct brw_sampler_default_color sdc; COPY_4V(sdc.color, color); - return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); + return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc, + NULL, 0 ); } -/* +struct wm_sampler_key { + int sampler_count; + + struct wm_sampler_entry { + GLenum wrap_r, wrap_s, wrap_t; + float maxlod, minlod; + float lod_bias; + float max_aniso; + GLenum minfilter, magfilter; + GLenum comparemode, comparefunc; + dri_bo *sdc_bo; + } sampler[BRW_MAX_TEX_UNIT]; +}; + +/** + * Sets the sampler state for a single unit based off of the sampler key + * entry. */ -static void brw_update_sampler_state( struct gl_texture_unit *texUnit, - struct gl_texture_object *texObj, - GLuint sdc_gs_offset, - struct brw_sampler_state *sampler) -{ +static void brw_update_sampler_state(struct wm_sampler_entry *key, + dri_bo *sdc_bo, + struct brw_sampler_state *sampler) +{ _mesa_memset(sampler, 0, sizeof(*sampler)); - switch (texObj->MinFilter) { + switch (key->minfilter) { case GL_NEAREST: sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; @@ -130,17 +146,17 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, /* Set Anisotropy: */ - if ( texObj->MaxAnisotropy > 1.0 ) { + if (key->max_aniso > 1.0) { sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; - if (texObj->MaxAnisotropy > 2.0) { - sampler->ss3.max_aniso = MAX2((texObj->MaxAnisotropy - 2) / 2, + if (key->max_aniso > 2.0) { + sampler->ss3.max_aniso = MAX2((key->max_aniso - 2) / 2, BRW_ANISORATIO_16); } } else { - switch (texObj->MagFilter) { + switch (key->magfilter) { case GL_NEAREST: sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; break; @@ -152,9 +168,9 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, } } - sampler->ss1.r_wrap_mode = translate_wrap_mode(texObj->WrapR); - sampler->ss1.s_wrap_mode = translate_wrap_mode(texObj->WrapS); - sampler->ss1.t_wrap_mode = translate_wrap_mode(texObj->WrapT); + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); /* Fulsim complains if I don't do this. Hardware doesn't mind: */ @@ -168,17 +184,18 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, /* Set shadow function: */ - if (texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { + if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) { /* Shadowing is "enabled" by emitting a particular sampler * message (sample_c). So need to recompile WM program when * shadow comparison is enabled on each/any texture unit. */ - sampler->ss0.shadow_function = intel_translate_shadow_compare_func(texObj->CompareFunc); + sampler->ss0.shadow_function = + intel_translate_shadow_compare_func(key->comparefunc); } /* Set LOD bias: */ - sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias + texObj->LodBias, -16, 15), 6); + sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6); sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ @@ -192,62 +209,123 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, */ sampler->ss0.base_level = U_FIXED(0, 1); - sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(texObj->MaxLod, 0), 13), 6); - sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(texObj->MinLod, 0), 13), 6); + sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); + sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); - sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; + sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } +/** Sets up the cache key for sampler state for all texture units */ +static void +brw_wm_sampler_populate_key(struct brw_context *brw, + struct wm_sampler_key *key) +{ + int unit; + + memset(key, 0, sizeof(*key)); + + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + if (brw->attribs.Texture->Unit[unit]._ReallyEnabled) { + struct wm_sampler_entry *entry = &key->sampler[unit]; + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; + struct gl_texture_object *texObj = texUnit->_Current; + + entry->wrap_r = texObj->WrapR; + entry->wrap_s = texObj->WrapS; + entry->wrap_t = texObj->WrapT; + entry->maxlod = texObj->MaxLod; + entry->minlod = texObj->MinLod; + entry->lod_bias = texUnit->LodBias + texObj->LodBias; + entry->max_aniso = texObj->MaxAnisotropy; + entry->minfilter = texObj->MinFilter; + entry->magfilter = texObj->MagFilter; + entry->comparemode = texObj->CompareMode; + entry->comparefunc = texObj->CompareFunc; + + dri_bo_unreference(brw->wm.sdc_bo[unit]); + brw->wm.sdc_bo[unit] = upload_default_color(brw, texObj->BorderColor); + + key->sampler_count = unit + 1; + } + } +} /* All samplers must be uploaded in a single contiguous array, which * complicates various things. However, this is still too confusing - * FIXME: simplify all the different new texture state flags. */ -static void upload_wm_samplers( struct brw_context *brw ) +static int upload_wm_samplers( struct brw_context *brw ) { - GLuint unit; - GLuint sampler_count = 0; + struct wm_sampler_key key; + int i; + int ret = 0; - /* _NEW_TEXTURE */ - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - if (brw->attribs.Texture->Unit[unit]._ReallyEnabled) { - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; - struct gl_texture_object *texObj = texUnit->_Current; + brw_wm_sampler_populate_key(brw, &key); + + if (brw->wm.sampler_count != key.sampler_count) { + brw->wm.sampler_count = key.sampler_count; + brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + } + + dri_bo_unreference(brw->wm.sampler_bo); + brw->wm.sampler_bo = NULL; + if (brw->wm.sampler_count == 0) + return 0; - GLuint sdc_gs_offset = upload_default_color(brw, texObj->BorderColor); + brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, + &key, sizeof(key), + brw->wm.sdc_bo, key.sampler_count, + NULL); - brw_update_sampler_state(texUnit, - texObj, - sdc_gs_offset, - &brw->wm.sampler[unit]); + /* If we didnt find it in the cache, compute the state and put it in the + * cache. + */ + if (brw->wm.sampler_bo == NULL) { + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + memset(sampler, 0, sizeof(sampler)); + for (i = 0; i < key.sampler_count; i++) { + if (brw->wm.sdc_bo[i] == NULL) + continue; - sampler_count = unit + 1; + brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i], + &sampler[i]); + } + + brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, + &key, sizeof(key), + brw->wm.sdc_bo, key.sampler_count, + &sampler, sizeof(sampler), + NULL, NULL); + + /* Emit SDC relocations */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + if (!brw->attribs.Texture->Unit[i]._ReallyEnabled) + continue; + + ret |= dri_bufmgr_check_aperture_space(brw->wm.sdc_bo[i]); + dri_emit_reloc(brw->wm.sampler_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + i * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[i]); } - } - - if (brw->wm.sampler_count != sampler_count) { - brw->wm.sampler_count = sampler_count; - brw->state.dirty.cache |= CACHE_NEW_SAMPLER; } - brw->wm.sampler_gs_offset = 0; + ret |= dri_bufmgr_check_aperture_space(brw->wm.sampler_bo); + return ret; - if (brw->wm.sampler_count) - brw->wm.sampler_gs_offset = - brw_cache_data_sz(&brw->cache[BRW_SAMPLER], - brw->wm.sampler, - sizeof(struct brw_sampler_state) * brw->wm.sampler_count); } - const struct brw_tracked_state brw_wm_samplers = { .dirty = { .mesa = _NEW_TEXTURE, .brw = 0, .cache = 0 }, - .update = upload_wm_samplers + .prepare = upload_wm_samplers, }; diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c index 5b4f2ab..f4da0f2 100644 --- a/i965/brw_wm_state.c +++ b/i965/brw_wm_state.c @@ -34,109 +34,136 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "bufmgr.h" +#include "dri_bufmgr.h" +#include "brw_wm.h" /*********************************************************************** * WM unit - fragment programs and rasterization */ -static void invalidate_scratch_cb( struct intel_context *intel, - void *unused ) -{ - /* nothing */ -} +struct brw_wm_unit_key { + unsigned int total_grf, total_scratch; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int dispatch_grf_start_reg; + + unsigned int curbe_offset; + unsigned int urb_size; + + unsigned int max_threads; + unsigned int nr_surfaces, sampler_count; + GLboolean uses_depth, computes_depth, uses_kill, is_glsl; + GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; + GLfloat offset_units, offset_factor; +}; -static void upload_wm_unit(struct brw_context *brw ) +static void +wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { + const struct gl_fragment_program *fp = brw->fragment_program; struct intel_context *intel = &brw->intel; - struct brw_wm_unit_state wm; - GLuint max_threads; + + memset(key, 0, sizeof(*key)); if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - max_threads = 0; + key->max_threads = 1; else - max_threads = 31; - - - memset(&wm, 0, sizeof(wm)); + key->max_threads = 32; /* CACHE_NEW_WM_PROG */ - wm.thread0.grf_reg_count = ((brw->wm.prog_data->total_grf-1) & ~15) / 16; - wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; - wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; - wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; - wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; - - wm.wm5.max_threads = max_threads; - - if (brw->wm.prog_data->total_scratch) { - GLuint per_thread = (brw->wm.prog_data->total_scratch + 1023) / 1024; - GLuint total = per_thread * (max_threads + 1); - - /* Scratch space -- just have to make sure there is sufficient - * allocated for the active program and current number of threads. - */ - - if (!brw->wm.scratch_buffer) { - bmGenBuffers(intel, "wm scratch", 1, &brw->wm.scratch_buffer, 12); - bmBufferSetInvalidateCB(intel, - brw->wm.scratch_buffer, - invalidate_scratch_cb, - NULL, - GL_FALSE); - } + key->total_grf = brw->wm.prog_data->total_grf; + key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; + key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; + key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024); - if (total > brw->wm.scratch_buffer_size) { - brw->wm.scratch_buffer_size = total; - bmBufferData(intel, - brw->wm.scratch_buffer, - brw->wm.scratch_buffer_size, - NULL, - 0); - } - - assert(per_thread <= 12 * 1024); - wm.thread2.per_thread_scratch_space = (per_thread / 1024) - 1; + /* BRW_NEW_URB_FENCE */ + key->urb_size = brw->urb.vsize; - /* XXX: could make this dynamic as this is so rarely active: - */ - /* BRW_NEW_LOCK */ - wm.thread2.scratch_space_base_pointer = - bmBufferOffset(intel, brw->wm.scratch_buffer) >> 10; - } + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.wm_start; /* CACHE_NEW_SURFACE */ - wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; + key->nr_surfaces = brw->wm.nr_surfaces; - /* BRW_NEW_CURBE_OFFSETS */ - wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; + /* CACHE_NEW_SAMPLER */ + key->sampler_count = brw->wm.sampler_count; - wm.thread3.urb_entry_read_offset = 0; + /* _NEW_POLYGONSTIPPLE */ + key->polygon_stipple = brw->attribs.Polygon->StippleFlag; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; + + /* as far as we can tell */ + key->computes_depth = + (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0; + + /* _NEW_COLOR */ + key->uses_kill = fp->UsesKill || brw->attribs.Color->AlphaEnabled; + key->is_glsl = brw_wm_is_glsl(fp); + + /* XXX: This needs a flag to indicate when it changes. */ + key->stats_wm = intel->stats_wm; + + /* _NEW_LINE */ + key->line_stipple = brw->attribs.Line->StippleFlag; + + /* _NEW_POLYGON */ + key->offset_enable = brw->attribs.Polygon->OffsetFill; + key->offset_units = brw->attribs.Polygon->OffsetUnits; + key->offset_factor = brw->attribs.Polygon->OffsetFactor; +} + +static dri_bo * +wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, + dri_bo **reloc_bufs) +{ + struct brw_wm_unit_state wm; + dri_bo *bo; + + memset(&wm, 0, sizeof(wm)); + + wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + wm.thread1.binding_table_entry_count = key->nr_surfaces; + + if (key->total_scratch != 0) { + wm.thread2.scratch_space_base_pointer = + brw->wm.scratch_buffer->offset >> 10; /* reloc */ + wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; + } else { + wm.thread2.scratch_space_base_pointer = 0; + wm.thread2.per_thread_scratch_space = 0; + } - /* CACHE_NEW_SAMPLER */ - wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; - wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; + wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; + wm.thread3.urb_entry_read_length = key->urb_entry_read_length; + wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + wm.thread3.urb_entry_read_offset = 0; - /* BRW_NEW_FRAGMENT_PROGRAM */ - { - const struct gl_fragment_program *fp = brw->fragment_program; - - if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS)) - wm.wm5.program_uses_depth = 1; /* as far as we can tell */ - - if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) - wm.wm5.program_computes_depth = 1; - - /* _NEW_COLOR */ - if (fp->UsesKill || - brw->attribs.Color->AlphaEnabled) - wm.wm5.program_uses_killpixel = 1; + wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + if (brw->wm.sampler_bo != NULL) { + /* reloc */ + wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; + } else { + wm.wm4.sampler_state_pointer = 0; } - wm.wm5.enable_16_pix = 1; + wm.wm5.program_uses_depth = key->uses_depth; + wm.wm5.program_computes_depth = key->computes_depth; + wm.wm5.program_uses_killpixel = key->uses_kill; + + if (key->is_glsl) + wm.wm5.enable_8_pix = 1; + else + wm.wm5.enable_16_pix = 1; + + wm.wm5.max_threads = key->max_threads - 1; wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ wm.wm5.legacy_line_rast = 0; wm.wm5.legacy_global_depth_bias = 0; @@ -144,34 +171,108 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.line_aa_region_width = 0; wm.wm5.line_endcap_aa_region_width = 1; - /* _NEW_POLYGONSTIPPLE */ - if (brw->attribs.Polygon->StippleFlag) - wm.wm5.polygon_stipple = 1; + wm.wm5.polygon_stipple = key->polygon_stipple; - /* _NEW_POLYGON */ - if (brw->attribs.Polygon->OffsetFill) { + if (key->offset_enable) { wm.wm5.depth_offset = 1; /* Something wierd going on with legacy_global_depth_bias, * offset_constant, scaling and MRD. This value passes glean * but gives some odd results elsewere (eg. the * quad-offset-units test). */ - wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; + wm.global_depth_offset_constant = key->offset_units * 2; /* This is the only value that passes glean: */ - wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; + wm.global_depth_offset_scale = key->offset_factor; } - /* _NEW_LINE */ - if (brw->attribs.Line->StippleFlag) { - wm.wm5.line_stipple = 1; - } + wm.wm5.line_stipple = key->line_stipple; - if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm) + if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm) wm.wm4.stats_enable = 1; - brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); + bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, + key, sizeof(*key), + reloc_bufs, 3, + &wm, sizeof(wm), + NULL, NULL); + + /* Emit WM program relocation */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + wm.thread0.grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + + /* Emit scratch space relocation */ + if (key->total_scratch != 0) { + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + wm.thread2.per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_buffer); + } + + /* Emit sampler state relocation */ + if (key->sampler_count != 0) { + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); + } + + return bo; +} + + +static int upload_wm_unit( struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + struct brw_wm_unit_key key; + dri_bo *reloc_bufs[3]; + int ret = 0, i; + wm_unit_populate_key(brw, &key); + + /* Allocate the necessary scratch space if we haven't already. Don't + * bother reducing the allocation later, since we use scratch so + * rarely. + */ + assert(key.total_scratch <= 12 * 1024); + if (key.total_scratch) { + GLuint total = key.total_scratch * key.max_threads; + + if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) { + dri_bo_unreference(brw->wm.scratch_buffer); + brw->wm.scratch_buffer = NULL; + } + if (brw->wm.scratch_buffer == NULL) { + brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, + "wm scratch", + total, + 4096, DRM_BO_FLAG_MEM_TT); + } + } + + reloc_bufs[0] = brw->wm.prog_bo; + reloc_bufs[1] = brw->wm.scratch_buffer; + reloc_bufs[2] = brw->wm.sampler_bo; + + dri_bo_unreference(brw->wm.state_bo); + brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT, + &key, sizeof(key), + reloc_bufs, 3, + NULL); + if (brw->wm.state_bo == NULL) { + brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); + } + + for (i = 0; i < 3; i++) + if (reloc_bufs[i]) + ret |= dri_bufmgr_check_aperture_space(reloc_bufs[i]); + ret |= dri_bufmgr_check_aperture_space(brw->wm.state_bo); + return ret; } const struct brw_tracked_state brw_wm_unit = { @@ -189,6 +290,6 @@ const struct brw_tracked_state brw_wm_unit = { CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) }, - .update = upload_wm_unit + .prepare = upload_wm_unit, }; diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c index d24c618..2ba3eb4 100644 --- a/i965/brw_wm_surface_state.c +++ b/i965/brw_wm_surface_state.c @@ -69,7 +69,7 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format ) +static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) { switch( mesa_format ) { case MESA_FORMAT_L8: @@ -114,11 +114,32 @@ static GLuint translate_tex_format( GLuint mesa_format ) return BRW_SURFACEFORMAT_FXT1; case MESA_FORMAT_Z16: - return BRW_SURFACEFORMAT_L16_UNORM; + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I16_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A16_UNORM; + else + return BRW_SURFACEFORMAT_L16_UNORM; - case MESA_FORMAT_RGBA_DXT1: case MESA_FORMAT_RGB_DXT1: - return BRW_SURFACEFORMAT_DXT1_RGB; + return BRW_SURFACEFORMAT_DXT1_RGB; + + case MESA_FORMAT_RGBA_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case MESA_FORMAT_RGBA_DXT3: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case MESA_FORMAT_RGBA_DXT5: + return BRW_SURFACEFORMAT_BC3_UNORM; + + case MESA_FORMAT_SRGBA8: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case MESA_FORMAT_SRGB_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; + + case MESA_FORMAT_Z24_S8: + return BRW_SURFACEFORMAT_I24X8_UNORM; default: assert(0); @@ -126,142 +147,343 @@ static GLuint translate_tex_format( GLuint mesa_format ) } } -static -void brw_update_texture_surface( GLcontext *ctx, - GLuint unit, - struct brw_surface_state *surf ) -{ - struct intel_context *intel = intel_context(ctx); - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; - - memset(surf, 0, sizeof(*surf)); +struct brw_wm_surface_key { + GLenum target, depthmode; + dri_bo *bo; + GLint format; + GLint first_level, last_level; + GLint width, height, depth; + GLint pitch, cpp; + GLboolean tiled; + GLuint offset; +}; - surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf->ss0.surface_type = translate_tex_target(tObj->Target); - surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat); +static dri_bo * +brw_create_texture_surface( struct brw_context *brw, + struct brw_wm_surface_key *key ) +{ + struct brw_surface_state surf; + dri_bo *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = translate_tex_target(key->target); + + if (key->bo) + surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode); + else { + switch(key->depth) { + case 32: surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; + default: + case 24: surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; break; + case 16: surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; break; + } + } /* This is ok for all textures with channel width 8bit or less: */ -/* surf->ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - - /* BRW_NEW_LOCK */ - surf->ss1.base_addr = bmBufferOffset(intel, - intelObj->mt->region->buffer); - - surf->ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel; - surf->ss2.width = firstImage->Width - 1; - surf->ss2.height = firstImage->Height - 1; - - surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf->ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */ - surf->ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1; - surf->ss3.depth = firstImage->Depth - 1; - - surf->ss4.min_lod = 0; +/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + if (key->bo) + surf.ss1.base_addr = key->bo->offset; /* reloc */ + else + surf.ss1.base_addr = key->offset; + + surf.ss2.mip_count = key->last_level - key->first_level; + surf.ss2.width = key->width - 1; + surf.ss2.height = key->height - 1; + + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = key->tiled; + surf.ss3.pitch = (key->pitch * key->cpp) - 1; + surf.ss3.depth = key->depth - 1; + + surf.ss4.min_lod = 0; - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - surf->ss0.cube_pos_x = 1; - surf->ss0.cube_pos_y = 1; - surf->ss0.cube_pos_z = 1; - surf->ss0.cube_neg_x = 1; - surf->ss0.cube_neg_y = 1; - surf->ss0.cube_neg_z = 1; + if (key->target == GL_TEXTURE_CUBE_MAP) { + surf.ss0.cube_pos_x = 1; + surf.ss0.cube_pos_y = 1; + surf.ss0.cube_pos_z = 1; + surf.ss0.cube_neg_x = 1; + surf.ss0.cube_neg_y = 1; + surf.ss0.cube_neg_z = 1; } -} + bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + if (key->bo) { + /* Emit relocation to surface contents */ + dri_emit_reloc(bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + } + return bo; +} +static int +brw_update_texture_surface( GLcontext *ctx, GLuint unit ) +{ + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + struct brw_wm_surface_key key; + int ret = 0; + + memset(&key, 0, sizeof(key)); + + if (intelObj->imageOverride) { + key.pitch = intelObj->pitchOverride / intelObj->mt->cpp; + key.depth = intelObj->depthOverride; + key.bo = NULL; + key.offset = intelObj->textureOffset; + } else { + key.format = firstImage->TexFormat->MesaFormat; + key.pitch = intelObj->mt->pitch; + key.depth = firstImage->Depth; + key.bo = intelObj->mt->region->buffer; + key.offset = 0; + ret |= dri_bufmgr_check_aperture_space(key.bo); + } -#define OFFSET(TYPE, FIELD) ( (GLuint)&(((TYPE *)0)->FIELD) ) + key.target = tObj->Target; + key.depthmode = tObj->DepthMode; + key.first_level = intelObj->firstLevel; + key.last_level = intelObj->lastLevel; + key.width = firstImage->Width; + key.height = firstImage->Height; + key.cpp = intelObj->mt->cpp; + key.tiled = intelObj->mt->region->tiled; + + dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); + brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) { + brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key); + } + ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); + return ret; +} -static void upload_wm_surfaces(struct brw_context *brw ) +/** + * Sets up a surface state structure to point at the given region. + * While it is only used for the front/back buffer currently, it should be + * usable for further buffers when doing ARB_draw_buffer support. + */ +static int +brw_update_region_surface(struct brw_context *brw, struct intel_region *region, + unsigned int unit, GLboolean cached) { - GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; - struct brw_surface_binding_table bind; - GLuint i; - - memcpy(&bind, &brw->wm.bind, sizeof(bind)); - - { + dri_bo *region_bo = NULL; + int ret = 0; + struct { + unsigned int surface_type; + unsigned int surface_format; + unsigned int width, height, cpp; + GLubyte color_mask[4]; + GLboolean tiled, color_blend; + } key; + + memset(&key, 0, sizeof(key)); + + if (region != NULL) { + region_bo = region->buffer; + + key.surface_type = BRW_SURFACE_2D; + if (region->cpp == 4) + key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + else + key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + key.tiled = region->tiled; + key.width = region->pitch; /* XXX: not really! */ + key.height = region->height; + key.cpp = region->cpp; + + ret |= dri_bufmgr_check_aperture_space(region->buffer); + } else { + key.surface_type = BRW_SURFACE_NULL; + key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + key.tiled = 0; + key.width = 1; + key.height = 1; + key.cpp = 4; + } + memcpy(key.color_mask, brw->attribs.Color->ColorMask, + sizeof(key.color_mask)); + key.color_blend = (!brw->attribs.Color->_LogicOpEnabled && + brw->attribs.Color->BlendEnabled); + + dri_bo_unreference(brw->wm.surf_bo[unit]); + brw->wm.surf_bo[unit] = NULL; + if (cached) + brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); + + if (brw->wm.surf_bo[unit] == NULL) { struct brw_surface_state surf; - struct intel_region *region = brw->state.draw_region; memset(&surf, 0, sizeof(surf)); - if (region->cpp == 4) - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - else - surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + surf.ss0.surface_format = key.surface_format; + surf.ss0.surface_type = key.surface_type; + if (region_bo != NULL) + surf.ss1.base_addr = region_bo->offset; /* reloc */ - surf.ss0.surface_type = BRW_SURFACE_2D; + surf.ss2.width = key.width - 1; + surf.ss2.height = key.height - 1; + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = key.tiled; + surf.ss3.pitch = (key.width * key.cpp) - 1; /* _NEW_COLOR */ - surf.ss0.color_blend = (!brw->attribs.Color->_LogicOpEnabled && - brw->attribs.Color->BlendEnabled); + surf.ss0.color_blend = key.color_blend; + surf.ss0.writedisable_red = !key.color_mask[0]; + surf.ss0.writedisable_green = !key.color_mask[1]; + surf.ss0.writedisable_blue = !key.color_mask[2]; + surf.ss0.writedisable_alpha = !key.color_mask[3]; + + /* Key size will never match key size for textures, so we're safe. */ + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + &surf, sizeof(surf), + NULL, NULL); + if (region_bo != NULL) { + dri_emit_reloc(brw->wm.surf_bo[unit], + DRM_BO_FLAG_MEM_TT | + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE, + 0, + offsetof(struct brw_surface_state, ss1), + region_bo); + } + } + ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit]); - surf.ss0.writedisable_red = !brw->attribs.Color->ColorMask[0]; - surf.ss0.writedisable_green = !brw->attribs.Color->ColorMask[1]; - surf.ss0.writedisable_blue = !brw->attribs.Color->ColorMask[2]; - surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3]; + return ret; +} - /* BRW_NEW_LOCK */ - surf.ss1.base_addr = bmBufferOffset(&brw->intel, region->buffer); +/** + * Constructs the binding table for the WM surface state, which maps unit + * numbers to surface state objects. + */ +static dri_bo * +brw_wm_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->wm.surf_bo, brw->wm.nr_surfaces, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < brw->wm.nr_surfaces; i++) + if (brw->wm.surf_bo[i]) + data[i] = brw->wm.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->wm.surf_bo, brw->wm.nr_surfaces, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_WM_MAX_SURF; i++) { + if (brw->wm.surf_bo[i] != NULL) { + dri_emit_reloc(bind_bo, + DRM_BO_FLAG_MEM_TT | + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); + } + } - surf.ss2.width = region->pitch - 1; /* XXX: not really! */ - surf.ss2.height = region->height - 1; - surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = region->tiled; - surf.ss3.pitch = (region->pitch * region->cpp) - 1; + free(data); + } - brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); - brw->wm.nr_surfaces = 1; + return bind_bo; +} + +static int prepare_wm_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + GLuint i, ret; + + if (brw->state.nr_draw_regions > 1) { + for (i = 0; i < brw->state.nr_draw_regions; i++) { + ret = brw_update_region_surface(brw, brw->state.draw_regions[i], i, + GL_FALSE); + if (ret) + return ret; + } + }else { + ret = brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); + if (ret) + return ret; } + brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; - /* _NEW_TEXTURE, BRW_NEW_TEXDATA - */ - if (texUnit->_ReallyEnabled && - intel_finalize_mipmap_tree(intel,texUnit->_Current)) { - - struct brw_surface_state surf; - - brw_update_texture_surface(ctx, i, &surf); - - brw->wm.bind.surf_ss_offset[i+1] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); - brw->wm.nr_surfaces = i+2; - } - else if( texUnit->_ReallyEnabled && - texUnit->_Current == intel->frame_buffer_texobj ) - { - brw->wm.bind.surf_ss_offset[i+1] = brw->wm.bind.surf_ss_offset[0]; - brw->wm.nr_surfaces = i+2; - } - else { - brw->wm.bind.surf_ss_offset[i+1] = 0; + /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ + if(texUnit->_ReallyEnabled) { + if (texUnit->_Current == intel->frame_buffer_texobj) { + dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); + brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = brw->wm.surf_bo[0]; + dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); + brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + } else { + ret = brw_update_texture_surface(ctx, i); + brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + + if (ret) + return ret; + } + } else { + dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); + brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = NULL; } + } - brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], - &brw->wm.bind ); + dri_bo_unreference(brw->wm.bind_bo); + brw->wm.bind_bo = brw_wm_get_binding_table(brw); + + return dri_bufmgr_check_aperture_space(brw->wm.bind_bo); } + const struct brw_tracked_state brw_wm_surfaces = { .dirty = { .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, - .brw = (BRW_NEW_CONTEXT | - BRW_NEW_LOCK), /* required for bmBufferOffset */ + .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .update = upload_wm_surfaces + .prepare = prepare_wm_surfaces, }; diff --git a/i965/bufmgr.h b/i965/bufmgr.h deleted file mode 100644 index e748c0d..0000000 --- a/i965/bufmgr.h +++ /dev/null @@ -1,215 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef BUFMGR_H -#define BUFMGR_H - -#include "intel_context.h" - - -/* The buffer manager context. Opaque. - */ -struct bufmgr; -struct buffer; - - -struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ); - -/* Flags for validate and other calls. If both NO_UPLOAD and NO_EVICT - * are specified, ValidateBuffers is essentially a query. - */ -#define BM_MEM_LOCAL 0x1 -#define BM_MEM_AGP 0x2 -#define BM_MEM_VRAM 0x4 /* not yet used */ -#define BM_WRITE 0x8 /* not yet used */ -#define BM_READ 0x10 /* not yet used */ -#define BM_NO_UPLOAD 0x20 -#define BM_NO_EVICT 0x40 -#define BM_NO_MOVE 0x80 /* not yet used */ -#define BM_NO_ALLOC 0x100 /* legacy "fixed" buffers only */ -#define BM_CLIENT 0x200 /* for map - pointer will be accessed - * without dri lock */ - -#define BM_MEM_MASK (BM_MEM_LOCAL|BM_MEM_AGP|BM_MEM_VRAM) - - - - -/* Create a pool of a given memory type, from a certain offset and a - * certain size. - * - * Also passed in is a virtual pointer to the start of the pool. This - * is useful in the faked-out version in i915 so that MapBuffer can - * return a pointer to a buffer residing in AGP space. - * - * Flags passed into a pool are inherited by all buffers allocated in - * that pool. So pools representing the static front,back,depth - * buffer allocations should have MEM_AGP|NO_UPLOAD|NO_EVICT|NO_MOVE to match - * the behaviour of the legacy allocations. - * - * Returns -1 for failure, pool number for success. - */ -int bmInitPool( struct intel_context *, - unsigned long low_offset, - void *low_virtual, - unsigned long size, - unsigned flags); - - -/* Stick closely to ARB_vbo semantics - they're well defined and - * understood, and drivers can just pass the calls through without too - * much thunking. - */ -void bmGenBuffers(struct intel_context *, const char *, unsigned n, struct buffer **buffers, - int align ); -void bmDeleteBuffers(struct intel_context *, unsigned n, struct buffer **buffers); - - -/* Hook to inform faked buffer manager about fixed-position - * front,depth,back buffers. These may move to a fully memory-managed - * scheme, or they may continue to be managed as is. - */ -struct buffer *bmGenBufferStatic(struct intel_context *, - unsigned pool); - -/* On evict, buffer manager will call invalidate_cb() to note that the - * buffer needs to be reloaded. - * - * Buffer is uploaded by calling bmMapBuffer() and copying data into - * the returned pointer. - * - * This is basically a big hack to get some more performance by - * turning off backing store for buffers where we either have it - * already (textures) or don't need it (batch buffers, temporary - * vbo's). - */ -void bmBufferSetInvalidateCB(struct intel_context *, - struct buffer *buf, - void (*invalidate_cb)( struct intel_context *, void *ptr ), - void *ptr, - GLboolean dont_fence_subdata); - - -/* The driver has more intimate knowledge of the hardare than a GL - * client would, so flags here is more proscriptive than the usage - * values in the ARB_vbo interface: - */ -int bmBufferData(struct intel_context *, - struct buffer *buf, - unsigned size, - const void *data, - unsigned flags ); - -int bmBufferSubData(struct intel_context *, - struct buffer *buf, - unsigned offset, - unsigned size, - const void *data ); - - -int bmBufferDataAUB(struct intel_context *, - struct buffer *buf, - unsigned size, - const void *data, - unsigned flags, - unsigned aubtype, - unsigned aubsubtype ); - -int bmBufferSubDataAUB(struct intel_context *, - struct buffer *buf, - unsigned offset, - unsigned size, - const void *data, - unsigned aubtype, - unsigned aubsubtype ); - - -/* In this version, taking the offset will provoke an upload on - * buffers not already resident in AGP: - */ -unsigned bmBufferOffset(struct intel_context *, - struct buffer *buf); - - -/* Extract data from the buffer: - */ -void bmBufferGetSubData(struct intel_context *, - struct buffer *buf, - unsigned offset, - unsigned size, - void *data ); - -void *bmMapBuffer( struct intel_context *, - struct buffer *buf, - unsigned access ); - -void bmUnmapBuffer( struct intel_context *, - struct buffer *buf ); - -void bmUnmapBufferAUB( struct intel_context *, - struct buffer *buf, - unsigned aubtype, - unsigned aubsubtype ); - - -/* Pertains to all buffers who's offset has been taken since the last - * fence or release. - */ -int bmValidateBuffers( struct intel_context * ); -void bmReleaseBuffers( struct intel_context * ); - -GLuint bmCtxId( struct intel_context *intel ); - - -GLboolean bmError( struct intel_context * ); -void bmEvictAll( struct intel_context * ); - -void *bmFindVirtual( struct intel_context *intel, - unsigned int offset, - size_t sz ); - -/* This functionality is used by the buffer manager, not really sure - * if we need to be exposing it in this way, probably libdrm will - * offer equivalent calls. - * - * For now they can stay, but will likely change/move before final: - */ -unsigned bmSetFence( struct intel_context * ); -unsigned bmSetFenceLock( struct intel_context * ); -unsigned bmLockAndFence( struct intel_context *intel ); -int bmTestFence( struct intel_context *, unsigned fence ); -void bmFinishFence( struct intel_context *, unsigned fence ); -void bmFinishFenceLock( struct intel_context *, unsigned fence ); - -void bm_fake_NotifyContendedLockTake( struct intel_context * ); - -extern int INTEL_DEBUG; -#define DEBUG_BUFMGR 0x10000000 - -#define DBG(...) do { if (INTEL_DEBUG & DEBUG_BUFMGR) _mesa_printf(__VA_ARGS__); } while(0) - -#endif diff --git a/i965/bufmgr_fake.c b/i965/bufmgr_fake.c deleted file mode 100644 index fb4903d..0000000 --- a/i965/bufmgr_fake.c +++ /dev/null @@ -1,1463 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Originally a fake version of the buffer manager so that we can - * prototype the changes in a driver fairly quickly, has been fleshed - * out to a fully functional interim solution. - * - * Basically wraps the old style memory management in the new - * programming interface, but is more expressive and avoids many of - * the bugs in the old texture manager. - */ -#include "bufmgr.h" - -#include "intel_context.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" - -#include "simple_list.h" -#include "mm.h" -#include "imports.h" - -#define BM_POOL_MAX 8 - -/* Internal flags: - */ -#define BM_NO_BACKING_STORE 0x2000 -#define BM_NO_FENCE_SUBDATA 0x4000 - - -static int check_fenced( struct intel_context *intel ); - -static int nr_attach = 0; - -/* Wrapper around mm.c's mem_block, which understands that you must - * wait for fences to expire before memory can be freed. This is - * specific to our use of memcpy for uploads - an upload that was - * processed through the command queue wouldn't need to care about - * fences. - */ -struct block { - struct block *next, *prev; - struct pool *pool; /* BM_MEM_AGP */ - struct mem_block *mem; /* BM_MEM_AGP */ - - unsigned referenced:1; - unsigned on_hardware:1; - unsigned fenced:1; - - - unsigned fence; /* BM_MEM_AGP, Split to read_fence, write_fence */ - - struct buffer *buf; - void *virtual; -}; - - -struct buffer { - unsigned id; /* debug only */ - const char *name; - unsigned size; - - unsigned mapped:1; - unsigned dirty:1; - unsigned aub_dirty:1; - unsigned alignment:13; - unsigned flags:16; - - struct block *block; - void *backing_store; - void (*invalidate_cb)( struct intel_context *, void * ); - void *invalidate_ptr; -}; - -struct pool { - unsigned size; - unsigned low_offset; - struct buffer *static_buffer; - unsigned flags; - struct mem_block *heap; - void *virtual; - struct block lru; /* only allocated, non-fence-pending blocks here */ -}; - -struct bufmgr { - _glthread_Mutex mutex; /**< for thread safety */ - struct pool pool[BM_POOL_MAX]; - unsigned nr_pools; - - unsigned buf_nr; /* for generating ids */ - - struct block referenced; /* after bmBufferOffset */ - struct block on_hardware; /* after bmValidateBuffers */ - struct block fenced; /* after bmFenceBuffers (mi_flush, emit irq, write dword) */ - /* then to pool->lru or free() */ - - unsigned ctxId; - unsigned last_fence; - unsigned free_on_hardware; - - unsigned fail:1; - unsigned need_fence:1; -}; - -#define MAXFENCE 0x7fffffff - -static GLboolean FENCE_LTE( unsigned a, unsigned b ) -{ - if (a == b) - return GL_TRUE; - - if (a < b && b - a < (1<<24)) - return GL_TRUE; - - if (a > b && MAXFENCE - a + b < (1<<24)) - return GL_TRUE; - - return GL_FALSE; -} - -int bmTestFence( struct intel_context *intel, unsigned fence ) -{ - /* Slight problem with wrap-around: - */ - return fence == 0 || FENCE_LTE(fence, intel->sarea->last_dispatch); -} - -#define LOCK(bm) \ - int dolock = nr_attach > 1; \ - if (dolock) _glthread_LOCK_MUTEX(bm->mutex) - -#define UNLOCK(bm) \ - if (dolock) _glthread_UNLOCK_MUTEX(bm->mutex) - - - -static GLboolean alloc_from_pool( struct intel_context *intel, - unsigned pool_nr, - struct buffer *buf ) -{ - struct bufmgr *bm = intel->bm; - struct pool *pool = &bm->pool[pool_nr]; - struct block *block = (struct block *)calloc(sizeof *block, 1); - GLuint sz, align = (1<<buf->alignment); - - if (!block) - return GL_FALSE; - - sz = (buf->size + align-1) & ~(align-1); - - block->mem = mmAllocMem(pool->heap, - sz, - buf->alignment, 0); - if (!block->mem) { - free(block); - return GL_FALSE; - } - - make_empty_list(block); - - /* Insert at head or at tail??? - */ - insert_at_tail(&pool->lru, block); - - block->pool = pool; - block->virtual = pool->virtual + block->mem->ofs; - block->buf = buf; - - buf->block = block; - - return GL_TRUE; -} - - - - - - - - -/* Release the card storage associated with buf: - */ -static void free_block( struct intel_context *intel, struct block *block ) -{ - DBG("free block %p\n", block); - - if (!block) - return; - - check_fenced(intel); - - if (block->referenced) { - _mesa_printf("tried to free block on referenced list\n"); - assert(0); - } - else if (block->on_hardware) { - block->buf = NULL; - intel->bm->free_on_hardware += block->mem->size; - } - else if (block->fenced) { - block->buf = NULL; - } - else { - DBG(" - free immediately\n"); - remove_from_list(block); - - mmFreeMem(block->mem); - free(block); - } -} - - -static void alloc_backing_store( struct intel_context *intel, struct buffer *buf ) -{ - assert(!buf->backing_store); - assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE))); - - buf->backing_store = ALIGN_MALLOC(buf->size, 64); -} - -static void free_backing_store( struct intel_context *intel, struct buffer *buf ) -{ - assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE))); - - if (buf->backing_store) { - ALIGN_FREE(buf->backing_store); - buf->backing_store = NULL; - } -} - - - - - - -static void set_dirty( struct intel_context *intel, - struct buffer *buf ) -{ - if (buf->flags & BM_NO_BACKING_STORE) - buf->invalidate_cb(intel, buf->invalidate_ptr); - - assert(!(buf->flags & BM_NO_EVICT)); - - DBG("set_dirty - buf %d\n", buf->id); - buf->dirty = 1; -} - - -static int evict_lru( struct intel_context *intel, GLuint max_fence, GLuint *pool ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - int i; - - DBG("%s\n", __FUNCTION__); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s(block, tmp, &bm->pool[i].lru) { - - if (block->buf && - (block->buf->flags & BM_NO_FENCE_SUBDATA)) - continue; - - if (block->fence && max_fence && - !FENCE_LTE(block->fence, max_fence)) - return 0; - - set_dirty(intel, block->buf); - block->buf->block = NULL; - - free_block(intel, block); - *pool = i; - return 1; - } - } - } - - - return 0; -} - - -#define foreach_s_rev(ptr, t, list) \ - for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev) - -static int evict_mru( struct intel_context *intel, GLuint *pool ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - int i; - - DBG("%s\n", __FUNCTION__); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s_rev(block, tmp, &bm->pool[i].lru) { - - if (block->buf && - (block->buf->flags & BM_NO_FENCE_SUBDATA)) - continue; - - set_dirty(intel, block->buf); - block->buf->block = NULL; - - free_block(intel, block); - *pool = i; - return 1; - } - } - } - - - return 0; -} - - -static int check_fenced( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - int ret = 0; - - foreach_s(block, tmp, &bm->fenced ) { - assert(block->fenced); - - if (bmTestFence(intel, block->fence)) { - - block->fenced = 0; - - if (!block->buf) { - DBG("delayed free: offset %x sz %x\n", block->mem->ofs, block->mem->size); - remove_from_list(block); - mmFreeMem(block->mem); - free(block); - } - else { - DBG("return to lru: offset %x sz %x\n", block->mem->ofs, block->mem->size); - move_to_tail(&block->pool->lru, block); - } - - ret = 1; - } - else { - /* Blocks are ordered by fence, so if one fails, all from - * here will fail also: - */ - break; - } - } - - /* Also check the referenced list: - */ - foreach_s(block, tmp, &bm->referenced ) { - if (block->fenced && - bmTestFence(intel, block->fence)) { - block->fenced = 0; - } - } - - - DBG("%s: %d\n", __FUNCTION__, ret); - return ret; -} - - - -static void fence_blocks( struct intel_context *intel, - unsigned fence ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - - foreach_s (block, tmp, &bm->on_hardware) { - DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, - block->mem->size, block->buf, fence); - block->fence = fence; - - block->on_hardware = 0; - block->fenced = 1; - - /* Move to tail of pending list here - */ - move_to_tail(&bm->fenced, block); - } - - /* Also check the referenced list: - */ - foreach_s (block, tmp, &bm->referenced) { - if (block->on_hardware) { - DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, - block->mem->size, block->buf, fence); - - block->fence = fence; - block->on_hardware = 0; - block->fenced = 1; - } - } - - - bm->last_fence = fence; - assert(is_empty_list(&bm->on_hardware)); -} - - - - -static GLboolean alloc_block( struct intel_context *intel, - struct buffer *buf ) -{ - struct bufmgr *bm = intel->bm; - int i; - - assert(intel->locked); - - DBG("%s 0x%x bytes (%s)\n", __FUNCTION__, buf->size, buf->name); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_ALLOC) && - alloc_from_pool(intel, i, buf)) { - - DBG("%s --> 0x%x (sz %x)\n", __FUNCTION__, - buf->block->mem->ofs, buf->block->mem->size); - - return GL_TRUE; - } - } - - DBG("%s --> fail\n", __FUNCTION__); - return GL_FALSE; -} - - -static GLboolean evict_and_alloc_block( struct intel_context *intel, - struct buffer *buf ) -{ - GLuint pool; - struct bufmgr *bm = intel->bm; - - assert(buf->block == NULL); - - /* Put a cap on the amount of free memory we'll allow to accumulate - * before emitting a fence. - */ - if (bm->free_on_hardware > 1 * 1024 * 1024) { - DBG("fence for free space: %x\n", bm->free_on_hardware); - bmSetFence(intel); - } - - /* Search for already free memory: - */ - if (alloc_block(intel, buf)) - return GL_TRUE; - - /* Look for memory that may have become free: - */ - if (check_fenced(intel) && - alloc_block(intel, buf)) - return GL_TRUE; - - /* Look for memory blocks not used for >1 frame: - */ - while (evict_lru(intel, intel->second_last_swap_fence, &pool)) - if (alloc_from_pool(intel, pool, buf)) - return GL_TRUE; - - /* If we're not thrashing, allow lru eviction to dig deeper into - * recently used textures. We'll probably be thrashing soon: - */ - if (!intel->thrashing) { - while (evict_lru(intel, 0, &pool)) - if (alloc_from_pool(intel, pool, buf)) - return GL_TRUE; - } - - /* Keep thrashing counter alive? - */ - if (intel->thrashing) - intel->thrashing = 20; - - /* Wait on any already pending fences - here we are waiting for any - * freed memory that has been submitted to hardware and fenced to - * become available: - */ - while (!is_empty_list(&bm->fenced)) { - GLuint fence = bm->fenced.next->fence; - bmFinishFence(intel, fence); - - if (alloc_block(intel, buf)) - return GL_TRUE; - } - - - /* - */ - if (!is_empty_list(&bm->on_hardware)) { - bmSetFence(intel); - - while (!is_empty_list(&bm->fenced)) { - GLuint fence = bm->fenced.next->fence; - bmFinishFence(intel, fence); - } - - if (!intel->thrashing) { - DBG("thrashing\n"); - } - intel->thrashing = 20; - - if (alloc_block(intel, buf)) - return GL_TRUE; - } - - while (evict_mru(intel, &pool)) - if (alloc_from_pool(intel, pool, buf)) - return GL_TRUE; - - DBG("%s 0x%x bytes failed\n", __FUNCTION__, buf->size); - - assert(is_empty_list(&bm->on_hardware)); - assert(is_empty_list(&bm->fenced)); - - return GL_FALSE; -} - - - - - - - - - - -/*********************************************************************** - * Public functions - */ - - -/* The initialization functions are skewed in the fake implementation. - * This call would be to attach to an existing manager, rather than to - * create a local one. - */ -struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ) -{ - _glthread_DECLARE_STATIC_MUTEX(initMutex); - static struct bufmgr bm; - - /* This function needs a mutex of its own... - */ - _glthread_LOCK_MUTEX(initMutex); - - if (nr_attach == 0) { - _glthread_INIT_MUTEX(bm.mutex); - - make_empty_list(&bm.referenced); - make_empty_list(&bm.fenced); - make_empty_list(&bm.on_hardware); - - /* The context id of any of the share group. This won't be used - * in communication with the kernel, so it doesn't matter if - * this context is eventually deleted. - */ - bm.ctxId = intel->hHWContext; - } - - nr_attach++; - - _glthread_UNLOCK_MUTEX(initMutex); - - return &bm; -} - - - -/* The virtual pointer would go away in a true implementation. - */ -int bmInitPool( struct intel_context *intel, - unsigned long low_offset, - void *low_virtual, - unsigned long size, - unsigned flags) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - LOCK(bm); - { - GLuint i; - - for (i = 0; i < bm->nr_pools; i++) { - if (bm->pool[i].low_offset == low_offset && - bm->pool[i].size == size) { - retval = i; - goto out; - } - } - - - if (bm->nr_pools >= BM_POOL_MAX) - retval = -1; - else { - i = bm->nr_pools++; - - DBG("bmInitPool %d low_offset %x sz %x\n", - i, low_offset, size); - - bm->pool[i].low_offset = low_offset; - bm->pool[i].size = size; - bm->pool[i].heap = mmInit( low_offset, size ); - bm->pool[i].virtual = low_virtual - low_offset; - bm->pool[i].flags = flags; - - make_empty_list(&bm->pool[i].lru); - - retval = i; - } - } - out: - UNLOCK(bm); - return retval; -} - -static struct buffer *do_GenBuffer(struct intel_context *intel, const char *name, int align) -{ - struct bufmgr *bm = intel->bm; - struct buffer *buf = calloc(sizeof(*buf), 1); - - buf->id = ++bm->buf_nr; - buf->name = name; - buf->alignment = align; - buf->flags = BM_MEM_AGP|BM_MEM_VRAM|BM_MEM_LOCAL; - - return buf; -} - - -void *bmFindVirtual( struct intel_context *intel, - unsigned int offset, - size_t sz ) -{ - struct bufmgr *bm = intel->bm; - int i; - - for (i = 0; i < bm->nr_pools; i++) - if (offset >= bm->pool[i].low_offset && - offset + sz <= bm->pool[i].low_offset + bm->pool[i].size) - return bm->pool[i].virtual + offset; - - return NULL; -} - - -void bmGenBuffers(struct intel_context *intel, - const char *name, unsigned n, - struct buffer **buffers, - int align ) -{ - struct bufmgr *bm = intel->bm; - LOCK(bm); - { - int i; - - for (i = 0; i < n; i++) - buffers[i] = do_GenBuffer(intel, name, align); - } - UNLOCK(bm); -} - - -void bmDeleteBuffers(struct intel_context *intel, unsigned n, struct buffer **buffers) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - unsigned i; - - for (i = 0; i < n; i++) { - struct buffer *buf = buffers[i]; - - if (buf && buf->block) - free_block(intel, buf->block); - - if (buf && buf->backing_store) - free_backing_store(intel, buf); - - if (buf) - free(buf); - } - } - UNLOCK(bm); -} - - - - -/* Hook to inform faked buffer manager about fixed-position - * front,depth,back buffers. These may move to a fully memory-managed - * scheme, or they may continue to be managed as is. It will probably - * be useful to pass a fixed offset here one day. - */ -struct buffer *bmGenBufferStatic(struct intel_context *intel, - unsigned pool ) -{ - struct bufmgr *bm = intel->bm; - struct buffer *buf; - LOCK(bm); - { - assert(bm->pool[pool].flags & BM_NO_EVICT); - assert(bm->pool[pool].flags & BM_NO_MOVE); - - if (bm->pool[pool].static_buffer) - buf = bm->pool[pool].static_buffer; - else { - buf = do_GenBuffer(intel, "static", 12); - - bm->pool[pool].static_buffer = buf; - assert(!buf->block); - - buf->size = bm->pool[pool].size; - buf->flags = bm->pool[pool].flags; - buf->alignment = 12; - - if (!alloc_from_pool(intel, pool, buf)) - assert(0); - } - } - UNLOCK(bm); - return buf; -} - - -static void wait_quiescent(struct intel_context *intel, - struct block *block) -{ - if (block->on_hardware) { - assert(intel->bm->need_fence); - bmSetFence(intel); - assert(!block->on_hardware); - } - - - if (block->fenced) { - bmFinishFence(intel, block->fence); - } - - assert(!block->on_hardware); - assert(!block->fenced); -} - - - -/* If buffer size changes, free and reallocate. Otherwise update in - * place. - */ -int bmBufferData(struct intel_context *intel, - struct buffer *buf, - unsigned size, - const void *data, - unsigned flags ) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - LOCK(bm); - { - DBG("bmBufferData %d sz 0x%x data: %p\n", buf->id, size, data); - - assert(!buf->mapped); - - if (buf->block) { - struct block *block = buf->block; - - /* Optimistic check to see if we can reuse the block -- not - * required for correctness: - */ - if (block->fenced) - check_fenced(intel); - - if (block->on_hardware || - block->fenced || - (buf->size && buf->size != size) || - (data == NULL)) { - - assert(!block->referenced); - - free_block(intel, block); - buf->block = NULL; - buf->dirty = 1; - } - } - - buf->size = size; - if (buf->block) { - assert (buf->block->mem->size >= size); - } - - if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) { - - assert(intel->locked || data == NULL); - - if (data != NULL) { - if (!buf->block && !evict_and_alloc_block(intel, buf)) { - bm->fail = 1; - retval = -1; - goto out; - } - - wait_quiescent(intel, buf->block); - - DBG("bmBufferData %d offset 0x%x sz 0x%x\n", - buf->id, buf->block->mem->ofs, size); - - assert(buf->block->virtual == buf->block->pool->virtual + buf->block->mem->ofs); - - do_memcpy(buf->block->virtual, data, size); - } - buf->dirty = 0; - } - else { - DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); - set_dirty(intel, buf); - free_backing_store(intel, buf); - - if (data != NULL) { - alloc_backing_store(intel, buf); - do_memcpy(buf->backing_store, data, size); - } - } - } - out: - UNLOCK(bm); - return retval; -} - - -/* Update the buffer in place, in whatever space it is currently resident: - */ -int bmBufferSubData(struct intel_context *intel, - struct buffer *buf, - unsigned offset, - unsigned size, - const void *data ) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - if (size == 0) - return 0; - - LOCK(bm); - { - DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size); - - assert(offset+size <= buf->size); - - if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) { - - assert(intel->locked); - - if (!buf->block && !evict_and_alloc_block(intel, buf)) { - bm->fail = 1; - retval = -1; - goto out; - } - - if (!(buf->flags & BM_NO_FENCE_SUBDATA)) - wait_quiescent(intel, buf->block); - - buf->dirty = 0; - - do_memcpy(buf->block->virtual + offset, data, size); - } - else { - DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); - set_dirty(intel, buf); - - if (buf->backing_store == NULL) - alloc_backing_store(intel, buf); - - do_memcpy(buf->backing_store + offset, data, size); - } - } - out: - UNLOCK(bm); - return retval; -} - - - -int bmBufferDataAUB(struct intel_context *intel, - struct buffer *buf, - unsigned size, - const void *data, - unsigned flags, - unsigned aubtype, - unsigned aubsubtype ) -{ - int retval = bmBufferData(intel, buf, size, data, flags); - - - /* This only works because in this version of the buffer manager we - * allocate all buffers statically in agp space and so can emit the - * uploads to the aub file with the correct offsets as they happen. - */ - if (retval == 0 && data && intel->aub_file) { - - if (buf->block && !buf->dirty) { - intel->vtbl.aub_gtt_data(intel, - buf->block->mem->ofs, - buf->block->virtual, - size, - aubtype, - aubsubtype); - buf->aub_dirty = 0; - } - } - - return retval; -} - - -int bmBufferSubDataAUB(struct intel_context *intel, - struct buffer *buf, - unsigned offset, - unsigned size, - const void *data, - unsigned aubtype, - unsigned aubsubtype ) -{ - int retval = bmBufferSubData(intel, buf, offset, size, data); - - - /* This only works because in this version of the buffer manager we - * allocate all buffers statically in agp space and so can emit the - * uploads to the aub file with the correct offsets as they happen. - */ - if (intel->aub_file) { - if (retval == 0 && buf->block && !buf->dirty) - intel->vtbl.aub_gtt_data(intel, - buf->block->mem->ofs + offset, - ((const char *)buf->block->virtual) + offset, - size, - aubtype, - aubsubtype); - } - - return retval; -} - -void bmUnmapBufferAUB( struct intel_context *intel, - struct buffer *buf, - unsigned aubtype, - unsigned aubsubtype ) -{ - bmUnmapBuffer(intel, buf); - - if (intel->aub_file) { - /* Hack - exclude the framebuffer mappings. If you removed - * this, you'd get very big aubfiles, but you *would* be able to - * see fallback rendering. - */ - if (buf->block && !buf->dirty && buf->block->pool == &intel->bm->pool[0]) { - buf->aub_dirty = 1; - } - } -} - -unsigned bmBufferOffset(struct intel_context *intel, - struct buffer *buf) -{ - struct bufmgr *bm = intel->bm; - unsigned retval = 0; - - LOCK(bm); - { - assert(intel->locked); - - if (!buf->block && - !evict_and_alloc_block(intel, buf)) { - bm->fail = 1; - retval = ~0; - } - else { - assert(buf->block); - assert(buf->block->buf == buf); - - DBG("Add buf %d (block %p, dirty %d) to referenced list\n", buf->id, buf->block, - buf->dirty); - - move_to_tail(&bm->referenced, buf->block); - buf->block->referenced = 1; - - retval = buf->block->mem->ofs; - } - } - UNLOCK(bm); - - return retval; -} - - - -/* Extract data from the buffer: - */ -void bmBufferGetSubData(struct intel_context *intel, - struct buffer *buf, - unsigned offset, - unsigned size, - void *data ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size); - - if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) { - if (buf->block && size) { - wait_quiescent(intel, buf->block); - do_memcpy(data, buf->block->virtual + offset, size); - } - } - else { - if (buf->backing_store && size) { - do_memcpy(data, buf->backing_store + offset, size); - } - } - } - UNLOCK(bm); -} - - -/* Return a pointer to whatever space the buffer is currently resident in: - */ -void *bmMapBuffer( struct intel_context *intel, - struct buffer *buf, - unsigned flags ) -{ - struct bufmgr *bm = intel->bm; - void *retval = NULL; - - LOCK(bm); - { - DBG("bmMapBuffer %d\n", buf->id); - - if (buf->mapped) { - _mesa_printf("%s: already mapped\n", __FUNCTION__); - retval = NULL; - } - else if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) { - - assert(intel->locked); - - if (!buf->block && !evict_and_alloc_block(intel, buf)) { - DBG("%s: alloc failed\n", __FUNCTION__); - bm->fail = 1; - retval = NULL; - } - else { - assert(buf->block); - buf->dirty = 0; - - if (!(buf->flags & BM_NO_FENCE_SUBDATA)) - wait_quiescent(intel, buf->block); - - buf->mapped = 1; - retval = buf->block->virtual; - } - } - else { - DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); - set_dirty(intel, buf); - - if (buf->backing_store == 0) - alloc_backing_store(intel, buf); - - buf->mapped = 1; - retval = buf->backing_store; - } - } - UNLOCK(bm); - return retval; -} - -void bmUnmapBuffer( struct intel_context *intel, struct buffer *buf ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - DBG("bmUnmapBuffer %d\n", buf->id); - buf->mapped = 0; - } - UNLOCK(bm); -} - - - - -/* This is the big hack that turns on BM_NO_BACKING_STORE. Basically - * says that an external party will maintain the backing store, eg - * Mesa's local copy of texture data. - */ -void bmBufferSetInvalidateCB(struct intel_context *intel, - struct buffer *buf, - void (*invalidate_cb)( struct intel_context *, void *ptr ), - void *ptr, - GLboolean dont_fence_subdata) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - if (buf->backing_store) - free_backing_store(intel, buf); - - buf->flags |= BM_NO_BACKING_STORE; - - if (dont_fence_subdata) - buf->flags |= BM_NO_FENCE_SUBDATA; - - DBG("bmBufferSetInvalidateCB set buf %d dirty\n", buf->id); - buf->dirty = 1; - buf->invalidate_cb = invalidate_cb; - buf->invalidate_ptr = ptr; - - /* Note that it is invalid right from the start. Also note - * invalidate_cb is called with the bufmgr locked, so cannot - * itself make bufmgr calls. - */ - invalidate_cb( intel, ptr ); - } - UNLOCK(bm); -} - - - - - - - -/* This is only protected against thread interactions by the DRI lock - * and the policy of ensuring that all dma is flushed prior to - * releasing that lock. Otherwise you might have two threads building - * up a list of buffers to validate at once. - */ -int bmValidateBuffers( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - LOCK(bm); - { - DBG("%s fail %d\n", __FUNCTION__, bm->fail); - assert(intel->locked); - - if (!bm->fail) { - struct block *block, *tmp; - - foreach_s(block, tmp, &bm->referenced) { - struct buffer *buf = block->buf; - - DBG("Validate buf %d / block %p / dirty %d\n", buf->id, block, buf->dirty); - - /* Upload the buffer contents if necessary: - */ - if (buf->dirty) { - DBG("Upload dirty buf %d (%s) sz %d offset 0x%x\n", buf->id, - buf->name, buf->size, block->mem->ofs); - - assert(!(buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT))); - - wait_quiescent(intel, buf->block); - - do_memcpy(buf->block->virtual, - buf->backing_store, - buf->size); - - if (intel->aub_file) { - intel->vtbl.aub_gtt_data(intel, - buf->block->mem->ofs, - buf->backing_store, - buf->size, - 0, - 0); - } - - buf->dirty = 0; - buf->aub_dirty = 0; - } - else if (buf->aub_dirty) { - intel->vtbl.aub_gtt_data(intel, - buf->block->mem->ofs, - buf->block->virtual, - buf->size, - 0, - 0); - buf->aub_dirty = 0; - } - - block->referenced = 0; - block->on_hardware = 1; - move_to_tail(&bm->on_hardware, block); - } - - bm->need_fence = 1; - } - - retval = bm->fail ? -1 : 0; - } - UNLOCK(bm); - - - if (retval != 0) - DBG("%s failed\n", __FUNCTION__); - - return retval; -} - - - - -void bmReleaseBuffers( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - struct block *block, *tmp; - - foreach_s (block, tmp, &bm->referenced) { - - DBG("remove block %p from referenced list\n", block); - - if (block->on_hardware) { - /* Return to the on-hardware list. - */ - move_to_tail(&bm->on_hardware, block); - } - else if (block->fenced) { - struct block *s; - - /* Hmm - have to scan the fenced list to insert the - * buffers in order. This is O(nm), but rare and the - * numbers are low. - */ - foreach (s, &bm->fenced) { - if (FENCE_LTE(block->fence, s->fence)) - break; - } - - move_to_tail(s, block); - } - else { - /* Return to the lru list: - */ - move_to_tail(&block->pool->lru, block); - } - - block->referenced = 0; - } - } - UNLOCK(bm); -} - - -/* This functionality is used by the buffer manager, not really sure - * if we need to be exposing it in this way, probably libdrm will - * offer equivalent calls. - * - * For now they can stay, but will likely change/move before final: - */ -unsigned bmSetFence( struct intel_context *intel ) -{ - assert(intel->locked); - - /* Emit MI_FLUSH here: - */ - if (intel->bm->need_fence) { - - /* Emit a flush without using a batchbuffer. Can't rely on the - * batchbuffer at this level really. Would really prefer that - * the IRQ ioctly emitted the flush at the same time. - */ - GLuint dword[2]; - dword[0] = intel->vtbl.flush_cmd(); - dword[1] = 0; - intel_cmd_ioctl(intel, (char *)&dword, sizeof(dword)); - - intel->bm->last_fence = intelEmitIrqLocked( intel ); - - fence_blocks(intel, intel->bm->last_fence); - - intel->vtbl.note_fence(intel, intel->bm->last_fence); - intel->bm->need_fence = 0; - - if (intel->thrashing) { - intel->thrashing--; - if (!intel->thrashing) - DBG("not thrashing\n"); - } - - intel->bm->free_on_hardware = 0; - } - - return intel->bm->last_fence; -} - -unsigned bmSetFenceLock( struct intel_context *intel ) -{ - unsigned last; - LOCK(intel->bm); - last = bmSetFence(intel); - UNLOCK(intel->bm); - return last; -} -unsigned bmLockAndFence( struct intel_context *intel ) -{ - if (intel->bm->need_fence) { - LOCK_HARDWARE(intel); - LOCK(intel->bm); - bmSetFence(intel); - UNLOCK(intel->bm); - UNLOCK_HARDWARE(intel); - } - - return intel->bm->last_fence; -} - - -void bmFinishFence( struct intel_context *intel, unsigned fence ) -{ - if (!bmTestFence(intel, fence)) { - DBG("...wait on fence %d\n", fence); - intelWaitIrq( intel, fence ); - } - assert(bmTestFence(intel, fence)); - check_fenced(intel); -} - -void bmFinishFenceLock( struct intel_context *intel, unsigned fence ) -{ - LOCK(intel->bm); - bmFinishFence(intel, fence); - UNLOCK(intel->bm); -} - - -/* Specifically ignore texture memory sharing. - * -- just evict everything - * -- and wait for idle - */ -void bm_fake_NotifyContendedLockTake( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - struct block *block, *tmp; - GLuint i; - - assert(is_empty_list(&bm->referenced)); - - bm->need_fence = 1; - bm->fail = 0; - bmFinishFence(intel, bmSetFence(intel)); - - assert(is_empty_list(&bm->fenced)); - assert(is_empty_list(&bm->on_hardware)); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s(block, tmp, &bm->pool[i].lru) { - assert(bmTestFence(intel, block->fence)); - set_dirty(intel, block->buf); - } - } - } - } - UNLOCK(bm); -} - - - -void bmEvictAll( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - struct block *block, *tmp; - GLuint i; - - DBG("%s\n", __FUNCTION__); - - assert(is_empty_list(&bm->referenced)); - - bm->need_fence = 1; - bm->fail = 0; - bmFinishFence(intel, bmSetFence(intel)); - - assert(is_empty_list(&bm->fenced)); - assert(is_empty_list(&bm->on_hardware)); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s(block, tmp, &bm->pool[i].lru) { - assert(bmTestFence(intel, block->fence)); - set_dirty(intel, block->buf); - block->buf->block = NULL; - - free_block(intel, block); - } - } - } - } - UNLOCK(bm); -} - - -GLboolean bmError( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - GLboolean retval; - - LOCK(bm); - { - retval = bm->fail; - } - UNLOCK(bm); - - return retval; -} - - -GLuint bmCtxId( struct intel_context *intel ) -{ - return intel->bm->ctxId; -} diff --git a/i965/intel_batchbuffer.c b/i965/intel_batchbuffer.c deleted file mode 100644 index 64885ed..0000000 --- a/i965/intel_batchbuffer.c +++ /dev/null @@ -1,243 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "imports.h" -#include "intel_batchbuffer.h" -#include "intel_ioctl.h" -#include "bufmgr.h" - - -static void intel_batchbuffer_reset( struct intel_batchbuffer *batch ) -{ - assert(batch->map == NULL); - - batch->offset = (unsigned long)batch->ptr; - batch->offset = (batch->offset + 63) & ~63; - batch->ptr = (unsigned char *) batch->offset; - - if (BATCH_SZ - batch->offset < BATCH_REFILL) { - bmBufferData(batch->intel, - batch->buffer, - BATCH_SZ, - NULL, - 0); - batch->offset = 0; - batch->ptr = NULL; - } - - batch->flags = 0; -} - -static void intel_batchbuffer_reset_cb( struct intel_context *intel, - void *ptr ) -{ - struct intel_batchbuffer *batch = (struct intel_batchbuffer *)ptr; - assert(batch->map == NULL); - batch->flags = 0; - batch->offset = 0; - batch->ptr = NULL; -} - -GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch ) -{ - if (!batch->map) { - batch->map = bmMapBuffer(batch->intel, batch->buffer, - BM_MEM_AGP|BM_MEM_LOCAL|BM_CLIENT|BM_WRITE); - batch->ptr += (unsigned long)batch->map; - } - - return batch->map; -} - -void intel_batchbuffer_unmap( struct intel_batchbuffer *batch ) -{ - if (batch->map) { - batch->ptr -= (unsigned long)batch->map; - batch->map = NULL; - bmUnmapBuffer(batch->intel, batch->buffer); - } -} - - - -/*====================================================================== - * Public functions - */ -struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel ) -{ - struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); - - batch->intel = intel; - - bmGenBuffers(intel, "batch", 1, &batch->buffer, 12); - - bmBufferSetInvalidateCB(intel, batch->buffer, - intel_batchbuffer_reset_cb, - batch, - GL_TRUE); - - bmBufferData(batch->intel, - batch->buffer, - BATCH_SZ, - NULL, - 0); - - - return batch; -} - -void intel_batchbuffer_free( struct intel_batchbuffer *batch ) -{ - if (batch->map) - bmUnmapBuffer(batch->intel, batch->buffer); - - bmDeleteBuffers(batch->intel, 1, &batch->buffer); - free(batch); -} - - -#define MI_BATCH_BUFFER_END (0xA<<23) - - -GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch ) -{ - struct intel_context *intel = batch->intel; - GLuint used = batch->ptr - (batch->map + batch->offset); - GLuint offset; - GLint retval = GL_TRUE; - - assert(intel->locked); - - if (used == 0) { - bmReleaseBuffers( batch->intel ); - return GL_TRUE; - } - - /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a - * performance drain that we would like to avoid. - */ - if (used & 4) { - ((int *)batch->ptr)[0] = MI_BATCH_BUFFER_END; - batch->ptr += 4; - used += 4; - } - else { - ((int *)batch->ptr)[0] = 0; - ((int *)batch->ptr)[1] = MI_BATCH_BUFFER_END; - - batch->ptr += 8; - used += 8; - } - - intel_batchbuffer_unmap(batch); - - /* Get the batch buffer offset: Must call bmBufferOffset() before - * bmValidateBuffers(), otherwise the buffer won't be on the inuse - * list. - */ - offset = bmBufferOffset(batch->intel, batch->buffer); - - if (bmValidateBuffers( batch->intel ) != 0) { - assert(intel->locked); - bmReleaseBuffers( batch->intel ); - retval = GL_FALSE; - goto out; - } - - - if (intel->aub_file) { - /* Send buffered commands to aubfile as a single packet. - */ - intel_batchbuffer_map(batch); - ((int *)batch->ptr)[-1] = intel->vtbl.flush_cmd(); - intel->vtbl.aub_commands(intel, - offset, /* Fulsim wierdness - don't adjust */ - batch->map + batch->offset, - used); - ((int *)batch->ptr)[-1] = MI_BATCH_BUFFER_END; - intel_batchbuffer_unmap(batch); - } - - - /* Fire the batch buffer, which was uploaded above: - */ - intel_batch_ioctl(batch->intel, - offset + batch->offset, - used); - - if (intel->aub_file && - intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) - intel->vtbl.aub_dump_bmp( intel, 0 ); - - /* Reset the buffer: - */ - out: - intel_batchbuffer_reset( batch ); - intel_batchbuffer_map( batch ); - - if (!retval) - DBG("%s failed\n", __FUNCTION__); - - return retval; -} - - - - - - - -void intel_batchbuffer_align( struct intel_batchbuffer *batch, - GLuint align, - GLuint sz ) -{ - unsigned long ptr = (unsigned long) batch->ptr; - unsigned long aptr = (ptr + align) & ~((unsigned long)align-1); - GLuint fixup = aptr - ptr; - - if (intel_batchbuffer_space(batch) < fixup + sz) - intel_batchbuffer_flush(batch); - else { - memset(batch->ptr, 0, fixup); - batch->ptr += fixup; - } -} - - - - -void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, - GLuint bytes, - GLuint flags) -{ - assert((bytes & 3) == 0); - intel_batchbuffer_require_space(batch, bytes, flags); - __memcpy(batch->ptr, data, bytes); - batch->ptr += bytes; -} - diff --git a/i965/intel_batchbuffer.h b/i965/intel_batchbuffer.h deleted file mode 100644 index 25e0a65..0000000 --- a/i965/intel_batchbuffer.h +++ /dev/null @@ -1,127 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_BATCHBUFFER_H -#define INTEL_BATCHBUFFER_H - -#include "mtypes.h" -#include "bufmgr.h" - -struct intel_context; - -#define BATCH_SZ (16 * 1024) -#define BATCH_REFILL 4096 -#define BATCH_RESERVED 16 - -#define INTEL_BATCH_NO_CLIPRECTS 0x1 -#define INTEL_BATCH_CLIPRECTS 0x2 - -struct intel_batchbuffer { - struct intel_context *intel; - - struct buffer *buffer; - - GLuint flags; - unsigned long offset; - - GLubyte *map; - GLubyte *ptr; -}; - -struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel ); - -void intel_batchbuffer_free( struct intel_batchbuffer *batch ); - - -GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch ); - -void intel_batchbuffer_unmap( struct intel_batchbuffer *batch ); -GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch ); - - -/* Unlike bmBufferData, this currently requires the buffer be mapped. - * Consider it a convenience function wrapping multple - * intel_buffer_dword() calls. - */ -void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, - GLuint bytes, - GLuint flags); - -void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, - GLuint bytes); - - -/* Inline functions - might actually be better off with these - * non-inlined. Certainly better off switching all command packets to - * be passed as structs rather than dwords, but that's a little bit of - * work... - */ -static inline GLuint -intel_batchbuffer_space( struct intel_batchbuffer *batch ) -{ - return (BATCH_SZ - BATCH_RESERVED) - (batch->ptr - (batch->map + batch->offset)); -} - - -static inline void -intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, - GLuint dword) -{ - assert(batch->map); - assert(intel_batchbuffer_space(batch) >= 4); - *(GLuint *)(batch->ptr) = dword; - batch->ptr += 4; -} - -static inline void -intel_batchbuffer_require_space(struct intel_batchbuffer *batch, - GLuint sz, - GLuint flags) -{ - assert(sz < BATCH_SZ - 8); - if (intel_batchbuffer_space(batch) < sz || - (batch->flags != 0 && flags != 0 && batch->flags != flags)) - intel_batchbuffer_flush(batch); - - batch->flags |= flags; -} - -void intel_batchbuffer_align( struct intel_batchbuffer *batch, - GLuint align, - GLuint sz ); - - -/* Here are the crusty old macros, to be removed: - */ -#define BATCH_LOCALS -#define BEGIN_BATCH(n, flags) intel_batchbuffer_require_space(intel->batch, n*4, flags) -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) -#define ADVANCE_BATCH() do { } while(0) - - -#endif diff --git a/i965/intel_blit.c b/i965/intel_blit.c deleted file mode 100644 index f88cbb2..0000000 --- a/i965/intel_blit.c +++ /dev/null @@ -1,617 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdio.h> -#include <errno.h> - -#include "mtypes.h" -#include "context.h" -#include "enums.h" -#include "vblank.h" - -#include "intel_reg.h" -#include "intel_batchbuffer.h" -#include "intel_context.h" -#include "intel_blit.h" -#include "intel_regions.h" -#include "intel_structs.h" - -#include "bufmgr.h" - - - - -/* - * Copy the back buffer to the front buffer. - */ -void intelCopyBuffer( const __DRIdrawablePrivate *dPriv, - const drm_clip_rect_t *rect ) -{ - struct intel_context *intel; - GLboolean missed_target; - int64_t ust; - - DBG("%s\n", __FUNCTION__); - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; - intelFlush( &intel->ctx ); - - - bmFinishFenceLock(intel, intel->last_swap_fence); - - /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets - * should work regardless. - */ - LOCK_HARDWARE( intel ); - - if (!rect) - { - UNLOCK_HARDWARE( intel ); - driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target ); - LOCK_HARDWARE( intel ); - } - - { - intelScreenPrivate *intelScreen = intel->intelScreen; - __DRIdrawablePrivate *dPriv = intel->driDrawable; - int nbox = dPriv->numClipRects; - drm_clip_rect_t *pbox = dPriv->pClipRects; - int cpp = intelScreen->cpp; - struct intel_region *src, *dst; - int BR13, CMD; - int i; - int src_pitch, dst_pitch; - - if (intel->sarea->pf_current_page == 0) { - dst = intel->front_region; - src = intel->back_region; - } - else { - assert(0); - src = intel->front_region; - dst = intel->back_region; - } - - src_pitch = src->pitch * src->cpp; - dst_pitch = dst->pitch * dst->cpp; - - if (cpp == 2) { - BR13 = (0xCC << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - } - else { - BR13 = (0xCC << 16) | (1<<24) | (1<<25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - } - - if (src->tiled) { - CMD |= XY_SRC_TILED; - src_pitch /= 4; - } - - if (dst->tiled) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - - for (i = 0 ; i < nbox; i++, pbox++) - { - drm_clip_rect_t tmp = *pbox; - - if (rect) { - if (!intel_intersect_cliprects(&tmp, &tmp, rect)) - continue; - } - - - if (tmp.x1 > tmp.x2 || - tmp.y1 > tmp.y2 || - tmp.x2 > intelScreen->width || - tmp.y2 > intelScreen->height) - continue; - - BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( dst_pitch | BR13 ); - OUT_BATCH( (tmp.y1 << 16) | tmp.x1 ); - OUT_BATCH( (tmp.y2 << 16) | tmp.x2 ); - OUT_BATCH( bmBufferOffset(intel, dst->buffer) ); - OUT_BATCH( (tmp.y1 << 16) | tmp.x1 ); - OUT_BATCH( src_pitch ); - OUT_BATCH( bmBufferOffset(intel, src->buffer) ); - ADVANCE_BATCH(); - } - } - - intel_batchbuffer_flush( intel->batch ); - intel->second_last_swap_fence = intel->last_swap_fence; - intel->last_swap_fence = bmSetFenceLock( intel ); - UNLOCK_HARDWARE( intel ); - - if (!rect) - { - intel->swap_count++; - (*dri_interface->getUST)(&ust); - if (missed_target) { - intel->swap_missed_count++; - intel->swap_missed_ust = ust - intel->swap_ust; - } - - intel->swap_ust = ust; - } - -} - - - - -void intelEmitFillBlit( struct intel_context *intel, - GLuint cpp, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLuint color ) -{ - GLuint BR13, CMD; - BATCH_LOCALS; - - dst_pitch *= cpp; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = (0xF0 << 16) | (1<<24); - CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (0xF0 << 16) | (1<<24) | (1<<25); - CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | - XY_COLOR_BLT_WRITE_RGB); - break; - default: - return; - } - - if (dst_tiled) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( dst_pitch | BR13 ); - OUT_BATCH( (y << 16) | x ); - OUT_BATCH( ((y+h) << 16) | (x+w) ); - OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset ); - OUT_BATCH( color ); - ADVANCE_BATCH(); -} - -static GLuint translate_raster_op(GLenum logicop) -{ - switch(logicop) { - case GL_CLEAR: return 0x00; - case GL_AND: return 0x88; - case GL_AND_REVERSE: return 0x44; - case GL_COPY: return 0xCC; - case GL_AND_INVERTED: return 0x22; - case GL_NOOP: return 0xAA; - case GL_XOR: return 0x66; - case GL_OR: return 0xEE; - case GL_NOR: return 0x11; - case GL_EQUIV: return 0x99; - case GL_INVERT: return 0x55; - case GL_OR_REVERSE: return 0xDD; - case GL_COPY_INVERTED: return 0x33; - case GL_OR_INVERTED: return 0xBB; - case GL_NAND: return 0x77; - case GL_SET: return 0xFF; - default: return 0; - } -} - - -/* Copy BitBlt - */ -void intelEmitCopyBlit( struct intel_context *intel, - GLuint cpp, - GLshort src_pitch, - struct buffer *src_buffer, - GLuint src_offset, - GLboolean src_tiled, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort src_x, GLshort src_y, - GLshort dst_x, GLshort dst_y, - GLshort w, GLshort h, - GLenum logic_op ) -{ - GLuint CMD, BR13; - int dst_y2 = dst_y + h; - int dst_x2 = dst_x + w; - BATCH_LOCALS; - - - DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n", - __FUNCTION__, - src_buffer, src_pitch, src_x, src_y, - dst_buffer, dst_pitch, dst_x, dst_y, - w,h,logic_op); - - assert( logic_op - GL_CLEAR >= 0 ); - assert( logic_op - GL_CLEAR < 0x10 ); - - src_pitch *= cpp; - dst_pitch *= cpp; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = (translate_raster_op(logic_op) << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) | - (1<<25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - break; - default: - return; - } - - if (src_tiled) { - CMD |= XY_SRC_TILED; - src_pitch /= 4; - } - - if (dst_tiled) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - - if (dst_y2 < dst_y || - dst_x2 < dst_x) { - return; - } - - dst_pitch &= 0xffff; - src_pitch &= 0xffff; - - /* Initial y values don't seem to work with negative pitches. If - * we adjust the offsets manually (below), it seems to work fine. - * - * On the other hand, if we always adjust, the hardware doesn't - * know which blit directions to use, so overlapping copypixels get - * the wrong result. - */ - if (dst_pitch > 0 && src_pitch > 0) { - BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( dst_pitch | BR13 ); - OUT_BATCH( (dst_y << 16) | dst_x ); - OUT_BATCH( (dst_y2 << 16) | dst_x2 ); - OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset ); - OUT_BATCH( (src_y << 16) | src_x ); - OUT_BATCH( src_pitch ); - OUT_BATCH( bmBufferOffset(intel, src_buffer) + src_offset ); - ADVANCE_BATCH(); - } - else { - BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( (dst_pitch & 0xffff) | BR13 ); - OUT_BATCH( (0 << 16) | dst_x ); - OUT_BATCH( (h << 16) | dst_x2 ); - OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset + dst_y * dst_pitch ); - OUT_BATCH( (0 << 16) | src_x ); - OUT_BATCH( (src_pitch & 0xffff) ); - OUT_BATCH( bmBufferOffset(intel, src_buffer) + src_offset + src_y * src_pitch ); - ADVANCE_BATCH(); - } -} - - - -void intelClearWithBlit(GLcontext *ctx, GLbitfield flags) -{ - struct intel_context *intel = intel_context( ctx ); - intelScreenPrivate *intelScreen = intel->intelScreen; - GLuint clear_depth, clear_color; - GLint cx, cy, cw, ch; - GLint cpp = intelScreen->cpp; - GLboolean all; - GLint i; - struct intel_region *front = intel->front_region; - struct intel_region *back = intel->back_region; - struct intel_region *depth = intel->depth_region; - GLuint BR13, FRONT_CMD, BACK_CMD, DEPTH_CMD; - GLuint front_pitch; - GLuint back_pitch; - GLuint depth_pitch; - BATCH_LOCALS; - - - clear_color = intel->ClearColor; - clear_depth = 0; - - if (flags & BUFFER_BIT_DEPTH) { - clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth); - } - - if (flags & BUFFER_BIT_STENCIL) { - clear_depth |= (ctx->Stencil.Clear & 0xff) << 24; - } - - switch(cpp) { - case 2: - BR13 = (0xF0 << 16) | (1<<24); - BACK_CMD = FRONT_CMD = XY_COLOR_BLT_CMD; - DEPTH_CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (0xF0 << 16) | (1<<24) | (1<<25); - BACK_CMD = FRONT_CMD = (XY_COLOR_BLT_CMD | - XY_COLOR_BLT_WRITE_ALPHA | - XY_COLOR_BLT_WRITE_RGB); - DEPTH_CMD = XY_COLOR_BLT_CMD; - if (flags & BUFFER_BIT_DEPTH) DEPTH_CMD |= XY_COLOR_BLT_WRITE_RGB; - if (flags & BUFFER_BIT_STENCIL) DEPTH_CMD |= XY_COLOR_BLT_WRITE_ALPHA; - break; - default: - return; - } - - - - intelFlush( &intel->ctx ); - LOCK_HARDWARE( intel ); - { - /* get clear bounds after locking */ - cx = ctx->DrawBuffer->_Xmin; - cy = ctx->DrawBuffer->_Ymin; - ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin; - cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin; - all = (cw == ctx->DrawBuffer->Width && ch == ctx->DrawBuffer->Height); - - /* flip top to bottom */ - cy = intel->driDrawable->h - cy - ch; - cx = cx + intel->drawX; - cy += intel->drawY; - - /* adjust for page flipping */ - if ( intel->sarea->pf_current_page == 0 ) { - front = intel->front_region; - back = intel->back_region; - } - else { - back = intel->front_region; - front = intel->back_region; - } - - front_pitch = front->pitch * front->cpp; - back_pitch = back->pitch * back->cpp; - depth_pitch = depth->pitch * depth->cpp; - - if (front->tiled) { - FRONT_CMD |= XY_DST_TILED; - front_pitch /= 4; - } - - if (back->tiled) { - BACK_CMD |= XY_DST_TILED; - back_pitch /= 4; - } - - if (depth->tiled) { - DEPTH_CMD |= XY_DST_TILED; - depth_pitch /= 4; - } - - for (i = 0 ; i < intel->numClipRects ; i++) - { - drm_clip_rect_t *box = &intel->pClipRects[i]; - drm_clip_rect_t b; - - if (!all) { - GLint x = box->x1; - GLint y = box->y1; - GLint w = box->x2 - x; - GLint h = box->y2 - y; - - if (x < cx) w -= cx - x, x = cx; - if (y < cy) h -= cy - y, y = cy; - if (x + w > cx + cw) w = cx + cw - x; - if (y + h > cy + ch) h = cy + ch - y; - if (w <= 0) continue; - if (h <= 0) continue; - - b.x1 = x; - b.y1 = y; - b.x2 = x + w; - b.y2 = y + h; - } else { - b = *box; - } - - - if (b.x1 > b.x2 || - b.y1 > b.y2 || - b.x2 > intelScreen->width || - b.y2 > intelScreen->height) - continue; - - if ( flags & BUFFER_BIT_FRONT_LEFT ) { - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( FRONT_CMD ); - OUT_BATCH( front_pitch | BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( bmBufferOffset(intel, front->buffer) ); - OUT_BATCH( clear_color ); - ADVANCE_BATCH(); - } - - if ( flags & BUFFER_BIT_BACK_LEFT ) { - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( BACK_CMD ); - OUT_BATCH( back_pitch | BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( bmBufferOffset(intel, back->buffer) ); - OUT_BATCH( clear_color ); - ADVANCE_BATCH(); - } - - if ( flags & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) { - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( DEPTH_CMD ); - OUT_BATCH( depth_pitch | BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( bmBufferOffset(intel, depth->buffer) ); - OUT_BATCH( clear_depth ); - ADVANCE_BATCH(); - } - } - } - intel_batchbuffer_flush( intel->batch ); - UNLOCK_HARDWARE( intel ); -} - - - -#define BR13_565 0x1 -#define BR13_8888 0x3 - - -void -intelEmitImmediateColorExpandBlit(struct intel_context *intel, - GLuint cpp, - GLubyte *src_bits, GLuint src_size, - GLuint fg_color, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLenum logic_op) -{ - struct xy_setup_blit setup; - struct xy_text_immediate_blit text; - int dwords = ((src_size + 7) & ~7) / 4; - - assert( logic_op - GL_CLEAR >= 0 ); - assert( logic_op - GL_CLEAR < 0x10 ); - - if (w < 0 || h < 0) - return; - - dst_pitch *= cpp; - - if (dst_tiled) - dst_pitch /= 4; - - DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", - __FUNCTION__, - dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); - - memset(&setup, 0, sizeof(setup)); - - setup.br0.client = CLIENT_2D; - setup.br0.opcode = OPCODE_XY_SETUP_BLT; - setup.br0.write_alpha = (cpp == 4); - setup.br0.write_rgb = (cpp == 4); - setup.br0.dst_tiled = dst_tiled; - setup.br0.length = (sizeof(setup) / sizeof(int)) - 2; - - setup.br13.dest_pitch = dst_pitch; - setup.br13.rop = translate_raster_op(logic_op); - setup.br13.color_depth = (cpp == 4) ? BR13_8888 : BR13_565; - setup.br13.clipping_enable = 0; - setup.br13.mono_source_transparency = 1; - - setup.dw2.clip_y1 = 0; - setup.dw2.clip_x1 = 0; - setup.dw3.clip_y2 = 100; - setup.dw3.clip_x2 = 100; - - setup.dest_base_addr = bmBufferOffset(intel, dst_buffer) + dst_offset; - setup.background_color = 0; - setup.foreground_color = fg_color; - setup.pattern_base_addr = 0; - - memset(&text, 0, sizeof(text)); - text.dw0.client = CLIENT_2D; - text.dw0.opcode = OPCODE_XY_TEXT_IMMEDIATE_BLT; - text.dw0.pad0 = 0; - text.dw0.byte_packed = 1; /* ?maybe? */ - text.dw0.pad1 = 0; - text.dw0.dst_tiled = dst_tiled; - text.dw0.pad2 = 0; - text.dw0.length = (sizeof(text)/sizeof(int)) - 2 + dwords; - text.dw1.dest_y1 = y; /* duplicates info in setup blit */ - text.dw1.dest_x1 = x; - text.dw2.dest_y2 = y + h; - text.dw2.dest_x2 = x + w; - - intel_batchbuffer_require_space( intel->batch, - sizeof(setup) + - sizeof(text) + - dwords, - INTEL_BATCH_NO_CLIPRECTS ); - - intel_batchbuffer_data( intel->batch, - &setup, - sizeof(setup), - INTEL_BATCH_NO_CLIPRECTS ); - - intel_batchbuffer_data( intel->batch, - &text, - sizeof(text), - INTEL_BATCH_NO_CLIPRECTS ); - - intel_batchbuffer_data( intel->batch, - src_bits, - dwords * 4, - INTEL_BATCH_NO_CLIPRECTS ); -} - diff --git a/i965/intel_buffer_objects.c b/i965/intel_buffer_objects.c deleted file mode 100644 index 015e433..0000000 --- a/i965/intel_buffer_objects.c +++ /dev/null @@ -1,207 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "imports.h" -#include "mtypes.h" -#include "bufferobj.h" - -#include "intel_context.h" -#include "intel_buffer_objects.h" -#include "bufmgr.h" - - -/** - * There is some duplication between mesa's bufferobjects and our - * bufmgr buffers. Both have an integer handle and a hashtable to - * lookup an opaque structure. It would be nice if the handles and - * internal structure where somehow shared. - */ -static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx, - GLuint name, - GLenum target ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *obj = MALLOC_STRUCT(intel_buffer_object); - - _mesa_initialize_buffer_object(&obj->Base, name, target); - - /* XXX: We generate our own handle, which is different to 'name' above. - */ - bmGenBuffers(intel, "bufferobj", 1, &obj->buffer, 6); - assert(obj->buffer); - - return &obj->Base; -} - - -/** - * Deallocate/free a vertex/pixel buffer object. - * Called via glDeleteBuffersARB(). - */ -static void intel_bufferobj_free( GLcontext *ctx, - struct gl_buffer_object *obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - assert(intel_obj); - - if (intel_obj->buffer) - bmDeleteBuffers( intel, 1, &intel_obj->buffer ); - - _mesa_free(intel_obj); -} - - - -/** - * Allocate space for and store data in a buffer object. Any data that was - * previously stored in the buffer object is lost. If data is NULL, - * memory will be allocated, but no copy will occur. - * Called via glBufferDataARB(). - */ -static void intel_bufferobj_data( GLcontext *ctx, - GLenum target, - GLsizeiptrARB size, - const GLvoid *data, - GLenum usage, - struct gl_buffer_object *obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - /* XXX: do something useful with 'usage' (eg. populate flags - * argument below) - */ - assert(intel_obj); - - obj->Size = size; - obj->Usage = usage; - - bmBufferDataAUB(intel, intel_obj->buffer, size, data, 0, - 0, 0); -} - - -/** - * Replace data in a subrange of buffer object. If the data range - * specified by size + offset extends beyond the end of the buffer or - * if data is NULL, no copy is performed. - * Called via glBufferSubDataARB(). - */ -static void intel_bufferobj_subdata( GLcontext *ctx, - GLenum target, - GLintptrARB offset, - GLsizeiptrARB size, - const GLvoid * data, - struct gl_buffer_object * obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - assert(intel_obj); - bmBufferSubDataAUB(intel, intel_obj->buffer, offset, size, data, 0, 0); -} - - -/** - * Called via glGetBufferSubDataARB(). - */ -static void intel_bufferobj_get_subdata( GLcontext *ctx, - GLenum target, - GLintptrARB offset, - GLsizeiptrARB size, - GLvoid * data, - struct gl_buffer_object * obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - assert(intel_obj); - bmBufferGetSubData(intel, intel_obj->buffer, offset, size, data); -} - - - -/** - * Called via glMapBufferARB(). - */ -static void *intel_bufferobj_map( GLcontext *ctx, - GLenum target, - GLenum access, - struct gl_buffer_object *obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - /* XXX: Translate access to flags arg below: - */ - assert(intel_obj); - assert(intel_obj->buffer); - obj->Pointer = bmMapBuffer(intel, intel_obj->buffer, 0); - return obj->Pointer; -} - - -/** - * Called via glMapBufferARB(). - */ -static GLboolean intel_bufferobj_unmap( GLcontext *ctx, - GLenum target, - struct gl_buffer_object *obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - assert(intel_obj); - assert(intel_obj->buffer); - assert(obj->Pointer); - bmUnmapBufferAUB(intel, intel_obj->buffer, 0, 0); - obj->Pointer = NULL; - return GL_TRUE; -} - -struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj ) -{ - assert(intel_obj->Base.Name); - assert(intel_obj->buffer); - return intel_obj->buffer; -} - -void intel_bufferobj_init( struct intel_context *intel ) -{ - GLcontext *ctx = &intel->ctx; - - ctx->Driver.NewBufferObject = intel_bufferobj_alloc; - ctx->Driver.DeleteBuffer = intel_bufferobj_free; - ctx->Driver.BufferData = intel_bufferobj_data; - ctx->Driver.BufferSubData = intel_bufferobj_subdata; - ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata; - ctx->Driver.MapBuffer = intel_bufferobj_map; - ctx->Driver.UnmapBuffer = intel_bufferobj_unmap; -} diff --git a/i965/intel_buffers.c b/i965/intel_buffers.c deleted file mode 100644 index de6a867..0000000 --- a/i965/intel_buffers.c +++ /dev/null @@ -1,581 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_blit.h" -#include "intel_regions.h" -#include "intel_batchbuffer.h" -#include "context.h" -#include "utils.h" -#include "framebuffer.h" -#include "vblank.h" -#include "macros.h" -#include "swrast/swrast.h" - -GLboolean intel_intersect_cliprects( drm_clip_rect_t *dst, - const drm_clip_rect_t *a, - const drm_clip_rect_t *b ) -{ - dst->x1 = MAX2(a->x1, b->x1); - dst->x2 = MIN2(a->x2, b->x2); - dst->y1 = MAX2(a->y1, b->y1); - dst->y2 = MIN2(a->y2, b->y2); - - return (dst->x1 <= dst->x2 && - dst->y1 <= dst->y2); -} - -struct intel_region *intel_drawbuf_region( struct intel_context *intel ) -{ - switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) { - case BUFFER_BIT_FRONT_LEFT: - return intel->front_region; - case BUFFER_BIT_BACK_LEFT: - return intel->back_region; - default: - /* Not necessary to fallback - could handle either NONE or - * FRONT_AND_BACK cases below. - */ - return NULL; - } -} - -struct intel_region *intel_readbuf_region( struct intel_context *intel ) -{ - GLcontext *ctx = &intel->ctx; - - /* This will have to change to support EXT_fbo's, but is correct - * for now: - */ - switch (ctx->ReadBuffer->_ColorReadBufferIndex) { - case BUFFER_FRONT_LEFT: - return intel->front_region; - case BUFFER_BACK_LEFT: - return intel->back_region; - default: - assert(0); - return NULL; - } -} - - - -static void intelBufferSize(GLframebuffer *buffer, - GLuint *width, - GLuint *height) -{ - GET_CURRENT_CONTEXT(ctx); - struct intel_context *intel = intel_context(ctx); - /* Need to lock to make sure the driDrawable is uptodate. This - * information is used to resize Mesa's software buffers, so it has - * to be correct. - */ - LOCK_HARDWARE(intel); - if (intel->driDrawable) { - *width = intel->driDrawable->w; - *height = intel->driDrawable->h; - } - else { - *width = 0; - *height = 0; - } - UNLOCK_HARDWARE(intel); -} - - -static void intelSetFrontClipRects( struct intel_context *intel ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - if (!dPriv) return; - - intel->numClipRects = dPriv->numClipRects; - intel->pClipRects = dPriv->pClipRects; - intel->drawX = dPriv->x; - intel->drawY = dPriv->y; -} - - -static void intelSetBackClipRects( struct intel_context *intel ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - if (!dPriv) return; - - if (intel->sarea->pf_enabled == 0 && dPriv->numBackClipRects == 0) { - intel->numClipRects = dPriv->numClipRects; - intel->pClipRects = dPriv->pClipRects; - intel->drawX = dPriv->x; - intel->drawY = dPriv->y; - } else { - intel->numClipRects = dPriv->numBackClipRects; - intel->pClipRects = dPriv->pBackClipRects; - intel->drawX = dPriv->backX; - intel->drawY = dPriv->backY; - - if (dPriv->numBackClipRects == 1 && - dPriv->x == dPriv->backX && - dPriv->y == dPriv->backY) { - - /* Repeat the calculation of the back cliprect dimensions here - * as early versions of dri.a in the Xserver are incorrect. Try - * very hard not to restrict future versions of dri.a which - * might eg. allocate truly private back buffers. - */ - int x1, y1; - int x2, y2; - - x1 = dPriv->x; - y1 = dPriv->y; - x2 = dPriv->x + dPriv->w; - y2 = dPriv->y + dPriv->h; - - if (x1 < 0) x1 = 0; - if (y1 < 0) y1 = 0; - if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width; - if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height; - - if (x1 == dPriv->pBackClipRects[0].x1 && - y1 == dPriv->pBackClipRects[0].y1) { - - dPriv->pBackClipRects[0].x2 = x2; - dPriv->pBackClipRects[0].y2 = y2; - } - } - } -} - - -void intelWindowMoved( struct intel_context *intel ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - if (!intel->ctx.DrawBuffer) { - intelSetFrontClipRects( intel ); - } - else { - switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) { - case BUFFER_BIT_FRONT_LEFT: - intelSetFrontClipRects( intel ); - break; - case BUFFER_BIT_BACK_LEFT: - intelSetBackClipRects( intel ); - break; - default: - /* glDrawBuffer(GL_NONE or GL_FRONT_AND_BACK): software fallback */ - intelSetFrontClipRects( intel ); - } - } - - { - if (intel->intelScreen->driScrnPriv->ddxMinor >= 7) { - volatile drmI830Sarea *sarea = intel->sarea; - drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w, - .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h - }; - drm_clip_rect_t pipeA_rect = { .x1 = sarea->pipeA_x, - .x2 = sarea->pipeA_x + sarea->pipeA_w, - .y1 = sarea->pipeA_y, - .y2 = sarea->pipeA_y + sarea->pipeA_h }; - drm_clip_rect_t pipeB_rect = { .x1 = sarea->pipeB_x, - .x2 = sarea->pipeB_x + sarea->pipeB_w, - .y1 = sarea->pipeB_y, - .y2 = sarea->pipeB_y + sarea->pipeB_h }; - GLint areaA = driIntersectArea( drw_rect, pipeA_rect ); - GLint areaB = driIntersectArea( drw_rect, pipeB_rect ); - GLuint flags = intel->vblank_flags; - - if (areaB > areaA || (areaA > 0 && areaB > 0)) { - flags = intel->vblank_flags | VBLANK_FLAG_SECONDARY; - } else { - flags = intel->vblank_flags & ~VBLANK_FLAG_SECONDARY; - } - - if (flags != intel->vblank_flags) { - intel->vblank_flags = flags; - driGetCurrentVBlank(dPriv, intel->vblank_flags, &intel->vbl_seq); - } - } else { - intel->vblank_flags &= ~VBLANK_FLAG_SECONDARY; - } - } - _mesa_resize_framebuffer(&intel->ctx, - (GLframebuffer*)dPriv->driverPrivate, - dPriv->w, dPriv->h); - - /* Set state we know depends on drawable parameters: - */ - { - GLcontext *ctx = &intel->ctx; - - if (ctx->Driver.Scissor) - ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y, - ctx->Scissor.Width, ctx->Scissor.Height ); - - if (ctx->Driver.DepthRange) - ctx->Driver.DepthRange( ctx, - ctx->Viewport.Near, - ctx->Viewport.Far ); - - intel->NewGLState |= _NEW_SCISSOR; - } - - /* This works because the lock is always grabbed before emitting - * commands and commands are always flushed prior to releasing - * the lock. - */ - intel->NewGLState |= _NEW_WINDOW_POS; -} - - - -/* A true meta version of this would be very simple and additionally - * machine independent. Maybe we'll get there one day. - */ -static void intelClearWithTris(struct intel_context *intel, - GLbitfield mask) -{ - GLcontext *ctx = &intel->ctx; - drm_clip_rect_t clear; - GLint cx, cy, cw, ch; - - if (INTEL_DEBUG & DEBUG_DRI) - _mesa_printf("%s %x\n", __FUNCTION__, mask); - - { - - intel->vtbl.install_meta_state(intel); - - /* Get clear bounds after locking */ - cx = ctx->DrawBuffer->_Xmin; - cy = ctx->DrawBuffer->_Ymin; - cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin; - ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin; - - clear.x1 = cx; - clear.y1 = cy; - clear.x2 = cx + cw; - clear.y2 = cy + ch; - - /* Back and stencil cliprects are the same. Try and do both - * buffers at once: - */ - if (mask & (BUFFER_BIT_BACK_LEFT|BUFFER_BIT_STENCIL|BUFFER_BIT_DEPTH)) { - intel->vtbl.meta_draw_region(intel, - intel->back_region, - intel->depth_region ); - - if (mask & BUFFER_BIT_BACK_LEFT) - intel->vtbl.meta_color_mask(intel, GL_TRUE ); - else - intel->vtbl.meta_color_mask(intel, GL_FALSE ); - - if (mask & BUFFER_BIT_STENCIL) - intel->vtbl.meta_stencil_replace( intel, - intel->ctx.Stencil.WriteMask[0], - intel->ctx.Stencil.Clear); - else - intel->vtbl.meta_no_stencil_write(intel); - - if (mask & BUFFER_BIT_DEPTH) - intel->vtbl.meta_depth_replace( intel ); - else - intel->vtbl.meta_no_depth_write(intel); - - /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the - * drawing origin may not be correctly emitted. - */ - intel->vtbl.meta_draw_quad(intel, - clear.x1, clear.x2, - clear.y1, clear.y2, - intel->ctx.Depth.Clear, - intel->clear_chan[0], - intel->clear_chan[1], - intel->clear_chan[2], - intel->clear_chan[3], - 0, 0, 0, 0); - } - - /* Front may have different cliprects: - */ - if (mask & BUFFER_BIT_FRONT_LEFT) { - intel->vtbl.meta_no_depth_write(intel); - intel->vtbl.meta_no_stencil_write(intel); - intel->vtbl.meta_color_mask(intel, GL_TRUE ); - intel->vtbl.meta_draw_region(intel, - intel->front_region, - intel->depth_region); - - /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the - * drawing origin may not be correctly emitted. - */ - intel->vtbl.meta_draw_quad(intel, - clear.x1, clear.x2, - clear.y1, clear.y2, - 0, - intel->clear_chan[0], - intel->clear_chan[1], - intel->clear_chan[2], - intel->clear_chan[3], - 0, 0, 0, 0); - } - - intel->vtbl.leave_meta_state( intel ); - } -} - - - - - -static void intelClear(GLcontext *ctx, GLbitfield mask) -{ - struct intel_context *intel = intel_context( ctx ); - const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask); - GLbitfield tri_mask = 0; - GLbitfield blit_mask = 0; - GLbitfield swrast_mask = 0; - - if (INTEL_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s %x\n", __FUNCTION__, mask); - - - if (mask & BUFFER_BIT_FRONT_LEFT) { - if (colorMask == ~0) { - blit_mask |= BUFFER_BIT_FRONT_LEFT; - } - else { - tri_mask |= BUFFER_BIT_FRONT_LEFT; - } - } - - if (mask & BUFFER_BIT_BACK_LEFT) { - if (colorMask == ~0) { - blit_mask |= BUFFER_BIT_BACK_LEFT; - } - else { - tri_mask |= BUFFER_BIT_BACK_LEFT; - } - } - - - if (mask & BUFFER_BIT_STENCIL) { - if (!intel->hw_stencil) { - swrast_mask |= BUFFER_BIT_STENCIL; - } - else if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff || - intel->depth_region->tiled) { - tri_mask |= BUFFER_BIT_STENCIL; - } - else { - blit_mask |= BUFFER_BIT_STENCIL; - } - } - - /* Do depth with stencil if possible to avoid 2nd pass over the - * same buffer. - */ - if (mask & BUFFER_BIT_DEPTH) { - if ((tri_mask & BUFFER_BIT_STENCIL) || - intel->depth_region->tiled) - tri_mask |= BUFFER_BIT_DEPTH; - else - blit_mask |= BUFFER_BIT_DEPTH; - } - - swrast_mask |= (mask & BUFFER_BIT_ACCUM); - - intelFlush( ctx ); - - if (blit_mask) - intelClearWithBlit( ctx, blit_mask ); - - if (tri_mask) - intelClearWithTris( intel, tri_mask ); - - if (swrast_mask) - _swrast_Clear( ctx, swrast_mask ); -} - - - - - - - -/* Flip the front & back buffers - */ -static void intelPageFlip( const __DRIdrawablePrivate *dPriv ) -{ -#if 0 - struct intel_context *intel; - int tmp, ret; - - if (INTEL_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; - - intelFlush( &intel->ctx ); - LOCK_HARDWARE( intel ); - - if (dPriv->pClipRects) { - *(drm_clip_rect_t *)intel->sarea->boxes = dPriv->pClipRects[0]; - intel->sarea->nbox = 1; - } - - ret = drmCommandNone(intel->driFd, DRM_I830_FLIP); - if (ret) { - fprintf(stderr, "%s: %d\n", __FUNCTION__, ret); - UNLOCK_HARDWARE( intel ); - exit(1); - } - - tmp = intel->sarea->last_enqueue; - intelRefillBatchLocked( intel ); - UNLOCK_HARDWARE( intel ); - - - intelSetDrawBuffer( &intel->ctx, intel->ctx.Color.DriverDrawBuffer ); -#endif -} - - -void intelSwapBuffers( __DRIdrawablePrivate *dPriv ) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - struct intel_context *intel; - GLcontext *ctx; - intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; - ctx = &intel->ctx; - if (ctx->Visual.doubleBufferMode) { - _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ - if ( 0 /*intel->doPageFlip*/ ) { /* doPageFlip is never set !!! */ - intelPageFlip( dPriv ); - } else { - intelCopyBuffer( dPriv, NULL ); - } - if (intel->aub_file) { - intelFlush(ctx); - intel->vtbl.aub_dump_bmp( intel, 1 ); - - intel->aub_wrap = 1; - } - } - } else { - /* XXX this shouldn't be an error but we can't handle it for now */ - fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); - } -} - -void intelCopySubBuffer( __DRIdrawablePrivate *dPriv, - int x, int y, int w, int h ) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - struct intel_context *intel = dPriv->driContextPriv->driverPrivate; - GLcontext *ctx = &intel->ctx; - - if (ctx->Visual.doubleBufferMode) { - drm_clip_rect_t rect; - rect.x1 = x + dPriv->x; - rect.y1 = (dPriv->h - y - h) + dPriv->y; - rect.x2 = rect.x1 + w; - rect.y2 = rect.y1 + h; - _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ - intelCopyBuffer( dPriv, &rect ); - } - } else { - /* XXX this shouldn't be an error but we can't handle it for now */ - fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); - } -} - - -static void intelDrawBuffer(GLcontext *ctx, GLenum mode ) -{ - struct intel_context *intel = intel_context(ctx); - int front = 0; - - if (!ctx->DrawBuffer) - return; - - switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) { - case BUFFER_BIT_FRONT_LEFT: - front = 1; - FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE ); - break; - case BUFFER_BIT_BACK_LEFT: - front = 0; - FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE ); - break; - default: - FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE ); - return; - } - - if ( intel->sarea->pf_current_page == 1 ) - front ^= 1; - - intelSetFrontClipRects( intel ); - - - if (front) { - if (intel->draw_region != intel->front_region) { - intel_region_release(intel, &intel->draw_region); - intel_region_reference(&intel->draw_region, intel->front_region); - } - } else { - if (intel->draw_region != intel->back_region) { - intel_region_release(intel, &intel->draw_region); - intel_region_reference(&intel->draw_region, intel->back_region); - } - } - - intel->vtbl.set_draw_region( intel, - intel->draw_region, - intel->depth_region); -} - -static void intelReadBuffer( GLcontext *ctx, GLenum mode ) -{ - /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ -} - - - -void intelInitBufferFuncs( struct dd_function_table *functions ) -{ - functions->Clear = intelClear; - functions->GetBufferSize = intelBufferSize; - functions->DrawBuffer = intelDrawBuffer; - functions->ReadBuffer = intelReadBuffer; -} diff --git a/i965/intel_context.c b/i965/intel_context.c deleted file mode 100644 index 5ee5282..0000000 --- a/i965/intel_context.c +++ /dev/null @@ -1,716 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "glheader.h" -#include "context.h" -#include "matrix.h" -#include "simple_list.h" -#include "extensions.h" -#include "framebuffer.h" -#include "imports.h" -#include "points.h" - -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" -#include "tnl/tnl.h" -#include "vbo/vbo.h" - -#include "tnl/t_pipeline.h" -#include "tnl/t_vertex.h" - -#include "drivers/common/driverfuncs.h" - -#include "intel_screen.h" - -#include "i830_dri.h" -#include "i830_common.h" - -#include "intel_tex.h" -#include "intel_span.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" -#include "intel_blit.h" -#include "intel_regions.h" -#include "intel_buffer_objects.h" - -#include "bufmgr.h" - -#include "utils.h" -#include "vblank.h" -#ifndef INTEL_DEBUG -int INTEL_DEBUG = (0); -#endif - -#define need_GL_ARB_multisample -#define need_GL_ARB_point_parameters -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_buffer_object -#define need_GL_ARB_vertex_program -#define need_GL_ARB_window_pos -#define need_GL_ARB_occlusion_query -#define need_GL_EXT_blend_color -#define need_GL_EXT_blend_equation_separate -#define need_GL_EXT_blend_func_separate -#define need_GL_EXT_blend_minmax -#define need_GL_EXT_cull_vertex -#define need_GL_EXT_fog_coord -#define need_GL_EXT_multi_draw_arrays -#define need_GL_EXT_secondary_color -#include "extension_helper.h" - -#ifndef VERBOSE -int VERBOSE = 0; -#endif - -/*************************************** - * Mesa's Driver Functions - ***************************************/ - -#define DRIVER_VERSION "4.1.3002" - -static const GLubyte *intelGetString( GLcontext *ctx, GLenum name ) -{ - const char * chipset; - static char buffer[128]; - - switch (name) { - case GL_VENDOR: - return (GLubyte *)"Tungsten Graphics, Inc"; - break; - - case GL_RENDERER: - switch (intel_context(ctx)->intelScreen->deviceID) { - case PCI_CHIP_I965_Q: - chipset = "Intel(R) 965Q"; break; - break; - case PCI_CHIP_I965_G: - case PCI_CHIP_I965_G_1: - chipset = "Intel(R) 965G"; break; - break; - case PCI_CHIP_I946_GZ: - chipset = "Intel(R) 946GZ"; break; - break; - case PCI_CHIP_I965_GM: - chipset = "Intel(R) 965GM"; break; - break; - case PCI_CHIP_IGD_GM: - chipset = "Intel(R) Integrated Graphics Device"; - break; - default: - chipset = "Unknown Intel Chipset"; break; - } - - (void) driGetRendererString( buffer, chipset, DRIVER_VERSION, 0 ); - return (GLubyte *) buffer; - - default: - return NULL; - } -} - - -/** - * Extension strings exported by the intel driver. - * - * \note - * It appears that ARB_texture_env_crossbar has "disappeared" compared to the - * old i830-specific driver. - */ -const struct dri_extension card_extensions[] = -{ - { "GL_ARB_multisample", GL_ARB_multisample_functions }, - { "GL_ARB_multitexture", NULL }, - { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, - { "GL_ARB_texture_border_clamp", NULL }, - { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, - { "GL_ARB_texture_cube_map", NULL }, - { "GL_ARB_texture_env_add", NULL }, - { "GL_ARB_texture_env_combine", NULL }, - { "GL_ARB_texture_env_dot3", NULL }, - { "GL_ARB_texture_mirrored_repeat", NULL }, - { "GL_ARB_texture_non_power_of_two", NULL }, - { "GL_ARB_texture_rectangle", NULL }, - { "GL_NV_texture_rectangle", NULL }, - { "GL_EXT_texture_rectangle", NULL }, - { "GL_ARB_texture_rectangle", NULL }, - { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, - { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, - { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, - { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, - { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, - { "GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions }, - { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, - { "GL_EXT_blend_logic_op", NULL }, - { "GL_EXT_blend_subtract", NULL }, - { "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions }, - { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, - { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, - { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, - { "GL_EXT_stencil_wrap", NULL }, - { "GL_EXT_texture_edge_clamp", NULL }, - { "GL_EXT_texture_env_combine", NULL }, - { "GL_EXT_texture_env_dot3", NULL }, - { "GL_EXT_texture_filter_anisotropic", NULL }, - { "GL_EXT_texture_lod_bias", NULL }, - { "GL_3DFX_texture_compression_FXT1", NULL }, - { "GL_APPLE_client_storage", NULL }, - { "GL_MESA_pack_invert", NULL }, - { "GL_MESA_ycbcr_texture", NULL }, - { "GL_NV_blend_square", NULL }, - { "GL_SGIS_generate_mipmap", NULL }, - { NULL, NULL } -}; - -const struct dri_extension arb_oc_extension = - { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}; - -void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging) -{ - struct intel_context *intel = ctx?intel_context(ctx):NULL; - driInitExtensions(ctx, card_extensions, enable_imaging); - if (!ctx || intel->intelScreen->drmMinor >= 8) - driInitSingleExtension (ctx, &arb_oc_extension); -} - -static const struct dri_debug_control debug_control[] = -{ - { "fall", DEBUG_FALLBACKS }, - { "tex", DEBUG_TEXTURE }, - { "ioctl", DEBUG_IOCTL }, - { "prim", DEBUG_PRIMS }, - { "vert", DEBUG_VERTS }, - { "state", DEBUG_STATE }, - { "verb", DEBUG_VERBOSE }, - { "dri", DEBUG_DRI }, - { "dma", DEBUG_DMA }, - { "san", DEBUG_SANITY }, - { "sync", DEBUG_SYNC }, - { "sleep", DEBUG_SLEEP }, - { "pix", DEBUG_PIXEL }, - { "buf", DEBUG_BUFMGR }, - { "stats", DEBUG_STATS }, - { "tile", DEBUG_TILE }, - { "sing", DEBUG_SINGLE_THREAD }, - { "thre", DEBUG_SINGLE_THREAD }, - { "wm", DEBUG_WM }, - { "vs", DEBUG_VS }, - { NULL, 0 } -}; - - -static void intelInvalidateState( GLcontext *ctx, GLuint new_state ) -{ - struct intel_context *intel = intel_context(ctx); - - _swrast_InvalidateState( ctx, new_state ); - _swsetup_InvalidateState( ctx, new_state ); - _vbo_InvalidateState( ctx, new_state ); - _tnl_InvalidateState( ctx, new_state ); - _tnl_invalidate_vertex_state( ctx, new_state ); - - intel->NewGLState |= new_state; - - if (intel->vtbl.invalidate_state) - intel->vtbl.invalidate_state( intel, new_state ); -} - - -void intelFlush( GLcontext *ctx ) -{ - struct intel_context *intel = intel_context( ctx ); - - bmLockAndFence(intel); -} - -void intelFinish( GLcontext *ctx ) -{ - struct intel_context *intel = intel_context( ctx ); - - bmFinishFence(intel, bmLockAndFence(intel)); -} - -static void -intelBeginQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q) -{ - struct intel_context *intel = intel_context( ctx ); - drmI830MMIO io = { - .read_write = MMIO_READ, - .reg = MMIO_REGS_PS_DEPTH_COUNT, - .data = &q->Result - }; - intel->stats_wm++; - intelFinish(&intel->ctx); - drmCommandWrite(intel->driFd, DRM_I830_MMIO, &io, sizeof(io)); -} - -static void -intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q) -{ - struct intel_context *intel = intel_context( ctx ); - GLuint64EXT tmp; - drmI830MMIO io = { - .read_write = MMIO_READ, - .reg = MMIO_REGS_PS_DEPTH_COUNT, - .data = &tmp - }; - intelFinish(&intel->ctx); - drmCommandWrite(intel->driFd, DRM_I830_MMIO, &io, sizeof(io)); - q->Result = tmp - q->Result; - q->Ready = GL_TRUE; - intel->stats_wm--; -} - - -void intelInitDriverFunctions( struct dd_function_table *functions ) -{ - _mesa_init_driver_functions( functions ); - - functions->Flush = intelFlush; - functions->Finish = intelFinish; - functions->GetString = intelGetString; - functions->UpdateState = intelInvalidateState; - functions->BeginQuery = intelBeginQuery; - functions->EndQuery = intelEndQuery; - - /* CopyPixels can be accelerated even with the current memory - * manager: - */ - if (!getenv("INTEL_NO_BLIT")) { - functions->CopyPixels = intelCopyPixels; - functions->Bitmap = intelBitmap; - } - - intelInitTextureFuncs( functions ); - intelInitStateFuncs( functions ); - intelInitBufferFuncs( functions ); -} - - - -GLboolean intelInitContext( struct intel_context *intel, - const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate, - struct dd_function_table *functions ) -{ - GLcontext *ctx = &intel->ctx; - GLcontext *shareCtx = (GLcontext *) sharedContextPrivate; - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - volatile drmI830Sarea *saPriv = (volatile drmI830Sarea *) - (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset); - - if (!_mesa_initialize_context(&intel->ctx, - mesaVis, shareCtx, - functions, - (void*) intel)) { - _mesa_printf("%s: failed to init mesa context\n", __FUNCTION__); - return GL_FALSE; - } - - driContextPriv->driverPrivate = intel; - intel->intelScreen = intelScreen; - intel->driScreen = sPriv; - intel->sarea = saPriv; - - driParseConfigFiles (&intel->optionCache, &intelScreen->optionCache, - intel->driScreen->myNum, "i965"); - - intel->vblank_flags = (intel->intelScreen->irq_active != 0) - ? driGetDefaultVBlankFlags(&intel->optionCache) : VBLANK_FLAG_NO_IRQ; - - ctx->Const.MaxTextureMaxAnisotropy = 2.0; - - if (getenv("INTEL_STRICT_CONFORMANCE")) { - intel->strict_conformance = 1; - } - - if (intel->strict_conformance) { - ctx->Const.MinLineWidth = 1.0; - ctx->Const.MinLineWidthAA = 1.0; - ctx->Const.MaxLineWidth = 1.0; - ctx->Const.MaxLineWidthAA = 1.0; - ctx->Const.LineWidthGranularity = 1.0; - } - else { - ctx->Const.MinLineWidth = 1.0; - ctx->Const.MinLineWidthAA = 1.0; - ctx->Const.MaxLineWidth = 5.0; - ctx->Const.MaxLineWidthAA = 5.0; - ctx->Const.LineWidthGranularity = 0.5; - } - - ctx->Const.MinPointSize = 1.0; - ctx->Const.MinPointSizeAA = 1.0; - ctx->Const.MaxPointSize = 255.0; - ctx->Const.MaxPointSizeAA = 3.0; - ctx->Const.PointSizeGranularity = 1.0; - - /* reinitialize the context point state. - * It depend on constants in __GLcontextRec::Const - */ - _mesa_init_point(ctx); - - /* Initialize the software rasterizer and helper modules. */ - _swrast_CreateContext( ctx ); - _vbo_CreateContext( ctx ); - _tnl_CreateContext( ctx ); - _swsetup_CreateContext( ctx ); - - TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; - - /* Configure swrast to match hardware characteristics: */ - _swrast_allow_pixel_fog( ctx, GL_FALSE ); - _swrast_allow_vertex_fog( ctx, GL_TRUE ); - - /* Dri stuff */ - intel->hHWContext = driContextPriv->hHWContext; - intel->driFd = sPriv->fd; - intel->driHwLock = (drmLock *) &sPriv->pSAREA->lock; - - intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24; - intel->hw_stipple = 1; - - switch(mesaVis->depthBits) { - case 0: /* what to do in this case? */ - case 16: - intel->depth_scale = 1.0/0xffff; - intel->polygon_offset_scale = 1.0/0xffff; - intel->depth_clear_mask = ~0; - intel->ClearDepth = 0xffff; - break; - case 24: - intel->depth_scale = 1.0/0xffffff; - intel->polygon_offset_scale = 2.0/0xffffff; /* req'd to pass glean */ - intel->depth_clear_mask = 0x00ffffff; - intel->stencil_clear_mask = 0xff000000; - intel->ClearDepth = 0x00ffffff; - break; - default: - assert(0); - break; - } - - /* Initialize swrast, tnl driver tables: */ - intelInitSpanFuncs( ctx ); - - intel->no_hw = getenv("INTEL_NO_HW") != NULL; - - if (!intel->intelScreen->irq_active) { - _mesa_printf("IRQs not active. Exiting\n"); - exit(1); - } - intelInitExtensions(ctx, GL_TRUE); - - INTEL_DEBUG = driParseDebugString( getenv( "INTEL_DEBUG" ), - debug_control ); - - - /* Buffer manager: - */ - intel->bm = bm_fake_intel_Attach( intel ); - - - bmInitPool(intel, - intel->intelScreen->tex.offset, /* low offset */ - intel->intelScreen->tex.map, /* low virtual */ - intel->intelScreen->tex.size, - BM_MEM_AGP); - - /* These are still static, but create regions for them. - */ - intel->front_region = - intel_region_create_static(intel, - BM_MEM_AGP, - intelScreen->front.offset, - intelScreen->front.map, - intelScreen->cpp, - intelScreen->front.pitch / intelScreen->cpp, - intelScreen->height, - intelScreen->front.size, - intelScreen->front.tiled != 0); - - intel->back_region = - intel_region_create_static(intel, - BM_MEM_AGP, - intelScreen->back.offset, - intelScreen->back.map, - intelScreen->cpp, - intelScreen->back.pitch / intelScreen->cpp, - intelScreen->height, - intelScreen->back.size, - intelScreen->back.tiled != 0); - - /* Still assuming front.cpp == depth.cpp - * - * XXX: Setting tiling to false because Depth tiling only supports - * YMAJOR but the blitter only supports XMAJOR tiling. Have to - * resolve later. - */ - intel->depth_region = - intel_region_create_static(intel, - BM_MEM_AGP, - intelScreen->depth.offset, - intelScreen->depth.map, - intelScreen->cpp, - intelScreen->depth.pitch / intelScreen->cpp, - intelScreen->height, - intelScreen->depth.size, - intelScreen->depth.tiled != 0); - - intel_bufferobj_init( intel ); - intel->batch = intel_batchbuffer_alloc( intel ); - - if (intel->ctx.Mesa_DXTn) { - _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); - _mesa_enable_extension( ctx, "GL_S3_s3tc" ); - } - else if (driQueryOptionb (&intel->optionCache, "force_s3tc_enable")) { - _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); - } - -/* driInitTextureObjects( ctx, & intel->swapped, */ -/* DRI_TEXMGR_DO_TEXTURE_1D | */ -/* DRI_TEXMGR_DO_TEXTURE_2D | */ -/* DRI_TEXMGR_DO_TEXTURE_RECT ); */ - - - if (getenv("INTEL_NO_RAST")) { - fprintf(stderr, "disabling 3D rasterization\n"); - intel->no_rast = 1; - } - - - return GL_TRUE; -} - -void intelDestroyContext(__DRIcontextPrivate *driContextPriv) -{ - struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate; - - assert(intel); /* should never be null */ - if (intel) { - GLboolean release_texture_heaps; - - - intel->vtbl.destroy( intel ); - - release_texture_heaps = (intel->ctx.Shared->RefCount == 1); - _swsetup_DestroyContext (&intel->ctx); - _tnl_DestroyContext (&intel->ctx); - _vbo_DestroyContext (&intel->ctx); - - _swrast_DestroyContext (&intel->ctx); - intel->Fallback = 0; /* don't call _swrast_Flush later */ - intel_batchbuffer_free(intel->batch); - intel->batch = NULL; - - - if ( release_texture_heaps ) { - /* This share group is about to go away, free our private - * texture object data. - */ - - /* XXX: destroy the shared bufmgr struct here? - */ - } - - /* Free the regions created to describe front/back/depth - * buffers: - */ -#if 0 - intel_region_release(intel, &intel->front_region); - intel_region_release(intel, &intel->back_region); - intel_region_release(intel, &intel->depth_region); - intel_region_release(intel, &intel->draw_region); -#endif - - /* free the Mesa context */ - _mesa_destroy_context(&intel->ctx); - } - - driContextPriv->driverPrivate = NULL; -} - -GLboolean intelUnbindContext(__DRIcontextPrivate *driContextPriv) -{ - return GL_TRUE; -} - -GLboolean intelMakeCurrent(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv) -{ - - if (driContextPriv) { - struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate; - - if (intel->driReadDrawable != driReadPriv) { - intel->driReadDrawable = driReadPriv; - } - - if ( intel->driDrawable != driDrawPriv ) { - /* Shouldn't the readbuffer be stored also? */ - driDrawableInitVBlank( driDrawPriv, intel->vblank_flags, - &intel->vbl_seq ); - - intel->driDrawable = driDrawPriv; - intelWindowMoved( intel ); - } - - _mesa_make_current(&intel->ctx, - (GLframebuffer *) driDrawPriv->driverPrivate, - (GLframebuffer *) driReadPriv->driverPrivate); - - intel->ctx.Driver.DrawBuffer( &intel->ctx, intel->ctx.Color.DrawBuffer[0] ); - } else { - _mesa_make_current(NULL, NULL, NULL); - } - - return GL_TRUE; -} - - -static void intelContendedLock( struct intel_context *intel, GLuint flags ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - __DRIscreenPrivate *sPriv = intel->driScreen; - volatile drmI830Sarea * sarea = intel->sarea; - int me = intel->hHWContext; - int my_bufmgr = bmCtxId(intel); - - drmGetLock(intel->driFd, intel->hHWContext, flags); - - /* If the window moved, may need to set a new cliprect now. - * - * NOTE: This releases and regains the hw lock, so all state - * checking must be done *after* this call: - */ - if (dPriv) - DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); - - - intel->locked = 1; - intel->need_flush = 1; - - /* Lost context? - */ - if (sarea->ctxOwner != me) { - DBG("Lost Context: sarea->ctxOwner %x me %x\n", sarea->ctxOwner, me); - sarea->ctxOwner = me; - intel->vtbl.lost_hardware( intel ); - } - - /* As above, but don't evict the texture data on transitions - * between contexts which all share a local buffer manager. - */ - if (sarea->texAge != my_bufmgr) { - DBG("Lost Textures: sarea->texAge %x my_bufmgr %x\n", sarea->ctxOwner, my_bufmgr); - sarea->texAge = my_bufmgr; - bm_fake_NotifyContendedLockTake( intel ); - } - - /* Drawable changed? - */ - if (dPriv && intel->lastStamp != dPriv->lastStamp) { - intelWindowMoved( intel ); - intel->lastStamp = dPriv->lastStamp; - } -} - -_glthread_DECLARE_STATIC_MUTEX(lockMutex); - -/* Lock the hardware and validate our state. - */ -void LOCK_HARDWARE( struct intel_context *intel ) -{ - char __ret=0; - - _glthread_LOCK_MUTEX(lockMutex); - assert(!intel->locked); - - - DRM_CAS(intel->driHwLock, intel->hHWContext, - (DRM_LOCK_HELD|intel->hHWContext), __ret); - if (__ret) - intelContendedLock( intel, 0 ); - - intel->locked = 1; - - if (intel->aub_wrap) { - bm_fake_NotifyContendedLockTake( intel ); - intel->vtbl.lost_hardware( intel ); - intel->vtbl.aub_wrap(intel); - intel->aub_wrap = 0; - } - - if (bmError(intel)) { - bmEvictAll(intel); - intel->vtbl.lost_hardware( intel ); - } - - /* Make sure nothing has been emitted prior to getting the lock: - */ - assert(intel->batch->map == 0); - - /* XXX: postpone, may not be needed: - */ - if (!intel_batchbuffer_map(intel->batch)) { - bmEvictAll(intel); - intel->vtbl.lost_hardware( intel ); - - /* This could only fail if the batchbuffer was greater in size - * than the available texture memory: - */ - if (!intel_batchbuffer_map(intel->batch)) { - _mesa_printf("double failure to map batchbuffer\n"); - assert(0); - } - } -} - - -/* Unlock the hardware using the global current context - */ -void UNLOCK_HARDWARE( struct intel_context *intel ) -{ - /* Make sure everything has been released: - */ - assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); - - intel_batchbuffer_unmap(intel->batch); - intel->vtbl.note_unlock( intel ); - intel->locked = 0; - - - - DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext); - _glthread_UNLOCK_MUTEX(lockMutex); -} - - diff --git a/i965/intel_context.h b/i965/intel_context.h deleted file mode 100644 index a244757..0000000 --- a/i965/intel_context.h +++ /dev/null @@ -1,531 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTELCONTEXT_INC -#define INTELCONTEXT_INC - - - -#include "mtypes.h" -#include "drm.h" -#include "texmem.h" - -#include "intel_screen.h" -#include "i830_common.h" -#include "tnl/t_vertex.h" - -#define TAG(x) intel##x -#include "tnl_dd/t_dd_vertex.h" -#undef TAG - -#define DV_PF_555 (1<<8) -#define DV_PF_565 (2<<8) -#define DV_PF_8888 (3<<8) - -struct intel_region; -struct intel_context; - -typedef void (*intel_tri_func)(struct intel_context *, intelVertex *, intelVertex *, - intelVertex *); -typedef void (*intel_line_func)(struct intel_context *, intelVertex *, intelVertex *); -typedef void (*intel_point_func)(struct intel_context *, intelVertex *); - -#define INTEL_FALLBACK_DRAW_BUFFER 0x1 -#define INTEL_FALLBACK_READ_BUFFER 0x2 -#define INTEL_FALLBACK_USER 0x4 -#define INTEL_FALLBACK_RENDERMODE 0x8 -#define INTEL_FALLBACK_TEXTURE 0x10 - -extern void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode ); -#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode ) - - - -struct intel_texture_object -{ - struct gl_texture_object base; /* The "parent" object */ - - /* The mipmap tree must include at least these levels once - * validated: - */ - GLuint firstLevel; - GLuint lastLevel; - - GLuint dirty_images[6]; - GLuint dirty; - - /* On validation any active images held in main memory or in other - * regions will be copied to this region and the old storage freed. - */ - struct intel_mipmap_tree *mt; -}; - - - -struct intel_context -{ - GLcontext ctx; /* the parent class */ - - struct { - void (*destroy)( struct intel_context *intel ); - void (*emit_state)( struct intel_context *intel ); - void (*emit_invarient_state)( struct intel_context *intel ); - void (*lost_hardware)( struct intel_context *intel ); - void (*note_fence)( struct intel_context *intel, GLuint fence ); - void (*note_unlock)( struct intel_context *intel ); - void (*update_texture_state)( struct intel_context *intel ); - - void (*render_start)( struct intel_context *intel ); - void (*set_draw_region)( struct intel_context *intel, - struct intel_region *draw_region, - struct intel_region *depth_region ); - - GLuint (*flush_cmd)( void ); - - void (*emit_flush)( struct intel_context *intel, - GLuint unused ); - - void (*aub_commands)( struct intel_context *intel, - GLuint offset, - const void *buf, - GLuint sz ); - void (*aub_dump_bmp)( struct intel_context *intel, GLuint buffer ); - void (*aub_wrap)( struct intel_context *intel ); - void (*aub_gtt_data)( struct intel_context *intel, - GLuint offset, - const void *src, - GLuint size, - GLuint aubtype, - GLuint aubsubtype); - - - void (*reduced_primitive_state)( struct intel_context *intel, GLenum rprim ); - - GLboolean (*check_vertex_size)( struct intel_context *intel, GLuint expected ); - - void (*invalidate_state)( struct intel_context *intel, GLuint new_state ); - - /* Metaops: - */ - void (*install_meta_state)( struct intel_context *intel ); - void (*leave_meta_state)( struct intel_context *intel ); - - void (*meta_draw_region)( struct intel_context *intel, - struct intel_region *draw_region, - struct intel_region *depth_region ); - - void (*meta_color_mask)( struct intel_context *intel, - GLboolean ); - - void (*meta_stencil_replace)( struct intel_context *intel, - GLuint mask, - GLuint clear ); - - void (*meta_depth_replace)( struct intel_context *intel ); - - void (*meta_texture_blend_replace) (struct intel_context * intel); - - void (*meta_no_stencil_write)( struct intel_context *intel ); - void (*meta_no_depth_write)( struct intel_context *intel ); - void (*meta_no_texture)( struct intel_context *intel ); - void (*meta_import_pixel_state) (struct intel_context * intel); - void (*meta_frame_buffer_texture)( struct intel_context *intel, - GLint xoff, GLint yoff ); - - void (*meta_draw_quad)(struct intel_context *intel, - GLfloat x0, GLfloat x1, - GLfloat y0, GLfloat y1, - GLfloat z, - GLubyte red, GLubyte green, - GLubyte blue, GLubyte alpha, - GLfloat s0, GLfloat s1, - GLfloat t0, GLfloat t1); - - - - } vtbl; - - GLint refcount; - GLuint Fallback; - GLuint NewGLState; - - GLuint last_swap_fence; - GLuint second_last_swap_fence; - - GLboolean aub_wrap; - GLuint stats_wm; - - struct intel_batchbuffer *batch; - - GLubyte clear_chan[4]; - GLuint ClearColor; - GLuint ClearDepth; - - GLfloat depth_scale; - GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */ - GLuint depth_clear_mask; - GLuint stencil_clear_mask; - - GLboolean hw_stencil; - GLboolean hw_stipple; - GLboolean depth_buffer_is_float; - GLboolean no_hw; - GLboolean no_rast; - GLboolean thrashing; - GLboolean locked; - GLboolean strict_conformance; - GLboolean need_flush; - - - - /* AGP memory buffer manager: - */ - struct bufmgr *bm; - - - /* State for intelvb.c and inteltris.c. - */ - GLenum render_primitive; - GLenum reduced_primitive; - - struct intel_region *front_region; - struct intel_region *back_region; - struct intel_region *draw_region; - struct intel_region *depth_region; - - /* These refer to the current draw (front vs. back) buffer: - */ - int drawX; /* origin of drawable in draw buffer */ - int drawY; - GLuint numClipRects; /* cliprects for that buffer */ - drm_clip_rect_t *pClipRects; - struct gl_texture_object *frame_buffer_texobj; - - GLboolean scissor; - drm_clip_rect_t draw_rect; - drm_clip_rect_t scissor_rect; - - drm_context_t hHWContext; - drmLock *driHwLock; - int driFd; - - __DRIdrawablePrivate *driDrawable; - __DRIdrawablePrivate *driReadDrawable; - __DRIscreenPrivate *driScreen; - intelScreenPrivate *intelScreen; - volatile drmI830Sarea *sarea; - - FILE *aub_file; - - GLuint lastStamp; - - /** - * Configuration cache - */ - driOptionCache optionCache; - - /* VBI - */ - GLuint vbl_seq; - GLuint vblank_flags; - - int64_t swap_ust; - int64_t swap_missed_ust; - - GLuint swap_count; - GLuint swap_missed_count; -}; - -/* These are functions now: - */ -void LOCK_HARDWARE( struct intel_context *intel ); -void UNLOCK_HARDWARE( struct intel_context *intel ); - - -#define SUBPIXEL_X 0.125 -#define SUBPIXEL_Y 0.125 - -/* ================================================================ - * Color packing: - */ - -#define INTEL_PACKCOLOR4444(r,g,b,a) \ - ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) - -#define INTEL_PACKCOLOR1555(r,g,b,a) \ - ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ - ((a) ? 0x8000 : 0)) - -#define INTEL_PACKCOLOR565(r,g,b) \ - ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) - -#define INTEL_PACKCOLOR8888(r,g,b,a) \ - ((a<<24) | (r<<16) | (g<<8) | b) - - -#define INTEL_PACKCOLOR(format, r, g, b, a) \ -(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) : \ - (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) : \ - (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) : \ - 0))) - - - -/* ================================================================ - * From linux kernel i386 header files, copes with odd sizes better - * than COPY_DWORDS would: - */ -#if defined(i386) || defined(__i386__) -static inline void * __memcpy(void * to, const void * from, size_t n) -{ - int d0, d1, d2; - __asm__ __volatile__( - "rep ; movsl\n\t" - "testb $2,%b4\n\t" - "je 1f\n\t" - "movsw\n" - "1:\ttestb $1,%b4\n\t" - "je 2f\n\t" - "movsb\n" - "2:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) - : "memory"); - return (to); -} -#else -#define __memcpy(a,b,c) memcpy(a,b,c) -#endif - - -/* The system memcpy (at least on ubuntu 5.10) has problems copying - * to agp (writecombined) memory from a source which isn't 64-byte - * aligned - there is a 4x performance falloff. - * - * The x86 __memcpy is immune to this but is slightly slower - * (10%-ish) than the system memcpy. - * - * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but - * isn't much faster than x86_memcpy for agp copies. - * - * TODO: switch dynamically. - */ -static inline void *do_memcpy( void *dest, const void *src, size_t n ) -{ - if ( (((unsigned long)src) & 63) || - (((unsigned long)dest) & 63)) { - return __memcpy(dest, src, n); - } - else - return memcpy(dest, src, n); -} - - - - - -/* ================================================================ - * Debugging: - */ -extern int INTEL_DEBUG; - -#define DEBUG_TEXTURE 0x1 -#define DEBUG_STATE 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_PRIMS 0x8 -#define DEBUG_VERTS 0x10 -#define DEBUG_FALLBACKS 0x20 -#define DEBUG_VERBOSE 0x40 -#define DEBUG_DRI 0x80 -#define DEBUG_DMA 0x100 -#define DEBUG_SANITY 0x200 -#define DEBUG_SYNC 0x400 -#define DEBUG_SLEEP 0x800 -#define DEBUG_PIXEL 0x1000 -#define DEBUG_STATS 0x2000 -#define DEBUG_TILE 0x4000 -#define DEBUG_SINGLE_THREAD 0x8000 -#define DEBUG_WM 0x10000 -#define DEBUG_URB 0x20000 -#define DEBUG_VS 0x40000 - - -#define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_I830_M 0x3577 -#define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I865_G 0x2572 -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I965_G 0x29A2 -#define PCI_CHIP_I965_Q 0x2992 -#define PCI_CHIP_I965_G_1 0x2982 -#define PCI_CHIP_I946_GZ 0x2972 -#define PCI_CHIP_I965_GM 0x2A02 - -#define PCI_CHIP_IGD_GM 0x2A42 - - -/* ================================================================ - * intel_context.c: - */ - -extern GLboolean intelInitContext( struct intel_context *intel, - const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate, - struct dd_function_table *functions ); - -extern void intelGetLock(struct intel_context *intel, GLuint flags); - -extern void intelInitState( GLcontext *ctx ); -extern void intelFinish( GLcontext *ctx ); -extern void intelFlush( GLcontext *ctx ); - -extern void intelInitDriverFunctions( struct dd_function_table *functions ); - - -/* ================================================================ - * intel_state.c: - */ -extern void intelInitStateFuncs( struct dd_function_table *functions ); - -#define COMPAREFUNC_ALWAYS 0 -#define COMPAREFUNC_NEVER 0x1 -#define COMPAREFUNC_LESS 0x2 -#define COMPAREFUNC_EQUAL 0x3 -#define COMPAREFUNC_LEQUAL 0x4 -#define COMPAREFUNC_GREATER 0x5 -#define COMPAREFUNC_NOTEQUAL 0x6 -#define COMPAREFUNC_GEQUAL 0x7 - -#define STENCILOP_KEEP 0 -#define STENCILOP_ZERO 0x1 -#define STENCILOP_REPLACE 0x2 -#define STENCILOP_INCRSAT 0x3 -#define STENCILOP_DECRSAT 0x4 -#define STENCILOP_INCR 0x5 -#define STENCILOP_DECR 0x6 -#define STENCILOP_INVERT 0x7 - -#define LOGICOP_CLEAR 0 -#define LOGICOP_NOR 0x1 -#define LOGICOP_AND_INV 0x2 -#define LOGICOP_COPY_INV 0x3 -#define LOGICOP_AND_RVRSE 0x4 -#define LOGICOP_INV 0x5 -#define LOGICOP_XOR 0x6 -#define LOGICOP_NAND 0x7 -#define LOGICOP_AND 0x8 -#define LOGICOP_EQUIV 0x9 -#define LOGICOP_NOOP 0xa -#define LOGICOP_OR_INV 0xb -#define LOGICOP_COPY 0xc -#define LOGICOP_OR_RVRSE 0xd -#define LOGICOP_OR 0xe -#define LOGICOP_SET 0xf - -#define BLENDFACT_ZERO 0x01 -#define BLENDFACT_ONE 0x02 -#define BLENDFACT_SRC_COLR 0x03 -#define BLENDFACT_INV_SRC_COLR 0x04 -#define BLENDFACT_SRC_ALPHA 0x05 -#define BLENDFACT_INV_SRC_ALPHA 0x06 -#define BLENDFACT_DST_ALPHA 0x07 -#define BLENDFACT_INV_DST_ALPHA 0x08 -#define BLENDFACT_DST_COLR 0x09 -#define BLENDFACT_INV_DST_COLR 0x0a -#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b -#define BLENDFACT_CONST_COLOR 0x0c -#define BLENDFACT_INV_CONST_COLOR 0x0d -#define BLENDFACT_CONST_ALPHA 0x0e -#define BLENDFACT_INV_CONST_ALPHA 0x0f -#define BLENDFACT_MASK 0x0f - -extern int intel_translate_shadow_compare_func( GLenum func ); -extern int intel_translate_compare_func( GLenum func ); -extern int intel_translate_stencil_op( GLenum op ); -extern int intel_translate_blend_factor( GLenum factor ); -extern int intel_translate_logic_op( GLenum opcode ); - - -/* ================================================================ - * intel_buffers.c: - */ -void intelInitBufferFuncs( struct dd_function_table *functions ); - -struct intel_region *intel_readbuf_region( struct intel_context *intel ); -struct intel_region *intel_drawbuf_region( struct intel_context *intel ); - -extern void intelWindowMoved( struct intel_context *intel ); - -extern GLboolean intel_intersect_cliprects( drm_clip_rect_t *dest, - const drm_clip_rect_t *a, - const drm_clip_rect_t *b ); - - -/* ================================================================ - * intel_pixel_copy.c: - */ -void intelCopyPixels(GLcontext * ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint destx, GLint desty, GLenum type); - -GLboolean intel_check_blit_fragment_ops(GLcontext * ctx); - -void intelBitmap(GLcontext * ctx, - GLint x, GLint y, - GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte * pixels); - -void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging); -#define _NEW_WINDOW_POS 0x40000000 - - -/*====================================================================== - * Inline conversion functions. - * These are better-typed than the macros used previously: - */ -static inline struct intel_context *intel_context( GLcontext *ctx ) -{ - return (struct intel_context *)ctx; -} - -static inline struct intel_texture_object *intel_texture_object( struct gl_texture_object *obj ) -{ - return (struct intel_texture_object *)obj; -} - -static inline struct intel_texture_image *intel_texture_image( struct gl_texture_image *img ) -{ - return (struct intel_texture_image *)img; -} - -#endif - diff --git a/i965/intel_ioctl.c b/i965/intel_ioctl.c deleted file mode 100644 index 0a8e976..0000000 --- a/i965/intel_ioctl.c +++ /dev/null @@ -1,205 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdio.h> -#include <unistd.h> -#include <errno.h> -#include <sched.h> - -#include "mtypes.h" -#include "context.h" -#include "swrast/swrast.h" - -#include "intel_context.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" -#include "intel_blit.h" -#include "intel_regions.h" -#include "drm.h" -#include "bufmgr.h" - -static int intelWaitIdleLocked( struct intel_context *intel ) -{ - static int in_wait_idle = 0; - unsigned int fence; - - if (!in_wait_idle) { - if (INTEL_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "waiting for idle\n"); - } - - in_wait_idle = 1; - fence = bmSetFence(intel); - intelWaitIrq(intel, fence); - in_wait_idle = 0; - - return bmTestFence(intel, fence); - } else { - return 1; - } -} - -int intelEmitIrqLocked( struct intel_context *intel ) -{ - int seq = 1; - - if (!intel->no_hw) { - drmI830IrqEmit ie; - int ret; - - assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == - (DRM_LOCK_HELD|intel->hHWContext)); - - ie.irq_seq = &seq; - - ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, - &ie, sizeof(ie) ); - if ( ret ) { - fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret ); - exit(1); - } - - if (0) - fprintf(stderr, "%s --> %d\n", __FUNCTION__, seq ); - } - - return seq; -} - -void intelWaitIrq( struct intel_context *intel, int seq ) -{ - if (!intel->no_hw) { - drmI830IrqWait iw; - int ret, lastdispatch; - - if (0) - fprintf(stderr, "%s %d\n", __FUNCTION__, seq ); - - iw.irq_seq = seq; - - do { - lastdispatch = intel->sarea->last_dispatch; - ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &iw, sizeof(iw) ); - - /* This seems quite often to return before it should!?! - */ - } while (ret == -EAGAIN || ret == -EINTR || (ret == -EBUSY && lastdispatch != intel->sarea->last_dispatch) || (ret == 0 && seq > intel->sarea->last_dispatch) - || (ret == 0 && intel->sarea->last_dispatch - seq >= (1 << 24))); - - - if ( ret ) { - fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret ); - - if (intel->aub_file) { - intel->vtbl.aub_dump_bmp( intel, intel->ctx.Visual.doubleBufferMode ? 1 : 0 ); - } - - exit(1); - } - } -} - - -void intel_batch_ioctl( struct intel_context *intel, - GLuint start_offset, - GLuint used) -{ - drmI830BatchBuffer batch; - - assert(intel->locked); - assert(used); - - if (0) - fprintf(stderr, "%s used %d offset %x..%x\n", - __FUNCTION__, - used, - start_offset, - start_offset + used); - - batch.start = start_offset; - batch.used = used; - batch.cliprects = NULL; - batch.num_cliprects = 0; - batch.DR1 = 0; - batch.DR4 = 0; - - if (INTEL_DEBUG & DEBUG_DMA) - fprintf(stderr, "%s: 0x%x..0x%x\n", - __FUNCTION__, - batch.start, - batch.start + batch.used * 4); - - if (!intel->no_hw) { - if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, - sizeof(batch))) { - fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); - } - - if (INTEL_DEBUG & DEBUG_SYNC) { - intelWaitIdleLocked(intel); - } - } -} - -void intel_cmd_ioctl( struct intel_context *intel, - char *buf, - GLuint used) -{ - drmI830CmdBuffer cmd; - - assert(intel->locked); - assert(used); - - cmd.buf = buf; - cmd.sz = used; - cmd.cliprects = intel->pClipRects; - cmd.num_cliprects = 0; - cmd.DR1 = 0; - cmd.DR4 = 0; - - if (INTEL_DEBUG & DEBUG_DMA) - fprintf(stderr, "%s: 0x%x..0x%x\n", - __FUNCTION__, - 0, - 0 + cmd.sz); - - if (!intel->no_hw) { - if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, - sizeof(cmd))) { - fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); - } - - if (INTEL_DEBUG & DEBUG_SYNC) { - intelWaitIdleLocked(intel); - } - } -} diff --git a/i965/intel_mipmap_tree.c b/i965/intel_mipmap_tree.c deleted file mode 100644 index 8548bc8..0000000 --- a/i965/intel_mipmap_tree.c +++ /dev/null @@ -1,270 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "intel_context.h" -#include "intel_mipmap_tree.h" -#include "intel_regions.h" -#include "bufmgr.h" -#include "enums.h" -#include "imports.h" - -static GLenum target_to_target( GLenum target ) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - return GL_TEXTURE_CUBE_MAP_ARB; - default: - return target; - } -} - -struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel, - GLenum target, - GLenum internal_format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - GLuint cpp, - GLboolean compressed) -{ - GLboolean ok; - struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); - - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("%s target %s format %s level %d..%d\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(internal_format), - first_level, - last_level); - - mt->target = target_to_target(target); - mt->internal_format = internal_format; - mt->first_level = first_level; - mt->last_level = last_level; - mt->width0 = width0; - mt->height0 = height0; - mt->depth0 = depth0; - mt->cpp = compressed ? 2 : cpp; - mt->compressed = compressed; - - switch (intel->intelScreen->deviceID) { -#if 0 - case PCI_CHIP_I945_G: - ok = i945_miptree_layout( mt ); - break; - case PCI_CHIP_I915_G: - case PCI_CHIP_I915_GM: - ok = i915_miptree_layout( mt ); - break; -#endif - default: - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("assuming BRW texture layouts\n"); - ok = brw_miptree_layout( intel, mt ); - break; - } - - if (ok) - mt->region = intel_region_alloc( intel, - mt->cpp, - mt->pitch, - mt->total_height ); - - if (!mt->region) { - free(mt); - return NULL; - } - - return mt; -} - - -/** - * intel_miptree_pitch_align: - * - * @intel: intel context pointer - * - * @mt: the miptree to compute pitch alignment for - * - * @pitch: the natural pitch value - * - * Given @pitch, compute a larger value which accounts for - * any necessary alignment required by the device - */ - -int intel_miptree_pitch_align (struct intel_context *intel, - struct intel_mipmap_tree *mt, - int pitch) -{ - if (!mt->compressed) - pitch = ((pitch * mt->cpp + 3) & ~3) / mt->cpp; - - return pitch; -} - - -void intel_miptree_destroy( struct intel_context *intel, - struct intel_mipmap_tree *mt ) -{ - if (mt) { - GLuint i; - - intel_region_release(intel, &(mt->region)); - - for (i = 0; i < MAX_TEXTURE_LEVELS; i++) - if (mt->level[i].image_offset) - free(mt->level[i].image_offset); - - free(mt); - } -} - - - - -void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, - GLuint level, - GLuint nr_images, - GLuint x, GLuint y, - GLuint w, GLuint h, GLuint d) -{ - mt->level[level].width = w; - mt->level[level].height = h; - mt->level[level].depth = d; - mt->level[level].level_offset = (x + y * mt->pitch) * mt->cpp; - mt->level[level].nr_images = nr_images; - - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("%s level %d img size: %d,%d level_offset 0x%x\n", __FUNCTION__, level, w, h, - mt->level[level].level_offset); - - /* Not sure when this would happen, but anyway: - */ - if (mt->level[level].image_offset) { - free(mt->level[level].image_offset); - mt->level[level].image_offset = NULL; - } - - if (nr_images > 1) { - mt->level[level].image_offset = malloc(nr_images * sizeof(GLuint)); - mt->level[level].image_offset[0] = 0; - } -} - - - -void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, - GLuint level, - GLuint img, - GLuint x, GLuint y) -{ - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("%s level %d img %d pos %d,%d\n", __FUNCTION__, level, img, x, y); - - if (img == 0) - assert(x == 0 && y == 0); - - if (img > 0) - mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp; -} - - -/* Although we use the image_offset[] array to store relative offsets - * to cube faces, Mesa doesn't know anything about this and expects - * each cube face to be treated as a separate image. - * - * These functions present that view to mesa: - */ -const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, - GLuint level) -{ - static const GLuint zero = 0; - - if (mt->target != GL_TEXTURE_3D || - mt->level[level].nr_images == 1) - return &zero; - else - return mt->level[level].image_offset; -} - - -GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt, - GLuint face, - GLuint level) -{ - if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) - return (mt->level[level].level_offset + - mt->level[level].image_offset[face]); - else - return mt->level[level].level_offset; -} - - - - - - -/* Upload data for a particular image. - */ -GLboolean intel_miptree_image_data(struct intel_context *intel, - struct intel_mipmap_tree *dst, - GLuint face, - GLuint level, - const void *src, - GLuint src_row_pitch, - GLuint src_image_pitch) -{ - GLuint depth = dst->level[level].depth; - GLuint dst_offset = intel_miptree_image_offset(dst, face, level); - const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level); - GLuint i; - - DBG("%s\n", __FUNCTION__); - for (i = 0; i < depth; i++) { - if (!intel_region_data(intel, - dst->region, - dst_offset + dst_depth_offset[i], - 0, - 0, - src, - src_row_pitch, - 0, 0, /* source x,y */ - dst->level[level].width, - dst->level[level].height)) - return GL_FALSE; - src += src_image_pitch; - } - return GL_TRUE; -} - diff --git a/i965/intel_mipmap_tree.h b/i965/intel_mipmap_tree.h deleted file mode 100644 index c67c726..0000000 --- a/i965/intel_mipmap_tree.h +++ /dev/null @@ -1,171 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_MIPMAP_TREE_H -#define INTEL_MIPMAP_TREE_H - -#include "intel_regions.h" - -/* A layer on top of the intel_regions code which adds: - * - * - Code to size and layout a region to hold a set of mipmaps. - * - Query to determine if a new image fits in an existing tree. - * - * The fixed mipmap layout of intel hardware where one offset - * specifies the position of all images in a mipmap hierachy - * complicates the implementation of GL texture image commands, - * compared to hardware where each image is specified with an - * independent offset. - * - * In an ideal world, each texture object would be associated with a - * single bufmgr buffer or 2d intel_region, and all the images within - * the texture object would slot into the tree as they arrive. The - * reality can be a little messier, as images can arrive from the user - * with sizes that don't fit in the existing tree, or in an order - * where the tree layout cannot be guessed immediately. - * - * This structure encodes an idealized mipmap tree. The GL image - * commands build these where possible, otherwise store the images in - * temporary system buffers. - */ - - -struct intel_mipmap_level { - GLuint level_offset; - GLuint width; - GLuint height; - GLuint depth; - GLuint nr_images; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - GLuint *image_offset; -}; - -struct intel_mipmap_tree { - /* Effectively the key: - */ - GLenum target; - GLenum internal_format; - - GLuint first_level; - GLuint last_level; - - GLuint width0, height0, depth0; - GLuint cpp; - GLboolean compressed; - - /* Derived from the above: - */ - GLuint pitch; - GLuint depth_pitch; /* per-image on i945? */ - GLuint total_height; - - /* Includes image offset tables: - */ - struct intel_mipmap_level level[MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct intel_region *region; - - /* These are also refcounted: - */ - GLuint refcount; -}; - - - -struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel, - GLenum target, - GLenum internal_format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - GLuint cpp, - GLboolean compressed); - -int intel_miptree_pitch_align (struct intel_context *intel, - struct intel_mipmap_tree *mt, - int pitch); - -void intel_miptree_destroy( struct intel_context *intel, - struct intel_mipmap_tree *mt ); - - -/* Return the linear offset of an image relative to the start of the - * tree: - */ -GLuint intel_miptree_image_offset( struct intel_mipmap_tree *mt, - GLuint face, - GLuint level ); - -/* Return pointers to each 2d slice within an image. Indexed by depth - * value. - */ -const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, - GLuint level); - - -void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, - GLuint level, - GLuint nr_images, - GLuint x, GLuint y, - GLuint w, GLuint h, GLuint d); - -void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, - GLuint level, - GLuint img, - GLuint x, GLuint y); - - -/* Upload an image into a tree - */ -GLboolean intel_miptree_image_data(struct intel_context *intel, - struct intel_mipmap_tree *dst, - GLuint face, - GLuint level, - const void *src, - GLuint src_row_pitch, - GLuint src_image_pitch); - -/* i915_mipmap_tree.c: - */ -GLboolean i915_miptree_layout( struct intel_mipmap_tree *mt ); -GLboolean i945_miptree_layout( struct intel_mipmap_tree *mt ); -GLboolean brw_miptree_layout( struct intel_context *intel, - struct intel_mipmap_tree *mt ); - - - -#endif diff --git a/i965/intel_reg.h b/i965/intel_reg.h deleted file mode 100644 index 3c448b3..0000000 --- a/i965/intel_reg.h +++ /dev/null @@ -1,91 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef _INTEL_REG_H_ -#define _INTEL_REG_H_ - - - -#define CMD_3D (0x3<<29) - - -#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24)) -#define PRIM_INDIRECT (1<<23) -#define PRIM_INLINE (0<<23) -#define PRIM_INDIRECT_SEQUENTIAL (0<<17) -#define PRIM_INDIRECT_ELTS (1<<17) - -#define PRIM3D_TRILIST (0x0<<18) -#define PRIM3D_TRISTRIP (0x1<<18) -#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) -#define PRIM3D_TRIFAN (0x3<<18) -#define PRIM3D_POLY (0x4<<18) -#define PRIM3D_LINELIST (0x5<<18) -#define PRIM3D_LINESTRIP (0x6<<18) -#define PRIM3D_RECTLIST (0x7<<18) -#define PRIM3D_POINTLIST (0x8<<18) -#define PRIM3D_DIB (0x9<<18) -#define PRIM3D_MASK (0x1f<<18) - -#define I915PACKCOLOR4444(r,g,b,a) \ - ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) - -#define I915PACKCOLOR1555(r,g,b,a) \ - ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ - ((a) ? 0x8000 : 0)) - -#define I915PACKCOLOR565(r,g,b) \ - ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) - -#define I915PACKCOLOR8888(r,g,b,a) \ - ((a<<24) | (r<<16) | (g<<8) | b) - - - - -#define BR00_BITBLT_CLIENT 0x40000000 -#define BR00_OP_COLOR_BLT 0x10000000 -#define BR00_OP_SRC_COPY_BLT 0x10C00000 -#define BR13_SOLID_PATTERN 0x80000000 - -#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) -#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) -#define XY_COLOR_BLT_WRITE_RGB (1<<20) - -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) -#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) - -#define XY_SRC_TILED (1<<15) -#define XY_DST_TILED (1<<11) - -#define FENCE_LINEAR 0 -#define FENCE_XMAJOR 1 -#define FENCE_YMAJOR 2 - -#endif diff --git a/i965/intel_regions.c b/i965/intel_regions.c deleted file mode 100644 index 835ecdd..0000000 --- a/i965/intel_regions.c +++ /dev/null @@ -1,295 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Provide additional functionality on top of bufmgr buffers: - * - 2d semantics and blit operations - * - refcounting of buffers for multiple images in a buffer. - * - refcounting of buffer mappings. - * - some logic for moving the buffers to the best memory pools for - * given operations. - * - * Most of this is to make it easier to implement the fixed-layout - * mipmap tree required by intel hardware in the face of GL's - * programming interface where each image can be specifed in random - * order and it isn't clear what layout the tree should have until the - * last moment. - */ - -#include "intel_context.h" -#include "intel_regions.h" -#include "intel_blit.h" -#include "bufmgr.h" -#include "imports.h" - -/* XXX: Thread safety? - */ -GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *region) -{ - DBG("%s\n", __FUNCTION__); - if (!region->map_refcount++) { - region->map = bmMapBuffer(intel, region->buffer, 0); - if (!region->map) - region->map_refcount--; - } - - return region->map; -} - -void intel_region_unmap(struct intel_context *intel, - struct intel_region *region) -{ - DBG("%s\n", __FUNCTION__); - if (!--region->map_refcount) { - bmUnmapBufferAUB(intel, region->buffer, 0, 0); - region->map = NULL; - } -} - -struct intel_region *intel_region_alloc( struct intel_context *intel, - GLuint cpp, - GLuint pitch, - GLuint height ) -{ - struct intel_region *region = calloc(sizeof(*region), 1); - - DBG("%s %dx%dx%d == 0x%x bytes\n", __FUNCTION__, - cpp, pitch, height, cpp*pitch*height); - - region->cpp = cpp; - region->pitch = pitch; - region->height = height; /* needed? */ - region->refcount = 1; - - bmGenBuffers(intel, "tex", 1, ®ion->buffer, 6); - bmBufferData(intel, region->buffer, pitch * cpp * height, NULL, 0); - - return region; -} - -void intel_region_reference( struct intel_region **dst, - struct intel_region *src) -{ - src->refcount++; - assert(*dst == NULL); - *dst = src; -} - -void intel_region_release( struct intel_context *intel, - struct intel_region **region ) -{ - if (!*region) - return; - - DBG("%s %d\n", __FUNCTION__, (*region)->refcount-1); - - if (--(*region)->refcount == 0) { - assert((*region)->map_refcount == 0); - bmDeleteBuffers(intel, 1, &(*region)->buffer); - free(*region); - } - *region = NULL; -} - - -struct intel_region *intel_region_create_static( struct intel_context *intel, - GLuint mem_type, - GLuint offset, - void *virtual, - GLuint cpp, - GLuint pitch, - GLuint height, - GLuint size, - GLboolean tiled ) -{ - struct intel_region *region = calloc(sizeof(*region), 1); - GLint pool; - - DBG("%s\n", __FUNCTION__); - - region->cpp = cpp; - region->pitch = pitch; - region->height = height; /* needed? */ - region->refcount = 1; - region->tiled = tiled; - - /* Recipe for creating a static buffer - create a static pool with - * the right offset and size, generate a buffer and use a special - * call to bind it to all of the memory in that pool. - */ - pool = bmInitPool(intel, offset, virtual, size, - (BM_MEM_AGP | - BM_NO_UPLOAD | - BM_NO_EVICT | - BM_NO_MOVE)); - if (pool < 0) { - _mesa_printf("bmInitPool failed for static region\n"); - exit(1); - } - - region->buffer = bmGenBufferStatic(intel, pool); - - return region; -} - - - - -void _mesa_copy_rect( GLubyte *dst, - GLuint cpp, - GLuint dst_pitch, - GLuint dst_x, - GLuint dst_y, - GLuint width, - GLuint height, - const GLubyte *src, - GLuint src_pitch, - GLuint src_x, - GLuint src_y ) -{ - GLuint i; - - dst_pitch *= cpp; - src_pitch *= cpp; - dst += dst_x * cpp; - src += src_x * cpp; - dst += dst_y * dst_pitch; - src += src_y * dst_pitch; - width *= cpp; - - if (width == dst_pitch && - width == src_pitch) - do_memcpy(dst, src, height * width); - else { - for (i = 0; i < height; i++) { - do_memcpy(dst, src, width); - dst += dst_pitch; - src += src_pitch; - } - } -} - - -/* Upload data to a rectangular sub-region. Lots of choices how to do this: - * - * - memcpy by span to current destination - * - upload data as new buffer and blit - * - * Currently always memcpy. - */ -GLboolean intel_region_data(struct intel_context *intel, - struct intel_region *dst, - GLuint dst_offset, - GLuint dstx, GLuint dsty, - const void *src, GLuint src_pitch, - GLuint srcx, GLuint srcy, - GLuint width, GLuint height) -{ - DBG("%s\n", __FUNCTION__); - - if (width == dst->pitch && - width == src_pitch && - dst_offset == 0 && - height == dst->height && - srcx == 0 && - srcy == 0) - { - return (bmBufferDataAUB(intel, - dst->buffer, - dst->cpp * width * dst->height, - src, 0, 0, 0) == 0); - } - else { - GLubyte *map = intel_region_map(intel, dst); - - if (map) { - assert (dst_offset + dstx + width + - (dsty + height - 1) * dst->pitch * dst->cpp <= - dst->pitch * dst->cpp * dst->height); - - _mesa_copy_rect(map + dst_offset, - dst->cpp, - dst->pitch, - dstx, dsty, - width, height, - src, - src_pitch, - srcx, srcy); - - intel_region_unmap(intel, dst); - return GL_TRUE; - } - else - return GL_FALSE; - } -} - -/* Copy rectangular sub-regions. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -void intel_region_copy( struct intel_context *intel, - struct intel_region *dst, - GLuint dst_offset, - GLuint dstx, GLuint dsty, - struct intel_region *src, - GLuint src_offset, - GLuint srcx, GLuint srcy, - GLuint width, GLuint height ) -{ - DBG("%s\n", __FUNCTION__); - - assert(src->cpp == dst->cpp); - - intelEmitCopyBlit(intel, - dst->cpp, - src->pitch, src->buffer, src_offset, src->tiled, - dst->pitch, dst->buffer, dst_offset, dst->tiled, - srcx, srcy, - dstx, dsty, - width, height, - GL_COPY ); -} - -/* Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -void intel_region_fill( struct intel_context *intel, - struct intel_region *dst, - GLuint dst_offset, - GLuint dstx, GLuint dsty, - GLuint width, GLuint height, - GLuint color ) -{ - DBG("%s\n", __FUNCTION__); - - intelEmitFillBlit(intel, - dst->cpp, - dst->pitch, dst->buffer, dst_offset, dst->tiled, - dstx, dsty, - width, height, - color ); -} - diff --git a/i965/intel_regions.h b/i965/intel_regions.h deleted file mode 100644 index d2235f1..0000000 --- a/i965/intel_regions.h +++ /dev/null @@ -1,140 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_REGIONS_H -#define INTEL_REGIONS_H - -#include "mtypes.h" -#include "bufmgr.h" /* for DBG! */ -struct intel_context; - -/* A layer on top of the bufmgr buffers that adds a few useful things: - * - * - Refcounting for local buffer references. - * - Refcounting for buffer maps - * - Buffer dimensions - pitch and height. - * - Blitter commands for copying 2D regions between buffers. - */ -struct intel_region { - struct buffer *buffer; - GLuint refcount; - GLuint cpp; - GLuint pitch; - GLuint height; - GLboolean tiled; - GLubyte *map; - GLuint map_refcount; -}; - -/* Allocate a refcounted region. Pointers to regions should only be - * copied by calling intel_reference_region(). - * - * No support for dynamically allocating tiled regions at this point. - */ -struct intel_region *intel_region_alloc( struct intel_context *intel, - GLuint cpp, - GLuint pitch, - GLuint height ); - -void intel_region_reference( struct intel_region **dst, - struct intel_region *src ); - -void intel_region_release(struct intel_context *intel, - struct intel_region **ib ); - -/* Static regions may be tiled. The assumption is that the X server - * has set up fence registers to define tiled zones in agp and these - * buffers are within those zones. Tiling regions without fence - * registers is more work. - */ -struct intel_region *intel_region_create_static( struct intel_context *intel, - GLuint mem_type, - GLuint offset, - void *virtual, - GLuint cpp, - GLuint pitch, - GLuint height, - GLuint size, - GLboolean tiled ); - -/* Map/unmap regions. This is refcounted also: - */ -GLubyte *intel_region_map(struct intel_context *intel, - struct intel_region *ib); - -void intel_region_unmap(struct intel_context *intel, - struct intel_region *ib); - - -/* Upload data to a rectangular sub-region - */ -GLboolean intel_region_data(struct intel_context *intel, - struct intel_region *dest, - GLuint dest_offset, - GLuint destx, GLuint desty, - const void *src, GLuint src_stride, - GLuint srcx, GLuint srcy, - GLuint width, GLuint height); - -/* Copy rectangular sub-regions - */ -void intel_region_copy( struct intel_context *intel, - struct intel_region *dest, - GLuint dest_offset, - GLuint destx, GLuint desty, - struct intel_region *src, - GLuint src_offset, - GLuint srcx, GLuint srcy, - GLuint width, GLuint height ); - -/* Fill a rectangular sub-region - */ -void intel_region_fill( struct intel_context *intel, - struct intel_region *dest, - GLuint dest_offset, - GLuint destx, GLuint desty, - GLuint width, GLuint height, - GLuint color ); - - -/*********************************************************************** - * Misc utilities: move to somewhere generic - */ -void _mesa_copy_rect( GLubyte *dst, - GLuint cpp, - GLuint dst_pitch, - GLuint dst_x, - GLuint dst_y, - GLuint width, - GLuint height, - const GLubyte *src, - GLuint src_pitch, - GLuint src_x, - GLuint src_y ); - - -#endif diff --git a/i965/intel_screen.c b/i965/intel_screen.c deleted file mode 100644 index 5dac50d..0000000 --- a/i965/intel_screen.c +++ /dev/null @@ -1,701 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "glheader.h" -#include "context.h" -#include "framebuffer.h" -#include "matrix.h" -#include "renderbuffer.h" -#include "simple_list.h" -#include "utils.h" -#include "vblank.h" -#include "xmlpool.h" - - -#include "intel_screen.h" - -#include "intel_context.h" -#include "intel_tex.h" -#include "intel_span.h" -#include "intel_ioctl.h" - -#include "i830_dri.h" - -PUBLIC const char __driConfigOptions[] = -DRI_CONF_BEGIN - DRI_CONF_SECTION_PERFORMANCE - DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) - DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) - DRI_CONF_SECTION_END - DRI_CONF_SECTION_QUALITY - DRI_CONF_FORCE_S3TC_ENABLE(false) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) - DRI_CONF_SECTION_END -DRI_CONF_END; -const GLuint __driNConfigOptions = 4; - -#ifdef USE_NEW_INTERFACE -static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; -#endif /*USE_NEW_INTERFACE*/ - -/** - * Map all the memory regions described by the screen. - * \return GL_TRUE if success, GL_FALSE if error. - */ -GLboolean -intelMapScreenRegions(__DRIscreenPrivate *sPriv) -{ - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - - if (intelScreen->front.handle) { - if (drmMap(sPriv->fd, - intelScreen->front.handle, - intelScreen->front.size, - (drmAddress *)&intelScreen->front.map) != 0) { - _mesa_problem(NULL, "drmMap(frontbuffer) failed!"); - return GL_FALSE; - } - } else { - /* Use the old static allocation method if the server isn't setting up - * a movable handle for us. Add in the front buffer offset from - * framebuffer start, as our span routines (unlike other drivers) expect - * the renderbuffer address to point to the beginning of the - * renderbuffer. - */ - intelScreen->front.map = (char *)sPriv->pFB; - if (intelScreen->front.map == NULL) { - fprintf(stderr, "Failed to find framebuffer mapping\n"); - return GL_FALSE; - } - } - - if (drmMap(sPriv->fd, - intelScreen->back.handle, - intelScreen->back.size, - (drmAddress *)&intelScreen->back.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (drmMap(sPriv->fd, - intelScreen->depth.handle, - intelScreen->depth.size, - (drmAddress *)&intelScreen->depth.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (drmMap(sPriv->fd, - intelScreen->tex.handle, - intelScreen->tex.size, - (drmAddress *)&intelScreen->tex.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (0) - printf("Mappings: front: %p back: %p depth: %p tex: %p\n", - intelScreen->front.map, - intelScreen->back.map, - intelScreen->depth.map, - intelScreen->tex.map); - return GL_TRUE; -} - - -void -intelUnmapScreenRegions(intelScreenPrivate *intelScreen) -{ -#define REALLY_UNMAP 1 - /* If front.handle is present, we're doing the dynamic front buffer mapping, - * but if we've fallen back to static allocation then we shouldn't try to - * unmap here. - */ - if (intelScreen->front.handle) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0) - printf("drmUnmap front failed!\n"); -#endif - intelScreen->front.map = NULL; - } - if (intelScreen->back.map) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0) - printf("drmUnmap back failed!\n"); -#endif - intelScreen->back.map = NULL; - } - if (intelScreen->depth.map) { -#if REALLY_UNMAP - drmUnmap(intelScreen->depth.map, intelScreen->depth.size); - intelScreen->depth.map = NULL; -#endif - } - if (intelScreen->tex.map) { -#if REALLY_UNMAP - drmUnmap(intelScreen->tex.map, intelScreen->tex.size); - intelScreen->tex.map = NULL; -#endif - } -} - - -static void -intelPrintDRIInfo(intelScreenPrivate *intelScreen, - __DRIscreenPrivate *sPriv, - I830DRIPtr gDRIPriv) -{ - fprintf(stderr, "*** Front size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->front.size, intelScreen->front.offset, - intelScreen->front.pitch); - fprintf(stderr, "*** Back size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->back.size, intelScreen->back.offset, - intelScreen->back.pitch); - fprintf(stderr, "*** Depth size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->depth.size, intelScreen->depth.offset, - intelScreen->depth.pitch); - fprintf(stderr, "*** Rotated size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->rotated.size, intelScreen->rotated.offset, - intelScreen->rotated.pitch); - fprintf(stderr, "*** Texture size: 0x%x offset: 0x%x\n", - intelScreen->tex.size, intelScreen->tex.offset); - fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem); -} - - -static void -intelPrintSAREA(volatile drmI830Sarea *sarea) -{ - fprintf(stderr, "SAREA: sarea width %d height %d\n", sarea->width, sarea->height); - fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); - fprintf(stderr, - "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->front_offset, sarea->front_size, - (unsigned) sarea->front_handle); - fprintf(stderr, - "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->back_offset, sarea->back_size, - (unsigned) sarea->back_handle); - fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->depth_offset, sarea->depth_size, - (unsigned) sarea->depth_handle); - fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->tex_offset, sarea->tex_size, - (unsigned) sarea->tex_handle); - fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation); - fprintf(stderr, - "SAREA: rotated offset: 0x%08x size: 0x%x\n", - sarea->rotated_offset, sarea->rotated_size); - fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch); -} - - -/** - * A number of the screen parameters are obtained/computed from - * information in the SAREA. This function updates those parameters. - */ -void -intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, - volatile drmI830Sarea *sarea) -{ - intelScreen->width = sarea->width; - intelScreen->height = sarea->height; - - intelScreen->front.offset = sarea->front_offset; - intelScreen->front.pitch = sarea->pitch * intelScreen->cpp; - intelScreen->front.handle = sarea->front_handle; - intelScreen->front.size = sarea->front_size; - intelScreen->front.tiled = sarea->front_tiled; - - intelScreen->back.offset = sarea->back_offset; - intelScreen->back.pitch = sarea->pitch * intelScreen->cpp; - intelScreen->back.handle = sarea->back_handle; - intelScreen->back.size = sarea->back_size; - intelScreen->back.tiled = sarea->back_tiled; - - intelScreen->depth.offset = sarea->depth_offset; - intelScreen->depth.pitch = sarea->pitch * intelScreen->cpp; - intelScreen->depth.handle = sarea->depth_handle; - intelScreen->depth.size = sarea->depth_size; - intelScreen->depth.tiled = sarea->depth_tiled; - - intelScreen->tex.offset = sarea->tex_offset; - intelScreen->logTextureGranularity = sarea->log_tex_granularity; - intelScreen->tex.handle = sarea->tex_handle; - intelScreen->tex.size = sarea->tex_size; - - intelScreen->rotated.offset = sarea->rotated_offset; - intelScreen->rotated.pitch = sarea->rotated_pitch * intelScreen->cpp; - intelScreen->rotated.size = sarea->rotated_size; - intelScreen->rotated.tiled = sarea->rotated_tiled; - intelScreen->current_rotation = sarea->rotation; -#if 0 - matrix23Rotate(&intelScreen->rotMatrix, - sarea->width, sarea->height, sarea->rotation); -#endif - intelScreen->rotatedWidth = sarea->virtualX; - intelScreen->rotatedHeight = sarea->virtualY; - - if (0) - intelPrintSAREA(sarea); -} - - -static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv) -{ - intelScreenPrivate *intelScreen; - I830DRIPtr gDRIPriv = (I830DRIPtr)sPriv->pDevPriv; - PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension = - (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension")); - void * const psc = sPriv->psc->screenConfigs; - volatile drmI830Sarea *sarea; - - if (sPriv->devPrivSize != sizeof(I830DRIRec)) { - fprintf(stderr,"\nERROR! sizeof(I830DRIRec) (%ld) does not match passed size from device driver (%d)\n", (unsigned long)sizeof(I830DRIRec), sPriv->devPrivSize); - return GL_FALSE; - } - - /* Allocate the private area */ - intelScreen = (intelScreenPrivate *)CALLOC(sizeof(intelScreenPrivate)); - if (!intelScreen) { - fprintf(stderr,"\nERROR! Allocating private area failed\n"); - return GL_FALSE; - } - /* parse information in __driConfigOptions */ - driParseOptionInfo (&intelScreen->optionCache, - __driConfigOptions, __driNConfigOptions); - - intelScreen->driScrnPriv = sPriv; - sPriv->private = (void *)intelScreen; - intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset; - sarea = (volatile drmI830Sarea *) - (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset); - - intelScreen->deviceID = gDRIPriv->deviceID; - intelScreen->mem = gDRIPriv->mem; - intelScreen->cpp = gDRIPriv->cpp; - - switch (gDRIPriv->bitsPerPixel) { - case 15: intelScreen->fbFormat = DV_PF_555; break; - case 16: intelScreen->fbFormat = DV_PF_565; break; - case 32: intelScreen->fbFormat = DV_PF_8888; break; - } - - intelUpdateScreenFromSAREA(intelScreen, sarea); - - if (0) - intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv); - - if (!intelMapScreenRegions(sPriv)) { - fprintf(stderr,"\nERROR! mapping regions\n"); - _mesa_free(intelScreen); - sPriv->private = NULL; - return GL_FALSE; - } - - intelScreen->drmMinor = sPriv->drmMinor; - - /* Determine if IRQs are active? */ - { - int ret; - drmI830GetParam gp; - - gp.param = I830_PARAM_IRQ_ACTIVE; - gp.value = &intelScreen->irq_active; - - ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM, - &gp, sizeof(gp)); - if (ret) { - fprintf(stderr, "drmI830GetParam: %d\n", ret); - return GL_FALSE; - } - } - - /* Determine if batchbuffers are allowed */ - { - int ret; - drmI830GetParam gp; - - gp.param = I830_PARAM_ALLOW_BATCHBUFFER; - gp.value = &intelScreen->allow_batchbuffer; - - ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM, - &gp, sizeof(gp)); - if (ret) { - fprintf(stderr, "drmI830GetParam: (%d) %d\n", gp.param, ret); - return GL_FALSE; - } - } - - if (glx_enable_extension != NULL) { - (*glx_enable_extension)( psc, "GLX_SGI_swap_control" ); - (*glx_enable_extension)( psc, "GLX_SGI_video_sync" ); - (*glx_enable_extension)( psc, "GLX_MESA_swap_control" ); - (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" ); - (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" ); - (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" ); - } - - return GL_TRUE; -} - - -static void intelDestroyScreen(__DRIscreenPrivate *sPriv) -{ - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - - intelUnmapScreenRegions(intelScreen); - FREE(intelScreen); - sPriv->private = NULL; -} - -static GLboolean intelCreateBuffer( __DRIscreenPrivate *driScrnPriv, - __DRIdrawablePrivate *driDrawPriv, - const __GLcontextModes *mesaVis, - GLboolean isPixmap ) -{ - intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private; - - if (isPixmap) { - return GL_FALSE; /* not implemented */ - } else { - GLboolean swStencil = (mesaVis->stencilBits > 0 && - mesaVis->depthBits != 24); - - struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); - - { - driRenderbuffer *frontRb - = driNewRenderbuffer(GL_RGBA, - screen->front.map, - screen->cpp, - screen->front.offset, screen->front.pitch, - driDrawPriv); - intelSetSpanFunctions(frontRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base); - } - - if (mesaVis->doubleBufferMode) { - driRenderbuffer *backRb - = driNewRenderbuffer(GL_RGBA, - screen->back.map, - screen->cpp, - screen->back.offset, screen->back.pitch, - driDrawPriv); - intelSetSpanFunctions(backRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base); - } - - if (mesaVis->depthBits == 16) { - driRenderbuffer *depthRb - = driNewRenderbuffer(GL_DEPTH_COMPONENT16, - screen->depth.map, - screen->cpp, - screen->depth.offset, screen->depth.pitch, - driDrawPriv); - intelSetSpanFunctions(depthRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); - } - else if (mesaVis->depthBits == 24) { - driRenderbuffer *depthRb - = driNewRenderbuffer(GL_DEPTH_COMPONENT24, - screen->depth.map, - screen->cpp, - screen->depth.offset, screen->depth.pitch, - driDrawPriv); - intelSetSpanFunctions(depthRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); - } - - if (mesaVis->stencilBits > 0 && !swStencil) { - driRenderbuffer *stencilRb - = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT, - screen->depth.map, - screen->cpp, - screen->depth.offset, screen->depth.pitch, - driDrawPriv); - intelSetSpanFunctions(stencilRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base); - } - - _mesa_add_soft_renderbuffers(fb, - GL_FALSE, /* color */ - GL_FALSE, /* depth */ - swStencil, - mesaVis->accumRedBits > 0, - GL_FALSE, /* alpha */ - GL_FALSE /* aux */); - driDrawPriv->driverPrivate = (void *) fb; - - return (driDrawPriv->driverPrivate != NULL); - } -} - -static void intelDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) -{ - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); -} - - -/** - * Get information about previous buffer swaps. - */ -static int -intelGetSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) -{ - struct intel_context *intel; - - if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL) - || (dPriv->driContextPriv->driverPrivate == NULL) - || (sInfo == NULL) ) { - return -1; - } - - intel = dPriv->driContextPriv->driverPrivate; - sInfo->swap_count = intel->swap_count; - sInfo->swap_ust = intel->swap_ust; - sInfo->swap_missed_count = intel->swap_missed_count; - - sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0) - ? driCalculateSwapUsage( dPriv, 0, intel->swap_missed_ust ) - : 0.0; - - return 0; -} - - -/* There are probably better ways to do this, such as an - * init-designated function to register chipids and createcontext - * functions. - */ -extern GLboolean i830CreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); - -extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); - -extern GLboolean brwCreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); - - - - -static GLboolean intelCreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate) -{ -#if 0 - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - switch (intelScreen->deviceID) { - case PCI_CHIP_845_G: - case PCI_CHIP_I830_M: - case PCI_CHIP_I855_GM: - case PCI_CHIP_I865_G: - return i830CreateContext( mesaVis, driContextPriv, - sharedContextPrivate ); - - case PCI_CHIP_I915_G: - case PCI_CHIP_I915_GM: - case PCI_CHIP_I945_G: - case PCI_CHIP_I945_GM: - return i915CreateContext( mesaVis, driContextPriv, - sharedContextPrivate ); - - default: - fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID); - return GL_FALSE; - } -#else - return brwCreateContext( mesaVis, driContextPriv, - sharedContextPrivate ); -#endif -} - - -static const struct __DriverAPIRec intelAPI = { - .InitDriver = intelInitDriver, - .DestroyScreen = intelDestroyScreen, - .CreateContext = intelCreateContext, - .DestroyContext = intelDestroyContext, - .CreateBuffer = intelCreateBuffer, - .DestroyBuffer = intelDestroyBuffer, - .SwapBuffers = intelSwapBuffers, - .MakeCurrent = intelMakeCurrent, - .UnbindContext = intelUnbindContext, - .GetSwapInfo = intelGetSwapInfo, - .GetMSC = driGetMSC32, - .WaitForMSC = driWaitForMSC32, - .WaitForSBC = NULL, - .SwapBuffersMSC = NULL, - .CopySubBuffer = intelCopySubBuffer -}; - - -static __GLcontextModes * -intelFillInModes( unsigned pixel_bits, unsigned depth_bits, - unsigned stencil_bits, GLboolean have_back_buffer ) -{ - __GLcontextModes * modes; - __GLcontextModes * m; - unsigned num_modes; - unsigned depth_buffer_factor; - unsigned back_buffer_factor; - GLenum fb_format; - GLenum fb_type; - - /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't - * support pageflipping at all. - */ - static const GLenum back_buffer_modes[] = { - GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML - }; - - u_int8_t depth_bits_array[3]; - u_int8_t stencil_bits_array[3]; - - - depth_bits_array[0] = 0; - depth_bits_array[1] = depth_bits; - depth_bits_array[2] = depth_bits; - - /* Just like with the accumulation buffer, always provide some modes - * with a stencil buffer. It will be a sw fallback, but some apps won't - * care about that. - */ - stencil_bits_array[0] = 0; - stencil_bits_array[1] = 0; - stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; - - depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; - back_buffer_factor = (have_back_buffer) ? 3 : 1; - - num_modes = depth_buffer_factor * back_buffer_factor * 4; - - if ( pixel_bits == 16 ) { - fb_format = GL_RGB; - fb_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - fb_format = GL_BGRA; - fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; - } - - modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) ); - m = modes; - if ( ! driFillInModes( & m, fb_format, fb_type, - depth_bits_array, stencil_bits_array, depth_buffer_factor, - back_buffer_modes, back_buffer_factor, - GLX_TRUE_COLOR ) ) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - if ( ! driFillInModes( & m, fb_format, fb_type, - depth_bits_array, stencil_bits_array, depth_buffer_factor, - back_buffer_modes, back_buffer_factor, - GLX_DIRECT_COLOR ) ) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - - /* Mark the visual as slow if there are "fake" stencil bits. - */ - for ( m = modes ; m != NULL ; m = m->next ) { - if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) { - m->visualRating = GLX_SLOW_CONFIG; - } - } - - return modes; -} - - -/** - * This is the bootstrap function for the driver. libGL supplies all of the - * requisite information about the system, and the driver initializes itself. - * This routine also fills in the linked list pointed to by \c driver_modes - * with the \c __GLcontextModes that the driver can support for windows or - * pbuffers. - * - * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on - * failure. - */ -PUBLIC -void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIscreen *psc, - const __GLcontextModes * modes, - const __DRIversion * ddx_version, - const __DRIversion * dri_version, - const __DRIversion * drm_version, - const __DRIframebuffer * frame_buffer, - drmAddress pSAREA, int fd, - int internal_api_version, - const __DRIinterfaceMethods * interface, - __GLcontextModes ** driver_modes ) - -{ - __DRIscreenPrivate *psp; - static const __DRIversion ddx_expected = { 1, 6, 0 }; - static const __DRIversion dri_expected = { 4, 0, 0 }; - static const __DRIversion drm_expected = { 1, 3, 0 }; - - dri_interface = interface; - - if ( ! driCheckDriDdxDrmVersions2( "i915", - dri_version, & dri_expected, - ddx_version, & ddx_expected, - drm_version, & drm_expected ) ) { - return NULL; - } - - psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, - ddx_version, dri_version, drm_version, - frame_buffer, pSAREA, fd, - internal_api_version, &intelAPI); - if ( psp != NULL ) { - I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv; - *driver_modes = intelFillInModes( dri_priv->cpp * 8, - (dri_priv->cpp == 2) ? 16 : 24, - (dri_priv->cpp == 2) ? 0 : 8, - GL_TRUE ); - /* Calling driInitExtensions here, with a NULL context pointer, does not actually - * enable the extensions. It just makes sure that all the dispatch offsets for all - * the extensions that *might* be enables are known. This is needed because the - * dispatch offsets need to be known when _mesa_context_create is called, but we can't - * enable the extensions until we have a context pointer. - * - * Hello chicken. Hello egg. How are you two today? - */ - intelInitExtensions(NULL, GL_FALSE); - } - - return (void *) psp; -} diff --git a/i965/intel_span.c b/i965/intel_span.c deleted file mode 100644 index 60fbecc..0000000 --- a/i965/intel_span.c +++ /dev/null @@ -1,283 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "glheader.h" -#include "macros.h" -#include "mtypes.h" -#include "colormac.h" - -#include "intel_screen.h" -#include "intel_regions.h" -#include "intel_span.h" -#include "intel_ioctl.h" -#include "intel_tex.h" -#include "intel_batchbuffer.h" -#include "swrast/swrast.h" - -#undef DBG -#define DBG 0 - -#define LOCAL_VARS \ - struct intel_context *intel = intel_context(ctx); \ - __DRIdrawablePrivate *dPriv = intel->driDrawable; \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - GLuint pitch = drb->pitch; \ - GLuint height = dPriv->h; \ - char *buf = (char *) drb->Base.Data + \ - dPriv->x * drb->cpp + \ - dPriv->y * pitch; \ - GLushort p; \ - (void) buf; (void) p - -#define LOCAL_DEPTH_VARS \ - struct intel_context *intel = intel_context(ctx); \ - __DRIdrawablePrivate *dPriv = intel->driDrawable; \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - GLuint pitch = drb->pitch; \ - GLuint height = dPriv->h; \ - char *buf = (char *) drb->Base.Data + \ - dPriv->x * drb->cpp + \ - dPriv->y * pitch - -#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS - -#define INIT_MONO_PIXEL(p,color)\ - p = INTEL_PACKCOLOR565(color[0],color[1],color[2]) - -#define Y_FLIP(_y) (height - _y - 1) - -#define HW_LOCK() - -#define HW_UNLOCK() - -/* 16 bit, 565 rgb color spanline and pixel functions - */ -#define WRITE_RGBA( _x, _y, r, g, b, a ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = ( (((int)r & 0xf8) << 8) | \ - (((int)g & 0xfc) << 3) | \ - (((int)b & 0xf8) >> 3)) -#define WRITE_PIXEL( _x, _y, p ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = p - -#define READ_RGBA( rgba, _x, _y ) \ -do { \ - GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch); \ - rgba[0] = (((p >> 11) & 0x1f) * 255) / 31; \ - rgba[1] = (((p >> 5) & 0x3f) * 255) / 63; \ - rgba[2] = (((p >> 0) & 0x1f) * 255) / 31; \ - rgba[3] = 255; \ -} while(0) - -#define TAG(x) intel##x##_565 -#include "spantmp.h" - -/* 15 bit, 555 rgb color spanline and pixel functions - */ -#define WRITE_RGBA( _x, _y, r, g, b, a ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = (((r & 0xf8) << 7) | \ - ((g & 0xf8) << 3) | \ - ((b & 0xf8) >> 3)) - -#define WRITE_PIXEL( _x, _y, p ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = p - -#define READ_RGBA( rgba, _x, _y ) \ -do { \ - GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch); \ - rgba[0] = (p >> 7) & 0xf8; \ - rgba[1] = (p >> 3) & 0xf8; \ - rgba[2] = (p << 3) & 0xf8; \ - rgba[3] = 255; \ -} while(0) - -#define TAG(x) intel##x##_555 -#include "spantmp.h" - -/* 16 bit depthbuffer functions. - */ -#define WRITE_DEPTH( _x, _y, d ) \ - *(GLushort *)(buf + (_x)*2 + (_y)*pitch) = d; - -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch); - - -#define TAG(x) intel##x##_z16 -#include "depthtmp.h" - - -#undef LOCAL_VARS -#define LOCAL_VARS \ - struct intel_context *intel = intel_context(ctx); \ - __DRIdrawablePrivate *dPriv = intel->driDrawable; \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - GLuint pitch = drb->pitch; \ - GLuint height = dPriv->h; \ - char *buf = (char *)drb->Base.Data + \ - dPriv->x * drb->cpp + \ - dPriv->y * pitch; \ - GLuint p; \ - (void) buf; (void) p - -#undef INIT_MONO_PIXEL -#define INIT_MONO_PIXEL(p,color)\ - p = INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3]) - -/* 32 bit, 8888 argb color spanline and pixel functions - */ -#define WRITE_RGBA(_x, _y, r, g, b, a) \ - *(GLuint *)(buf + _x*4 + _y*pitch) = ((r << 16) | \ - (g << 8) | \ - (b << 0) | \ - (a << 24) ) - -#define WRITE_PIXEL(_x, _y, p) \ - *(GLuint *)(buf + _x*4 + _y*pitch) = p - - -#define READ_RGBA(rgba, _x, _y) \ - do { \ - GLuint p = *(GLuint *)(buf + _x*4 + _y*pitch); \ - rgba[0] = (p >> 16) & 0xff; \ - rgba[1] = (p >> 8) & 0xff; \ - rgba[2] = (p >> 0) & 0xff; \ - rgba[3] = (p >> 24) & 0xff; \ - } while (0) - -#define TAG(x) intel##x##_8888 -#include "spantmp.h" - - -/* 24/8 bit interleaved depth/stencil functions - */ -#define WRITE_DEPTH( _x, _y, d ) { \ - GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch); \ - tmp &= 0xff000000; \ - tmp |= (d) & 0xffffff; \ - *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp; \ -} - -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) & 0xffffff; - - -#define TAG(x) intel##x##_z24_s8 -#include "depthtmp.h" - -#define WRITE_STENCIL( _x, _y, d ) { \ - GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch); \ - tmp &= 0xffffff; \ - tmp |= ((d)<<24); \ - *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp; \ -} - -#define READ_STENCIL( d, _x, _y ) \ - d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) >> 24; - -#define TAG(x) intel##x##_z24_s8 -#include "stenciltmp.h" - - -/* Move locking out to get reasonable span performance. - */ -void intelSpanRenderStart( GLcontext *ctx ) -{ - struct intel_context *intel = intel_context(ctx); - - if (intel->need_flush) { - LOCK_HARDWARE(intel); - intel->vtbl.emit_flush(intel, 0); - intel_batchbuffer_flush(intel->batch); - intel->need_flush = 0; - UNLOCK_HARDWARE(intel); - intelFinish(&intel->ctx); - } - - - LOCK_HARDWARE(intel); - - /* Just map the framebuffer and all textures. Bufmgr code will - * take care of waiting on the necessary fences: - */ - intel_region_map(intel, intel->front_region); - intel_region_map(intel, intel->back_region); - intel_region_map(intel, intel->depth_region); -} - -void intelSpanRenderFinish( GLcontext *ctx ) -{ - struct intel_context *intel = intel_context( ctx ); - - _swrast_flush( ctx ); - - /* Now unmap the framebuffer: - */ - intel_region_unmap(intel, intel->front_region); - intel_region_unmap(intel, intel->back_region); - intel_region_unmap(intel, intel->depth_region); - - UNLOCK_HARDWARE( intel ); -} - -void intelInitSpanFuncs( GLcontext *ctx ) -{ - struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx); - swdd->SpanRenderStart = intelSpanRenderStart; - swdd->SpanRenderFinish = intelSpanRenderFinish; -} - - -/** - * Plug in the Get/Put routines for the given driRenderbuffer. - */ -void -intelSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis) -{ - if (drb->Base.InternalFormat == GL_RGBA) { - if (vis->redBits == 5 && vis->greenBits == 5 && vis->blueBits == 5) { - intelInitPointers_555(&drb->Base); - } - else if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) { - intelInitPointers_565(&drb->Base); - } - else { - assert(vis->redBits == 8); - assert(vis->greenBits == 8); - assert(vis->blueBits == 8); - intelInitPointers_8888(&drb->Base); - } - } - else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { - intelInitDepthPointers_z16(&drb->Base); - } - else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { - intelInitDepthPointers_z24_s8(&drb->Base); - } - else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { - intelInitStencilPointers_z24_s8(&drb->Base); - } -} diff --git a/i965/intel_state.c b/i965/intel_state.c index 701b30c..0fba5a7 100644 --- a/i965/intel_state.c +++ b/i965/intel_state.c @@ -195,15 +195,16 @@ int intel_translate_logic_op( GLenum opcode ) static void intelClearColor(GLcontext *ctx, const GLfloat color[4]) { struct intel_context *intel = intel_context(ctx); - intelScreenPrivate *screen = intel->intelScreen; UNCLAMPED_FLOAT_TO_RGBA_CHAN(intel->clear_chan, color); - intel->ClearColor = INTEL_PACKCOLOR(screen->fbFormat, - intel->clear_chan[0], - intel->clear_chan[1], - intel->clear_chan[2], - intel->clear_chan[3]); + intel->ClearColor8888 = INTEL_PACKCOLOR8888(intel->clear_chan[0], + intel->clear_chan[1], + intel->clear_chan[2], + intel->clear_chan[3]); + intel->ClearColor565 = INTEL_PACKCOLOR565(intel->clear_chan[0], + intel->clear_chan[1], + intel->clear_chan[2]); } @@ -222,99 +223,3 @@ void intelInitStateFuncs( struct dd_function_table *functions ) functions->RenderMode = intelRenderMode; functions->ClearColor = intelClearColor; } - - - - -void intelInitState( GLcontext *ctx ) -{ - /* Mesa should do this for us: - */ - ctx->Driver.AlphaFunc( ctx, - ctx->Color.AlphaFunc, - ctx->Color.AlphaRef); - - ctx->Driver.BlendColor( ctx, - ctx->Color.BlendColor ); - - ctx->Driver.BlendEquationSeparate( ctx, - ctx->Color.BlendEquationRGB, - ctx->Color.BlendEquationA); - - ctx->Driver.BlendFuncSeparate( ctx, - ctx->Color.BlendSrcRGB, - ctx->Color.BlendDstRGB, - ctx->Color.BlendSrcA, - ctx->Color.BlendDstA); - - ctx->Driver.ColorMask( ctx, - ctx->Color.ColorMask[RCOMP], - ctx->Color.ColorMask[GCOMP], - ctx->Color.ColorMask[BCOMP], - ctx->Color.ColorMask[ACOMP]); - - ctx->Driver.CullFace( ctx, ctx->Polygon.CullFaceMode ); - ctx->Driver.DepthFunc( ctx, ctx->Depth.Func ); - ctx->Driver.DepthMask( ctx, ctx->Depth.Mask ); - - ctx->Driver.Enable( ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled ); - ctx->Driver.Enable( ctx, GL_BLEND, ctx->Color.BlendEnabled ); - ctx->Driver.Enable( ctx, GL_COLOR_LOGIC_OP, ctx->Color.ColorLogicOpEnabled ); - ctx->Driver.Enable( ctx, GL_COLOR_SUM, ctx->Fog.ColorSumEnabled ); - ctx->Driver.Enable( ctx, GL_CULL_FACE, ctx->Polygon.CullFlag ); - ctx->Driver.Enable( ctx, GL_DEPTH_TEST, ctx->Depth.Test ); - ctx->Driver.Enable( ctx, GL_DITHER, ctx->Color.DitherFlag ); - ctx->Driver.Enable( ctx, GL_FOG, ctx->Fog.Enabled ); - ctx->Driver.Enable( ctx, GL_LIGHTING, ctx->Light.Enabled ); - ctx->Driver.Enable( ctx, GL_LINE_SMOOTH, ctx->Line.SmoothFlag ); - ctx->Driver.Enable( ctx, GL_POLYGON_STIPPLE, ctx->Polygon.StippleFlag ); - ctx->Driver.Enable( ctx, GL_SCISSOR_TEST, ctx->Scissor.Enabled ); - ctx->Driver.Enable( ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled ); - ctx->Driver.Enable( ctx, GL_TEXTURE_1D, GL_FALSE ); - ctx->Driver.Enable( ctx, GL_TEXTURE_2D, GL_FALSE ); - ctx->Driver.Enable( ctx, GL_TEXTURE_RECTANGLE_NV, GL_FALSE ); - ctx->Driver.Enable( ctx, GL_TEXTURE_3D, GL_FALSE ); - ctx->Driver.Enable( ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE ); - - ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color ); - ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 ); - ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density ); - ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start ); - ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End ); - - ctx->Driver.FrontFace( ctx, ctx->Polygon.FrontFace ); - - { - GLfloat f = (GLfloat)ctx->Light.Model.ColorControl; - ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_COLOR_CONTROL, &f ); - } - - ctx->Driver.LineWidth( ctx, ctx->Line.Width ); - ctx->Driver.LogicOpcode( ctx, ctx->Color.LogicOp ); - ctx->Driver.PointSize( ctx, ctx->Point.Size ); - ctx->Driver.PolygonStipple( ctx, (const GLubyte *)ctx->PolygonStipple ); - ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y, - ctx->Scissor.Width, ctx->Scissor.Height ); - ctx->Driver.ShadeModel( ctx, ctx->Light.ShadeModel ); - ctx->Driver.StencilFuncSeparate( ctx, GL_FRONT, - ctx->Stencil.Function[0], - ctx->Stencil.Ref[0], - ctx->Stencil.ValueMask[0] ); - ctx->Driver.StencilFuncSeparate( ctx, GL_BACK, - ctx->Stencil.Function[1], - ctx->Stencil.Ref[1], - ctx->Stencil.ValueMask[1] ); - ctx->Driver.StencilMaskSeparate( ctx, GL_FRONT, ctx->Stencil.WriteMask[0] ); - ctx->Driver.StencilMaskSeparate( ctx, GL_BACK, ctx->Stencil.WriteMask[1] ); - ctx->Driver.StencilOpSeparate( ctx, GL_FRONT, - ctx->Stencil.FailFunc[0], - ctx->Stencil.ZFailFunc[0], - ctx->Stencil.ZPassFunc[0]); - ctx->Driver.StencilOpSeparate( ctx, GL_BACK, - ctx->Stencil.FailFunc[1], - ctx->Stencil.ZFailFunc[1], - ctx->Stencil.ZPassFunc[1]); - - - ctx->Driver.DrawBuffer( ctx, ctx->Color.DrawBuffer[0] ); -} diff --git a/i965/intel_tex.c b/i965/intel_tex.c deleted file mode 100644 index 4523969..0000000 --- a/i965/intel_tex.c +++ /dev/null @@ -1,315 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "mtypes.h" -#include "image.h" -#include "texstore.h" -#include "texformat.h" -#include "teximage.h" -#include "texobj.h" -#include "swrast/swrast.h" - - -#include "intel_context.h" -#include "intel_tex.h" -#include "intel_mipmap_tree.h" - - -static GLuint target_to_face( GLenum target ) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - return ((GLuint) target - - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X); - default: - return 0; - } -} - -static void intelTexImage1D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - _mesa_store_teximage1d( ctx, target, level, internalFormat, - width, border, format, type, - pixels, packing, texObj, texImage ); - - intelObj->dirty_images[0] |= (1 << level); - intelObj->dirty |= 1; -} - -static void intelTexSubImage1D( GLcontext *ctx, - GLenum target, - GLint level, - GLint xoffset, - GLsizei width, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, - format, type, pixels, packing, texObj, - texImage); - - intelObj->dirty_images[0] |= (1 << level); - intelObj->dirty |= 1; -} - - -/* Handles 2D, CUBE, RECT: - */ -static void intelTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint face = target_to_face(target); - - _mesa_store_teximage2d( ctx, target, level, internalFormat, - width, height, border, format, type, - pixels, packing, texObj, texImage ); - - intelObj->dirty_images[face] |= (1 << level); - intelObj->dirty |= 1 << face; -} - -static void intelTexSubImage2D( GLcontext *ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint face = target_to_face(target); - - _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, type, pixels, packing, texObj, - texImage); - - intelObj->dirty_images[face] |= (1 << level); - intelObj->dirty |= 1 << face; -} - -static void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint face = target_to_face(target); - - _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width, - height, border, imageSize, data, texObj, texImage); - - intelObj->dirty_images[face] |= (1 << level); - intelObj->dirty |= 1 << face; -} - - -static void intelCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint face = target_to_face(target); - - _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, imageSize, data, texObj, texImage); - - intelObj->dirty_images[face] |= (1 << level); - intelObj->dirty |= 1 << face; -} - - -static void intelTexImage3D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint depth, - GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - _mesa_store_teximage3d(ctx, target, level, internalFormat, - width, height, depth, border, - format, type, pixels, - &ctx->Unpack, texObj, texImage); - - intelObj->dirty_images[0] |= (1 << level); - intelObj->dirty |= 1 << 0; -} - - -static void -intelTexSubImage3D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing, texObj, texImage); - - intelObj->dirty_images[0] |= (1 << level); - intelObj->dirty |= 1 << 0; -} - - - - -static struct gl_texture_object *intelNewTextureObject( GLcontext *ctx, - GLuint name, - GLenum target ) -{ - struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object); - - _mesa_initialize_texture_object(&obj->base, name, target); - - return &obj->base; -} - -static GLboolean intelIsTextureResident(GLcontext *ctx, - struct gl_texture_object *texObj) -{ -#if 0 - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - return - intelObj->mt && - intelObj->mt->region && - intel_is_region_resident(intel, intelObj->mt->region); -#endif - return 1; -} - - - -static void intelTexParameter( GLcontext *ctx, - GLenum target, - struct gl_texture_object *texObj, - GLenum pname, - const GLfloat *params ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - switch (pname) { - /* Anything which can affect the calculation of firstLevel and - * lastLevel, as changes to these may invalidate the miptree. - */ - case GL_TEXTURE_MIN_FILTER: - case GL_TEXTURE_MAG_FILTER: - case GL_TEXTURE_BASE_LEVEL: - case GL_TEXTURE_MAX_LEVEL: - case GL_TEXTURE_MIN_LOD: - case GL_TEXTURE_MAX_LOD: - intelObj->dirty |= 1; - break; - - default: - break; - } -} - - -static void -intel_delete_texture_object( GLcontext *ctx, struct gl_texture_object *texObj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - if (intelObj->mt) - intel_miptree_destroy(intel, intelObj->mt); - - _mesa_delete_texture_object( ctx, texObj ); -} - -void intelInitTextureFuncs( struct dd_function_table *functions ) -{ - functions->NewTextureObject = intelNewTextureObject; - functions->TexImage1D = intelTexImage1D; - functions->TexImage2D = intelTexImage2D; - functions->TexImage3D = intelTexImage3D; - functions->TexSubImage1D = intelTexSubImage1D; - functions->TexSubImage2D = intelTexSubImage2D; - functions->TexSubImage3D = intelTexSubImage3D; - functions->CopyTexImage1D = _swrast_copy_teximage1d; - functions->CopyTexImage2D = _swrast_copy_teximage2d; - functions->CopyTexSubImage1D = _swrast_copy_texsubimage1d; - functions->CopyTexSubImage2D = _swrast_copy_texsubimage2d; - functions->CopyTexSubImage3D = _swrast_copy_texsubimage3d; - functions->DeleteTexture = intel_delete_texture_object; - functions->UpdateTexturePalette = NULL; - functions->IsTextureResident = intelIsTextureResident; - functions->TestProxyTexImage = _mesa_test_proxy_teximage; - functions->CompressedTexImage2D = intelCompressedTexImage2D; - functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; - functions->TexParameter = intelTexParameter; -} - - - - - diff --git a/i965/intel_tex_validate.c b/i965/intel_tex_validate.c deleted file mode 100644 index cb23b9d..0000000 --- a/i965/intel_tex_validate.c +++ /dev/null @@ -1,256 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "mtypes.h" -#include "macros.h" - -#include "intel_context.h" -#include "intel_mipmap_tree.h" -#include "intel_tex.h" -#include "bufmgr.h" - -/** - * Compute which mipmap levels that really need to be sent to the hardware. - * This depends on the base image size, GL_TEXTURE_MIN_LOD, - * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL. - */ -static void intel_calculate_first_last_level( struct intel_texture_object *intelObj ) -{ - struct gl_texture_object *tObj = &intelObj->base; - const struct gl_texture_image * const baseImage = - tObj->Image[0][tObj->BaseLevel]; - - /* These must be signed values. MinLod and MaxLod can be negative numbers, - * and having firstLevel and lastLevel as signed prevents the need for - * extra sign checks. - */ - int firstLevel; - int lastLevel; - - /* Yes, this looks overly complicated, but it's all needed. - */ - switch (tObj->Target) { - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_3D: - case GL_TEXTURE_CUBE_MAP: - if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { - /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. - */ - firstLevel = lastLevel = tObj->BaseLevel; - } - else { - /* Currently not taking min/max lod into account here, those - * values are programmed as sampler state elsewhere and we - * upload the same mipmap levels regardless. Not sure if - * this makes sense as it means it isn't possible for the app - * to use min/max lod to reduce texture memory pressure: - */ - firstLevel = tObj->BaseLevel; - lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, - tObj->MaxLevel); - lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ - } - break; - case GL_TEXTURE_RECTANGLE_NV: - case GL_TEXTURE_4D_SGIS: - firstLevel = lastLevel = 0; - break; - default: - return; - } - - /* save these values */ - intelObj->firstLevel = firstLevel; - intelObj->lastLevel = lastLevel; -} - -static GLboolean copy_image_data_to_tree( struct intel_context *intel, - struct intel_texture_object *intelObj, - struct gl_texture_image *texImage, - GLuint face, - GLuint level) -{ - return intel_miptree_image_data(intel, - intelObj->mt, - face, - level, - texImage->Data, - texImage->RowStride, - (texImage->RowStride * - texImage->Height * - texImage->TexFormat->TexelBytes)); -} - -static void intel_texture_invalidate( struct intel_texture_object *intelObj ) -{ - GLint nr_faces, face; - intelObj->dirty = ~0; - - nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - for (face = 0; face < nr_faces; face++) - intelObj->dirty_images[face] = ~0; -} - -static void intel_texture_invalidate_cb( struct intel_context *intel, - void *ptr ) -{ - intel_texture_invalidate( (struct intel_texture_object *) ptr ); -} - - -/* - */ -GLuint intel_finalize_mipmap_tree( struct intel_context *intel, - struct gl_texture_object *tObj ) -{ - struct intel_texture_object *intelObj = intel_texture_object(tObj); - GLuint face, i; - GLuint nr_faces = 0; - struct gl_texture_image *firstImage; - - if( tObj == intel->frame_buffer_texobj ) - return GL_FALSE; - - /* We know/require this is true by now: - */ - assert(intelObj->base.Complete); - - /* What levels must the tree include at a minimum? - */ - if (intelObj->dirty) { - intel_calculate_first_last_level( intelObj ); -/* intel_miptree_destroy(intel, intelObj->mt); */ -/* intelObj->mt = NULL; */ - } - - firstImage = intelObj->base.Image[0][intelObj->firstLevel]; - - /* Fallback case: - */ - if (firstImage->Border) { - if (intelObj->mt) { - intel_miptree_destroy(intel, intelObj->mt); - intelObj->mt = NULL; - /* Set all images dirty: - */ - intel_texture_invalidate(intelObj); - } - return GL_FALSE; - } - - - - /* Check tree can hold all active levels. Check tree matches - * target, imageFormat, etc. - */ - if (intelObj->mt && - (intelObj->mt->target != intelObj->base.Target || - intelObj->mt->internal_format != firstImage->InternalFormat || - intelObj->mt->first_level != intelObj->firstLevel || - intelObj->mt->last_level != intelObj->lastLevel || - intelObj->mt->width0 != firstImage->Width || - intelObj->mt->height0 != firstImage->Height || - intelObj->mt->depth0 != firstImage->Depth || - intelObj->mt->cpp != firstImage->TexFormat->TexelBytes || - intelObj->mt->compressed != firstImage->IsCompressed)) - { - intel_miptree_destroy(intel, intelObj->mt); - intelObj->mt = NULL; - - /* Set all images dirty: - */ - intel_texture_invalidate(intelObj); - } - - - /* May need to create a new tree: - */ - if (!intelObj->mt) { - intelObj->mt = intel_miptree_create(intel, - intelObj->base.Target, - firstImage->InternalFormat, - intelObj->firstLevel, - intelObj->lastLevel, - firstImage->Width, - firstImage->Height, - firstImage->Depth, - firstImage->TexFormat->TexelBytes, - firstImage->IsCompressed); - - /* Tell the buffer manager that we will manage the backing - * store, but we still want it to do fencing for us. - */ - bmBufferSetInvalidateCB(intel, - intelObj->mt->region->buffer, - intel_texture_invalidate_cb, - intelObj, - GL_FALSE); - } - - /* Pull in any images not in the object's tree: - */ - if (intelObj->dirty) { - nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - for (face = 0; face < nr_faces; face++) { - if (intelObj->dirty_images[face]) { - for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) { - struct gl_texture_image *texImage = intelObj->base.Image[face][i]; - - /* Need to import images in main memory or held in other trees. - */ - if (intelObj->dirty_images[face] & (1<<i) && - texImage) { - - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("copy data from image %d (%p) into object miptree\n", - i, - texImage->Data); - - if (!copy_image_data_to_tree(intel, - intelObj, - texImage, - face, - i)) - return GL_FALSE; - - } - } - } - } - - /* Only clear the dirty flags if everything went ok: - */ - for (face = 0; face < nr_faces; face++) { - intelObj->dirty_images[face] = 0; - } - - intelObj->dirty = 0; - } - - return GL_TRUE; -} diff --git a/i965/server/i830_common.h b/i965/server/i830_common.h deleted file mode 100644 index fe2b8e8..0000000 --- a/i965/server/i830_common.h +++ /dev/null @@ -1,232 +0,0 @@ -/************************************************************************** - -Copyright 2001 VA Linux Systems Inc., Fremont, California. -Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -on the rights to use, copy, modify, merge, publish, distribute, sub -license, and/or sell copies of the Software, and to permit persons to whom -the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next -paragraph) shall be included in all copies or substantial portions of the -Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_common.h,v 1.1 2002/09/11 00:29:32 dawes Exp $ */ - -#ifndef _I830_COMMON_H_ -#define _I830_COMMON_H_ - - -#define I830_NR_TEX_REGIONS 255 /* maximum due to use of chars for next/prev */ -#define I830_LOG_MIN_TEX_REGION_SIZE 14 - - -/* Driver specific DRM command indices - * NOTE: these are not OS specific, but they are driver specific - */ -#define DRM_I830_INIT 0x00 -#define DRM_I830_FLUSH 0x01 -#define DRM_I830_FLIP 0x02 -#define DRM_I830_BATCHBUFFER 0x03 -#define DRM_I830_IRQ_EMIT 0x04 -#define DRM_I830_IRQ_WAIT 0x05 -#define DRM_I830_GETPARAM 0x06 -#define DRM_I830_SETPARAM 0x07 -#define DRM_I830_ALLOC 0x08 -#define DRM_I830_FREE 0x09 -#define DRM_I830_INIT_HEAP 0x0a -#define DRM_I830_CMDBUFFER 0x0b -#define DRM_I830_DESTROY_HEAP 0x0c -#define DRM_I830_MMIO 0x10 - -typedef struct { - enum { - I830_INIT_DMA = 0x01, - I830_CLEANUP_DMA = 0x02, - I830_RESUME_DMA = 0x03 - } func; - unsigned int mmio_offset; - int sarea_priv_offset; - unsigned int ring_start; - unsigned int ring_end; - unsigned int ring_size; - unsigned int front_offset; - unsigned int back_offset; - unsigned int depth_offset; - unsigned int w; - unsigned int h; - unsigned int pitch; - unsigned int pitch_bits; - unsigned int back_pitch; - unsigned int depth_pitch; - unsigned int cpp; - unsigned int chipset; -} drmI830Init; - -typedef struct { - drmTextureRegion texList[I830_NR_TEX_REGIONS+1]; - int last_upload; /* last time texture was uploaded */ - int last_enqueue; /* last time a buffer was enqueued */ - volatile int last_dispatch; /* age of the most recently dispatched buffer */ - int ctxOwner; /* last context to upload state */ - int texAge; - int pf_enabled; /* is pageflipping allowed? */ - int pf_active; - int pf_current_page; /* which buffer is being displayed? */ - int perf_boxes; /* performance boxes to be displayed */ - int width, height; /* screen size in pixels */ - - drm_handle_t front_handle; - int front_offset; - int front_size; - - drm_handle_t back_handle; - int back_offset; - int back_size; - - drm_handle_t depth_handle; - int depth_offset; - int depth_size; - - drm_handle_t tex_handle; - int tex_offset; - int tex_size; - int log_tex_granularity; - int pitch; - int rotation; /* 0, 90, 180 or 270 */ - int rotated_offset; - int rotated_size; - int rotated_pitch; - int virtualX, virtualY; - - unsigned int front_tiled; - unsigned int back_tiled; - unsigned int depth_tiled; - unsigned int rotated_tiled; - unsigned int rotated2_tiled; - - int pipeA_x; - int pipeA_y; - int pipeA_w; - int pipeA_h; - int pipeB_x; - int pipeB_y; - int pipeB_w; - int pipeB_h; - -} drmI830Sarea; - -/* Flags for perf_boxes - */ -#define I830_BOX_RING_EMPTY 0x1 /* populated by kernel */ -#define I830_BOX_FLIP 0x2 /* populated by kernel */ -#define I830_BOX_WAIT 0x4 /* populated by kernel & client */ -#define I830_BOX_TEXTURE_LOAD 0x8 /* populated by kernel */ -#define I830_BOX_LOST_CONTEXT 0x10 /* populated by client */ - - -typedef struct { - int start; /* agp offset */ - int used; /* nr bytes in use */ - int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ - int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ - int num_cliprects; /* mulitpass with multiple cliprects? */ - drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ -} drmI830BatchBuffer; - -typedef struct { - char *buf; /* agp offset */ - int sz; /* nr bytes in use */ - int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ - int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ - int num_cliprects; /* mulitpass with multiple cliprects? */ - drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ -} drmI830CmdBuffer; - -typedef struct { - int *irq_seq; -} drmI830IrqEmit; - -typedef struct { - int irq_seq; -} drmI830IrqWait; - -typedef struct { - int param; - int *value; -} drmI830GetParam; - -#define I830_PARAM_IRQ_ACTIVE 1 -#define I830_PARAM_ALLOW_BATCHBUFFER 2 - -typedef struct { - int param; - int value; -} drmI830SetParam; - -#define I830_SETPARAM_USE_MI_BATCHBUFFER_START 1 -#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 -#define I830_SETPARAM_ALLOW_BATCHBUFFER 3 - - -/* A memory manager for regions of shared memory: - */ -#define I830_MEM_REGION_AGP 1 - -typedef struct { - int region; - int alignment; - int size; - int *region_offset; /* offset from start of fb or agp */ -} drmI830MemAlloc; - -typedef struct { - int region; - int region_offset; -} drmI830MemFree; - -typedef struct { - int region; - int size; - int start; -} drmI830MemInitHeap; - -typedef struct { - int region; -} drmI830MemDestroyHeap; - -#define MMIO_READ 0 -#define MMIO_WRITE 1 - -#define MMIO_REGS_IA_PRIMATIVES_COUNT 0 -#define MMIO_REGS_IA_VERTICES_COUNT 1 -#define MMIO_REGS_VS_INVOCATION_COUNT 2 -#define MMIO_REGS_GS_PRIMITIVES_COUNT 3 -#define MMIO_REGS_GS_INVOCATION_COUNT 4 -#define MMIO_REGS_CL_PRIMITIVES_COUNT 5 -#define MMIO_REGS_CL_INVOCATION_COUNT 6 -#define MMIO_REGS_PS_INVOCATION_COUNT 7 -#define MMIO_REGS_PS_DEPTH_COUNT 8 - -typedef struct { - unsigned int read_write:1; - unsigned int reg:31; - void __user *data; -} drmI830MMIO; - -#endif /* _I830_DRM_H_ */ diff --git a/i965/server/intel.h b/i965/server/intel.h deleted file mode 100644 index d7858a2..0000000 --- a/i965/server/intel.h +++ /dev/null @@ -1,328 +0,0 @@ -#ifndef _INTEL_H_ -#define _INTEL_H_ - -#include "xf86drm.h" /* drm_handle_t, etc */ - -/* Intel */ -#ifndef PCI_CHIP_I810 -#define PCI_CHIP_I810 0x7121 -#define PCI_CHIP_I810_DC100 0x7123 -#define PCI_CHIP_I810_E 0x7125 -#define PCI_CHIP_I815 0x1132 -#define PCI_CHIP_I810_BRIDGE 0x7120 -#define PCI_CHIP_I810_DC100_BRIDGE 0x7122 -#define PCI_CHIP_I810_E_BRIDGE 0x7124 -#define PCI_CHIP_I815_BRIDGE 0x1130 -#endif - -#define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_I830_M 0x3577 - -#ifndef PCI_CHIP_I855_GM -#define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I855_GM_BRIDGE 0x3580 -#endif - -#ifndef PCI_CHIP_I865_G -#define PCI_CHIP_I865_G 0x2572 -#define PCI_CHIP_I865_G_BRIDGE 0x2570 -#endif - -#ifndef PCI_CHIP_I915_G -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_G_BRIDGE 0x2580 -#endif - -#ifndef PCI_CHIP_I915_GM -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I915_GM_BRIDGE 0x2590 -#endif - -#ifndef PCI_CHIP_E7221_G -#define PCI_CHIP_E7221_G 0x258A -/* Same as I915_G_BRIDGE */ -#define PCI_CHIP_E7221_G_BRIDGE 0x2580 -#endif - -#ifndef PCI_CHIP_I945_G -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I945_G_BRIDGE 0x2770 -#endif - -#ifndef PCI_CHIP_I945_GM -#define PCI_CHIP_I945_GM 0x27A2 -#define PCI_CHIP_I945_GM_BRIDGE 0x27A0 -#endif - -#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 || \ - pI810->Chipset == PCI_CHIP_I810_DC100 || \ - pI810->Chipset == PCI_CHIP_I810_E) -#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815) -#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M) -#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G) -#define IS_I85X(pI810) (pI810->Chipset == PCI_CHIP_I855_GM) -#define IS_I852(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME)) -#define IS_I855(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME)) -#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G) - -#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G) -#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM) -#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G) -#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM) -#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810)) - -#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810)) - -#define I830_GMCH_CTRL 0x52 - - -#define I830_GMCH_GMS_MASK 0x70 -#define I830_GMCH_GMS_DISABLED 0x00 -#define I830_GMCH_GMS_LOCAL 0x10 -#define I830_GMCH_GMS_STOLEN_512 0x20 -#define I830_GMCH_GMS_STOLEN_1024 0x30 -#define I830_GMCH_GMS_STOLEN_8192 0x40 - -#define I855_GMCH_GMS_MASK (0x7 << 4) -#define I855_GMCH_GMS_DISABLED 0x00 -#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4) -#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4) -#define I855_GMCH_GMS_STOLEN_8M (0x3 << 4) -#define I855_GMCH_GMS_STOLEN_16M (0x4 << 4) -#define I855_GMCH_GMS_STOLEN_32M (0x5 << 4) -#define I915G_GMCH_GMS_STOLEN_48M (0x6 << 4) -#define I915G_GMCH_GMS_STOLEN_64M (0x7 << 4) - -typedef unsigned char Bool; -#define TRUE 1 -#define FALSE 0 - -#define PIPE_NONE 0<<0 -#define PIPE_CRT 1<<0 -#define PIPE_TV 1<<1 -#define PIPE_DFP 1<<2 -#define PIPE_LFP 1<<3 -#define PIPE_CRT2 1<<4 -#define PIPE_TV2 1<<5 -#define PIPE_DFP2 1<<6 -#define PIPE_LFP2 1<<7 - -typedef struct _I830MemPool *I830MemPoolPtr; -typedef struct _I830MemRange *I830MemRangePtr; -typedef struct _I830MemRange { - long Start; - long End; - long Size; - unsigned long Physical; - unsigned long Offset; /* Offset of AGP-allocated portion */ - unsigned long Alignment; - drm_handle_t Key; - unsigned long Pitch; // add pitch - I830MemPoolPtr Pool; -} I830MemRange; - -typedef struct _I830MemPool { - I830MemRange Total; - I830MemRange Free; - I830MemRange Fixed; - I830MemRange Allocated; -} I830MemPool; - -typedef struct { - int tail_mask; - I830MemRange mem; - unsigned char *virtual_start; - int head; - int tail; - int space; -} I830RingBuffer; - -typedef struct _I830Rec { - unsigned char *MMIOBase; - unsigned char *FbBase; - int cpp; - - unsigned int bios_version; - - /* These are set in PreInit and never changed. */ - long FbMapSize; - long TotalVideoRam; - I830MemRange StolenMemory; /* pre-allocated memory */ - long BIOSMemorySize; /* min stolen pool size */ - int BIOSMemSizeLoc; - - /* These change according to what has been allocated. */ - long FreeMemory; - I830MemRange MemoryAperture; - I830MemPool StolenPool; - long allocatedMemory; - - /* Regions allocated either from the above pools, or from agpgart. */ - /* for single and dual head configurations */ - I830MemRange FrontBuffer; - I830MemRange FrontBuffer2; - I830MemRange Scratch; - I830MemRange Scratch2; - - I830RingBuffer *LpRing; - - I830MemRange BackBuffer; - I830MemRange DepthBuffer; - I830MemRange TexMem; - int TexGranularity; - I830MemRange ContextMem; - int drmMinor; - Bool have3DWindows; - - Bool NeedRingBufferLow; - Bool allowPageFlip; - Bool disableTiling; - - int Chipset; - unsigned long LinearAddr; - unsigned long MMIOAddr; - - drmSize registerSize; /**< \brief MMIO register map size */ - drm_handle_t registerHandle; /**< \brief MMIO register map handle */ - // IOADDRESS ioBase; - int irq; /**< \brief IRQ number */ - int GttBound; - - drm_handle_t ring_map; - unsigned int Fence[8]; - -} I830Rec; - -/* - * 12288 is set as the maximum, chosen because it is enough for - * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare. - */ -#define I830_MAXIMUM_VBIOS_MEM 12288 -#define I830_DEFAULT_VIDEOMEM_2D (MB(32) / 1024) -#define I830_DEFAULT_VIDEOMEM_3D (MB(64) / 1024) - -/* Flags for memory allocation function */ -#define FROM_ANYWHERE 0x00000000 -#define FROM_POOL_ONLY 0x00000001 -#define FROM_NEW_ONLY 0x00000002 -#define FROM_MASK 0x0000000f - -#define ALLOCATE_AT_TOP 0x00000010 -#define ALLOCATE_AT_BOTTOM 0x00000020 -#define FORCE_GAPS 0x00000040 - -#define NEED_PHYSICAL_ADDR 0x00000100 -#define ALIGN_BOTH_ENDS 0x00000200 -#define FORCE_LOW 0x00000400 - -#define ALLOC_NO_TILING 0x00001000 -#define ALLOC_INITIAL 0x00002000 - -#define ALLOCATE_DRY_RUN 0x80000000 - -/* Chipset registers for VIDEO BIOS memory RW access */ -#define _855_DRAM_RW_CONTROL 0x58 -#define _845_DRAM_RW_CONTROL 0x90 -#define DRAM_WRITE 0x33330000 - -#define KB(x) ((x) * 1024) -#define MB(x) ((x) * KB(1024)) - -#define GTT_PAGE_SIZE KB(4) -#define ROUND_TO(x, y) (((x) + (y) - 1) / (y) * (y)) -#define ROUND_DOWN_TO(x, y) ((x) / (y) * (y)) -#define ROUND_TO_PAGE(x) ROUND_TO((x), GTT_PAGE_SIZE) -#define ROUND_TO_MB(x) ROUND_TO((x), MB(1)) -#define PRIMARY_RINGBUFFER_SIZE KB(128) - - -/* Ring buffer registers, p277, overview p19 - */ -#define LP_RING 0x2030 -#define HP_RING 0x2040 - -#define RING_TAIL 0x00 -#define TAIL_ADDR 0x000FFFF8 -#define I830_TAIL_MASK 0x001FFFF8 - -#define RING_HEAD 0x04 -#define HEAD_WRAP_COUNT 0xFFE00000 -#define HEAD_WRAP_ONE 0x00200000 -#define HEAD_ADDR 0x001FFFFC -#define I830_HEAD_MASK 0x001FFFFC - -#define RING_START 0x08 -#define START_ADDR 0x03FFFFF8 -#define I830_RING_START_MASK 0xFFFFF000 - -#define RING_LEN 0x0C -#define RING_NR_PAGES 0x001FF000 -#define I830_RING_NR_PAGES 0x001FF000 -#define RING_REPORT_MASK 0x00000006 -#define RING_REPORT_64K 0x00000002 -#define RING_REPORT_128K 0x00000004 -#define RING_NO_REPORT 0x00000000 -#define RING_VALID_MASK 0x00000001 -#define RING_VALID 0x00000001 -#define RING_INVALID 0x00000000 - - -/* Fence/Tiling ranges [0..7] - */ -#define FENCE 0x2000 -#define FENCE_NR 8 - -#define I915G_FENCE_START_MASK 0x0ff00000 - -#define I830_FENCE_START_MASK 0x07f80000 - -#define FENCE_START_MASK 0x03F80000 -#define FENCE_X_MAJOR 0x00000000 -#define FENCE_Y_MAJOR 0x00001000 -#define FENCE_SIZE_MASK 0x00000700 -#define FENCE_SIZE_512K 0x00000000 -#define FENCE_SIZE_1M 0x00000100 -#define FENCE_SIZE_2M 0x00000200 -#define FENCE_SIZE_4M 0x00000300 -#define FENCE_SIZE_8M 0x00000400 -#define FENCE_SIZE_16M 0x00000500 -#define FENCE_SIZE_32M 0x00000600 -#define FENCE_SIZE_64M 0x00000700 -#define I915G_FENCE_SIZE_1M 0x00000000 -#define I915G_FENCE_SIZE_2M 0x00000100 -#define I915G_FENCE_SIZE_4M 0x00000200 -#define I915G_FENCE_SIZE_8M 0x00000300 -#define I915G_FENCE_SIZE_16M 0x00000400 -#define I915G_FENCE_SIZE_32M 0x00000500 -#define I915G_FENCE_SIZE_64M 0x00000600 -#define I915G_FENCE_SIZE_128M 0x00000700 -#define FENCE_PITCH_1 0x00000000 -#define FENCE_PITCH_2 0x00000010 -#define FENCE_PITCH_4 0x00000020 -#define FENCE_PITCH_8 0x00000030 -#define FENCE_PITCH_16 0x00000040 -#define FENCE_PITCH_32 0x00000050 -#define FENCE_PITCH_64 0x00000060 -#define FENCE_VALID 0x00000001 - -#include <mmio.h> - -# define MMIO_IN8(base, offset) \ - *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) -# define MMIO_IN32(base, offset) \ - read_MMIO_LE32(base, offset) -# define MMIO_OUT8(base, offset, val) \ - *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val) -# define MMIO_OUT32(base, offset, val) \ - *(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val) - - - /* Memory mapped register access macros */ -#define INREG8(addr) MMIO_IN8(MMIO, addr) -#define INREG(addr) MMIO_IN32(MMIO, addr) -#define OUTREG8(addr, val) MMIO_OUT8(MMIO, addr, val) -#define OUTREG(addr, val) MMIO_OUT32(MMIO, addr, val) - -#define DSPABASE 0x70184 - -#endif diff --git a/i965/server/intel_dri.c b/i965/server/intel_dri.c deleted file mode 100644 index 169fdbe..0000000 --- a/i965/server/intel_dri.c +++ /dev/null @@ -1,1282 +0,0 @@ -/** - * \file server/intel_dri.c - * \brief File to perform the device-specific initialization tasks typically - * done in the X server. - * - * Here they are converted to run in the client (or perhaps a standalone - * process), and to work with the frame buffer device rather than the X - * server infrastructure. - * - * Copyright (C) 2006 Dave Airlie (airlied@linux.ie) - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sub license, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial portions - of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR - ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <unistd.h> - -#include "driver.h" -#include "drm.h" - -#include "intel.h" -#include "i830_dri.h" - -#include "memops.h" -#include "pciaccess.h" - -static size_t drm_page_size; -static int nextTile = 0; -#define xf86DrvMsg(...) do {} while(0) - -static const int pitches[] = { - 128 * 8, - 128 * 16, - 128 * 32, - 128 * 64, - 0 -}; - -static Bool I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea); - -static unsigned long -GetBestTileAlignment(unsigned long size) -{ - unsigned long i; - - for (i = KB(512); i < size; i <<= 1) - ; - - if (i > MB(64)) - i = MB(64); - - return i; -} - -static void SetFenceRegs(const DRIDriverContext *ctx, I830Rec *pI830) -{ - int i; - unsigned char *MMIO = ctx->MMIOAddress; - - for (i = 0; i < 8; i++) { - OUTREG(FENCE + i * 4, pI830->Fence[i]); - // if (I810_DEBUG & DEBUG_VERBOSE_VGA) - fprintf(stderr,"Fence Register : %x\n", pI830->Fence[i]); - } -} - -/* Tiled memory is good... really, really good... - * - * Need to make it less likely that we miss out on this - probably - * need to move the frontbuffer away from the 'guarenteed' alignment - * of the first memory segment, or perhaps allocate a discontigous - * framebuffer to get more alignment 'sweet spots'. - */ -static void -SetFence(const DRIDriverContext *ctx, I830Rec *pI830, - int nr, unsigned int start, unsigned int pitch, - unsigned int size) -{ - unsigned int val; - unsigned int fence_mask = 0; - unsigned int fence_pitch; - - if (nr < 0 || nr > 7) { - fprintf(stderr, - "SetFence: fence %d out of range\n",nr); - return; - } - - pI830->Fence[nr] = 0; - - if (IS_I9XX(pI830)) - fence_mask = ~I915G_FENCE_START_MASK; - else - fence_mask = ~I830_FENCE_START_MASK; - - if (start & fence_mask) { - fprintf(stderr, - "SetFence: %d: start (0x%08x) is not %s aligned\n", - nr, start, (IS_I9XX(pI830)) ? "1MB" : "512k"); - return; - } - - if (start % size) { - fprintf(stderr, - "SetFence: %d: start (0x%08x) is not size (%dk) aligned\n", - nr, start, size / 1024); - return; - } - - if (pitch & 127) { - fprintf(stderr, - "SetFence: %d: pitch (%d) not a multiple of 128 bytes\n", - nr, pitch); - return; - } - - val = (start | FENCE_X_MAJOR | FENCE_VALID); - - if (IS_I9XX(pI830)) { - switch (size) { - case MB(1): - val |= I915G_FENCE_SIZE_1M; - break; - case MB(2): - val |= I915G_FENCE_SIZE_2M; - break; - case MB(4): - val |= I915G_FENCE_SIZE_4M; - break; - case MB(8): - val |= I915G_FENCE_SIZE_8M; - break; - case MB(16): - val |= I915G_FENCE_SIZE_16M; - break; - case MB(32): - val |= I915G_FENCE_SIZE_32M; - break; - case MB(64): - val |= I915G_FENCE_SIZE_64M; - break; - default: - fprintf(stderr, - "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024); - return; - } - } else { - switch (size) { - case KB(512): - val |= FENCE_SIZE_512K; - break; - case MB(1): - val |= FENCE_SIZE_1M; - break; - case MB(2): - val |= FENCE_SIZE_2M; - break; - case MB(4): - val |= FENCE_SIZE_4M; - break; - case MB(8): - val |= FENCE_SIZE_8M; - break; - case MB(16): - val |= FENCE_SIZE_16M; - break; - case MB(32): - val |= FENCE_SIZE_32M; - break; - case MB(64): - val |= FENCE_SIZE_64M; - break; - default: - fprintf(stderr, - "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024); - return; - } - } - - if (IS_I9XX(pI830)) - fence_pitch = pitch / 512; - else - fence_pitch = pitch / 128; - - switch (fence_pitch) { - case 1: - val |= FENCE_PITCH_1; - break; - case 2: - val |= FENCE_PITCH_2; - break; - case 4: - val |= FENCE_PITCH_4; - break; - case 8: - val |= FENCE_PITCH_8; - break; - case 16: - val |= FENCE_PITCH_16; - break; - case 32: - val |= FENCE_PITCH_32; - break; - case 64: - val |= FENCE_PITCH_64; - break; - default: - fprintf(stderr, - "SetFence: %d: illegal pitch (%d)\n", nr, pitch); - return; - } - - pI830->Fence[nr] = val; -} - -static Bool -MakeTiles(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *pMem) -{ - int pitch, ntiles, i; - - pitch = pMem->Pitch * ctx->cpp; - /* - * Simply try to break the region up into at most four pieces of size - * equal to the alignment. - */ - ntiles = ROUND_TO(pMem->Size, pMem->Alignment) / pMem->Alignment; - if (ntiles >= 4) { - return FALSE; - } - - for (i = 0; i < ntiles; i++, nextTile++) { - SetFence(ctx, pI830, nextTile, pMem->Start + i * pMem->Alignment, - pitch, pMem->Alignment); - } - return TRUE; -} - -static void I830SetupMemoryTiling(const DRIDriverContext *ctx, I830Rec *pI830) -{ - int i; - - /* Clear out */ - for (i = 0; i < 8; i++) - pI830->Fence[i] = 0; - - nextTile = 0; - - if (pI830->BackBuffer.Alignment >= KB(512)) { - if (MakeTiles(ctx, pI830, &(pI830->BackBuffer))) { - fprintf(stderr, - "Activating tiled memory for the back buffer.\n"); - } else { - fprintf(stderr, - "MakeTiles failed for the back buffer.\n"); - pI830->allowPageFlip = FALSE; - } - } - - if (pI830->DepthBuffer.Alignment >= KB(512)) { - if (MakeTiles(ctx, pI830, &(pI830->DepthBuffer))) { - fprintf(stderr, - "Activating tiled memory for the depth buffer.\n"); - } else { - fprintf(stderr, - "MakeTiles failed for the depth buffer.\n"); - } - } - - return; -} - -static int I830DetectMemory(const DRIDriverContext *ctx, I830Rec *pI830) -{ - struct pci_device host_bridge; - uint32_t gmch_ctrl; - int memsize = 0; - int range; - - memset(&host_bridge, 0, sizeof(host_bridge)); - - pci_device_cfg_read_u32(&host_bridge, &gmch_ctrl, I830_GMCH_CTRL); - - /* We need to reduce the stolen size, by the GTT and the popup. - * The GTT varying according the the FbMapSize and the popup is 4KB */ - range = (ctx->shared.fbSize / (1024*1024)) + 4; - - if (IS_I85X(pI830) || IS_I865G(pI830) || IS_I9XX(pI830)) { - switch (gmch_ctrl & I830_GMCH_GMS_MASK) { - case I855_GMCH_GMS_STOLEN_1M: - memsize = MB(1) - KB(range); - break; - case I855_GMCH_GMS_STOLEN_4M: - memsize = MB(4) - KB(range); - break; - case I855_GMCH_GMS_STOLEN_8M: - memsize = MB(8) - KB(range); - break; - case I855_GMCH_GMS_STOLEN_16M: - memsize = MB(16) - KB(range); - break; - case I855_GMCH_GMS_STOLEN_32M: - memsize = MB(32) - KB(range); - break; - case I915G_GMCH_GMS_STOLEN_48M: - if (IS_I9XX(pI830)) - memsize = MB(48) - KB(range); - break; - case I915G_GMCH_GMS_STOLEN_64M: - if (IS_I9XX(pI830)) - memsize = MB(64) - KB(range); - break; - } - } else { - switch (gmch_ctrl & I830_GMCH_GMS_MASK) { - case I830_GMCH_GMS_STOLEN_512: - memsize = KB(512) - KB(range); - break; - case I830_GMCH_GMS_STOLEN_1024: - memsize = MB(1) - KB(range); - break; - case I830_GMCH_GMS_STOLEN_8192: - memsize = MB(8) - KB(range); - break; - case I830_GMCH_GMS_LOCAL: - memsize = 0; - xf86DrvMsg(pScrn->scrnIndex, X_WARNING, - "Local memory found, but won't be used.\n"); - break; - } - } - if (memsize > 0) { - fprintf(stderr, - "detected %d kB stolen memory.\n", memsize / 1024); - } else { - fprintf(stderr, - "no video memory detected.\n"); - } - return memsize; -} - -static int AgpInit(const DRIDriverContext *ctx, I830Rec *info) -{ - unsigned long mode = 0x4; - - if (drmAgpAcquire(ctx->drmFD) < 0) { - fprintf(stderr, "[gart] AGP not available\n"); - return 0; - } - - if (drmAgpEnable(ctx->drmFD, mode) < 0) { - fprintf(stderr, "[gart] AGP not enabled\n"); - drmAgpRelease(ctx->drmFD); - return 0; - } - else - fprintf(stderr, "[gart] AGP enabled at %dx\n", ctx->agpmode); - - return 1; -} - -/* - * Allocate memory from the given pool. Grow the pool if needed and if - * possible. - */ -static unsigned long -AllocFromPool(const DRIDriverContext *ctx, I830Rec *pI830, - I830MemRange *result, I830MemPool *pool, - long size, unsigned long alignment, int flags) -{ - long needed, start, end; - - if (!result || !pool || !size) - return 0; - - /* Calculate how much space is needed. */ - if (alignment <= GTT_PAGE_SIZE) - needed = size; - else { - start = ROUND_TO(pool->Free.Start, alignment); - end = ROUND_TO(start + size, alignment); - needed = end - pool->Free.Start; - } - if (needed > pool->Free.Size) { - return 0; - } - - result->Start = ROUND_TO(pool->Free.Start, alignment); - pool->Free.Start += needed; - result->End = pool->Free.Start; - - pool->Free.Size = pool->Free.End - pool->Free.Start; - result->Size = result->End - result->Start; - result->Pool = pool; - result->Alignment = alignment; - return needed; -} - -static unsigned long AllocFromAGP(const DRIDriverContext *ctx, I830Rec *pI830, long size, unsigned long alignment, I830MemRange *result) -{ - unsigned long start, end; - unsigned long newApStart, newApEnd; - int ret; - if (!result || !size) - return 0; - - if (!alignment) - alignment = 4; - - start = ROUND_TO(pI830->MemoryAperture.Start, alignment); - end = ROUND_TO(start + size, alignment); - newApStart = end; - newApEnd = pI830->MemoryAperture.End; - - ret=drmAgpAlloc(ctx->drmFD, size, 0, &(result->Physical), (drm_handle_t *)&(result->Key)); - - if (ret) - { - fprintf(stderr,"drmAgpAlloc failed %d\n", ret); - return 0; - } - pI830->allocatedMemory += size; - pI830->MemoryAperture.Start = newApStart; - pI830->MemoryAperture.End = newApEnd; - pI830->MemoryAperture.Size = newApEnd - newApStart; - // pI830->FreeMemory -= size; - result->Start = start; - result->End = start + size; - result->Size = size; - result->Offset = start; - result->Alignment = alignment; - result->Pool = NULL; - - return size; -} - -unsigned long -I830AllocVidMem(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *result, I830MemPool *pool, long size, unsigned long alignment, int flags) -{ - int ret; - - if (!result) - return 0; - - /* Make sure these are initialised. */ - result->Size = 0; - result->Key = -1; - - if (!size) { - return 0; - } - - if (pool->Free.Size < size) - return AllocFromAGP(ctx, pI830, size, alignment, result); - else - { - ret = AllocFromPool(ctx, pI830, result, pool, size, alignment, flags); - - if (ret==0) - return AllocFromAGP(ctx, pI830, size, alignment, result); - return ret; - } -} - -static Bool BindAgpRange(const DRIDriverContext *ctx, I830MemRange *mem) -{ - if (!mem) - return FALSE; - - if (mem->Key == -1) - return TRUE; - - return !drmAgpBind(ctx->drmFD, mem->Key, mem->Offset); -} - -/* simple memory allocation routines needed */ -/* put ring buffer in low memory */ -/* need to allocate front, back, depth buffers aligned correctly, - allocate ring buffer, -*/ - -/* */ -static Bool -I830AllocateMemory(const DRIDriverContext *ctx, I830Rec *pI830) -{ - unsigned long size, ret; - unsigned long lines, lineSize, align; - - /* allocate ring buffer */ - memset(pI830->LpRing, 0, sizeof(I830RingBuffer)); - pI830->LpRing->mem.Key = -1; - - size = PRIMARY_RINGBUFFER_SIZE; - - ret = I830AllocVidMem(ctx, pI830, &pI830->LpRing->mem, &pI830->StolenPool, size, 0x1000, 0); - - if (ret != size) - { - fprintf(stderr,"unable to allocate ring buffer %ld\n", ret); - return FALSE; - } - - pI830->LpRing->tail_mask = pI830->LpRing->mem.Size - 1; - - - /* allocate front buffer */ - memset(&(pI830->FrontBuffer), 0, sizeof(pI830->FrontBuffer)); - pI830->FrontBuffer.Key = -1; - pI830->FrontBuffer.Pitch = ctx->shared.virtualWidth; - - align = KB(512); - - lineSize = ctx->shared.virtualWidth * ctx->cpp; - lines = (ctx->shared.virtualHeight + 15) / 16 * 16; - size = lineSize * lines; - size = ROUND_TO_PAGE(size); - - align = GetBestTileAlignment(size); - - ret = I830AllocVidMem(ctx, pI830, &pI830->FrontBuffer, &pI830->StolenPool, size, align, 0); - if (ret < size) - { - fprintf(stderr,"unable to allocate front buffer %ld\n", ret); - return FALSE; - } - - memset(&(pI830->BackBuffer), 0, sizeof(pI830->BackBuffer)); - pI830->BackBuffer.Key = -1; - pI830->BackBuffer.Pitch = ctx->shared.virtualWidth; - - ret = I830AllocVidMem(ctx, pI830, &pI830->BackBuffer, &pI830->StolenPool, size, align, 0); - if (ret < size) - { - fprintf(stderr,"unable to allocate back buffer %ld\n", ret); - return FALSE; - } - - memset(&(pI830->DepthBuffer), 0, sizeof(pI830->DepthBuffer)); - pI830->DepthBuffer.Key = -1; - pI830->DepthBuffer.Pitch = ctx->shared.virtualWidth; - - ret = I830AllocVidMem(ctx, pI830, &pI830->DepthBuffer, &pI830->StolenPool, size, align, 0); - if (ret < size) - { - fprintf(stderr,"unable to allocate depth buffer %ld\n", ret); - return FALSE; - } - - memset(&(pI830->ContextMem), 0, sizeof(pI830->ContextMem)); - pI830->ContextMem.Key = -1; - size = KB(32); - - ret = I830AllocVidMem(ctx, pI830, &pI830->ContextMem, &pI830->StolenPool, size, align, 0); - if (ret < size) - { - fprintf(stderr,"unable to allocate context buffer %ld\n", ret); - return FALSE; - } - - memset(&(pI830->TexMem), 0, sizeof(pI830->TexMem)); - pI830->TexMem.Key = -1; - - size = 32768 * 1024; - ret = AllocFromAGP(ctx, pI830, size, align, &pI830->TexMem); - if (ret < size) - { - fprintf(stderr,"unable to allocate texture memory %ld\n", ret); - return FALSE; - } - - return TRUE; -} - -static Bool -I830BindMemory(const DRIDriverContext *ctx, I830Rec *pI830) -{ - if (!BindAgpRange(ctx, &pI830->LpRing->mem)) - return FALSE; - if (!BindAgpRange(ctx, &pI830->FrontBuffer)) - return FALSE; - if (!BindAgpRange(ctx, &pI830->BackBuffer)) - return FALSE; - if (!BindAgpRange(ctx, &pI830->DepthBuffer)) - return FALSE; - if (!BindAgpRange(ctx, &pI830->ContextMem)) - return FALSE; - if (!BindAgpRange(ctx, &pI830->TexMem)) - return FALSE; - - return TRUE; -} - -static Bool -I830CleanupDma(const DRIDriverContext *ctx) -{ - drmI830Init info; - - memset(&info, 0, sizeof(drmI830Init)); - info.func = I830_CLEANUP_DMA; - - if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT, - &info, sizeof(drmI830Init))) { - fprintf(stderr, "I830 Dma Cleanup Failed\n"); - return FALSE; - } - - return TRUE; -} - -static Bool -I830InitDma(const DRIDriverContext *ctx, I830Rec *pI830) -{ - I830RingBuffer *ring = pI830->LpRing; - drmI830Init info; - - memset(&info, 0, sizeof(drmI830Init)); - info.func = I830_INIT_DMA; - - info.ring_start = ring->mem.Start + pI830->LinearAddr; - info.ring_end = ring->mem.End + pI830->LinearAddr; - info.ring_size = ring->mem.Size; - - info.mmio_offset = (unsigned int)ctx->MMIOStart; - - info.sarea_priv_offset = sizeof(drm_sarea_t); - - info.front_offset = pI830->FrontBuffer.Start; - info.back_offset = pI830->BackBuffer.Start; - info.depth_offset = pI830->DepthBuffer.Start; - info.w = ctx->shared.virtualWidth; - info.h = ctx->shared.virtualHeight; - info.pitch = ctx->shared.virtualWidth; - info.back_pitch = pI830->BackBuffer.Pitch; - info.depth_pitch = pI830->DepthBuffer.Pitch; - info.cpp = ctx->cpp; - - if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT, - &info, sizeof(drmI830Init))) { - fprintf(stderr, - "I830 Dma Initialization Failed\n"); - return FALSE; - } - - return TRUE; -} - -static int I830CheckDRMVersion( const DRIDriverContext *ctx, - I830Rec *pI830 ) -{ - drmVersionPtr version; - - version = drmGetVersion(ctx->drmFD); - - if (version) { - int req_minor, req_patch; - - req_minor = 4; - req_patch = 0; - - if (version->version_major != 1 || - version->version_minor < req_minor || - (version->version_minor == req_minor && - version->version_patchlevel < req_patch)) { - /* Incompatible drm version */ - fprintf(stderr, - "[dri] I830DRIScreenInit failed because of a version " - "mismatch.\n" - "[dri] i915.o kernel module version is %d.%d.%d " - "but version 1.%d.%d or newer is needed.\n" - "[dri] Disabling DRI.\n", - version->version_major, - version->version_minor, - version->version_patchlevel, - req_minor, - req_patch); - drmFreeVersion(version); - return 0; - } - - pI830->drmMinor = version->version_minor; - drmFreeVersion(version); - } - return 1; -} - -static void -I830SetRingRegs(const DRIDriverContext *ctx, I830Rec *pI830) -{ - unsigned int itemp; - unsigned char *MMIO = ctx->MMIOAddress; - - OUTREG(LP_RING + RING_LEN, 0); - OUTREG(LP_RING + RING_TAIL, 0); - OUTREG(LP_RING + RING_HEAD, 0); - - if ((long)(pI830->LpRing->mem.Start & I830_RING_START_MASK) != - pI830->LpRing->mem.Start) { - fprintf(stderr, - "I830SetRingRegs: Ring buffer start (%lx) violates its " - "mask (%x)\n", pI830->LpRing->mem.Start, I830_RING_START_MASK); - } - /* Don't care about the old value. Reserved bits must be zero anyway. */ - itemp = pI830->LpRing->mem.Start & I830_RING_START_MASK; - OUTREG(LP_RING + RING_START, itemp); - - if (((pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES) != - pI830->LpRing->mem.Size - 4096) { - fprintf(stderr, - "I830SetRingRegs: Ring buffer size - 4096 (%lx) violates its " - "mask (%x)\n", pI830->LpRing->mem.Size - 4096, - I830_RING_NR_PAGES); - } - /* Don't care about the old value. Reserved bits must be zero anyway. */ - itemp = (pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES; - itemp |= (RING_NO_REPORT | RING_VALID); - OUTREG(LP_RING + RING_LEN, itemp); - - pI830->LpRing->head = INREG(LP_RING + RING_HEAD) & I830_HEAD_MASK; - pI830->LpRing->tail = INREG(LP_RING + RING_TAIL); - pI830->LpRing->space = pI830->LpRing->head - (pI830->LpRing->tail + 8); - if (pI830->LpRing->space < 0) - pI830->LpRing->space += pI830->LpRing->mem.Size; - - SetFenceRegs(ctx, pI830); - - /* RESET THE DISPLAY PIPE TO POINT TO THE FRONTBUFFER - hacky - hacky hacky */ - OUTREG(DSPABASE, pI830->FrontBuffer.Start + pI830->LinearAddr); - -} - -static Bool -I830SetParam(const DRIDriverContext *ctx, int param, int value) -{ - drmI830SetParam sp; - - memset(&sp, 0, sizeof(sp)); - sp.param = param; - sp.value = value; - - if (drmCommandWrite(ctx->drmFD, DRM_I830_SETPARAM, &sp, sizeof(sp))) { - fprintf(stderr, "I830 SetParam Failed\n"); - return FALSE; - } - - return TRUE; -} - -static Bool -I830DRIMapScreenRegions(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ - fprintf(stderr, - "[drm] Mapping front buffer\n"); - - if (drmAddMap(ctx->drmFD, - (drm_handle_t)(sarea->front_offset + pI830->LinearAddr), - sarea->front_size, - DRM_FRAME_BUFFER, /*DRM_AGP,*/ - 0, - &sarea->front_handle) < 0) { - fprintf(stderr, - "[drm] drmAddMap(front_handle) failed. Disabling DRI\n"); - return FALSE; - } - ctx->shared.hFrameBuffer = sarea->front_handle; - ctx->shared.fbSize = sarea->front_size; - fprintf(stderr, "[drm] Front Buffer = 0x%08x\n", - sarea->front_handle); - - if (drmAddMap(ctx->drmFD, - (drm_handle_t)(sarea->back_offset), - sarea->back_size, DRM_AGP, 0, - &sarea->back_handle) < 0) { - fprintf(stderr, - "[drm] drmAddMap(back_handle) failed. Disabling DRI\n"); - return FALSE; - } - fprintf(stderr, "[drm] Back Buffer = 0x%08x\n", - sarea->back_handle); - - if (drmAddMap(ctx->drmFD, - (drm_handle_t)sarea->depth_offset, - sarea->depth_size, DRM_AGP, 0, - &sarea->depth_handle) < 0) { - fprintf(stderr, - "[drm] drmAddMap(depth_handle) failed. Disabling DRI\n"); - return FALSE; - } - fprintf(stderr, "[drm] Depth Buffer = 0x%08x\n", - sarea->depth_handle); - - if (drmAddMap(ctx->drmFD, - (drm_handle_t)sarea->tex_offset, - sarea->tex_size, DRM_AGP, 0, - &sarea->tex_handle) < 0) { - fprintf(stderr, - "[drm] drmAddMap(tex_handle) failed. Disabling DRI\n"); - return FALSE; - } - fprintf(stderr, "[drm] textures = 0x%08x\n", - sarea->tex_handle); - - return TRUE; -} - - -static void -I830DRIUnmapScreenRegions(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ -#if 1 - if (sarea->front_handle) { - drmRmMap(ctx->drmFD, sarea->front_handle); - sarea->front_handle = 0; - } -#endif - if (sarea->back_handle) { - drmRmMap(ctx->drmFD, sarea->back_handle); - sarea->back_handle = 0; - } - if (sarea->depth_handle) { - drmRmMap(ctx->drmFD, sarea->depth_handle); - sarea->depth_handle = 0; - } - if (sarea->tex_handle) { - drmRmMap(ctx->drmFD, sarea->tex_handle); - sarea->tex_handle = 0; - } -} - -static void -I830InitTextureHeap(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ - /* Start up the simple memory manager for agp space */ - drmI830MemInitHeap drmHeap; - drmHeap.region = I830_MEM_REGION_AGP; - drmHeap.start = 0; - drmHeap.size = sarea->tex_size; - - if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT_HEAP, - &drmHeap, sizeof(drmHeap))) { - fprintf(stderr, - "[drm] Failed to initialized agp heap manager\n"); - } else { - fprintf(stderr, - "[drm] Initialized kernel agp heap manager, %d\n", - sarea->tex_size); - - I830SetParam(ctx, I830_SETPARAM_TEX_LRU_LOG_GRANULARITY, - sarea->log_tex_granularity); - } -} - -static Bool -I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ - if (drmAddMap(ctx->drmFD, - (drm_handle_t)pI830->LpRing->mem.Start, - pI830->LpRing->mem.Size, DRM_AGP, 0, - &pI830->ring_map) < 0) { - fprintf(stderr, - "[drm] drmAddMap(ring_map) failed. Disabling DRI\n"); - return FALSE; - } - fprintf(stderr, "[drm] ring buffer = 0x%08x\n", - pI830->ring_map); - - if (I830InitDma(ctx, pI830) == FALSE) { - return FALSE; - } - - /* init to zero to be safe */ - - I830DRIMapScreenRegions(ctx, pI830, sarea); - I830InitTextureHeap(ctx, pI830, sarea); - - if (ctx->pciDevice != PCI_CHIP_845_G && - ctx->pciDevice != PCI_CHIP_I830_M) { - I830SetParam(ctx, I830_SETPARAM_USE_MI_BATCHBUFFER_START, 1 ); - } - - /* Okay now initialize the dma engine */ - { - pI830->irq = drmGetInterruptFromBusID(ctx->drmFD, - ctx->pciBus, - ctx->pciDevice, - ctx->pciFunc); - - if (drmCtlInstHandler(ctx->drmFD, pI830->irq)) { - fprintf(stderr, - "[drm] failure adding irq handler\n"); - pI830->irq = 0; - return FALSE; - } - else - fprintf(stderr, - "[drm] dma control initialized, using IRQ %d\n", - pI830->irq); - } - - fprintf(stderr, "[dri] visual configs initialized\n"); - - return TRUE; -} - -static Bool -I830ClearScreen(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ - /* need to drmMap front and back buffers and zero them */ - drmAddress map_addr; - int ret; - - ret = drmMap(ctx->drmFD, - sarea->front_handle, - sarea->front_size, - &map_addr); - - if (ret) - { - fprintf(stderr, "Unable to map front buffer\n"); - return FALSE; - } - - drimemsetio((char *)map_addr, - 0, - sarea->front_size); - drmUnmap(map_addr, sarea->front_size); - - - ret = drmMap(ctx->drmFD, - sarea->back_handle, - sarea->back_size, - &map_addr); - - if (ret) - { - fprintf(stderr, "Unable to map back buffer\n"); - return FALSE; - } - - drimemsetio((char *)map_addr, - 0, - sarea->back_size); - drmUnmap(map_addr, sarea->back_size); - - return TRUE; -} - -static Bool -I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830) - -{ - I830DRIPtr pI830DRI; - drmI830Sarea *pSAREAPriv; - int err; - - drm_page_size = getpagesize(); - - pI830->registerSize = ctx->MMIOSize; - /* This is a hack for now. We have to have more than a 4k page here - * because of the size of the state. However, the state should be - * in a per-context mapping. This will be added in the Mesa 3.5 port - * of the I830 driver. - */ - ctx->shared.SAREASize = SAREA_MAX; - - /* Note that drmOpen will try to load the kernel module, if needed. */ - ctx->drmFD = drmOpen("i915", NULL ); - if (ctx->drmFD < 0) { - fprintf(stderr, "[drm] drmOpen failed\n"); - return 0; - } - - if ((err = drmSetBusid(ctx->drmFD, ctx->pciBusID)) < 0) { - fprintf(stderr, "[drm] drmSetBusid failed (%d, %s), %s\n", - ctx->drmFD, ctx->pciBusID, strerror(-err)); - return 0; - } - - if (drmAddMap( ctx->drmFD, - 0, - ctx->shared.SAREASize, - DRM_SHM, - DRM_CONTAINS_LOCK, - &ctx->shared.hSAREA) < 0) - { - fprintf(stderr, "[drm] drmAddMap failed\n"); - return 0; - } - - fprintf(stderr, "[drm] added %d byte SAREA at 0x%08x\n", - ctx->shared.SAREASize, ctx->shared.hSAREA); - - if (drmMap( ctx->drmFD, - ctx->shared.hSAREA, - ctx->shared.SAREASize, - (drmAddressPtr)(&ctx->pSAREA)) < 0) - { - fprintf(stderr, "[drm] drmMap failed\n"); - return 0; - - } - - memset(ctx->pSAREA, 0, ctx->shared.SAREASize); - fprintf(stderr, "[drm] mapped SAREA 0x%08x to %p, size %d\n", - ctx->shared.hSAREA, ctx->pSAREA, ctx->shared.SAREASize); - - - if (drmAddMap(ctx->drmFD, - ctx->MMIOStart, - ctx->MMIOSize, - DRM_REGISTERS, - DRM_READ_ONLY, - &pI830->registerHandle) < 0) { - fprintf(stderr, "[drm] drmAddMap mmio failed\n"); - return 0; - } - fprintf(stderr, - "[drm] register handle = 0x%08x\n", pI830->registerHandle); - - - if (!I830CheckDRMVersion(ctx, pI830)) { - return FALSE; - } - - /* Create a 'server' context so we can grab the lock for - * initialization ioctls. - */ - if ((err = drmCreateContext(ctx->drmFD, &ctx->serverContext)) != 0) { - fprintf(stderr, "%s: drmCreateContext failed %d\n", __FUNCTION__, err); - return 0; - } - - DRM_LOCK(ctx->drmFD, ctx->pSAREA, ctx->serverContext, 0); - - /* Initialize the SAREA private data structure */ - pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + - sizeof(drm_sarea_t)); - memset(pSAREAPriv, 0, sizeof(*pSAREAPriv)); - - pI830->StolenMemory.Size = I830DetectMemory(ctx, pI830); - pI830->StolenMemory.Start = 0; - pI830->StolenMemory.End = pI830->StolenMemory.Size; - - pI830->MemoryAperture.Start = pI830->StolenMemory.End; - pI830->MemoryAperture.End = KB(40000); - pI830->MemoryAperture.Size = pI830->MemoryAperture.End - pI830->MemoryAperture.Start; - - pI830->StolenPool.Fixed = pI830->StolenMemory; - pI830->StolenPool.Total = pI830->StolenMemory; - pI830->StolenPool.Free = pI830->StolenPool.Total; - pI830->FreeMemory = pI830->StolenPool.Total.Size; - - if (!AgpInit(ctx, pI830)) - return FALSE; - - if (I830AllocateMemory(ctx, pI830) == FALSE) - { - return FALSE; - } - - if (I830BindMemory(ctx, pI830) == FALSE) - { - return FALSE; - } - - pSAREAPriv->front_offset = pI830->FrontBuffer.Start; - pSAREAPriv->front_size = pI830->FrontBuffer.Size; - pSAREAPriv->width = ctx->shared.virtualWidth; - pSAREAPriv->height = ctx->shared.virtualHeight; - pSAREAPriv->pitch = ctx->shared.virtualWidth; - pSAREAPriv->virtualX = ctx->shared.virtualWidth; - pSAREAPriv->virtualY = ctx->shared.virtualHeight; - pSAREAPriv->back_offset = pI830->BackBuffer.Start; - pSAREAPriv->back_size = pI830->BackBuffer.Size; - pSAREAPriv->depth_offset = pI830->DepthBuffer.Start; - pSAREAPriv->depth_size = pI830->DepthBuffer.Size; - pSAREAPriv->tex_offset = pI830->TexMem.Start; - pSAREAPriv->tex_size = pI830->TexMem.Size; - pSAREAPriv->log_tex_granularity = pI830->TexGranularity; - - ctx->driverClientMsg = malloc(sizeof(I830DRIRec)); - ctx->driverClientMsgSize = sizeof(I830DRIRec); - pI830DRI = (I830DRIPtr)ctx->driverClientMsg; - pI830DRI->deviceID = pI830->Chipset; - pI830DRI->regsSize = I830_REG_SIZE; - pI830DRI->width = ctx->shared.virtualWidth; - pI830DRI->height = ctx->shared.virtualHeight; - pI830DRI->mem = ctx->shared.fbSize; - pI830DRI->cpp = ctx->cpp; - pI830DRI->backOffset = pI830->BackBuffer.Start; - pI830DRI->backPitch = pI830->BackBuffer.Pitch; - - pI830DRI->depthOffset = pI830->DepthBuffer.Start; - pI830DRI->depthPitch = pI830->DepthBuffer.Pitch; - - pI830DRI->fbOffset = pI830->FrontBuffer.Start; - pI830DRI->fbStride = pI830->FrontBuffer.Pitch; - - pI830DRI->bitsPerPixel = ctx->bpp; - pI830DRI->sarea_priv_offset = sizeof(drm_sarea_t); - - err = I830DRIDoMappings(ctx, pI830, pSAREAPriv); - if (err == FALSE) - return FALSE; - - I830SetupMemoryTiling(ctx, pI830); - - /* Quick hack to clear the front & back buffers. Could also use - * the clear ioctl to do this, but would need to setup hw state - * first. - */ - I830ClearScreen(ctx, pI830, pSAREAPriv); - - I830SetRingRegs(ctx, pI830); - - return TRUE; -} - - -/** - * \brief Validate the fbdev mode. - * - * \param ctx display handle. - * - * \return one on success, or zero on failure. - * - * Saves some registers and returns 1. - * - * \sa radeonValidateMode(). - */ -static int i830ValidateMode( const DRIDriverContext *ctx ) -{ - return 1; -} - -/** - * \brief Examine mode returned by fbdev. - * - * \param ctx display handle. - * - * \return one on success, or zero on failure. - * - * Restores registers that fbdev has clobbered and returns 1. - * - * \sa i810ValidateMode(). - */ -static int i830PostValidateMode( const DRIDriverContext *ctx ) -{ - I830Rec *pI830 = ctx->driverPrivate; - - I830SetRingRegs(ctx, pI830); - return 1; -} - - -/** - * \brief Initialize the framebuffer device mode - * - * \param ctx display handle. - * - * \return one on success, or zero on failure. - * - * Fills in \p info with some default values and some information from \p ctx - * and then calls I810ScreenInit() for the screen initialization. - * - * Before exiting clears the framebuffer memory accessing it directly. - */ -static int i830InitFBDev( DRIDriverContext *ctx ) -{ - I830Rec *pI830 = calloc(1, sizeof(I830Rec)); - int i; - - { - int dummy = ctx->shared.virtualWidth; - - switch (ctx->bpp / 8) { - case 1: dummy = (ctx->shared.virtualWidth + 127) & ~127; break; - case 2: dummy = (ctx->shared.virtualWidth + 31) & ~31; break; - case 3: - case 4: dummy = (ctx->shared.virtualWidth + 15) & ~15; break; - } - - ctx->shared.virtualWidth = dummy; - ctx->shared.Width = ctx->shared.virtualWidth; - } - - - for (i = 0; pitches[i] != 0; i++) { - if (pitches[i] >= ctx->shared.virtualWidth) { - ctx->shared.virtualWidth = pitches[i]; - break; - } - } - - ctx->driverPrivate = (void *)pI830; - - pI830->LpRing = calloc(1, sizeof(I830RingBuffer)); - pI830->Chipset = ctx->chipset; - pI830->LinearAddr = ctx->FBStart; - - if (!I830ScreenInit( ctx, pI830 )) - return 0; - - - return 1; -} - - -/** - * \brief The screen is being closed, so clean up any state and free any - * resources used by the DRI. - * - * \param ctx display handle. - * - * Unmaps the SAREA, closes the DRM device file descriptor and frees the driver - * private data. - */ -static void i830HaltFBDev( DRIDriverContext *ctx ) -{ - drmI830Sarea *pSAREAPriv; - I830Rec *pI830 = ctx->driverPrivate; - - if (pI830->irq) { - drmCtlUninstHandler(ctx->drmFD); - pI830->irq = 0; } - - I830CleanupDma(ctx); - - pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + - sizeof(drm_sarea_t)); - - I830DRIUnmapScreenRegions(ctx, pI830, pSAREAPriv); - drmUnmap( ctx->pSAREA, ctx->shared.SAREASize ); - drmClose(ctx->drmFD); - - if (ctx->driverPrivate) { - free(ctx->driverPrivate); - ctx->driverPrivate = 0; - } -} - - -extern void i810NotifyFocus( int ); - -/** - * \brief Exported driver interface for Mini GLX. - * - * \sa DRIDriverRec. - */ -const struct DRIDriverRec __driDriver = { - i830ValidateMode, - i830PostValidateMode, - i830InitFBDev, - i830HaltFBDev, - NULL,//I830EngineShutdown, - NULL, //I830EngineRestore, -#ifndef _EMBEDDED - 0, -#else - i810NotifyFocus, -#endif -}; diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c new file mode 100644 index 0000000..a594fb6 --- /dev/null +++ b/shared/intel_batchbuffer.c @@ -0,0 +1,294 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_batchbuffer.h" +#include "intel_ioctl.h" +#include "intel_decode.h" +#include "intel_reg.h" + +/* Relocations in kernel space: + * - pass dma buffer seperately + * - memory manager knows how to patch + * - pass list of dependent buffers + * - pass relocation list + * + * Either: + * - get back an offset for buffer to fire + * - memory manager knows how to fire buffer + * + * Really want the buffer to be AGP and pinned. + * + */ + +/* Cliprect fence: The highest fence protecting a dma buffer + * containing explicit cliprect information. Like the old drawable + * lock but irq-driven. X server must wait for this fence to expire + * before changing cliprects [and then doing sw rendering?]. For + * other dma buffers, the scheduler will grab current cliprect info + * and mix into buffer. X server must hold the lock while changing + * cliprects??? Make per-drawable. Need cliprects in shared memory + * -- beats storing them with every cmd buffer in the queue. + * + * ==> X server must wait for this fence to expire before touching the + * framebuffer with new cliprects. + * + * ==> Cliprect-dependent buffers associated with a + * cliprect-timestamp. All of the buffers associated with a timestamp + * must go to hardware before any buffer with a newer timestamp. + * + * ==> Dma should be queued per-drawable for correct X/GL + * synchronization. Or can fences be used for this? + * + * Applies to: Blit operations, metaops, X server operations -- X + * server automatically waits on its own dma to complete before + * modifying cliprects ??? + */ + +void +intel_batchbuffer_reset(struct intel_batchbuffer *batch) +{ + struct intel_context *intel = batch->intel; + + if (batch->buf != NULL) { + dri_bo_unreference(batch->buf); + batch->buf = NULL; + } + + batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer", + intel->maxBatchSize, 4096, + DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); + dri_bo_map(batch->buf, GL_TRUE); + batch->map = batch->buf->virtual; + batch->size = intel->maxBatchSize; + batch->ptr = batch->map; + batch->dirty_state = ~0; + batch->cliprect_mode = IGNORE_CLIPRECTS; + + /* account batchbuffer in aperture */ + dri_bufmgr_check_aperture_space(batch->buf); + +} + +struct intel_batchbuffer * +intel_batchbuffer_alloc(struct intel_context *intel) +{ + struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); + + batch->intel = intel; + batch->last_fence = NULL; + intel_batchbuffer_reset(batch); + + return batch; +} + +void +intel_batchbuffer_free(struct intel_batchbuffer *batch) +{ + if (batch->last_fence) { + dri_fence_wait(batch->last_fence); + dri_fence_unreference(batch->last_fence); + batch->last_fence = NULL; + } + if (batch->map) { + dri_bo_unmap(batch->buf); + batch->map = NULL; + } + dri_bo_unreference(batch->buf); + batch->buf = NULL; + free(batch); +} + + + +/* TODO: Push this whole function into bufmgr. + */ +static void +do_flush_locked(struct intel_batchbuffer *batch, + GLuint used, GLboolean allow_unlock) +{ + struct intel_context *intel = batch->intel; + void *start; + GLuint count; + + dri_bo_unmap(batch->buf); + start = dri_process_relocs(batch->buf, &count); + + batch->map = NULL; + batch->ptr = NULL; + + /* Throw away non-effective packets. Won't work once we have + * hardware contexts which would preserve statechanges beyond a + * single buffer. + */ + + if (!(intel->numClipRects == 0 && + batch->cliprect_mode == LOOP_CLIPRECTS)) { + if (intel->ttm == GL_TRUE) { + intel_exec_ioctl(batch->intel, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock, + start, count, &batch->last_fence); + } else { + intel_batch_ioctl(batch->intel, + batch->buf->offset, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock); + } + } + + dri_post_submit(batch->buf, &batch->last_fence); + + if (intel->numClipRects == 0 && + batch->cliprect_mode == LOOP_CLIPRECTS) { + if (allow_unlock) { + /* If we are not doing any actual user-visible rendering, + * do a sched_yield to keep the app from pegging the cpu while + * achieving nothing. + */ + UNLOCK_HARDWARE(intel); + sched_yield(); + LOCK_HARDWARE(intel); + } + } + + if (INTEL_DEBUG & DEBUG_BATCH) { + dri_bo_map(batch->buf, GL_FALSE); + intel_decode(batch->buf->virtual, used / 4, batch->buf->offset, + intel->intelScreen->deviceID); + dri_bo_unmap(batch->buf); + + if (intel->vtbl.debug_batch != NULL) + intel->vtbl.debug_batch(intel); + } + + intel->vtbl.new_batch(intel); +} + +void +_intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, + int line) +{ + struct intel_context *intel = batch->intel; + GLuint used = batch->ptr - batch->map; + GLboolean was_locked = intel->locked; + + if (used == 0) + return; + + if (INTEL_DEBUG & DEBUG_BATCH) + fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, + used); + /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a + * performance drain that we would like to avoid. + */ + if (used & 4) { + ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->ptr)[1] = 0; + ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END; + used += 12; + } + else { + ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END; + used += 8; + } + + /* Workaround for recursive batchbuffer flushing: If the window is + * moved, we can get into a case where we try to flush during a + * flush. What happens is that when we try to grab the lock for + * the first flush, we detect that the window moved which then + * causes another flush (from the intel_draw_buffer() call in + * intelUpdatePageFlipping()). To work around this we reset the + * batchbuffer tail pointer before trying to get the lock. This + * prevent the nested buffer flush, but a better fix would be to + * avoid that in the first place. */ + batch->ptr = batch->map; + + /* TODO: Just pass the relocation list and dma buffer up to the + * kernel. + */ + if (!was_locked) + LOCK_HARDWARE(intel); + + do_flush_locked(batch, used, GL_FALSE); + + if (!was_locked) + UNLOCK_HARDWARE(intel); + + if (INTEL_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "waiting for idle\n"); + if (batch->last_fence != NULL) + dri_fence_wait(batch->last_fence); + } + + /* Reset the buffer: + */ + intel_batchbuffer_reset(batch); +} + +void +intel_batchbuffer_finish(struct intel_batchbuffer *batch) +{ + intel_batchbuffer_flush(batch); + if (batch->last_fence != NULL) + dri_fence_wait(batch->last_fence); +} + + +/* This is the only way buffers get added to the validate list. + */ +GLboolean +intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, + dri_bo *buffer, + GLuint flags, GLuint delta) +{ + int ret; + + ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer); + + /* + * Using the old buffer offset, write in what the right data would be, in case + * the buffer doesn't move and we can short-circuit the relocation processing + * in the kernel + */ + intel_batchbuffer_emit_dword (batch, buffer->offset + delta); + + return GL_TRUE; +} + +void +intel_batchbuffer_data(struct intel_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode) +{ + assert((bytes & 3) == 0); + intel_batchbuffer_require_space(batch, bytes, cliprect_mode); + __memcpy(batch->ptr, data, bytes); + batch->ptr += bytes; +} diff --git a/shared/intel_batchbuffer.h b/shared/intel_batchbuffer.h new file mode 100644 index 0000000..0da6020 --- /dev/null +++ b/shared/intel_batchbuffer.h @@ -0,0 +1,147 @@ +#ifndef INTEL_BATCHBUFFER_H +#define INTEL_BATCHBUFFER_H + +#include "mtypes.h" + +#include "dri_bufmgr.h" + +struct intel_context; + +#define BATCH_SZ 16384 +#define BATCH_RESERVED 16 + +enum cliprect_mode { + /** + * Batchbuffer contents may be looped over per cliprect, but do not + * require it. + */ + IGNORE_CLIPRECTS, + /** + * Batchbuffer contents require looping over per cliprect at batch submit + * time. + */ + LOOP_CLIPRECTS, + /** + * Batchbuffer contents contain drawing that should not be executed multiple + * times. + */ + NO_LOOP_CLIPRECTS, + /** + * Batchbuffer contents contain drawing that already handles cliprects, such + * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE. + * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch + * outside of LOCK/UNLOCK. + */ + REFERENCES_CLIPRECTS +}; + +struct intel_batchbuffer +{ + struct intel_context *intel; + + dri_bo *buf; + dri_fence *last_fence; + + GLubyte *map; + GLubyte *ptr; + + enum cliprect_mode cliprect_mode; + + GLuint size; + + GLuint dirty_state; +}; + +struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context + *intel); + +void intel_batchbuffer_free(struct intel_batchbuffer *batch); + + +void intel_batchbuffer_finish(struct intel_batchbuffer *batch); + +void _intel_batchbuffer_flush(struct intel_batchbuffer *batch, + const char *file, int line); + +#define intel_batchbuffer_flush(batch) \ + _intel_batchbuffer_flush(batch, __FILE__, __LINE__) + +void intel_batchbuffer_reset(struct intel_batchbuffer *batch); + + +/* Unlike bmBufferData, this currently requires the buffer be mapped. + * Consider it a convenience function wrapping multple + * intel_buffer_dword() calls. + */ +void intel_batchbuffer_data(struct intel_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode); + +void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, + GLuint bytes); + +GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, + dri_bo *buffer, + GLuint flags, GLuint offset); + +/* Inline functions - might actually be better off with these + * non-inlined. Certainly better off switching all command packets to + * be passed as structs rather than dwords, but that's a little bit of + * work... + */ +static INLINE GLuint +intel_batchbuffer_space(struct intel_batchbuffer *batch) +{ + return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); +} + + +static INLINE void +intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) +{ + assert(batch->map); + assert(intel_batchbuffer_space(batch) >= 4); + *(GLuint *) (batch->ptr) = dword; + batch->ptr += 4; +} + +static INLINE void +intel_batchbuffer_require_space(struct intel_batchbuffer *batch, + GLuint sz, + enum cliprect_mode cliprect_mode) +{ + assert(sz < batch->size - 8); + if (intel_batchbuffer_space(batch) < sz) + intel_batchbuffer_flush(batch); + + if (cliprect_mode != IGNORE_CLIPRECTS) { + if (batch->cliprect_mode == IGNORE_CLIPRECTS) { + batch->cliprect_mode = cliprect_mode; + } else { + if (batch->cliprect_mode != cliprect_mode) { + intel_batchbuffer_flush(batch); + batch->cliprect_mode = cliprect_mode; + } + } + } +} + +/* Here are the crusty old macros, to be removed: + */ +#define BATCH_LOCALS + +#define BEGIN_BATCH(n, cliprect_mode) do { \ + intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \ +} while (0) + +#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) + +#define OUT_RELOC(buf, cliprect_mode, delta) do { \ + assert((delta) >= 0); \ + intel_batchbuffer_emit_reloc(intel->batch, buf, cliprect_mode, delta); \ +} while (0) + +#define ADVANCE_BATCH() do { } while(0) + + +#endif diff --git a/shared/intel_blit.c b/shared/intel_blit.c new file mode 100644 index 0000000..25ac609 --- /dev/null +++ b/shared/intel_blit.c @@ -0,0 +1,631 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <errno.h> + +#include "mtypes.h" +#include "context.h" +#include "enums.h" + +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_buffers.h" +#include "intel_context.h" +#include "intel_fbo.h" +#include "intel_reg.h" +#include "intel_regions.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +/** + * Copy the back color buffer to the front color buffer. + * Used for SwapBuffers(). + */ +void +intelCopyBuffer(const __DRIdrawablePrivate * dPriv, + const drm_clip_rect_t * rect) +{ + + struct intel_context *intel; + const intelScreenPrivate *intelScreen; + int ret; + + DBG("%s\n", __FUNCTION__); + + assert(dPriv); + + intel = intelScreenContext(dPriv->driScreenPriv->private); + if (!intel) + return; + + intelScreen = intel->intelScreen; + + if (intel->last_swap_fence) { + dri_fence_wait(intel->last_swap_fence); + dri_fence_unreference(intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + intel->last_swap_fence = intel->first_swap_fence; + intel->first_swap_fence = NULL; + + /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets + * should work regardless. + */ + LOCK_HARDWARE(intel); + + if (dPriv && dPriv->numClipRects) { + struct intel_framebuffer *intel_fb = dPriv->driverPrivate; + struct intel_region *src, *dst; + int nbox = dPriv->numClipRects; + drm_clip_rect_t *pbox = dPriv->pClipRects; + int cpp; + int src_pitch, dst_pitch; + unsigned short src_x, src_y; + int BR13, CMD; + int i; + + src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT); + dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT); + + src_pitch = src->pitch * src->cpp; + dst_pitch = dst->pitch * dst->cpp; + + cpp = src->cpp; + + ASSERT(intel_fb); + ASSERT(intel_fb->Base.Name == 0); /* Not a user-created FBO */ + ASSERT(src); + ASSERT(dst); + ASSERT(src->cpp == dst->cpp); + + if (cpp == 2) { + BR13 = (0xCC << 16) | (1 << 24); + CMD = XY_SRC_COPY_BLT_CMD; + } + else { + BR13 = (0xCC << 16) | (1 << 24) | (1 << 25); + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + } + +#ifndef I915 + if (src->tiled) { + CMD |= XY_SRC_TILED; + src_pitch /= 4; + } + if (dst->tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } +#endif + /* do space/cliprects check before going any further */ + intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS); + again: + ret = dri_bufmgr_check_aperture_space(dst->buffer); + ret |= dri_bufmgr_check_aperture_space(src->buffer); + + if (ret) { + intel_batchbuffer_flush(intel->batch); + goto again; + } + + for (i = 0; i < nbox; i++, pbox++) { + drm_clip_rect_t box = *pbox; + + if (rect) { + if (!intel_intersect_cliprects(&box, &box, rect)) + continue; + } + + if (box.x1 >= box.x2 || + box.y1 >= box.y2) + continue; + + assert(box.x1 < box.x2); + assert(box.y1 < box.y2); + src_x = box.x1 - dPriv->x + dPriv->backX; + src_y = box.y1 - dPriv->y + dPriv->backY; + + BEGIN_BATCH(8, REFERENCES_CLIPRECTS); + OUT_BATCH(CMD); + OUT_BATCH(BR13 | dst_pitch); + OUT_BATCH((box.y1 << 16) | box.x1); + OUT_BATCH((box.y2 << 16) | box.x2); + + OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0); + OUT_BATCH((src_y << 16) | src_x); + OUT_BATCH(src_pitch); + OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + ADVANCE_BATCH(); + } + + if (intel->first_swap_fence) + dri_fence_unreference(intel->first_swap_fence); + intel_batchbuffer_flush(intel->batch); + intel->first_swap_fence = intel->batch->last_fence; + if (intel->first_swap_fence) + dri_fence_reference(intel->first_swap_fence); + } + + UNLOCK_HARDWARE(intel); +} + + + + +void +intelEmitFillBlit(struct intel_context *intel, + GLuint cpp, + GLshort dst_pitch, + dri_bo *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort x, GLshort y, + GLshort w, GLshort h, + GLuint color) +{ + GLuint BR13, CMD; + BATCH_LOCALS; + + dst_pitch *= cpp; + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = (0xF0 << 16) | (1 << 24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = (0xF0 << 16) | (1 << 24) | (1 << 25); + CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return; + } +#ifndef I915 + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } +#endif + + DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); + + assert(w > 0); + assert(h > 0); + + BEGIN_BATCH(6, NO_LOOP_CLIPRECTS); + OUT_BATCH(CMD); + OUT_BATCH(BR13 | dst_pitch); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + h) << 16) | (x + w)); + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); + OUT_BATCH(color); + ADVANCE_BATCH(); +} + +static GLuint translate_raster_op(GLenum logicop) +{ + switch(logicop) { + case GL_CLEAR: return 0x00; + case GL_AND: return 0x88; + case GL_AND_REVERSE: return 0x44; + case GL_COPY: return 0xCC; + case GL_AND_INVERTED: return 0x22; + case GL_NOOP: return 0xAA; + case GL_XOR: return 0x66; + case GL_OR: return 0xEE; + case GL_NOR: return 0x11; + case GL_EQUIV: return 0x99; + case GL_INVERT: return 0x55; + case GL_OR_REVERSE: return 0xDD; + case GL_COPY_INVERTED: return 0x33; + case GL_OR_INVERTED: return 0xBB; + case GL_NAND: return 0x77; + case GL_SET: return 0xFF; + default: return 0; + } +} + + +/* Copy BitBlt + */ +void +intelEmitCopyBlit(struct intel_context *intel, + GLuint cpp, + GLshort src_pitch, + dri_bo *src_buffer, + GLuint src_offset, + GLboolean src_tiled, + GLshort dst_pitch, + dri_bo *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort src_x, GLshort src_y, + GLshort dst_x, GLshort dst_y, + GLshort w, GLshort h, + GLenum logic_op) +{ + GLuint CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + int ret; + BATCH_LOCALS; + + /* do space/cliprects check before going any further */ + intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); + again: + ret = dri_bufmgr_check_aperture_space(dst_buffer); + ret |= dri_bufmgr_check_aperture_space(src_buffer); + if (ret) { + intel_batchbuffer_flush(intel->batch); + goto again; + } + + DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + + src_pitch *= cpp; + dst_pitch *= cpp; + + BR13 = translate_raster_op(logic_op) << 16; + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 |= (1 << 24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 |= (1 << 24) | (1 << 25); + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return; + } + +#ifndef I915 + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + if (src_tiled) { + CMD |= XY_SRC_TILED; + src_pitch /= 4; + } +#endif + + if (dst_y2 <= dst_y || dst_x2 <= dst_x) { + return; + } + + /* Initial y values don't seem to work with negative pitches. If + * we adjust the offsets manually (below), it seems to work fine. + * + * On the other hand, if we always adjust, the hardware doesn't + * know which blit directions to use, so overlapping copypixels get + * the wrong result. + */ + if (dst_pitch > 0 && src_pitch > 0) { + assert(dst_x < dst_x2); + assert(dst_y < dst_y2); + + BEGIN_BATCH(8, NO_LOOP_CLIPRECTS); + OUT_BATCH(CMD); + OUT_BATCH(BR13 | dst_pitch); + OUT_BATCH((dst_y << 16) | dst_x); + OUT_BATCH((dst_y2 << 16) | dst_x2); + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + dst_offset); + OUT_BATCH((src_y << 16) | src_x); + OUT_BATCH(src_pitch); + OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + src_offset); + ADVANCE_BATCH(); + } + else { + assert(dst_x < dst_x2); + assert(h > 0); + + BEGIN_BATCH(8, NO_LOOP_CLIPRECTS); + OUT_BATCH(CMD); + OUT_BATCH(BR13 | ((uint16_t)dst_pitch)); + OUT_BATCH((0 << 16) | dst_x); + OUT_BATCH((h << 16) | dst_x2); + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + dst_offset + dst_y * dst_pitch); + OUT_BATCH((0 << 16) | src_x); + OUT_BATCH(src_pitch); + OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + src_offset + src_y * src_pitch); + ADVANCE_BATCH(); + } +} + + +/** + * Use blitting to clear the renderbuffers named by 'flags'. + * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field + * since that might include software renderbuffers or renderbuffers + * which we're clearing with triangles. + * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear + */ +void +intelClearWithBlit(GLcontext *ctx, GLbitfield mask) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + GLuint clear_depth; + GLbitfield skipBuffers = 0; + BATCH_LOCALS; + + /* + * Compute values for clearing the buffers. + */ + clear_depth = 0; + if (mask & BUFFER_BIT_DEPTH) { + clear_depth = (GLuint) (fb->_DepthMax * ctx->Depth.Clear); + } + if (mask & BUFFER_BIT_STENCIL) { + clear_depth |= (ctx->Stencil.Clear & 0xff) << 24; + } + + /* If clearing both depth and stencil, skip BUFFER_BIT_STENCIL in + * the loop below. + */ + if ((mask & BUFFER_BIT_DEPTH) && (mask & BUFFER_BIT_STENCIL)) { + skipBuffers = BUFFER_BIT_STENCIL; + } + + /* XXX Move this flush/lock into the following conditional? */ + intelFlush(&intel->ctx); + LOCK_HARDWARE(intel); + + if (intel->numClipRects) { + GLint cx, cy, cw, ch; + drm_clip_rect_t clear; + int i; + + /* Get clear bounds after locking */ + cx = fb->_Xmin; + cy = fb->_Ymin; + cw = fb->_Xmax - cx; + ch = fb->_Ymax - cy; + + if (fb->Name == 0) { + /* clearing a window */ + + /* flip top to bottom */ + clear.x1 = cx + intel->drawX; + clear.y1 = intel->driDrawable->y + intel->driDrawable->h - cy - ch; + clear.x2 = clear.x1 + cw; + clear.y2 = clear.y1 + ch; + } + else { + /* clearing FBO */ + assert(intel->numClipRects == 1); + assert(intel->pClipRects == &intel->fboRect); + clear.x1 = cx; + clear.y1 = cy; + clear.x2 = clear.x1 + cw; + clear.y2 = clear.y1 + ch; + /* no change to mask */ + } + + for (i = 0; i < intel->numClipRects; i++) { + const drm_clip_rect_t *box = &intel->pClipRects[i]; + drm_clip_rect_t b; + GLuint buf; + GLuint clearMask = mask; /* use copy, since we modify it below */ + GLboolean all = (cw == fb->Width && ch == fb->Height); + + if (!all) { + intel_intersect_cliprects(&b, &clear, box); + } + else { + b = *box; + } + + if (b.x1 >= b.x2 || b.y1 >= b.y2) + continue; + + if (0) + _mesa_printf("clear %d,%d..%d,%d, mask %x\n", + b.x1, b.y1, b.x2, b.y2, mask); + + /* Loop over all renderbuffers */ + for (buf = 0; buf < BUFFER_COUNT && clearMask; buf++) { + const GLbitfield bufBit = 1 << buf; + if ((clearMask & bufBit) && !(bufBit & skipBuffers)) { + /* OK, clear this renderbuffer */ + struct intel_region *irb_region = + intel_get_rb_region(fb, buf); + dri_bo *write_buffer = + intel_region_buffer(intel, irb_region, + all ? INTEL_WRITE_FULL : + INTEL_WRITE_PART); + + GLuint clearVal; + GLint pitch, cpp; + GLuint BR13, CMD; + + ASSERT(irb_region); + + pitch = irb_region->pitch; + cpp = irb_region->cpp; + + DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + irb_region->buffer, (pitch * cpp), + irb_region->draw_offset, + b.x1, b.y1, b.x2 - b.x1, b.y2 - b.y1); + + BR13 = 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD; + + /* Setup the blit command */ + if (cpp == 4) { + BR13 |= (1 << 24) | (1 << 25); + if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) { + if (clearMask & BUFFER_BIT_DEPTH) + CMD |= XY_BLT_WRITE_RGB; + if (clearMask & BUFFER_BIT_STENCIL) + CMD |= XY_BLT_WRITE_ALPHA; + } + else { + /* clearing RGBA */ + CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + } + } + else { + ASSERT(cpp == 2 || cpp == 0); + BR13 |= (1 << 24); + } + +#ifndef I915 + if (irb_region->tiled) { + CMD |= XY_DST_TILED; + pitch /= 4; + } +#endif + BR13 |= (pitch * cpp); + + if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) { + clearVal = clear_depth; + } + else { + clearVal = (cpp == 4) + ? intel->ClearColor8888 : intel->ClearColor565; + } + /* + _mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n", + buf, irb->Base.Name); + */ + intel_wait_flips(intel); + + assert(b.x1 < b.x2); + assert(b.y1 < b.y2); + + BEGIN_BATCH(6, REFERENCES_CLIPRECTS); + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((b.y1 << 16) | b.x1); + OUT_BATCH((b.y2 << 16) | b.x2); + OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + irb_region->draw_offset); + OUT_BATCH(clearVal); + ADVANCE_BATCH(); + clearMask &= ~bufBit; /* turn off bit, for faster loop exit */ + } + } + } + intel_batchbuffer_flush(intel->batch); + } + + UNLOCK_HARDWARE(intel); +} + +void +intelEmitImmediateColorExpandBlit(struct intel_context *intel, + GLuint cpp, + GLubyte *src_bits, GLuint src_size, + GLuint fg_color, + GLshort dst_pitch, + dri_bo *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort x, GLshort y, + GLshort w, GLshort h, + GLenum logic_op) +{ + int dwords = ALIGN(src_size, 8) / 4; + uint32_t opcode, br13, blit_cmd; + + assert( logic_op - GL_CLEAR >= 0 ); + assert( logic_op - GL_CLEAR < 0x10 ); + + if (w < 0 || h < 0) + return; + + dst_pitch *= cpp; + + DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", + __FUNCTION__, + dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); + + intel_batchbuffer_require_space( intel->batch, + (8 * 4) + + (3 * 4) + + dwords, + NO_LOOP_CLIPRECTS ); + + opcode = XY_SETUP_BLT_CMD; + if (cpp == 4) + opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; +#ifndef I915 + if (dst_tiled) { + opcode |= XY_DST_TILED; + dst_pitch /= 4; + } +#endif + + br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29); + if (cpp == 2) + br13 |= BR13_565; + else + br13 |= BR13_8888; + + blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ + if (dst_tiled) + blit_cmd |= XY_DST_TILED; + + BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS); + OUT_BATCH(opcode); + OUT_BATCH(br13); + OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ + OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ + OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); + OUT_BATCH(0); /* bg */ + OUT_BATCH(fg_color); /* fg */ + OUT_BATCH(0); /* pattern base addr */ + + OUT_BATCH(blit_cmd | ((3 - 2) + dwords)); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + h) << 16) | (x + w)); + ADVANCE_BATCH(); + + intel_batchbuffer_data( intel->batch, + src_bits, + dwords * 4, + NO_LOOP_CLIPRECTS ); +} diff --git a/i965/intel_blit.h b/shared/intel_blit.h index e361545..fc0620c 100644 --- a/i965/intel_blit.h +++ b/shared/intel_blit.h @@ -30,37 +30,36 @@ #include "intel_context.h" #include "intel_ioctl.h" +#include "dri_bufmgr.h" -struct buffer; +extern void intelCopyBuffer(const __DRIdrawablePrivate * dpriv, + const drm_clip_rect_t * rect); -extern void intelCopyBuffer( const __DRIdrawablePrivate *dpriv, - const drm_clip_rect_t *rect ); -extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask); +extern void intelClearWithBlit(GLcontext * ctx, GLbitfield mask); -extern void intelEmitCopyBlit( struct intel_context *intel, - GLuint cpp, - GLshort src_pitch, - struct buffer *src_buffer, - GLuint src_offset, - GLboolean src_tiled, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort srcx, GLshort srcy, - GLshort dstx, GLshort dsty, - GLshort w, GLshort h, - GLenum logic_op ); +extern void intelEmitCopyBlit(struct intel_context *intel, + GLuint cpp, + GLshort src_pitch, + dri_bo *src_buffer, + GLuint src_offset, + GLboolean src_tiled, + GLshort dst_pitch, + dri_bo *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort srcx, GLshort srcy, + GLshort dstx, GLshort dsty, + GLshort w, GLshort h, + GLenum logicop ); -extern void intelEmitFillBlit( struct intel_context *intel, - GLuint cpp, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLuint color ); +extern void intelEmitFillBlit(struct intel_context *intel, + GLuint cpp, + GLshort dst_pitch, + dri_bo *dst_buffer, + GLuint dst_offset, + GLboolean dst_tiled, + GLshort x, GLshort y, + GLshort w, GLshort h, GLuint color); void intelEmitImmediateColorExpandBlit(struct intel_context *intel, @@ -68,11 +67,11 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLubyte *src_bits, GLuint src_size, GLuint fg_color, GLshort dst_pitch, - struct buffer *dst_buffer, + dri_bo *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, - GLshort dst_x, GLshort dst_y, + GLshort x, GLshort y, GLshort w, GLshort h, - GLenum logic_op ); + GLenum logic_op); #endif diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c new file mode 100644 index 0000000..951b8cb --- /dev/null +++ b/shared/intel_buffer_objects.c @@ -0,0 +1,285 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "imports.h" +#include "mtypes.h" +#include "bufferobj.h" + +#include "intel_context.h" +#include "intel_buffer_objects.h" +#include "intel_regions.h" +#include "dri_bufmgr.h" + +static GLboolean intel_bufferobj_unmap(GLcontext * ctx, + GLenum target, + struct gl_buffer_object *obj); + +/** Allocates a new dri_bo to store the data for the buffer object. */ +static void +intel_bufferobj_alloc_buffer(struct intel_context *intel, + struct intel_buffer_object *intel_obj) +{ + intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", + intel_obj->Base.Size, 64, + DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); +} + +/** + * There is some duplication between mesa's bufferobjects and our + * bufmgr buffers. Both have an integer handle and a hashtable to + * lookup an opaque structure. It would be nice if the handles and + * internal structure where somehow shared. + */ +static struct gl_buffer_object * +intel_bufferobj_alloc(GLcontext * ctx, GLuint name, GLenum target) +{ + struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object); + + _mesa_initialize_buffer_object(&obj->Base, name, target); + + obj->buffer = NULL; + + return &obj->Base; +} + +/* Break the COW tie to the region. The region gets to keep the data. + */ +void +intel_bufferobj_release_region(struct intel_context *intel, + struct intel_buffer_object *intel_obj) +{ + assert(intel_obj->region->buffer == intel_obj->buffer); + intel_obj->region->pbo = NULL; + intel_obj->region = NULL; + + dri_bo_unreference(intel_obj->buffer); + intel_obj->buffer = NULL; +} + +/* Break the COW tie to the region. Both the pbo and the region end + * up with a copy of the data. + */ +void +intel_bufferobj_cow(struct intel_context *intel, + struct intel_buffer_object *intel_obj) +{ + assert(intel_obj->region); + intel_region_cow(intel, intel_obj->region); +} + + +/** + * Deallocate/free a vertex/pixel buffer object. + * Called via glDeleteBuffersARB(). + */ +static void +intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + + /* Buffer objects are automatically unmapped when deleting according + * to the spec. + */ + if (obj->Pointer) + intel_bufferobj_unmap(ctx, 0, obj); + + if (intel_obj->region) { + intel_bufferobj_release_region(intel, intel_obj); + } + else if (intel_obj->buffer) { + dri_bo_unreference(intel_obj->buffer); + } + + _mesa_free(intel_obj); +} + + + +/** + * Allocate space for and store data in a buffer object. Any data that was + * previously stored in the buffer object is lost. If data is NULL, + * memory will be allocated, but no copy will occur. + * Called via glBufferDataARB(). + */ +static void +intel_bufferobj_data(GLcontext * ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid * data, + GLenum usage, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + intel_obj->Base.Size = size; + intel_obj->Base.Usage = usage; + + /* Buffer objects are automatically unmapped when creating new data buffers + * according to the spec. + */ + if (obj->Pointer) + intel_bufferobj_unmap(ctx, 0, obj); + + if (intel_obj->region) + intel_bufferobj_release_region(intel, intel_obj); + + if (intel_obj->buffer != NULL) { + dri_bo_unreference(intel_obj->buffer); + intel_obj->buffer = NULL; + } + if (size != 0) { + intel_bufferobj_alloc_buffer(intel, intel_obj); + + if (data != NULL) + dri_bo_subdata(intel_obj->buffer, 0, size, data); + } +} + + +/** + * Replace data in a subrange of buffer object. If the data range + * specified by size + offset extends beyond the end of the buffer or + * if data is NULL, no copy is performed. + * Called via glBufferSubDataARB(). + */ +static void +intel_bufferobj_subdata(GLcontext * ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid * data, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + + if (intel_obj->region) + intel_bufferobj_cow(intel, intel_obj); + + dri_bo_subdata(intel_obj->buffer, offset, size, data); +} + + +/** + * Called via glGetBufferSubDataARB(). + */ +static void +intel_bufferobj_get_subdata(GLcontext * ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid * data, struct gl_buffer_object *obj) +{ + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + dri_bo_get_subdata(intel_obj->buffer, offset, size, data); +} + + + +/** + * Called via glMapBufferARB(). + */ +static void * +intel_bufferobj_map(GLcontext * ctx, + GLenum target, + GLenum access, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + /* XXX: Translate access to flags arg below: + */ + assert(intel_obj); + + if (intel_obj->region) + intel_bufferobj_cow(intel, intel_obj); + + if (intel_obj->buffer == NULL) { + obj->Pointer = NULL; + return NULL; + } + + dri_bo_map(intel_obj->buffer, GL_TRUE); + obj->Pointer = intel_obj->buffer->virtual; + return obj->Pointer; +} + + +/** + * Called via glMapBufferARB(). + */ +static GLboolean +intel_bufferobj_unmap(GLcontext * ctx, + GLenum target, struct gl_buffer_object *obj) +{ + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + if (intel_obj->buffer != NULL) { + assert(obj->Pointer); + dri_bo_unmap(intel_obj->buffer); + obj->Pointer = NULL; + } + return GL_TRUE; +} + +dri_bo * +intel_bufferobj_buffer(struct intel_context *intel, + struct intel_buffer_object *intel_obj, GLuint flag) +{ + if (intel_obj->region) { + if (flag == INTEL_WRITE_PART) + intel_bufferobj_cow(intel, intel_obj); + else if (flag == INTEL_WRITE_FULL) { + intel_bufferobj_release_region(intel, intel_obj); + intel_bufferobj_alloc_buffer(intel, intel_obj); + } + } + + return intel_obj->buffer; +} + +void +intel_bufferobj_init(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + + ctx->Driver.NewBufferObject = intel_bufferobj_alloc; + ctx->Driver.DeleteBuffer = intel_bufferobj_free; + ctx->Driver.BufferData = intel_bufferobj_data; + ctx->Driver.BufferSubData = intel_bufferobj_subdata; + ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata; + ctx->Driver.MapBuffer = intel_bufferobj_map; + ctx->Driver.UnmapBuffer = intel_bufferobj_unmap; +} diff --git a/i965/intel_buffer_objects.h b/shared/intel_buffer_objects.h index 4b38803..7cecc32 100644 --- a/i965/intel_buffer_objects.h +++ b/shared/intel_buffer_objects.h @@ -1,6 +1,6 @@ - /************************************************************************** +/************************************************************************** * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -31,25 +31,33 @@ #include "mtypes.h" struct intel_context; +struct intel_region; struct gl_buffer_object; /** * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object. */ -struct intel_buffer_object { +struct intel_buffer_object +{ struct gl_buffer_object Base; - struct buffer *buffer; /* the low-level buffer manager's buffer handle */ + dri_bo *buffer; /* the low-level buffer manager's buffer handle */ + + struct intel_region *region; /* Is there a zero-copy texture + associated with this (pixel) + buffer object? */ }; /* Get the bm buffer associated with a GL bufferobject: */ -struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *obj ); +dri_bo *intel_bufferobj_buffer(struct intel_context *intel, + struct intel_buffer_object + *obj, GLuint flag); /* Hook the bufferobject implementation into mesa: */ -void intel_bufferobj_init( struct intel_context *intel ); +void intel_bufferobj_init(struct intel_context *intel); @@ -58,13 +66,21 @@ void intel_bufferobj_init( struct intel_context *intel ); * the Name == 0 test is the only way to identify them and avoid * casting them erroneously to our structs. */ -static inline struct intel_buffer_object * -intel_buffer_object( struct gl_buffer_object *obj ) +static INLINE struct intel_buffer_object * +intel_buffer_object(struct gl_buffer_object *obj) { if (obj->Name) - return (struct intel_buffer_object *)obj; + return (struct intel_buffer_object *) obj; else return NULL; } +/* Helpers for zerocopy image uploads. See also intel_regions.h: + */ +void intel_bufferobj_cow(struct intel_context *intel, + struct intel_buffer_object *intel_obj); +void intel_bufferobj_release_region(struct intel_context *intel, + struct intel_buffer_object *intel_obj); + + #endif diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c new file mode 100644 index 0000000..75542a9 --- /dev/null +++ b/shared/intel_buffers.c @@ -0,0 +1,1094 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_blit.h" +#include "intel_buffers.h" +#include "intel_chipset.h" +#include "intel_depthstencil.h" +#include "intel_fbo.h" +#include "intel_regions.h" +#include "intel_batchbuffer.h" +#include "intel_reg.h" +#include "context.h" +#include "utils.h" +#include "drirenderbuffer.h" +#include "framebuffer.h" +#include "swrast/swrast.h" +#include "vblank.h" +#include "i915_drm.h" + +/* This block can be removed when libdrm >= 2.3.1 is required */ + +#ifndef DRM_IOCTL_I915_FLIP + +#define DRM_VBLANK_FLIP 0x8000000 + +typedef struct drm_i915_flip { + int pipes; +} drm_i915_flip_t; + +#undef DRM_IOCTL_I915_FLIP +#define DRM_IOCTL_I915_FLIP DRM_IOW(DRM_COMMAND_BASE + DRM_I915_FLIP, \ + drm_i915_flip_t) + +#endif + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +/** + * XXX move this into a new dri/common/cliprects.c file. + */ +GLboolean +intel_intersect_cliprects(drm_clip_rect_t * dst, + const drm_clip_rect_t * a, + const drm_clip_rect_t * b) +{ + GLint bx = b->x1; + GLint by = b->y1; + GLint bw = b->x2 - bx; + GLint bh = b->y2 - by; + + if (bx < a->x1) + bw -= a->x1 - bx, bx = a->x1; + if (by < a->y1) + bh -= a->y1 - by, by = a->y1; + if (bx + bw > a->x2) + bw = a->x2 - bx; + if (by + bh > a->y2) + bh = a->y2 - by; + if (bw <= 0) + return GL_FALSE; + if (bh <= 0) + return GL_FALSE; + + dst->x1 = bx; + dst->y1 = by; + dst->x2 = bx + bw; + dst->y2 = by + bh; + + return GL_TRUE; +} + +/** + * Return pointer to current color drawing region, or NULL. + */ +struct intel_region * +intel_drawbuf_region(struct intel_context *intel) +{ + struct intel_renderbuffer *irbColor = + intel_renderbuffer(intel->ctx.DrawBuffer->_ColorDrawBuffers[0]); + if (irbColor) + return irbColor->region; + else + return NULL; +} + +/** + * Return pointer to current color reading region, or NULL. + */ +struct intel_region * +intel_readbuf_region(struct intel_context *intel) +{ + struct intel_renderbuffer *irb + = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); + if (irb) + return irb->region; + else + return NULL; +} + + + +/** + * Update the following fields for rendering to a user-created FBO: + * intel->numClipRects + * intel->pClipRects + * intel->drawX + * intel->drawY + */ +static void +intelSetRenderbufferClipRects(struct intel_context *intel) +{ + assert(intel->ctx.DrawBuffer->Width > 0); + assert(intel->ctx.DrawBuffer->Height > 0); + intel->fboRect.x1 = 0; + intel->fboRect.y1 = 0; + intel->fboRect.x2 = intel->ctx.DrawBuffer->Width; + intel->fboRect.y2 = intel->ctx.DrawBuffer->Height; + intel->numClipRects = 1; + intel->pClipRects = &intel->fboRect; + intel->drawX = 0; + intel->drawY = 0; +} + + +/** + * As above, but for rendering to front buffer of a window. + * \sa intelSetRenderbufferClipRects + */ +static void +intelSetFrontClipRects(struct intel_context *intel) +{ + __DRIdrawablePrivate *dPriv = intel->driDrawable; + + if (!dPriv) + return; + + intel->numClipRects = dPriv->numClipRects; + intel->pClipRects = dPriv->pClipRects; + intel->drawX = dPriv->x; + intel->drawY = dPriv->y; +} + + +/** + * As above, but for rendering to back buffer of a window. + */ +static void +intelSetBackClipRects(struct intel_context *intel) +{ + __DRIdrawablePrivate *dPriv = intel->driDrawable; + struct intel_framebuffer *intel_fb; + + if (!dPriv) + return; + + intel_fb = dPriv->driverPrivate; + + if (intel_fb->pf_active || dPriv->numBackClipRects == 0) { + /* use the front clip rects */ + intel->numClipRects = dPriv->numClipRects; + intel->pClipRects = dPriv->pClipRects; + intel->drawX = dPriv->x; + intel->drawY = dPriv->y; + } + else { + /* use the back clip rects */ + intel->numClipRects = dPriv->numBackClipRects; + intel->pClipRects = dPriv->pBackClipRects; + intel->drawX = dPriv->backX; + intel->drawY = dPriv->backY; + } +} + +static void +intelUpdatePageFlipping(struct intel_context *intel, + GLint areaA, GLint areaB) +{ + __DRIdrawablePrivate *dPriv = intel->driDrawable; + struct intel_framebuffer *intel_fb = dPriv->driverPrivate; + GLboolean pf_active; + GLint pf_planes; + + /* Update page flipping info */ + pf_planes = 0; + + if (areaA > 0) + pf_planes |= 1; + + if (areaB > 0) + pf_planes |= 2; + + intel_fb->pf_current_page = (intel->sarea->pf_current_page >> + (intel_fb->pf_planes & 0x2)) & 0x3; + + intel_fb->pf_num_pages = intel->intelScreen->third.handle ? 3 : 2; + + pf_active = pf_planes && (pf_planes & intel->sarea->pf_active) == pf_planes; + + if (INTEL_DEBUG & DEBUG_LOCK) + if (pf_active != intel_fb->pf_active) + _mesa_printf("%s - Page flipping %sactive\n", __progname, + pf_active ? "" : "in"); + + if (pf_active) { + /* Sync pages between planes if flipping on both at the same time */ + if (pf_planes == 0x3 && pf_planes != intel_fb->pf_planes && + (intel->sarea->pf_current_page & 0x3) != + (((intel->sarea->pf_current_page) >> 2) & 0x3)) { + drm_i915_flip_t flip; + + if (intel_fb->pf_current_page == + (intel->sarea->pf_current_page & 0x3)) { + /* XXX: This is ugly, but emitting two flips 'in a row' can cause + * lockups for unknown reasons. + */ + intel->sarea->pf_current_page = + intel->sarea->pf_current_page & 0x3; + intel->sarea->pf_current_page |= + ((intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) % + intel_fb->pf_num_pages) << 2; + + flip.pipes = 0x2; + } else { + intel->sarea->pf_current_page = + intel->sarea->pf_current_page & (0x3 << 2); + intel->sarea->pf_current_page |= + (intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) % + intel_fb->pf_num_pages; + + flip.pipes = 0x1; + } + + drmCommandWrite(intel->driFd, DRM_I915_FLIP, &flip, sizeof(flip)); + } + + intel_fb->pf_planes = pf_planes; + } + + intel_fb->pf_active = pf_active; + intel_flip_renderbuffers(intel_fb); + intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer); +} + +/** + * This will be called whenever the currently bound window is moved/resized. + * XXX: actually, it seems to NOT be called when the window is only moved (BP). + */ +void +intelWindowMoved(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + __DRIdrawablePrivate *dPriv = intel->driDrawable; + struct intel_framebuffer *intel_fb = dPriv->driverPrivate; + + if (!intel->ctx.DrawBuffer) { + /* when would this happen? -BP */ + intelSetFrontClipRects(intel); + } + else if (intel->ctx.DrawBuffer->Name != 0) { + /* drawing to user-created FBO - do nothing */ + /* Cliprects would be set from intelDrawBuffer() */ + } + else { + /* drawing to a window */ + switch (intel_fb->Base._ColorDrawBufferIndexes[0]) { + case BUFFER_FRONT_LEFT: + intelSetFrontClipRects(intel); + break; + case BUFFER_BACK_LEFT: + intelSetBackClipRects(intel); + break; + default: + intelSetFrontClipRects(intel); + } + + } + + if (!intel->intelScreen->driScrnPriv->dri2.enabled && + intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) { + volatile struct drm_i915_sarea *sarea = intel->sarea; + drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w, + .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h }; + drm_clip_rect_t planeA_rect = { .x1 = sarea->planeA_x, .y1 = sarea->planeA_y, + .x2 = sarea->planeA_x + sarea->planeA_w, + .y2 = sarea->planeA_y + sarea->planeA_h }; + drm_clip_rect_t planeB_rect = { .x1 = sarea->planeB_x, .y1 = sarea->planeB_y, + .x2 = sarea->planeB_x + sarea->planeB_w, + .y2 = sarea->planeB_y + sarea->planeB_h }; + GLint areaA = driIntersectArea( drw_rect, planeA_rect ); + GLint areaB = driIntersectArea( drw_rect, planeB_rect ); + GLuint flags = dPriv->vblFlags; + + intelUpdatePageFlipping(intel, areaA, areaB); + + /* Update vblank info + */ + if (areaB > areaA || (areaA == areaB && areaB > 0)) { + flags = dPriv->vblFlags | VBLANK_FLAG_SECONDARY; + } else { + flags = dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY; + } + + /* Check to see if we changed pipes */ + if (flags != dPriv->vblFlags && dPriv->vblFlags && + !(dPriv->vblFlags & VBLANK_FLAG_NO_IRQ)) { + int64_t count; + drmVBlank vbl; + int i; + + /* + * Deal with page flipping + */ + vbl.request.type = DRM_VBLANK_ABSOLUTE; + + if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) { + vbl.request.type |= DRM_VBLANK_SECONDARY; + } + + for (i = 0; i < intel_fb->pf_num_pages; i++) { + if (!intel_fb->color_rb[i] || + (intel_fb->vbl_waited - intel_fb->color_rb[i]->vbl_pending) <= + (1<<23)) + continue; + + vbl.request.sequence = intel_fb->color_rb[i]->vbl_pending; + drmWaitVBlank(intel->driFd, &vbl); + } + + /* + * Update msc_base from old pipe + */ + driDrawableGetMSC32(dPriv->driScreenPriv, dPriv, &count); + dPriv->msc_base = count; + /* + * Then get new vblank_base and vblSeq values + */ + dPriv->vblFlags = flags; + driGetCurrentVBlank(dPriv); + dPriv->vblank_base = dPriv->vblSeq; + + intel_fb->vbl_waited = dPriv->vblSeq; + + for (i = 0; i < intel_fb->pf_num_pages; i++) { + if (intel_fb->color_rb[i]) + intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited; + } + } + } else { + dPriv->vblFlags &= ~VBLANK_FLAG_SECONDARY; + } + + /* Update Mesa's notion of window size */ + driUpdateFramebufferSize(ctx, dPriv); + intel_fb->Base.Initialized = GL_TRUE; /* XXX remove someday */ + + /* Update hardware scissor */ + if (ctx->Driver.Scissor != NULL) { + ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y, + ctx->Scissor.Width, ctx->Scissor.Height); + } + + /* Re-calculate viewport related state */ + if (ctx->Driver.DepthRange != NULL) + ctx->Driver.DepthRange( ctx, ctx->Viewport.Near, ctx->Viewport.Far ); +} + + + +/* A true meta version of this would be very simple and additionally + * machine independent. Maybe we'll get there one day. + */ +static void +intelClearWithTris(struct intel_context *intel, GLbitfield mask) +{ + GLcontext *ctx = &intel->ctx; + struct gl_framebuffer *fb = ctx->DrawBuffer; + GLuint buf; + + intel->vtbl.install_meta_state(intel); + + /* Back and stencil cliprects are the same. Try and do both + * buffers at once: + */ + if (mask & (BUFFER_BIT_BACK_LEFT | BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH)) { + struct intel_region *backRegion = + intel_get_rb_region(fb, BUFFER_BACK_LEFT); + struct intel_region *depthRegion = + intel_get_rb_region(fb, BUFFER_DEPTH); + + intel->vtbl.meta_draw_region(intel, backRegion, depthRegion); + + if (mask & BUFFER_BIT_BACK_LEFT) + intel->vtbl.meta_color_mask(intel, GL_TRUE); + else + intel->vtbl.meta_color_mask(intel, GL_FALSE); + + if (mask & BUFFER_BIT_STENCIL) + intel->vtbl.meta_stencil_replace(intel, + intel->ctx.Stencil.WriteMask[0], + intel->ctx.Stencil.Clear); + else + intel->vtbl.meta_no_stencil_write(intel); + + if (mask & BUFFER_BIT_DEPTH) + intel->vtbl.meta_depth_replace(intel); + else + intel->vtbl.meta_no_depth_write(intel); + + intel->vtbl.meta_draw_quad(intel, + fb->_Xmin, + fb->_Xmax, + fb->_Ymin, + fb->_Ymax, + intel->ctx.Depth.Clear, + intel->ClearColor8888, + 0, 0, 0, 0); /* texcoords */ + + mask &= ~(BUFFER_BIT_BACK_LEFT | BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH); + } + + /* clear the remaining (color) renderbuffers */ + for (buf = 0; buf < BUFFER_COUNT && mask; buf++) { + const GLuint bufBit = 1 << buf; + if (mask & bufBit) { + struct intel_renderbuffer *irbColor = + intel_renderbuffer(fb->Attachment[buf].Renderbuffer); + + ASSERT(irbColor); + + intel->vtbl.meta_no_depth_write(intel); + intel->vtbl.meta_no_stencil_write(intel); + intel->vtbl.meta_color_mask(intel, GL_TRUE); + intel->vtbl.meta_draw_region(intel, irbColor->region, NULL); + + intel->vtbl.meta_draw_quad(intel, + fb->_Xmin, + fb->_Xmax, + fb->_Ymin, + fb->_Ymax, + 0, intel->ClearColor8888, + 0, 0, 0, 0); /* texcoords */ + + mask &= ~bufBit; + } + } + + intel->vtbl.leave_meta_state(intel); +} + +static const char *buffer_names[] = { + [BUFFER_FRONT_LEFT] = "front", + [BUFFER_BACK_LEFT] = "back", + [BUFFER_FRONT_RIGHT] = "front right", + [BUFFER_BACK_RIGHT] = "back right", + [BUFFER_AUX0] = "aux0", + [BUFFER_AUX1] = "aux1", + [BUFFER_AUX2] = "aux2", + [BUFFER_AUX3] = "aux3", + [BUFFER_DEPTH] = "depth", + [BUFFER_STENCIL] = "stencil", + [BUFFER_ACCUM] = "accum", + [BUFFER_COLOR0] = "color0", + [BUFFER_COLOR1] = "color1", + [BUFFER_COLOR2] = "color2", + [BUFFER_COLOR3] = "color3", + [BUFFER_COLOR4] = "color4", + [BUFFER_COLOR5] = "color5", + [BUFFER_COLOR6] = "color6", + [BUFFER_COLOR7] = "color7", +}; + +/** + * Called by ctx->Driver.Clear. + */ +static void +intelClear(GLcontext *ctx, GLbitfield mask) +{ + struct intel_context *intel = intel_context(ctx); + const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); + GLbitfield tri_mask = 0; + GLbitfield blit_mask = 0; + GLbitfield swrast_mask = 0; + struct gl_framebuffer *fb = ctx->DrawBuffer; + GLuint i; + + if (0) + fprintf(stderr, "%s\n", __FUNCTION__); + + /* HW color buffers (front, back, aux, generic FBO, etc) */ + if (colorMask == ~0) { + /* clear all R,G,B,A */ + /* XXX FBO: need to check if colorbuffers are software RBOs! */ + blit_mask |= (mask & BUFFER_BITS_COLOR); + } + else { + /* glColorMask in effect */ + tri_mask |= (mask & BUFFER_BITS_COLOR); + } + + /* HW stencil */ + if (mask & BUFFER_BIT_STENCIL) { + const struct intel_region *stencilRegion + = intel_get_rb_region(fb, BUFFER_STENCIL); + if (stencilRegion) { + /* have hw stencil */ + if (IS_965(intel->intelScreen->deviceID) || + (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) { + /* We have to use the 3D engine if we're clearing a partial mask + * of the stencil buffer, or if we're on a 965 which has a tiled + * depth/stencil buffer in a layout we can't blit to. + */ + tri_mask |= BUFFER_BIT_STENCIL; + } + else { + /* clearing all stencil bits, use blitting */ + blit_mask |= BUFFER_BIT_STENCIL; + } + } + } + + /* HW depth */ + if (mask & BUFFER_BIT_DEPTH) { + /* clear depth with whatever method is used for stencil (see above) */ + if (IS_965(intel->intelScreen->deviceID) || + tri_mask & BUFFER_BIT_STENCIL) + tri_mask |= BUFFER_BIT_DEPTH; + else + blit_mask |= BUFFER_BIT_DEPTH; + } + + /* SW fallback clearing */ + swrast_mask = mask & ~tri_mask & ~blit_mask; + + for (i = 0; i < BUFFER_COUNT; i++) { + GLuint bufBit = 1 << i; + if ((blit_mask | tri_mask) & bufBit) { + if (!fb->Attachment[i].Renderbuffer->ClassID) { + blit_mask &= ~bufBit; + tri_mask &= ~bufBit; + swrast_mask |= bufBit; + } + } + } + + if (blit_mask) { + if (INTEL_DEBUG & DEBUG_BLIT) { + DBG("blit clear:"); + for (i = 0; i < BUFFER_COUNT; i++) { + if (blit_mask & (1 << i)) + DBG(" %s", buffer_names[i]); + } + DBG("\n"); + } + intelClearWithBlit(ctx, blit_mask); + } + + if (tri_mask) { + if (INTEL_DEBUG & DEBUG_BLIT) { + DBG("tri clear:"); + for (i = 0; i < BUFFER_COUNT; i++) { + if (tri_mask & (1 << i)) + DBG(" %s", buffer_names[i]); + } + DBG("\n"); + } + intelClearWithTris(intel, tri_mask); + } + + if (swrast_mask) { + if (INTEL_DEBUG & DEBUG_BLIT) { + DBG("swrast clear:"); + for (i = 0; i < BUFFER_COUNT; i++) { + if (swrast_mask & (1 << i)) + DBG(" %s", buffer_names[i]); + } + DBG("\n"); + } + _swrast_Clear(ctx, swrast_mask); + } +} + + +/* Emit wait for pending flips */ +void +intel_wait_flips(struct intel_context *intel) +{ + struct intel_framebuffer *intel_fb = + (struct intel_framebuffer *) intel->ctx.DrawBuffer; + struct intel_renderbuffer *intel_rb = + intel_get_renderbuffer(&intel_fb->Base, + intel_fb->Base._ColorDrawBufferIndexes[0] == + BUFFER_FRONT_LEFT ? BUFFER_FRONT_LEFT : + BUFFER_BACK_LEFT); + + if (intel_fb->Base.Name == 0 && intel_rb && + intel_rb->pf_pending == intel_fb->pf_seq) { + GLint pf_planes = intel_fb->pf_planes; + BATCH_LOCALS; + + /* Wait for pending flips to take effect */ + BEGIN_BATCH(2, NO_LOOP_CLIPRECTS); + OUT_BATCH(pf_planes & 0x1 ? (MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PLANE_A_FLIP) + : 0); + OUT_BATCH(pf_planes & 0x2 ? (MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PLANE_B_FLIP) + : 0); + ADVANCE_BATCH(); + + intel_rb->pf_pending--; + } +} + + +/* Flip the front & back buffers + */ +static GLboolean +intelPageFlip(const __DRIdrawablePrivate * dPriv) +{ + struct intel_context *intel; + int ret; + struct intel_framebuffer *intel_fb = dPriv->driverPrivate; + + if (INTEL_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; + + if (intel->intelScreen->drmMinor < 9) + return GL_FALSE; + + intelFlush(&intel->ctx); + + ret = 0; + + LOCK_HARDWARE(intel); + + if (dPriv->numClipRects && intel_fb->pf_active) { + drm_i915_flip_t flip; + + flip.pipes = intel_fb->pf_planes; + + ret = drmCommandWrite(intel->driFd, DRM_I915_FLIP, &flip, sizeof(flip)); + } + + UNLOCK_HARDWARE(intel); + + if (ret || !intel_fb->pf_active) + return GL_FALSE; + + if (!dPriv->numClipRects) { + usleep(10000); /* throttle invisible client 10ms */ + } + + intel_fb->pf_current_page = (intel->sarea->pf_current_page >> + (intel_fb->pf_planes & 0x2)) & 0x3; + + if (dPriv->numClipRects != 0) { + intel_get_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT)->pf_pending = + intel_get_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT)->pf_pending = + ++intel_fb->pf_seq; + } + + intel_flip_renderbuffers(intel_fb); + intel_draw_buffer(&intel->ctx, &intel_fb->Base); + + return GL_TRUE; +} + +static GLboolean +intelScheduleSwap(__DRIdrawablePrivate * dPriv, GLboolean *missed_target) +{ + struct intel_framebuffer *intel_fb = dPriv->driverPrivate; + unsigned int interval; + struct intel_context *intel = + intelScreenContext(dPriv->driScreenPriv->private); + const intelScreenPrivate *intelScreen = intel->intelScreen; + unsigned int target; + drm_i915_vblank_swap_t swap; + GLboolean ret; + + if (!dPriv->vblFlags || + (dPriv->vblFlags & VBLANK_FLAG_NO_IRQ) || + intelScreen->drmMinor < (intel_fb->pf_active ? 9 : 6)) + return GL_FALSE; + + interval = driGetVBlankInterval(dPriv); + + swap.seqtype = DRM_VBLANK_ABSOLUTE; + + if (dPriv->vblFlags & VBLANK_FLAG_SYNC) { + swap.seqtype |= DRM_VBLANK_NEXTONMISS; + } else if (interval == 0) + return GL_FALSE; + + swap.drawable = dPriv->hHWDrawable; + target = swap.sequence = dPriv->vblSeq + interval; + + if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) { + swap.seqtype |= DRM_VBLANK_SECONDARY; + } + + LOCK_HARDWARE(intel); + + intel_batchbuffer_flush(intel->batch); + + if ( intel_fb->pf_active ) { + swap.seqtype |= DRM_VBLANK_FLIP; + + intel_fb->pf_current_page = (((intel->sarea->pf_current_page >> + (intel_fb->pf_planes & 0x2)) & 0x3) + 1) % + intel_fb->pf_num_pages; + } + + if (!drmCommandWriteRead(intel->driFd, DRM_I915_VBLANK_SWAP, &swap, + sizeof(swap))) { + dPriv->vblSeq = swap.sequence; + swap.sequence -= target; + *missed_target = swap.sequence > 0 && swap.sequence <= (1 << 23); + + intel_get_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT)->vbl_pending = + intel_get_renderbuffer(&intel_fb->Base, + BUFFER_FRONT_LEFT)->vbl_pending = + dPriv->vblSeq; + + if (swap.seqtype & DRM_VBLANK_FLIP) { + intel_flip_renderbuffers(intel_fb); + intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer); + } + + ret = GL_TRUE; + } else { + if (swap.seqtype & DRM_VBLANK_FLIP) { + intel_fb->pf_current_page = ((intel->sarea->pf_current_page >> + (intel_fb->pf_planes & 0x2)) & 0x3) % + intel_fb->pf_num_pages; + } + + ret = GL_FALSE; + } + + UNLOCK_HARDWARE(intel); + + return ret; +} + +void +intelSwapBuffers(__DRIdrawablePrivate * dPriv) +{ + __DRIscreenPrivate *psp = dPriv->driScreenPriv; + + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + GET_CURRENT_CONTEXT(ctx); + struct intel_context *intel; + + if (ctx == NULL) + return; + + intel = intel_context(ctx); + + if (ctx->Visual.doubleBufferMode) { + GLboolean missed_target; + struct intel_framebuffer *intel_fb = dPriv->driverPrivate; + int64_t ust; + + _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ + + if (!intelScheduleSwap(dPriv, &missed_target)) { + driWaitForVBlank(dPriv, &missed_target); + + /* + * Update each buffer's vbl_pending so we don't get too out of + * sync + */ + intel_get_renderbuffer(&intel_fb->Base, + BUFFER_BACK_LEFT)->vbl_pending = + intel_get_renderbuffer(&intel_fb->Base, + BUFFER_FRONT_LEFT)->vbl_pending = + dPriv->vblSeq; + if (!intelPageFlip(dPriv)) { + intelCopyBuffer(dPriv, NULL); + } + } + + intel_fb->swap_count++; + (*psp->systemTime->getUST) (&ust); + if (missed_target) { + intel_fb->swap_missed_count++; + intel_fb->swap_missed_ust = ust - intel_fb->swap_ust; + } + + intel_fb->swap_ust = ust; + } + } + else { + /* XXX this shouldn't be an error but we can't handle it for now */ + fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); + } +} + +void +intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) +{ + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + struct intel_context *intel = + (struct intel_context *) dPriv->driContextPriv->driverPrivate; + GLcontext *ctx = &intel->ctx; + + if (ctx->Visual.doubleBufferMode) { + drm_clip_rect_t rect; + rect.x1 = x + dPriv->x; + rect.y1 = (dPriv->h - y - h) + dPriv->y; + rect.x2 = rect.x1 + w; + rect.y2 = rect.y1 + h; + _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ + intelCopyBuffer(dPriv, &rect); + } + } + else { + /* XXX this shouldn't be an error but we can't handle it for now */ + fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); + } +} + + +/** + * Update the hardware state for drawing into a window or framebuffer object. + * + * Called by glDrawBuffer, glBindFramebufferEXT, MakeCurrent, and other + * places within the driver. + * + * Basically, this needs to be called any time the current framebuffer + * changes, the renderbuffers change, or we need to draw into different + * color buffers. + */ +void +intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL; + struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL; + int front = 0; /* drawing to front color buffer? */ + + if (!fb) { + /* this can happen during the initial context initialization */ + return; + } + + /* Do this here, note core Mesa, since this function is called from + * many places within the driver. + */ + if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + } + + if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { + /* this may occur when we're called by glBindFrameBuffer() during + * the process of someone setting up renderbuffers, etc. + */ + /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/ + return; + } + + if (fb->Name) + intel_validate_paired_depth_stencil(ctx, fb); + + /* If the batch contents require looping over cliprects, flush them before + * we go changing which cliprects get referenced when that happens. + */ + if (intel->batch->cliprect_mode == LOOP_CLIPRECTS) + intel_batchbuffer_flush(intel->batch); + + /* + * How many color buffers are we drawing into? + */ + if (fb->_NumColorDrawBuffers == 0) { + /* writing to 0 */ + FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE); + colorRegions[0] = NULL; + + if (fb->Name != 0) + intelSetRenderbufferClipRects(intel); + } else if (fb->_NumColorDrawBuffers > 1) { + int i; + struct intel_renderbuffer *irb; + FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE); + + if (fb->Name != 0) + intelSetRenderbufferClipRects(intel); + for (i = 0; i < fb->_NumColorDrawBuffers; i++) { + irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); + colorRegions[i] = (irb && irb->region) ? irb->region : NULL; + } + } + else { + /* draw to exactly one color buffer */ + /*_mesa_debug(ctx, "Hardware rendering\n");*/ + FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE); + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { + front = 1; + } + + /* + * Get the intel_renderbuffer for the colorbuffer we're drawing into. + * And set up cliprects. + */ + if (fb->Name == 0) { + /* drawing to window system buffer */ + if (front) { + intelSetFrontClipRects(intel); + colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT); + } + else { + intelSetBackClipRects(intel); + colorRegions[0]= intel_get_rb_region(fb, BUFFER_BACK_LEFT); + } + } + else { + /* drawing to user-created FBO */ + struct intel_renderbuffer *irb; + intelSetRenderbufferClipRects(intel); + irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); + colorRegions[0] = (irb && irb->region) ? irb->region : NULL; + } + } + + /* Update culling direction which changes depending on the + * orientation of the buffer: + */ + if (ctx->Driver.FrontFace) + ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace); + else + ctx->NewState |= _NEW_POLYGON; + + if (!colorRegions[0]) { + FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE); + } + else { + FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE); + } + + /*** + *** Get depth buffer region and check if we need a software fallback. + *** Note that the depth buffer is usually a DEPTH_STENCIL buffer. + ***/ + if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) { + irbDepth = intel_renderbuffer(fb->_DepthBuffer->Wrapped); + if (irbDepth && irbDepth->region) { + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); + depthRegion = irbDepth->region; + } + else { + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_TRUE); + depthRegion = NULL; + } + } + else { + /* not using depth buffer */ + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); + depthRegion = NULL; + } + + /*** + *** Stencil buffer + *** This can only be hardware accelerated if we're using a + *** combined DEPTH_STENCIL buffer (for now anyway). + ***/ + if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) { + irbStencil = intel_renderbuffer(fb->_StencilBuffer->Wrapped); + if (irbStencil && irbStencil->region) { + ASSERT(irbStencil->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); + /* need to re-compute stencil hw state */ + if (ctx->Driver.Enable != NULL) + ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled); + else + ctx->NewState |= _NEW_STENCIL; + if (!depthRegion) + depthRegion = irbStencil->region; + } + else { + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_TRUE); + } + } + else { + /* XXX FBO: instead of FALSE, pass ctx->Stencil.Enabled ??? */ + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); + /* need to re-compute stencil hw state */ + if (ctx->Driver.Enable != NULL) + ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled); + else + ctx->NewState |= _NEW_STENCIL; + } + + /* + * Update depth test state + */ + if (ctx->Driver.Enable) { + if (ctx->Depth.Test && fb->Visual.depthBits > 0) { + ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_TRUE); + } else { + ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_FALSE); + } + } else { + ctx->NewState |= _NEW_DEPTH; + } + + intel->vtbl.set_draw_region(intel, colorRegions, depthRegion, + fb->_NumColorDrawBuffers); + + /* update viewport since it depends on window size */ + if (ctx->Driver.Viewport) { + ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y, + ctx->Viewport.Width, ctx->Viewport.Height); + } else { + ctx->NewState |= _NEW_VIEWPORT; + } + + /* Set state we know depends on drawable parameters: + */ + if (ctx->Driver.Scissor) + ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y, + ctx->Scissor.Width, ctx->Scissor.Height); + intel->NewGLState |= _NEW_SCISSOR; + + if (ctx->Driver.DepthRange) + ctx->Driver.DepthRange(ctx, + ctx->Viewport.Near, + ctx->Viewport.Far); +} + + +static void +intelDrawBuffer(GLcontext * ctx, GLenum mode) +{ + intel_draw_buffer(ctx, ctx->DrawBuffer); +} + + +static void +intelReadBuffer(GLcontext * ctx, GLenum mode) +{ + if (ctx->ReadBuffer == ctx->DrawBuffer) { + /* This will update FBO completeness status. + * A framebuffer will be incomplete if the GL_READ_BUFFER setting + * refers to a missing renderbuffer. Calling glReadBuffer can set + * that straight and can make the drawing buffer complete. + */ + intel_draw_buffer(ctx, ctx->DrawBuffer); + } + /* Generally, functions which read pixels (glReadPixels, glCopyPixels, etc) + * reference ctx->ReadBuffer and do appropriate state checks. + */ +} + + +void +intelInitBufferFuncs(struct dd_function_table *functions) +{ + functions->Clear = intelClear; + functions->DrawBuffer = intelDrawBuffer; + functions->ReadBuffer = intelReadBuffer; +} diff --git a/i915/intel_span.h b/shared/intel_buffers.h index 2d4f858..a669a85 100644 --- a/i915/intel_span.h +++ b/shared/intel_buffers.h @@ -1,6 +1,7 @@ + /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,17 +26,31 @@ * **************************************************************************/ -#ifndef _INTEL_SPAN_H -#define _INTEL_SPAN_H +#ifndef INTEL_BUFFERS_H +#define INTEL_BUFFERS_H + + +struct intel_context; +struct intel_framebuffer; + + +extern GLboolean +intel_intersect_cliprects(drm_clip_rect_t * dest, + const drm_clip_rect_t * a, + const drm_clip_rect_t * b); + +extern struct intel_region *intel_readbuf_region(struct intel_context *intel); + +extern struct intel_region *intel_drawbuf_region(struct intel_context *intel); + +extern void intel_wait_flips(struct intel_context *intel); -#include "drirenderbuffer.h" +extern void intelSwapBuffers(__DRIdrawablePrivate * dPriv); -extern void intelInitSpanFuncs( GLcontext *ctx ); +extern void intelWindowMoved(struct intel_context *intel); -extern void intelSpanRenderFinish( GLcontext *ctx ); -extern void intelSpanRenderStart( GLcontext *ctx ); +extern void intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb); -extern void -intelSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis); +extern void intelInitBufferFuncs(struct dd_function_table *functions); -#endif +#endif /* INTEL_BUFFERS_H */ diff --git a/shared/intel_bufmgr_ttm.c b/shared/intel_bufmgr_ttm.c new file mode 100644 index 0000000..194814e --- /dev/null +++ b/shared/intel_bufmgr_ttm.c @@ -0,0 +1,1122 @@ +/************************************************************************** + * + * Copyright © 2007 Red Hat Inc. + * Copyright © 2007 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * Keith Whitwell <keithw-at-tungstengraphics-dot-com> + * Eric Anholt <eric@anholt.net> + * Dave Airlie <airlied@linux.ie> + */ + +#include <xf86drm.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> + +#include "errno.h" +#include "mtypes.h" +#include "dri_bufmgr.h" +#include "string.h" +#include "imports.h" + +#include "i915_drm.h" + +#include "intel_bufmgr_ttm.h" +#ifdef TTM_API + +#define DBG(...) do { \ + if (bufmgr_ttm->bufmgr.debug) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +/* + * These bits are always specified in each validation + * request. Other bits are not supported at this point + * as it would require a bit of investigation to figure + * out what mask value should be used. + */ +#define INTEL_BO_MASK (DRM_BO_MASK_MEM | \ + DRM_BO_FLAG_READ | \ + DRM_BO_FLAG_WRITE | \ + DRM_BO_FLAG_EXE) + +struct intel_validate_entry { + dri_bo *bo; + struct drm_i915_op_arg bo_arg; +}; + +struct dri_ttm_bo_bucket_entry { + drmBO drm_bo; + struct dri_ttm_bo_bucket_entry *next; +}; + +struct dri_ttm_bo_bucket { + struct dri_ttm_bo_bucket_entry *head; + struct dri_ttm_bo_bucket_entry **tail; + /** + * Limit on the number of entries in this bucket. + * + * 0 means that this caching at this bucket size is disabled. + * -1 means that there is no limit to caching at this size. + */ + int max_entries; + int num_entries; +}; + +/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse + * is 1 << 16 pages, or 256MB. + */ +#define INTEL_TTM_BO_BUCKETS 16 +typedef struct _dri_bufmgr_ttm { + dri_bufmgr bufmgr; + + int fd; + unsigned int fence_type; + unsigned int fence_type_flush; + + uint32_t max_relocs; + + struct intel_validate_entry *validate_array; + int validate_array_size; + int validate_count; + + /** Array of lists of cached drmBOs of power-of-two sizes */ + struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS]; +} dri_bufmgr_ttm; + +/** + * Private information associated with a relocation that isn't already stored + * in the relocation buffer to be passed to the kernel. + */ +struct dri_ttm_reloc { + dri_bo *target_buf; + uint64_t validate_flags; + /** Offset of target_buf after last execution of this relocation entry. */ + unsigned int last_target_offset; +}; + +typedef struct _dri_bo_ttm { + dri_bo bo; + + int refcount; + unsigned int map_count; + drmBO drm_bo; + const char *name; + + uint64_t last_flags; + + /** + * Index of the buffer within the validation list while preparing a + * batchbuffer execution. + */ + int validate_index; + + /** DRM buffer object containing relocation list */ + uint32_t *reloc_buf_data; + struct dri_ttm_reloc *relocs; + + /** + * Indicates that the buffer may be shared with other processes, so we + * can't hold maps beyond when the user does. + */ + GLboolean shared; + + GLboolean delayed_unmap; + /* Virtual address from the dri_bo_map whose unmap was delayed. */ + void *saved_virtual; +} dri_bo_ttm; + +typedef struct _dri_fence_ttm +{ + dri_fence fence; + + int refcount; + const char *name; + drmFence drm_fence; +} dri_fence_ttm; + +static int +logbase2(int n) +{ + GLint i = 1; + GLint log2 = 0; + + while (n > i) { + i *= 2; + log2++; + } + + return log2; +} + +static struct dri_ttm_bo_bucket * +dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size) +{ + int i; + + /* We only do buckets in power of two increments */ + if ((size & (size - 1)) != 0) + return NULL; + + /* We should only see sizes rounded to pages. */ + assert((size % 4096) == 0); + + /* We always allocate in units of pages */ + i = ffs(size / 4096) - 1; + if (i >= INTEL_TTM_BO_BUCKETS) + return NULL; + + return &bufmgr_ttm->cache_bucket[i]; +} + + +static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) +{ + int i, j; + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + + if (bo_ttm->reloc_buf_data != NULL) { + for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) { + uint32_t *reloc_entry = bo_ttm->reloc_buf_data + + I915_RELOC_HEADER + + j * I915_RELOC0_STRIDE; + dri_bo *target_bo = bo_ttm->relocs[j].target_buf; + dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo; + + DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n", + i, + bo_ttm->name, reloc_entry[0], + target_ttm->name, target_bo->offset, + reloc_entry[1]); + } + } else { + DBG("%2d: %s\n", i, bo_ttm->name); + } + } +} + +/** + * Adds the given buffer to the list of buffers to be validated (moved into the + * appropriate memory type) with the next batch submission. + * + * If a buffer is validated multiple times in a batch submission, it ends up + * with the intersection of the memory type flags and the union of the + * access flags. + */ +static void +intel_add_validate_buffer(dri_bo *buf, + uint64_t flags) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing, + * do that now. + */ + if (ttm_buf->delayed_unmap) { + drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + ttm_buf->delayed_unmap = GL_FALSE; + } + + if (ttm_buf->validate_index == -1) { + struct intel_validate_entry *entry; + struct drm_i915_op_arg *arg; + struct drm_bo_op_req *req; + int index; + + /* Extend the array of validation entries as necessary. */ + if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) { + int i, new_size = bufmgr_ttm->validate_array_size * 2; + + if (new_size == 0) + new_size = 5; + + bufmgr_ttm->validate_array = + realloc(bufmgr_ttm->validate_array, + sizeof(struct intel_validate_entry) * new_size); + bufmgr_ttm->validate_array_size = new_size; + + /* Update pointers for realloced mem. */ + for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) { + bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long) + &bufmgr_ttm->validate_array[i + 1].bo_arg; + } + } + + /* Pick out the new array entry for ourselves */ + index = bufmgr_ttm->validate_count; + ttm_buf->validate_index = index; + entry = &bufmgr_ttm->validate_array[index]; + bufmgr_ttm->validate_count++; + + /* Fill in array entry */ + entry->bo = buf; + dri_bo_reference(buf); + + /* Fill in kernel arg */ + arg = &entry->bo_arg; + req = &arg->d.req; + + memset(arg, 0, sizeof(*arg)); + req->bo_req.handle = ttm_buf->drm_bo.handle; + req->op = drm_bo_validate; + req->bo_req.flags = flags; + req->bo_req.hint = 0; +#ifdef DRM_BO_HINT_PRESUMED_OFFSET + /* PRESUMED_OFFSET indicates that all relocations pointing at this + * buffer have the correct offset. If any of our relocations don't, + * this flag will be cleared off the buffer later in the relocation + * processing. + */ + req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET; + req->bo_req.presumed_offset = buf->offset; +#endif + req->bo_req.mask = INTEL_BO_MASK; + req->bo_req.fence_class = 0; /* Backwards compat. */ + + if (ttm_buf->reloc_buf_data != NULL) + arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data; + else + arg->reloc_ptr = 0; + + /* Hook up the linked list of args for the kernel */ + arg->next = 0; + if (index != 0) { + bufmgr_ttm->validate_array[index - 1].bo_arg.next = + (unsigned long)arg; + } + } else { + struct intel_validate_entry *entry = + &bufmgr_ttm->validate_array[ttm_buf->validate_index]; + struct drm_i915_op_arg *arg = &entry->bo_arg; + struct drm_bo_op_req *req = &arg->d.req; + uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM; + uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM; + + /* Buffer was already in the validate list. Extend its flags as + * necessary. + */ + + if (memFlags == 0) { + fprintf(stderr, + "%s: No shared memory types between " + "0x%16llx and 0x%16llx\n", + __FUNCTION__, req->bo_req.flags, flags); + abort(); + } + if (flags & ~INTEL_BO_MASK) { + fprintf(stderr, + "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n", + __FUNCTION__, flags & ~INTEL_BO_MASK); + abort(); + } + req->bo_req.flags = memFlags | modeFlags; + } +} + + +#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ + sizeof(uint32_t)) + +static int +intel_setup_reloc_list(dri_bo *bo) +{ + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; + + bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs, + sizeof(struct dri_ttm_reloc)); + bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs)); + + /* Initialize the relocation list with the header: + * DWORD 0: relocation count + * DWORD 1: relocation type + * DWORD 2+3: handle to next relocation list (currently none) 64-bits + */ + bo_ttm->reloc_buf_data[0] = 0; + bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0; + bo_ttm->reloc_buf_data[2] = 0; + bo_ttm->reloc_buf_data[3] = 0; + + return 0; +} + +#if 0 +int +driFenceSignaled(DriFenceObject * fence, unsigned type) +{ + int signaled; + int ret; + + if (fence == NULL) + return GL_TRUE; + + ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled); + BM_CKFATAL(ret); + return signaled; +} +#endif + +static dri_bo * +dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_bo_ttm *ttm_buf; + unsigned int pageSize = getpagesize(); + int ret; + uint64_t flags; + unsigned int hint; + unsigned long alloc_size; + struct dri_ttm_bo_bucket *bucket; + GLboolean alloc_from_cache = GL_FALSE; + + ttm_buf = calloc(1, sizeof(*ttm_buf)); + if (!ttm_buf) + return NULL; + + /* The mask argument doesn't do anything for us that we want other than + * determine which pool (TTM or local) the buffer is allocated into, so + * just pass all of the allocation class flags. + */ + flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | + DRM_BO_FLAG_EXE; + /* No hints we want to use. */ + hint = 0; + + /* Round the allocated size up to a power of two number of pages. */ + alloc_size = 1 << logbase2(size); + if (alloc_size < pageSize) + alloc_size = pageSize; + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size); + + /* If we don't have caching at this size, don't actually round the + * allocation up. + */ + if (bucket == NULL || bucket->max_entries == 0) + alloc_size = size; + + /* Get a buffer out of the cache if available */ + if (bucket != NULL && bucket->num_entries > 0) { + struct dri_ttm_bo_bucket_entry *entry = bucket->head; + int busy; + + /* Check if the buffer is still in flight. If not, reuse it. */ + ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy); + alloc_from_cache = (ret == 0 && busy == 0); + + if (alloc_from_cache) { + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + ttm_buf->drm_bo = entry->drm_bo; + free(entry); + } + } + + if (!alloc_from_cache) { + ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize, + NULL, flags, hint, &ttm_buf->drm_bo); + if (ret != 0) { + free(ttm_buf); + return NULL; + } + } + + ttm_buf->bo.size = size; + ttm_buf->bo.offset = ttm_buf->drm_bo.offset; + ttm_buf->bo.virtual = NULL; + ttm_buf->bo.bufmgr = bufmgr; + ttm_buf->name = name; + ttm_buf->refcount = 1; + ttm_buf->reloc_buf_data = NULL; + ttm_buf->relocs = NULL; + ttm_buf->last_flags = ttm_buf->drm_bo.flags; + ttm_buf->shared = GL_FALSE; + ttm_buf->delayed_unmap = GL_FALSE; + ttm_buf->validate_index = -1; + + DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size); + + return &ttm_buf->bo; +} + +/* Our TTM backend doesn't allow creation of static buffers, as that requires + * privelege for the non-fake case, and the lock in the fake case where we were + * working around the X Server not creating buffers and passing handles to us. + */ +static dri_bo * +dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name, + unsigned long offset, unsigned long size, void *virtual, + uint64_t location_mask) +{ + return NULL; +} + +/** + * Returns a dri_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +dri_bo * +intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_bo_ttm *ttm_buf; + int ret; + + ttm_buf = calloc(1, sizeof(*ttm_buf)); + if (!ttm_buf) + return NULL; + + ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", + name, handle, strerror(-ret)); + free(ttm_buf); + return NULL; + } + ttm_buf->bo.size = ttm_buf->drm_bo.size; + ttm_buf->bo.offset = ttm_buf->drm_bo.offset; + ttm_buf->bo.virtual = NULL; + ttm_buf->bo.bufmgr = bufmgr; + ttm_buf->name = name; + ttm_buf->refcount = 1; + ttm_buf->reloc_buf_data = NULL; + ttm_buf->relocs = NULL; + ttm_buf->last_flags = ttm_buf->drm_bo.flags; + ttm_buf->shared = GL_TRUE; + ttm_buf->delayed_unmap = GL_FALSE; + ttm_buf->validate_index = -1; + + DBG("bo_create_from_handle: %p %08x (%s)\n", + &ttm_buf->bo, handle, ttm_buf->name); + + return &ttm_buf->bo; +} + +static void +dri_ttm_bo_reference(dri_bo *buf) +{ + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + ttm_buf->refcount++; +} + +static void +dri_ttm_bo_unreference(dri_bo *buf) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + + if (!buf) + return; + + if (--ttm_buf->refcount == 0) { + struct dri_ttm_bo_bucket *bucket; + int ret; + + assert(ttm_buf->map_count == 0); + + if (ttm_buf->reloc_buf_data) { + int i; + + /* Unreference all the target buffers */ + for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++) + dri_bo_unreference(ttm_buf->relocs[i].target_buf); + free(ttm_buf->relocs); + + /* Free the kernel BO containing relocation entries */ + free(ttm_buf->reloc_buf_data); + ttm_buf->reloc_buf_data = NULL; + } + + if (ttm_buf->delayed_unmap) { + int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + + if (ret != 0) { + fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + } + + bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size); + /* Put the buffer into our internal cache for reuse if we can. */ + if (!ttm_buf->shared && + bucket != NULL && + (bucket->max_entries == -1 || + (bucket->max_entries > 0 && + bucket->num_entries < bucket->max_entries))) + { + struct dri_ttm_bo_bucket_entry *entry; + + entry = calloc(1, sizeof(*entry)); + entry->drm_bo = ttm_buf->drm_bo; + + entry->next = NULL; + *bucket->tail = entry; + bucket->tail = &entry->next; + bucket->num_entries++; + } else { + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed (%s): %s\n", + ttm_buf->name, strerror(-ret)); + } + } + + DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + free(buf); + return; + } +} + +static int +dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable) +{ + dri_bufmgr_ttm *bufmgr_ttm; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + uint64_t flags; + int ret; + + bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + + flags = DRM_BO_FLAG_READ; + if (write_enable) + flags |= DRM_BO_FLAG_WRITE; + + /* Allow recursive mapping. Mesa may recursively map buffers with + * nested display loops. + */ + if (ttm_buf->map_count++ != 0) + return 0; + + assert(buf->virtual == NULL); + + DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + /* XXX: What about if we're upgrading from READ to WRITE? */ + if (ttm_buf->delayed_unmap) { + buf->virtual = ttm_buf->saved_virtual; + return 0; + } + + ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + + return ret; +} + +static int +dri_ttm_bo_unmap(dri_bo *buf) +{ + dri_bufmgr_ttm *bufmgr_ttm; + dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; + int ret; + + if (buf == NULL) + return 0; + + assert(ttm_buf->map_count != 0); + if (--ttm_buf->map_count != 0) + return 0; + + bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; + + assert(buf->virtual != NULL); + + DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); + + if (!ttm_buf->shared) { + ttm_buf->saved_virtual = buf->virtual; + ttm_buf->delayed_unmap = GL_TRUE; + buf->virtual = NULL; + + return 0; + } + + buf->virtual = NULL; + + ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", + __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); + } + + return ret; +} + +/** + * Returns a dri_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +dri_fence * +intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, + drm_fence_arg_t *arg) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + dri_fence_ttm *ttm_fence; + + ttm_fence = malloc(sizeof(*ttm_fence)); + if (!ttm_fence) + return NULL; + + ttm_fence->drm_fence.handle = arg->handle; + ttm_fence->drm_fence.fence_class = arg->fence_class; + ttm_fence->drm_fence.type = arg->type; + ttm_fence->drm_fence.flags = arg->flags; + ttm_fence->drm_fence.signaled = 0; + ttm_fence->drm_fence.sequence = arg->sequence; + + ttm_fence->fence.bufmgr = bufmgr; + ttm_fence->name = name; + ttm_fence->refcount = 1; + + DBG("fence_create_from_handle: %p (%s)\n", + &ttm_fence->fence, ttm_fence->name); + + return &ttm_fence->fence; +} + + +static void +dri_ttm_fence_reference(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + + ++fence_ttm->refcount; + DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); +} + +static void +dri_ttm_fence_unreference(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + + if (!fence) + return; + + DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); + + if (--fence_ttm->refcount == 0) { + int ret; + + ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence); + if (ret != 0) { + fprintf(stderr, "drmFenceUnreference failed (%s): %s\n", + fence_ttm->name, strerror(-ret)); + } + + free(fence); + return; + } +} + +static void +dri_ttm_fence_wait(dri_fence *fence) +{ + dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; + int ret; + + ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n", + __FILE__, __LINE__, fence_ttm->name, strerror(-ret)); + abort(); + } + + DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); +} + +static void +dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; + + free(bufmgr_ttm->validate_array); + + /* Free any cached buffer objects we were going to reuse */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i]; + struct dri_ttm_bo_bucket_entry *entry; + + while ((entry = bucket->head) != NULL) { + int ret; + + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + /* Decrement the kernel refcount for the buffer. */ + ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo); + if (ret != 0) { + fprintf(stderr, "drmBOUnreference failed: %s\n", + strerror(-ret)); + } + + free(entry); + } + } + + free(bufmgr); +} + +/** + * Adds the target buffer to the validation list and adds the relocation + * to the reloc_buffer's relocation list. + * + * The relocation entry at the given offset must already contain the + * precomputed relocation value, because the kernel will optimize out + * the relocation entry write when the buffer hasn't moved from the + * last known offset in target_buf. + */ +static int +dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target_buf) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr; + dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf; + dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf; + int num_relocs; + uint32_t *this_reloc; + + /* Create a new relocation list if needed */ + if (reloc_buf_ttm->reloc_buf_data == NULL) + intel_setup_reloc_list(reloc_buf); + + num_relocs = reloc_buf_ttm->reloc_buf_data[0]; + + /* Check overflow */ + assert(num_relocs < bufmgr_ttm->max_relocs); + + this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER + + num_relocs * I915_RELOC0_STRIDE; + + this_reloc[0] = offset; + this_reloc[1] = delta; + this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */ + this_reloc[3] = 0; + + reloc_buf_ttm->relocs[num_relocs].validate_flags = flags; + reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf; + dri_bo_reference(target_buf); + + reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */ + /* Check wraparound */ + assert(reloc_buf_ttm->reloc_buf_data[0] != 0); + return 0; +} + +/** + * Walk the tree of relocations rooted at BO and accumulate the list of + * validations to be performed and update the relocation buffers with + * index values into the validation list. + */ +static void +dri_ttm_bo_process_reloc(dri_bo *bo) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + unsigned int nr_relocs; + int i; + + if (bo_ttm->reloc_buf_data == NULL) + return; + + nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; + + for (i = 0; i < nr_relocs; i++) { + struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; + + /* Continue walking the tree depth-first. */ + dri_ttm_bo_process_reloc(r->target_buf); + + /* Add the target to the validate list */ + intel_add_validate_buffer(r->target_buf, r->validate_flags); + + /* Clear the PRESUMED_OFFSET flag from the validate list entry of the + * target if this buffer has a stale relocated pointer at it. + */ + if (r->last_target_offset != r->target_buf->offset) { + dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf; + struct intel_validate_entry *entry = + &bufmgr_ttm->validate_array[target_buf_ttm->validate_index]; + + entry->bo_arg.d.req.bo_req.hint &= ~DRM_BO_HINT_PRESUMED_OFFSET; + } + } +} + +static void * +dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + + /* Update indices and set up the validate list. */ + dri_ttm_bo_process_reloc(batch_buf); + + /* Add the batch buffer to the validation list. There are no relocations + * pointing to it. + */ + intel_add_validate_buffer(batch_buf, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); + + *count = bufmgr_ttm->validate_count; + return &bufmgr_ttm->validate_array[0].bo_arg; +} + +static const char * +intel_get_flags_mem_type_string(uint64_t flags) +{ + switch (flags & DRM_BO_MASK_MEM) { + case DRM_BO_FLAG_MEM_LOCAL: return "local"; + case DRM_BO_FLAG_MEM_TT: return "ttm"; + case DRM_BO_FLAG_MEM_VRAM: return "vram"; + case DRM_BO_FLAG_MEM_PRIV0: return "priv0"; + case DRM_BO_FLAG_MEM_PRIV1: return "priv1"; + case DRM_BO_FLAG_MEM_PRIV2: return "priv2"; + case DRM_BO_FLAG_MEM_PRIV3: return "priv3"; + case DRM_BO_FLAG_MEM_PRIV4: return "priv4"; + default: return NULL; + } +} + +static const char * +intel_get_flags_caching_string(uint64_t flags) +{ + switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) { + case 0: return "UU"; + case DRM_BO_FLAG_CACHED: return "CU"; + case DRM_BO_FLAG_CACHED_MAPPED: return "UC"; + case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC"; + default: return NULL; + } +} + +static void +intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm) +{ + int i; + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg; + struct drm_bo_arg_rep *rep = &arg->d.rep; + + /* Update the flags */ + if (rep->bo_info.flags != bo_ttm->last_flags) { + DBG("BO %s migrated: %s/%s -> %s/%s\n", + bo_ttm->name, + intel_get_flags_mem_type_string(bo_ttm->last_flags), + intel_get_flags_caching_string(bo_ttm->last_flags), + intel_get_flags_mem_type_string(rep->bo_info.flags), + intel_get_flags_caching_string(rep->bo_info.flags)); + + bo_ttm->last_flags = rep->bo_info.flags; + } + /* Update the buffer offset */ + if (rep->bo_info.offset != bo->offset) { + DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n", + bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset); + bo->offset = rep->bo_info.offset; + } + } +} + +/** + * Update the last target offset field of relocation entries for PRESUMED_OFFSET + * computation. + */ +static void +dri_ttm_bo_post_submit(dri_bo *bo) +{ + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + unsigned int nr_relocs; + int i; + + if (bo_ttm->reloc_buf_data == NULL) + return; + + nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; + + for (i = 0; i < nr_relocs; i++) { + struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; + + /* Continue walking the tree depth-first. */ + dri_ttm_bo_post_submit(r->target_buf); + + r->last_target_offset = r->target_buf->offset; + } +} + +static void +dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; + int i; + + intel_update_buffer_offsets (bufmgr_ttm); + + dri_ttm_bo_post_submit(batch_buf); + + if (bufmgr_ttm->bufmgr.debug) + dri_ttm_dump_validation_list(bufmgr_ttm); + + for (i = 0; i < bufmgr_ttm->validate_count; i++) { + dri_bo *bo = bufmgr_ttm->validate_array[i].bo; + dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; + + /* Disconnect the buffer from the validate list */ + bo_ttm->validate_index = -1; + dri_bo_unreference(bo); + bufmgr_ttm->validate_array[i].bo = NULL; + } + bufmgr_ttm->validate_count = 0; +} + +/** + * Enables unlimited caching of buffer objects for reuse. + * + * This is potentially very memory expensive, as the cache at each bucket + * size is only bounded by how many buffers of that size we've managed to have + * in flight at once. + */ +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) +{ + dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; + int i; + + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { + bufmgr_ttm->cache_bucket[i].max_entries = -1; + } +} + +/* + * + */ +static int +dri_ttm_check_aperture_space(dri_bo *bo) +{ + return 0; +} + +/** + * Initializes the TTM buffer manager, which uses the kernel to allocate, map, + * and manage map buffer objections. + * + * \param fd File descriptor of the opened DRM device. + * \param fence_type Driver-specific fence type used for fences with no flush. + * \param fence_type_flush Driver-specific fence type used for fences with a + * flush. + */ +dri_bufmgr * +intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size) +{ + dri_bufmgr_ttm *bufmgr_ttm; + int i; + + bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm)); + bufmgr_ttm->fd = fd; + bufmgr_ttm->fence_type = fence_type; + bufmgr_ttm->fence_type_flush = fence_type_flush; + + /* Let's go with one relocation per every 2 dwords (but round down a bit + * since a power of two will mean an extra page allocation for the reloc + * buffer). + * + * Every 4 was too few for the blender benchmark. + */ + bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; + + bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc; + bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static; + bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference; + bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference; + bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map; + bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap; + bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference; + bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference; + bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait; + bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy; + bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc; + bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc; + bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; + bufmgr_ttm->bufmgr.debug = GL_FALSE; + bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space; + /* Initialize the linked lists for BO reuse cache. */ + for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) + bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head; + + return &bufmgr_ttm->bufmgr; +} +#else +dri_bufmgr * +intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size) +{ + return NULL; +} + +dri_bo * +intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle) +{ + return NULL; +} + +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) +{ +} +#endif diff --git a/shared/intel_bufmgr_ttm.h b/shared/intel_bufmgr_ttm.h new file mode 100644 index 0000000..f5bd64c --- /dev/null +++ b/shared/intel_bufmgr_ttm.h @@ -0,0 +1,28 @@ + +#ifndef INTEL_BUFMGR_TTM_H +#define INTEL_BUFMGR_TTM_H + +#include "dri_bufmgr.h" + +extern dri_bo *intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle); + +#ifdef TTM_API +dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, + drm_fence_arg_t *arg); +#endif + + +dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type, + unsigned int fence_type_flush, int batch_size); + +void +intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr); + +#ifndef TTM_API +#define DRM_I915_FENCE_CLASS_ACCEL 0 +#define DRM_I915_FENCE_TYPE_RW 2 +#define DRM_I915_FENCE_FLAG_FLUSHED 0x01000000 +#endif + +#endif diff --git a/shared/intel_chipset.h b/shared/intel_chipset.h new file mode 100644 index 0000000..15b9ef4 --- /dev/null +++ b/shared/intel_chipset.h @@ -0,0 +1,97 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 + +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_E7221_G 0x258A +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE + +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q33_G 0x29D2 + +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 + +#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I915_GM || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_GM45_GM) + +#define IS_GM45_GM(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G) + +#define IS_915(devid) (devid == PCI_CHIP_I915_G || \ + devid == PCI_CHIP_E7221_G || \ + devid == PCI_CHIP_I915_GM) + +#define IS_945(devid) (devid == PCI_CHIP_I945_G || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_G33_G || \ + devid == PCI_CHIP_Q33_G || \ + devid == PCI_CHIP_Q35_G) + +#define IS_965(devid) (devid == PCI_CHIP_I965_G || \ + devid == PCI_CHIP_I965_Q || \ + devid == PCI_CHIP_I965_G_1 || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_I946_GZ || \ + IS_GM45_GM(devid) || \ + IS_G4X(devid)) + +#define IS_9XX(devid) (IS_915(devid) || \ + IS_945(devid) || \ + IS_965(devid)) diff --git a/shared/intel_context.c b/shared/intel_context.c new file mode 100644 index 0000000..5fa9d95 --- /dev/null +++ b/shared/intel_context.c @@ -0,0 +1,1022 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "glheader.h" +#include "context.h" +#include "matrix.h" +#include "simple_list.h" +#include "extensions.h" +#include "framebuffer.h" +#include "imports.h" +#include "points.h" + +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "tnl/tnl.h" + +#include "tnl/t_pipeline.h" +#include "tnl/t_vertex.h" + +#include "drivers/common/driverfuncs.h" + +#include "intel_screen.h" + +#include "i830_dri.h" + +#include "intel_chipset.h" +#include "intel_buffers.h" +#include "intel_tex.h" +#include "intel_ioctl.h" +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_pixel.h" +#include "intel_regions.h" +#include "intel_buffer_objects.h" +#include "intel_fbo.h" +#include "intel_decode.h" +#include "intel_bufmgr_ttm.h" + +#include "drirenderbuffer.h" +#include "vblank.h" +#include "utils.h" +#include "xmlpool.h" /* for symbolic values of enum-type options */ +#ifndef INTEL_DEBUG +int INTEL_DEBUG = (0); +#endif + +#define need_GL_NV_point_sprite +#define need_GL_ARB_multisample +#define need_GL_ARB_point_parameters +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object +#define need_GL_ARB_vertex_program +#define need_GL_ARB_window_pos +#define need_GL_ARB_occlusion_query +#define need_GL_EXT_blend_color +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_cull_vertex +#define need_GL_EXT_fog_coord +#define need_GL_EXT_framebuffer_object +#define need_GL_EXT_multi_draw_arrays +#define need_GL_EXT_secondary_color +#define need_GL_NV_vertex_program +#define need_GL_ATI_separate_stencil +#define need_GL_EXT_point_parameters +#define need_GL_VERSION_2_0 +#define need_GL_VERSION_2_1 +#define need_GL_ARB_shader_objects +#define need_GL_ARB_vertex_shader + +#include "extension_helper.h" + +#define DRIVER_DATE "20061102" + +static const GLubyte * +intelGetString(GLcontext * ctx, GLenum name) +{ + const char *chipset; + static char buffer[128]; + + switch (name) { + case GL_VENDOR: + return (GLubyte *) "Tungsten Graphics, Inc"; + break; + + case GL_RENDERER: + switch (intel_context(ctx)->intelScreen->deviceID) { + case PCI_CHIP_845_G: + chipset = "Intel(R) 845G"; + break; + case PCI_CHIP_I830_M: + chipset = "Intel(R) 830M"; + break; + case PCI_CHIP_I855_GM: + chipset = "Intel(R) 852GM/855GM"; + break; + case PCI_CHIP_I865_G: + chipset = "Intel(R) 865G"; + break; + case PCI_CHIP_I915_G: + chipset = "Intel(R) 915G"; + break; + case PCI_CHIP_E7221_G: + chipset = "Intel (R) E7221G (i915)"; + break; + case PCI_CHIP_I915_GM: + chipset = "Intel(R) 915GM"; + break; + case PCI_CHIP_I945_G: + chipset = "Intel(R) 945G"; + break; + case PCI_CHIP_I945_GM: + chipset = "Intel(R) 945GM"; + break; + case PCI_CHIP_I945_GME: + chipset = "Intel(R) 945GME"; + break; + case PCI_CHIP_G33_G: + chipset = "Intel(R) G33"; + break; + case PCI_CHIP_Q35_G: + chipset = "Intel(R) Q35"; + break; + case PCI_CHIP_Q33_G: + chipset = "Intel(R) Q33"; + break; + case PCI_CHIP_I965_Q: + chipset = "Intel(R) 965Q"; + break; + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_G_1: + chipset = "Intel(R) 965G"; + break; + case PCI_CHIP_I946_GZ: + chipset = "Intel(R) 946GZ"; + break; + case PCI_CHIP_I965_GM: + chipset = "Intel(R) 965GM"; + break; + case PCI_CHIP_I965_GME: + chipset = "Intel(R) 965GME/GLE"; + break; + case PCI_CHIP_GM45_GM: + chipset = "Mobile Intel® GM45 Express Chipset"; + break; + case PCI_CHIP_IGD_E_G: + chipset = "Intel(R) Integrated Graphics Device"; + break; + case PCI_CHIP_G45_G: + chipset = "Intel(R) G45/G43"; + break; + case PCI_CHIP_Q45_G: + chipset = "Intel(R) Q45/Q43"; + break; + default: + chipset = "Unknown Intel Chipset"; + break; + } + + (void) driGetRendererString(buffer, chipset, DRIVER_DATE, 0); + return (GLubyte *) buffer; + + default: + return NULL; + } +} + +/** + * Extension strings exported by the intel driver. + * + * Extensions supported by all chips supported by i830_dri, i915_dri, or + * i965_dri. + */ +static const struct dri_extension card_extensions[] = { + {"GL_ARB_multisample", GL_ARB_multisample_functions}, + {"GL_ARB_multitexture", NULL}, + {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_NV_point_sprite", GL_NV_point_sprite_functions}, + {"GL_ARB_texture_border_clamp", NULL}, + {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, + {"GL_ARB_texture_cube_map", NULL}, + {"GL_ARB_texture_env_add", NULL}, + {"GL_ARB_texture_env_combine", NULL}, + {"GL_ARB_texture_env_crossbar", NULL}, + {"GL_ARB_texture_env_dot3", NULL}, + {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_texture_non_power_of_two", NULL }, + {"GL_ARB_texture_rectangle", NULL}, + {"GL_NV_texture_rectangle", NULL}, + {"GL_EXT_texture_rectangle", NULL}, + {"GL_ARB_point_parameters", NULL}, + {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, + {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, + {"GL_ARB_window_pos", GL_ARB_window_pos_functions}, + {"GL_EXT_blend_color", GL_EXT_blend_color_functions}, + {"GL_EXT_blend_equation_separate", + GL_EXT_blend_equation_separate_functions}, + {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, + {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, + {"GL_EXT_blend_logic_op", NULL}, + {"GL_EXT_blend_subtract", NULL}, + {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions}, + {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions}, + {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, + {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, +#if 1 /* XXX FBO temporary? */ + {"GL_EXT_packed_depth_stencil", NULL}, +#endif + {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_stencil_wrap", NULL}, + {"GL_EXT_texture_edge_clamp", NULL}, + {"GL_EXT_texture_env_combine", NULL}, + {"GL_EXT_texture_env_dot3", NULL}, + {"GL_EXT_texture_filter_anisotropic", NULL}, + {"GL_EXT_texture_lod_bias", NULL}, + {"GL_3DFX_texture_compression_FXT1", NULL}, + {"GL_APPLE_client_storage", NULL}, + {"GL_MESA_pack_invert", NULL}, + {"GL_MESA_ycbcr_texture", NULL}, + {"GL_NV_blend_square", NULL}, + {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, + {"GL_NV_vertex_program1_1", NULL}, + { "GL_SGIS_generate_mipmap", NULL }, + {NULL, NULL} +}; + +static const struct dri_extension brw_extensions[] = { + { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions}, + { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions}, + { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions}, + { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions}, + { "GL_ARB_point_sprite", NULL}, + { "GL_ARB_fragment_shader", NULL }, + { "GL_ARB_draw_buffers", NULL }, + { "GL_ARB_depth_texture", NULL }, + { "GL_ARB_fragment_program", NULL }, + { "GL_ARB_shadow", NULL }, + { "GL_EXT_shadow_funcs", NULL }, + { "GL_ARB_fragment_program_shadow", NULL }, + /* ARB extn won't work if not enabled */ + { "GL_SGIX_depth_texture", NULL }, + { "GL_EXT_texture_sRGB", NULL}, + { NULL, NULL } +}; + +static const struct dri_extension arb_oc_extensions[] = { + {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, + {NULL, NULL} +}; + +static const struct dri_extension ttm_extensions[] = { + {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions}, + {"GL_ARB_pixel_buffer_object", NULL}, + {NULL, NULL} +}; + +/** + * Initializes potential list of extensions if ctx == NULL, or actually enables + * extensions for a context. + */ +void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging) +{ + struct intel_context *intel = ctx?intel_context(ctx):NULL; + + /* Disable imaging extension until convolution is working in teximage paths. + */ + enable_imaging = GL_FALSE; + + driInitExtensions(ctx, card_extensions, enable_imaging); + + if (intel == NULL || intel->ttm) + driInitExtensions(ctx, ttm_extensions, GL_FALSE); + + if (intel == NULL || + (IS_965(intel->intelScreen->deviceID) && + intel->intelScreen->drmMinor >= 8)) + driInitExtensions(ctx, arb_oc_extensions, GL_FALSE); + + if (intel == NULL || IS_965(intel->intelScreen->deviceID)) + driInitExtensions(ctx, brw_extensions, GL_FALSE); +} + +static const struct dri_debug_control debug_control[] = { + { "tex", DEBUG_TEXTURE}, + { "state", DEBUG_STATE}, + { "ioctl", DEBUG_IOCTL}, + { "blit", DEBUG_BLIT}, + { "mip", DEBUG_MIPTREE}, + { "fall", DEBUG_FALLBACKS}, + { "verb", DEBUG_VERBOSE}, + { "bat", DEBUG_BATCH}, + { "pix", DEBUG_PIXEL}, + { "buf", DEBUG_BUFMGR}, + { "reg", DEBUG_REGION}, + { "fbo", DEBUG_FBO}, + { "lock", DEBUG_LOCK}, + { "sync", DEBUG_SYNC}, + { "prim", DEBUG_PRIMS }, + { "vert", DEBUG_VERTS }, + { "dri", DEBUG_DRI }, + { "dma", DEBUG_DMA }, + { "san", DEBUG_SANITY }, + { "sleep", DEBUG_SLEEP }, + { "stats", DEBUG_STATS }, + { "tile", DEBUG_TILE }, + { "sing", DEBUG_SINGLE_THREAD }, + { "thre", DEBUG_SINGLE_THREAD }, + { "wm", DEBUG_WM }, + { "urb", DEBUG_URB }, + { "vs", DEBUG_VS }, + { NULL, 0 } +}; + + +static void +intelInvalidateState(GLcontext * ctx, GLuint new_state) +{ + struct intel_context *intel = intel_context(ctx); + + _swrast_InvalidateState(ctx, new_state); + _swsetup_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _tnl_invalidate_vertex_state(ctx, new_state); + + intel->NewGLState |= new_state; + + if (intel->vtbl.invalidate_state) + intel->vtbl.invalidate_state( intel, new_state ); +} + + +void +intelFlush(GLcontext * ctx) +{ + struct intel_context *intel = intel_context(ctx); + + if (intel->Fallback) + _swrast_flush(ctx); + + if (!IS_965(intel->intelScreen->deviceID)) + INTEL_FIREVERTICES(intel); + + if (intel->batch->map != intel->batch->ptr) + intel_batchbuffer_flush(intel->batch); + + /* XXX: Need to do an MI_FLUSH here. + */ +} + +void +intelFinish(GLcontext * ctx) +{ + struct intel_context *intel = intel_context(ctx); + intelFlush(ctx); + if (intel->batch->last_fence) { + dri_fence_wait(intel->batch->last_fence); + dri_fence_unreference(intel->batch->last_fence); + intel->batch->last_fence = NULL; + } +} + +static void +intelBeginQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q) +{ + struct intel_context *intel = intel_context( ctx ); + struct drm_i915_mmio io = { + .read_write = I915_MMIO_READ, + .reg = MMIO_REGS_PS_DEPTH_COUNT, + .data = &q->Result + }; + intel->stats_wm++; + intelFinish(&intel->ctx); + drmCommandWrite(intel->driFd, DRM_I915_MMIO, &io, sizeof(io)); +} + +static void +intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q) +{ + struct intel_context *intel = intel_context( ctx ); + GLuint64EXT tmp; + struct drm_i915_mmio io = { + .read_write = I915_MMIO_READ, + .reg = MMIO_REGS_PS_DEPTH_COUNT, + .data = &tmp + }; + intelFinish(&intel->ctx); + drmCommandWrite(intel->driFd, DRM_I915_MMIO, &io, sizeof(io)); + q->Result = tmp - q->Result; + q->Ready = GL_TRUE; + intel->stats_wm--; +} + +/** Driver-specific fence emit implementation for the fake memory manager. */ +static unsigned int +intel_fence_emit(void *private) +{ + struct intel_context *intel = (struct intel_context *)private; + unsigned int fence; + + /* XXX: Need to emit a flush, if we haven't already (at least with the + * current batchbuffer implementation, we have). + */ + + fence = intelEmitIrqLocked(intel); + + return fence; +} + +/** Driver-specific fence wait implementation for the fake memory manager. */ +static int +intel_fence_wait(void *private, unsigned int cookie) +{ + struct intel_context *intel = (struct intel_context *)private; + + intelWaitIrq(intel, cookie); + + return 0; +} + +static GLboolean +intel_init_bufmgr(struct intel_context *intel) +{ + intelScreenPrivate *intelScreen = intel->intelScreen; + GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL; + GLboolean ttm_supported; + + /* If we've got a new enough DDX that's initializing TTM and giving us + * object handles for the shared buffers, use that. + */ + intel->ttm = GL_FALSE; + if (intel->intelScreen->driScrnPriv->dri2.enabled) + ttm_supported = GL_TRUE; + else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 && + intel->intelScreen->drmMinor >= 11 && + intel->intelScreen->front.bo_handle != -1) + ttm_supported = GL_TRUE; + else + ttm_supported = GL_FALSE; + + if (!ttm_disable && ttm_supported) { + int bo_reuse_mode; + intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd, + DRM_FENCE_TYPE_EXE, + DRM_FENCE_TYPE_EXE | + DRM_I915_FENCE_TYPE_RW, + BATCH_SZ); + if (intel->bufmgr != NULL) + intel->ttm = GL_TRUE; + + bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse"); + switch (bo_reuse_mode) { + case DRI_CONF_BO_REUSE_DISABLED: + break; + case DRI_CONF_BO_REUSE_ALL: + intel_ttm_enable_bo_reuse(intel->bufmgr); + break; + } + } + /* Otherwise, use the classic buffer manager. */ + if (intel->bufmgr == NULL) { + if (ttm_disable) { + fprintf(stderr, "TTM buffer manager disabled. Using classic.\n"); + } else { + fprintf(stderr, "Failed to initialize TTM buffer manager. " + "Falling back to classic.\n"); + } + + if (intelScreen->tex.size == 0) { + fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n", + __func__, __LINE__); + return GL_FALSE; + } + + intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset, + intelScreen->tex.map, + intelScreen->tex.size, + intel_fence_emit, + intel_fence_wait, + intel); + } + + return GL_TRUE; +} + +void +intelInitDriverFunctions(struct dd_function_table *functions) +{ + _mesa_init_driver_functions(functions); + + functions->Flush = intelFlush; + functions->Finish = intelFinish; + functions->GetString = intelGetString; + functions->UpdateState = intelInvalidateState; + + functions->CopyColorTable = _swrast_CopyColorTable; + functions->CopyColorSubTable = _swrast_CopyColorSubTable; + functions->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D; + functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D; + + functions->BeginQuery = intelBeginQuery; + functions->EndQuery = intelEndQuery; + + intelInitTextureFuncs(functions); + intelInitStateFuncs(functions); + intelInitBufferFuncs(functions); + intelInitPixelFuncs(functions); +} + + +GLboolean +intelInitContext(struct intel_context *intel, + const __GLcontextModes * mesaVis, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate, + struct dd_function_table *functions) +{ + GLcontext *ctx = &intel->ctx; + GLcontext *shareCtx = (GLcontext *) sharedContextPrivate; + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; + volatile struct drm_i915_sarea *saPriv = (struct drm_i915_sarea *) + (((GLubyte *) sPriv->pSAREA) + intelScreen->sarea_priv_offset); + int fthrottle_mode; + + if (!_mesa_initialize_context(&intel->ctx, mesaVis, shareCtx, + functions, (void *) intel)) { + _mesa_printf("%s: failed to init mesa context\n", __FUNCTION__); + return GL_FALSE; + } + + driContextPriv->driverPrivate = intel; + intel->intelScreen = intelScreen; + intel->driScreen = sPriv; + intel->sarea = saPriv; + + /* Dri stuff */ + intel->hHWContext = driContextPriv->hHWContext; + intel->driFd = sPriv->fd; + intel->driHwLock = sPriv->lock; + + intel->width = intelScreen->width; + intel->height = intelScreen->height; + + driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, + intel->driScreen->myNum, + IS_965(intelScreen->deviceID) ? "i965" : "i915"); + if (intelScreen->deviceID == PCI_CHIP_I865_G) + intel->maxBatchSize = 4096; + else + intel->maxBatchSize = BATCH_SZ; + + if (!intel_init_bufmgr(intel)) + return GL_FALSE; + + ctx->Const.MaxTextureMaxAnisotropy = 2.0; + + /* This doesn't yet catch all non-conformant rendering, but it's a + * start. + */ + if (getenv("INTEL_STRICT_CONFORMANCE")) { + intel->strict_conformance = 1; + } + + if (intel->strict_conformance) { + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = 1.0; + ctx->Const.MaxLineWidthAA = 1.0; + ctx->Const.LineWidthGranularity = 1.0; + } + else { + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = 5.0; + ctx->Const.MaxLineWidthAA = 5.0; + ctx->Const.LineWidthGranularity = 0.5; + } + + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSize = 255.0; + ctx->Const.MaxPointSizeAA = 3.0; + ctx->Const.PointSizeGranularity = 1.0; + + /* reinitialize the context point state. + * It depend on constants in __GLcontextRec::Const + */ + _mesa_init_point(ctx); + + ctx->Const.MaxColorAttachments = 4; /* XXX FBO: review this */ + + /* Initialize the software rasterizer and helper modules. */ + _swrast_CreateContext(ctx); + _vbo_CreateContext(ctx); + _tnl_CreateContext(ctx); + _swsetup_CreateContext(ctx); + + /* Configure swrast to match hardware characteristics: */ + _swrast_allow_pixel_fog(ctx, GL_FALSE); + _swrast_allow_vertex_fog(ctx, GL_TRUE); + + intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24; + intel->hw_stipple = 1; + + /* XXX FBO: this doesn't seem to be used anywhere */ + switch (mesaVis->depthBits) { + case 0: /* what to do in this case? */ + case 16: + intel->polygon_offset_scale = 1.0; + break; + case 24: + intel->polygon_offset_scale = 2.0; /* req'd to pass glean */ + break; + default: + assert(0); + break; + } + + if (IS_965(intelScreen->deviceID)) + intel->polygon_offset_scale /= 0xffff; + + intel->RenderIndex = ~0; + + fthrottle_mode = driQueryOptioni(&intel->optionCache, "fthrottle_mode"); + intel->irqsEmitted = 0; + + intel->do_irqs = (intel->intelScreen->irq_active && + fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS); + + intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); + + _math_matrix_ctr(&intel->ViewportMatrix); + + if (IS_965(intelScreen->deviceID) && !intel->intelScreen->irq_active) { + _mesa_printf("IRQs not active. Exiting\n"); + exit(1); + } + + intelInitExtensions(ctx, GL_FALSE); + + INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); + if (INTEL_DEBUG & DEBUG_BUFMGR) + dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE); + + if (!sPriv->dri2.enabled) + intel_recreate_static_regions(intel); + + intel->batch = intel_batchbuffer_alloc(intel); + intel->last_swap_fence = NULL; + intel->first_swap_fence = NULL; + + intel_bufferobj_init(intel); + intel_fbo_init(intel); + + if (intel->ctx.Mesa_DXTn) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + _mesa_enable_extension(ctx, "GL_S3_s3tc"); + } + else if (driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } + + intel->prim.primitive = ~0; + + /* Force all software fallbacks */ + if (driQueryOptionb(&intel->optionCache, "no_rast")) { + fprintf(stderr, "disabling 3D rasterization\n"); + FALLBACK(intel, INTEL_FALLBACK_USER, 1); + intel->no_rast = 1; + } + + /* Disable all hardware rendering (skip emitting batches and fences/waits + * to the kernel) + */ + intel->no_hw = getenv("INTEL_NO_HW") != NULL; + + return GL_TRUE; +} + +void +intelDestroyContext(__DRIcontextPrivate * driContextPriv) +{ + struct intel_context *intel = + (struct intel_context *) driContextPriv->driverPrivate; + + assert(intel); /* should never be null */ + if (intel) { + GLboolean release_texture_heaps; + + INTEL_FIREVERTICES(intel); + + intel->vtbl.destroy(intel); + + release_texture_heaps = (intel->ctx.Shared->RefCount == 1); + _swsetup_DestroyContext(&intel->ctx); + _tnl_DestroyContext(&intel->ctx); + _vbo_DestroyContext(&intel->ctx); + + _swrast_DestroyContext(&intel->ctx); + intel->Fallback = 0; /* don't call _swrast_Flush later */ + + intel_batchbuffer_free(intel->batch); + + if (intel->last_swap_fence) { + dri_fence_wait(intel->last_swap_fence); + dri_fence_unreference(intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + if (intel->first_swap_fence) { + dri_fence_wait(intel->first_swap_fence); + dri_fence_unreference(intel->first_swap_fence); + intel->first_swap_fence = NULL; + } + + if (release_texture_heaps) { + /* This share group is about to go away, free our private + * texture object data. + */ + if (INTEL_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "do something to free texture heaps\n"); + } + + /* free the Mesa context */ + _mesa_free_context_data(&intel->ctx); + + dri_bufmgr_destroy(intel->bufmgr); + } +} + +GLboolean +intelUnbindContext(__DRIcontextPrivate * driContextPriv) +{ + return GL_TRUE; +} + +GLboolean +intelMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv) +{ + __DRIscreenPrivate *psp = driDrawPriv->driScreenPriv; + + if (driContextPriv) { + struct intel_context *intel = + (struct intel_context *) driContextPriv->driverPrivate; + struct intel_framebuffer *intel_fb = + (struct intel_framebuffer *) driDrawPriv->driverPrivate; + GLframebuffer *readFb = (GLframebuffer *) driReadPriv->driverPrivate; + + + /* XXX FBO temporary fix-ups! */ + /* if the renderbuffers don't have regions, init them from the context */ + if (!driContextPriv->driScreenPriv->dri2.enabled) { + struct intel_renderbuffer *irbDepth + = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + struct intel_renderbuffer *irbStencil + = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + + if (intel_fb->color_rb[0]) { + intel_renderbuffer_set_region(intel_fb->color_rb[0], + intel->front_region); + } + if (intel_fb->color_rb[1]) { + intel_renderbuffer_set_region(intel_fb->color_rb[1], + intel->back_region); + } +#if 0 + if (intel_fb->color_rb[2]) { + intel_renderbuffer_set_region(intel_fb->color_rb[2], + intel->third_region); + } +#endif + if (irbDepth) { + intel_renderbuffer_set_region(irbDepth, intel->depth_region); + } + if (irbStencil) { + intel_renderbuffer_set_region(irbStencil, intel->depth_region); + } + } + + /* set GLframebuffer size to match window, if needed */ + driUpdateFramebufferSize(&intel->ctx, driDrawPriv); + + if (driReadPriv != driDrawPriv) { + driUpdateFramebufferSize(&intel->ctx, driReadPriv); + } + + _mesa_make_current(&intel->ctx, &intel_fb->Base, readFb); + + /* The drawbuffer won't always be updated by _mesa_make_current: + */ + if (intel->ctx.DrawBuffer == &intel_fb->Base) { + + if (intel->driReadDrawable != driReadPriv) + intel->driReadDrawable = driReadPriv; + + if (intel->driDrawable != driDrawPriv) { + if (driDrawPriv->swap_interval == (unsigned)-1) { + int i; + + driDrawPriv->vblFlags = (intel->intelScreen->irq_active != 0) + ? driGetDefaultVBlankFlags(&intel->optionCache) + : VBLANK_FLAG_NO_IRQ; + + (*psp->systemTime->getUST) (&intel_fb->swap_ust); + driDrawableInitVBlank(driDrawPriv); + intel_fb->vbl_waited = driDrawPriv->vblSeq; + + for (i = 0; i < (intel->intelScreen->third.handle ? 3 : 2); i++) { + if (intel_fb->color_rb[i]) + intel_fb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq; + } + } + intel->driDrawable = driDrawPriv; + intelWindowMoved(intel); + } + + intel_draw_buffer(&intel->ctx, &intel_fb->Base); + } + } + else { + _mesa_make_current(NULL, NULL, NULL); + } + + return GL_TRUE; +} + +static void +intelContendedLock(struct intel_context *intel, GLuint flags) +{ + __DRIdrawablePrivate *dPriv = intel->driDrawable; + __DRIscreenPrivate *sPriv = intel->driScreen; + volatile struct drm_i915_sarea *sarea = intel->sarea; + int me = intel->hHWContext; + + drmGetLock(intel->driFd, intel->hHWContext, flags); + intel->locked = 1; + + if (INTEL_DEBUG & DEBUG_LOCK) + _mesa_printf("%s - got contended lock\n", __progname); + + /* If the window moved, may need to set a new cliprect now. + * + * NOTE: This releases and regains the hw lock, so all state + * checking must be done *after* this call: + */ + if (dPriv) + DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); + + if (sarea && sarea->ctxOwner != me) { + if (INTEL_DEBUG & DEBUG_BUFMGR) { + fprintf(stderr, "Lost Context: sarea->ctxOwner %x me %x\n", + sarea->ctxOwner, me); + } + sarea->ctxOwner = me; + } + + /* If the last consumer of the texture memory wasn't us, notify the fake + * bufmgr and record the new owner. We should have the memory shared + * between contexts of a single fake bufmgr, but this will at least make + * things correct for now. + */ + if (!intel->ttm && sarea->texAge != intel->hHWContext) { + sarea->texAge = intel->hHWContext; + dri_bufmgr_fake_contended_lock_take(intel->bufmgr); + if (INTEL_DEBUG & DEBUG_BATCH) + intel_decode_context_reset(); + if (INTEL_DEBUG & DEBUG_BUFMGR) + fprintf(stderr, "Lost Textures: sarea->texAge %x hw context %x\n", + sarea->ctxOwner, intel->hHWContext); + } + + if (sarea->width != intel->width || sarea->height != intel->height) { + int numClipRects = intel->numClipRects; + + /* + * FIXME: Really only need to do this when drawing to a + * common back- or front buffer. + */ + + /* + * This will essentially drop the outstanding batchbuffer on + * the floor. + */ + intel->numClipRects = 0; + + if (intel->Fallback) + _swrast_flush(&intel->ctx); + + if (!IS_965(intel->intelScreen->deviceID)) + INTEL_FIREVERTICES(intel); + + if (intel->batch->map != intel->batch->ptr) + intel_batchbuffer_flush(intel->batch); + + intel->numClipRects = numClipRects; + + /* force window update */ + intel->lastStamp = 0; + + intel->width = sarea->width; + intel->height = sarea->height; + } + + /* Drawable changed? + */ + if (dPriv && intel->lastStamp != dPriv->lastStamp) { + intelWindowMoved(intel); + intel->lastStamp = dPriv->lastStamp; + } +} + + +_glthread_DECLARE_STATIC_MUTEX(lockMutex); + +/* Lock the hardware and validate our state. + */ +void LOCK_HARDWARE( struct intel_context *intel ) +{ + __DRIdrawable *dPriv = intel->driDrawable; + __DRIscreen *sPriv = intel->driScreen; + char __ret = 0; + struct intel_framebuffer *intel_fb = NULL; + struct intel_renderbuffer *intel_rb = NULL; + + _glthread_LOCK_MUTEX(lockMutex); + assert(!intel->locked); + intel->locked = 1; + + if (intel->driDrawable) { + intel_fb = intel->driDrawable->driverPrivate; + + if (intel_fb) + intel_rb = + intel_get_renderbuffer(&intel_fb->Base, + intel_fb->Base._ColorDrawBufferIndexes[0]); + } + + if (intel_rb && dPriv->vblFlags && + !(dPriv->vblFlags & VBLANK_FLAG_NO_IRQ) && + (intel_fb->vbl_waited - intel_rb->vbl_pending) > (1<<23)) { + drmVBlank vbl; + + vbl.request.type = DRM_VBLANK_ABSOLUTE; + + if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) { + vbl.request.type |= DRM_VBLANK_SECONDARY; + } + + vbl.request.sequence = intel_rb->vbl_pending; + drmWaitVBlank(intel->driFd, &vbl); + intel_fb->vbl_waited = vbl.reply.sequence; + } + + DRM_CAS(intel->driHwLock, intel->hHWContext, + (DRM_LOCK_HELD|intel->hHWContext), __ret); + + if (sPriv->dri2.enabled) { + if (__ret) + drmGetLock(intel->driFd, intel->hHWContext, 0); + if (__driParseEvents(dPriv->driContextPriv, dPriv)) { + intelWindowMoved(intel); + intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer); + } + } else if (__ret) { + intelContendedLock( intel, 0 ); + } + + + if (INTEL_DEBUG & DEBUG_LOCK) + _mesa_printf("%s - locked\n", __progname); +} + + +/* Unlock the hardware using the global current context + */ +void UNLOCK_HARDWARE( struct intel_context *intel ) +{ + intel->vtbl.note_unlock( intel ); + intel->locked = 0; + + DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext); + + _glthread_UNLOCK_MUTEX(lockMutex); + + if (INTEL_DEBUG & DEBUG_LOCK) + _mesa_printf("%s - unlocked\n", __progname); + + /** + * Nothing should be left in batch outside of LOCK/UNLOCK which references + * cliprects. + */ + assert(intel->batch->cliprect_mode != REFERENCES_CLIPRECTS); +} + diff --git a/shared/intel_context.h b/shared/intel_context.h new file mode 100644 index 0000000..df79ab8 --- /dev/null +++ b/shared/intel_context.h @@ -0,0 +1,502 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTELCONTEXT_INC +#define INTELCONTEXT_INC + + + +#include "mtypes.h" +#include "drm.h" +#include "mm.h" +#include "texmem.h" +#include "dri_bufmgr.h" + +#include "intel_screen.h" +#include "intel_tex_obj.h" +#include "i915_drm.h" +#include "tnl/t_vertex.h" + +#define TAG(x) intel##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + +#define DV_PF_555 (1<<8) +#define DV_PF_565 (2<<8) +#define DV_PF_8888 (3<<8) + +struct intel_region; +struct intel_context; + +typedef void (*intel_tri_func) (struct intel_context *, intelVertex *, + intelVertex *, intelVertex *); +typedef void (*intel_line_func) (struct intel_context *, intelVertex *, + intelVertex *); +typedef void (*intel_point_func) (struct intel_context *, intelVertex *); + +#define INTEL_FALLBACK_DRAW_BUFFER 0x1 +#define INTEL_FALLBACK_READ_BUFFER 0x2 +#define INTEL_FALLBACK_DEPTH_BUFFER 0x4 +#define INTEL_FALLBACK_STENCIL_BUFFER 0x8 +#define INTEL_FALLBACK_USER 0x10 +#define INTEL_FALLBACK_RENDERMODE 0x20 +#define INTEL_FALLBACK_TEXTURE 0x40 + +extern void intelFallback(struct intel_context *intel, GLuint bit, + GLboolean mode); +#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode ) + + +#define INTEL_WRITE_PART 0x1 +#define INTEL_WRITE_FULL 0x2 +#define INTEL_READ 0x4 + +#define INTEL_MAX_FIXUP 64 + +struct intel_context +{ + GLcontext ctx; /* the parent class */ + + struct + { + void (*destroy) (struct intel_context * intel); + void (*emit_state) (struct intel_context * intel); + void (*new_batch) (struct intel_context * intel); + void (*emit_invarient_state) (struct intel_context * intel); + void (*note_fence) (struct intel_context *intel, GLuint fence); + void (*note_unlock) (struct intel_context *intel); + void (*update_texture_state) (struct intel_context * intel); + + void (*render_start) (struct intel_context * intel); + void (*render_prevalidate) (struct intel_context * intel); + void (*set_draw_region) (struct intel_context * intel, + struct intel_region * draw_regions[], + struct intel_region * depth_region, + GLuint num_regions); + + GLuint (*flush_cmd) (void); + void (*emit_flush) (struct intel_context *intel, GLuint unused); + + void (*reduced_primitive_state) (struct intel_context * intel, + GLenum rprim); + + GLboolean (*check_vertex_size) (struct intel_context * intel, + GLuint expected); + void (*invalidate_state) (struct intel_context *intel, + GLuint new_state); + + + /* Metaops: + */ + void (*install_meta_state) (struct intel_context * intel); + void (*leave_meta_state) (struct intel_context * intel); + + void (*meta_draw_region) (struct intel_context * intel, + struct intel_region * draw_region, + struct intel_region * depth_region); + + void (*meta_draw_quad)(struct intel_context *intel, + GLfloat x0, GLfloat x1, + GLfloat y0, GLfloat y1, + GLfloat z, + GLuint color, /* ARGB32 */ + GLfloat s0, GLfloat s1, + GLfloat t0, GLfloat t1); + + void (*meta_color_mask) (struct intel_context * intel, GLboolean); + + void (*meta_stencil_replace) (struct intel_context * intel, + GLuint mask, GLuint clear); + + void (*meta_depth_replace) (struct intel_context * intel); + + void (*meta_texture_blend_replace) (struct intel_context * intel); + + void (*meta_no_stencil_write) (struct intel_context * intel); + void (*meta_no_depth_write) (struct intel_context * intel); + void (*meta_no_texture) (struct intel_context * intel); + + void (*meta_import_pixel_state) (struct intel_context * intel); + void (*meta_frame_buffer_texture) (struct intel_context *intel, + GLint xoff, GLint yoff); + + GLboolean(*meta_tex_rect_source) (struct intel_context * intel, + dri_bo * buffer, + GLuint offset, + GLuint pitch, + GLuint height, + GLenum format, GLenum type); + + void (*assert_not_dirty) (struct intel_context *intel); + + void (*debug_batch)(struct intel_context *intel); + } vtbl; + + GLint refcount; + GLuint Fallback; + GLuint NewGLState; + + dri_bufmgr *bufmgr; + unsigned int maxBatchSize; + + struct intel_region *front_region; + struct intel_region *back_region; + struct intel_region *third_region; + struct intel_region *depth_region; + + /** + * This value indicates that the kernel memory manager is being used + * instead of the fake client-side memory manager. + */ + GLboolean ttm; + + dri_fence *last_swap_fence; + dri_fence *first_swap_fence; + + struct intel_batchbuffer *batch; + GLboolean no_batch_wrap; + unsigned batch_id; + + struct + { + GLuint id; + GLuint primitive; + GLubyte *start_ptr; + void (*flush) (struct intel_context *); + } prim; + + GLuint stats_wm; + GLboolean locked; + char *prevLockFile; + int prevLockLine; + + GLubyte clear_chan[4]; + GLuint ClearColor565; + GLuint ClearColor8888; + + /* Offsets of fields within the current vertex: + */ + GLuint coloroffset; + GLuint specoffset; + GLuint wpos_offset; + GLuint wpos_size; + + struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + GLuint vertex_attr_count; + + GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */ + + GLboolean hw_stencil; + GLboolean hw_stipple; + GLboolean depth_buffer_is_float; + GLboolean no_rast; + GLboolean strict_conformance; + + /* State for intelvb.c and inteltris.c. + */ + GLuint RenderIndex; + GLmatrix ViewportMatrix; + GLenum render_primitive; + GLenum reduced_primitive; + GLuint vertex_size; + GLubyte *verts; /* points to tnl->clipspace.vertex_buf */ + struct intel_region *draw_region; + + /* Fallback rasterization functions + */ + intel_point_func draw_point; + intel_line_func draw_line; + intel_tri_func draw_tri; + + /* These refer to the current drawing buffer: + */ + int drawX, drawY; /**< origin of drawing area within region */ + GLuint numClipRects; /**< cliprects for drawing */ + drm_clip_rect_t *pClipRects; + struct gl_texture_object *frame_buffer_texobj; + drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ + + int perf_boxes; + + GLuint do_usleeps; + int do_irqs; + GLuint irqsEmitted; + + GLboolean scissor; + drm_clip_rect_t draw_rect; + drm_clip_rect_t scissor_rect; + + drm_context_t hHWContext; + drmLock *driHwLock; + int driFd; + + __DRIdrawablePrivate *driDrawable; + __DRIdrawablePrivate *driReadDrawable; + __DRIscreenPrivate *driScreen; + intelScreenPrivate *intelScreen; + volatile struct drm_i915_sarea *sarea; + + GLuint lastStamp; + + GLboolean no_hw; + + /** + * Configuration cache + */ + driOptionCache optionCache; + + /* Last seen width/height of the screen */ + int width; + int height; + + int64_t swap_ust; + int64_t swap_missed_ust; + + GLuint swap_count; + GLuint swap_missed_count; +}; + +/* These are functions now: + */ +void LOCK_HARDWARE( struct intel_context *intel ); +void UNLOCK_HARDWARE( struct intel_context *intel ); + +extern char *__progname; + + +#define SUBPIXEL_X 0.125 +#define SUBPIXEL_Y 0.125 + +#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) + +#define INTEL_FIREVERTICES(intel) \ +do { \ + if ((intel)->prim.flush) \ + (intel)->prim.flush(intel); \ +} while (0) + +/* ================================================================ + * Color packing: + */ + +#define INTEL_PACKCOLOR4444(r,g,b,a) \ + ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) + +#define INTEL_PACKCOLOR1555(r,g,b,a) \ + ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ + ((a) ? 0x8000 : 0)) + +#define INTEL_PACKCOLOR565(r,g,b) \ + ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) + +#define INTEL_PACKCOLOR8888(r,g,b,a) \ + ((a<<24) | (r<<16) | (g<<8) | b) + +#define INTEL_PACKCOLOR(format, r, g, b, a) \ +(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) : \ + (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) : \ + (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) : \ + 0))) + +/* ================================================================ + * From linux kernel i386 header files, copes with odd sizes better + * than COPY_DWORDS would: + * XXX Put this in src/mesa/main/imports.h ??? + */ +#if defined(i386) || defined(__i386__) +static INLINE void * __memcpy(void * to, const void * from, size_t n) +{ + int d0, d1, d2; + __asm__ __volatile__( + "rep ; movsl\n\t" + "testb $2,%b4\n\t" + "je 1f\n\t" + "movsw\n" + "1:\ttestb $1,%b4\n\t" + "je 2f\n\t" + "movsb\n" + "2:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) + : "memory"); + return (to); +} +#else +#define __memcpy(a,b,c) memcpy(a,b,c) +#endif + + +/* ================================================================ + * Debugging: + */ +extern int INTEL_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_BLIT 0x8 +#define DEBUG_MIPTREE 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_BATCH 0x80 +#define DEBUG_PIXEL 0x100 +#define DEBUG_BUFMGR 0x200 +#define DEBUG_REGION 0x400 +#define DEBUG_FBO 0x800 +#define DEBUG_LOCK 0x1000 +#define DEBUG_SYNC 0x2000 +#define DEBUG_PRIMS 0x4000 +#define DEBUG_VERTS 0x8000 +#define DEBUG_DRI 0x10000 +#define DEBUG_DMA 0x20000 +#define DEBUG_SANITY 0x40000 +#define DEBUG_SLEEP 0x80000 +#define DEBUG_STATS 0x100000 +#define DEBUG_TILE 0x200000 +#define DEBUG_SINGLE_THREAD 0x400000 +#define DEBUG_WM 0x800000 +#define DEBUG_URB 0x1000000 +#define DEBUG_VS 0x2000000 + +#define DBG(...) do { \ + if (INTEL_DEBUG & FILE_DEBUG_FLAG) \ + _mesa_printf(__VA_ARGS__); \ +} while(0) + +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q33_G 0x29D2 + + +/* ================================================================ + * intel_context.c: + */ + +extern GLboolean intelInitContext(struct intel_context *intel, + const __GLcontextModes * mesaVis, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate, + struct dd_function_table *functions); + +extern void intelGetLock(struct intel_context *intel, GLuint flags); + +extern void intelFinish(GLcontext * ctx); +extern void intelFlush(GLcontext * ctx); + +extern void intelInitDriverFunctions(struct dd_function_table *functions); +extern void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging); + + +/* ================================================================ + * intel_state.c: + */ +extern void intelInitStateFuncs(struct dd_function_table *functions); + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +enum { + DRI_CONF_BO_REUSE_DISABLED, + DRI_CONF_BO_REUSE_ALL +}; + +extern int intel_translate_shadow_compare_func(GLenum func); +extern int intel_translate_compare_func(GLenum func); +extern int intel_translate_stencil_op(GLenum op); +extern int intel_translate_blend_factor(GLenum factor); +extern int intel_translate_logic_op(GLenum opcode); + + +/*====================================================================== + * Inline conversion functions. + * These are better-typed than the macros used previously: + */ +static INLINE struct intel_context * +intel_context(GLcontext * ctx) +{ + return (struct intel_context *) ctx; +} + +#endif diff --git a/shared/intel_decode.c b/shared/intel_decode.c new file mode 100644 index 0000000..a124063 --- /dev/null +++ b/shared/intel_decode.c @@ -0,0 +1,1047 @@ +/* -*- c-basic-offset: 4 -*- */ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +/** @file intel_decode.c + * This file contains code to print out batchbuffer contents in a + * human-readable format. + * + * The current version only supports i915 packets, and only pretty-prints a + * subset of them. The intention is for it to make just a best attempt to + * decode, but never crash in the process. + */ + +#include <stdio.h> +#include <stdarg.h> +#include <inttypes.h> + +#include "intel_decode.h" +#include "intel_chipset.h" + +#define BUFFER_FAIL(_count, _len, _name) do { \ + fprintf(out, "Buffer size too small in %s (%d < %d)\n", \ + (_name), (_count), (_len)); \ + (*failures)++; \ + return count; \ +} while (0) + +static FILE *out; +static uint32_t saved_s2 = 0, saved_s4 = 0; +static char saved_s2_set = 0, saved_s4_set = 0; + +static float +int_as_float(uint32_t intval) +{ + union intfloat { + uint32_t i; + float f; + } uval; + + uval.i = intval; + return uval.f; +} + +static void +instr_out(uint32_t *data, uint32_t hw_offset, unsigned int index, + char *fmt, ...) +{ + va_list va; + + fprintf(out, "0x%08x: 0x%08x:%s ", hw_offset + index * 4, data[index], + index == 0 ? "" : " "); + va_start(va, fmt); + vfprintf(out, fmt, va); + va_end(va); +} + + +static int +decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_mi[] = { + { 0x08, 1, 1, "MI_ARB_ON_OFF" }, + { 0x0a, 1, 1, "MI_BATCH_BUFFER_END" }, + { 0x31, 2, 2, "MI_BATCH_BUFFER_START" }, + { 0x14, 3, 3, "MI_DISPLAY_BUFFER_INFO" }, + { 0x04, 1, 1, "MI_FLUSH" }, + { 0x22, 3, 3, "MI_LOAD_REGISTER_IMM" }, + { 0x13, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" }, + { 0x12, 2, 2, "MI_LOAD_SCAN_LINES_INCL" }, + { 0x00, 1, 1, "MI_NOOP" }, + { 0x11, 2, 2, "MI_OVERLAY_FLIP" }, + { 0x07, 1, 1, "MI_REPORT_HEAD" }, + { 0x18, 2, 2, "MI_SET_CONTEXT" }, + { 0x20, 3, 4, "MI_STORE_DATA_IMM" }, + { 0x21, 3, 4, "MI_STORE_DATA_INDEX" }, + { 0x24, 3, 3, "MI_STORE_REGISTER_MEM" }, + { 0x02, 1, 1, "MI_USER_INTERRUPT" }, + { 0x03, 1, 1, "MI_WAIT_FOR_EVENT" }, + }; + + + for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]); + opcode++) { + if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { + unsigned int len = 1, i; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_mi[opcode].name); + if (opcodes_mi[opcode].max_len > 1) { + len = (data[0] & 0x000000ff) + 2; + if (len < opcodes_mi[opcode].min_len || + len > opcodes_mi[opcode].max_len) + { + fprintf(out, "Bad length in %s\n", + opcodes_mi[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_mi[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(data, hw_offset, 0, "MI UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode, len; + char *format = NULL; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_2d[] = { + { 0x40, 5, 5, "COLOR_BLT" }, + { 0x43, 6, 6, "SRC_COPY_BLT" }, + { 0x01, 8, 8, "XY_SETUP_BLT" }, + { 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" }, + { 0x03, 3, 3, "XY_SETUP_CLIP_BLT" }, + { 0x24, 2, 2, "XY_PIXEL_BLT" }, + { 0x25, 3, 3, "XY_SCANLINES_BLT" }, + { 0x26, 4, 4, "Y_TEXT_BLT" }, + { 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" }, + { 0x50, 6, 6, "XY_COLOR_BLT" }, + { 0x51, 6, 6, "XY_PAT_BLT" }, + { 0x76, 8, 8, "XY_PAT_CHROMA_BLT" }, + { 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" }, + { 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" }, + { 0x52, 9, 9, "XY_MONO_PAT_BLT" }, + { 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" }, + { 0x53, 8, 8, "XY_SRC_COPY_BLT" }, + { 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" }, + { 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" }, + { 0x55, 9, 9, "XY_FULL_BLT" }, + { 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" }, + { 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" }, + { 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" }, + { 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" }, + { 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" }, + }; + + switch ((data[0] & 0x1fc00000) >> 22) { + case 0x50: + instr_out(data, hw_offset, 0, + "XY_COLOR_BLT (rgb %sabled, alpha %sabled)\n", + (data[0] & (1 << 20)) ? "en" : "dis", + (data[0] & (1 << 21)) ? "en" : "dis"); + + len = (data[0] & 0x000000ff) + 2; + if (len != 6) + fprintf(out, "Bad count in XY_COLOR_BLT\n"); + if (count < 6) + BUFFER_FAIL(count, len, "XY_COLOR_BLT"); + + switch ((data[1] >> 24) & 0x3) { + case 0: + format="8"; + break; + case 1: + format="565"; + break; + case 2: + format="1555"; + break; + case 3: + format="8888"; + break; + } + + instr_out(data, hw_offset, 1, "format %s, pitch %d, " + "clipping %sabled\n", format, + data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis"); + instr_out(data, hw_offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(data, hw_offset, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + instr_out(data, hw_offset, 4, "offset 0x%08x\n", data[4]); + instr_out(data, hw_offset, 5, "color\n"); + return len; + case 0x53: + instr_out(data, hw_offset, 0, + "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled)\n", + (data[0] & (1 << 20)) ? "en" : "dis", + (data[0] & (1 << 21)) ? "en" : "dis"); + + len = (data[0] & 0x000000ff) + 2; + if (len != 8) + fprintf(out, "Bad count in XY_SRC_COPY_BLT\n"); + if (count < 8) + BUFFER_FAIL(count, len, "XY_SRC_COPY_BLT"); + + switch ((data[1] >> 24) & 0x3) { + case 0: + format="8"; + break; + case 1: + format="565"; + break; + case 2: + format="1555"; + break; + case 3: + format="8888"; + break; + } + + instr_out(data, hw_offset, 1, "format %s, dst pitch %d, " + "clipping %sabled\n", format, + data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis"); + instr_out(data, hw_offset, 2, "dst (%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(data, hw_offset, 3, "dst (%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]); + instr_out(data, hw_offset, 5, "src (%d,%d)\n", + data[5] & 0xffff, data[5] >> 16); + instr_out(data, hw_offset, 6, "src pitch %d\n", + data[6] & 0xffff); + instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]); + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]); + opcode++) { + if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) { + unsigned int i; + + len = 1; + instr_out(data, hw_offset, 0, "%s\n", opcodes_2d[opcode].name); + if (opcodes_2d[opcode].max_len > 1) { + len = (data[0] & 0x000000ff) + 2; + if (len < opcodes_2d[opcode].min_len || + len > opcodes_2d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_2d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_2d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(data, hw_offset, 0, "2D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + switch ((data[0] & 0x00f80000) >> 19) { + case 0x11: + instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n"); + return 1; + case 0x10: + instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n"); + return 1; + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int len, i, c, opcode, word, map, sampler, instr; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_3d_1d[] = { + { 0x8e, 3, 3, "3DSTATE_BUFFER_INFO" }, + { 0x86, 4, 4, "3DSTATE_CHROMA_KEY" }, + { 0x9c, 1, 1, "3DSTATE_CLEAR_PARAMETERS" }, + { 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, + { 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, + { 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, + { 0x98, 2, 2, "3DSTATE_DEFAULT_Z" }, + { 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, + { 0x85, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" }, + { 0x80, 5, 5, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x8e, 3, 3, "3DSTATE_BUFFER_INFO" }, + { 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, + { 0x9e, 4, 4, "3DSTATE_MONO_FILTER" }, + { 0x89, 4, 4, "3DSTATE_FOG_MODE" }, + { 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" }, + { 0x81, 3, 3, "3DSTATE_SCISSOR_RECTANGLE" }, + { 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" }, + }; + + switch ((data[0] & 0x00ff0000) >> 16) { + case 0x07: + /* This instruction is unusual. A 0 length means just 1 DWORD instead of + * 2. The 0 length is specified in one place to be unsupported, but + * stated to be required in another, and 0 length LOAD_INDIRECTs appear + * to cause no harm at least. + */ + instr_out(data, hw_offset, 0, "3DSTATE_LOAD_INDIRECT\n"); + len = (data[0] & 0x000000ff) + 1; + i = 1; + if (data[0] & (0x01 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "SIS.0\n"); + instr_out(data, hw_offset, i++, "SIS.1\n"); + } + if (data[0] & (0x02 << 8)) { + if (i + 1 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "DIS.0\n"); + } + if (data[0] & (0x04 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "SSB.0\n"); + instr_out(data, hw_offset, i++, "SSB.1\n"); + } + if (data[0] & (0x08 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "MSB.0\n"); + instr_out(data, hw_offset, i++, "MSB.1\n"); + } + if (data[0] & (0x10 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "PSP.0\n"); + instr_out(data, hw_offset, i++, "PSP.1\n"); + } + if (data[0] & (0x20 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "PSC.0\n"); + instr_out(data, hw_offset, i++, "PSC.1\n"); + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n"); + (*failures)++; + return len; + } + return len; + case 0x04: + instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 0; word <= 7; word++) { + if (data[0] & (1 << (4 + word))) { + if (i >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1"); + + /* save vertex state for decode */ + if (word == 2) { + saved_s2_set = 1; + saved_s2 = data[i]; + } + if (word == 4) { + saved_s4_set = 1; + saved_s4 = data[i]; + } + + instr_out(data, hw_offset, i++, "S%d\n", word); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n"); + (*failures)++; + } + return len; + case 0x00: + instr_out(data, hw_offset, 0, "3DSTATE_MAP_STATE\n"); + len = (data[0] & 0x0000003f) + 2; + + i = 1; + for (map = 0; map <= 15; map++) { + if (data[1] & (1 << map)) { + if (i + 3 >= count) + BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE"); + instr_out(data, hw_offset, i++, "map %d MS2\n", map); + instr_out(data, hw_offset, i++, "map %d MS3\n", map); + instr_out(data, hw_offset, i++, "map %d MS4\n", map); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n"); + (*failures)++; + return len; + } + return len; + case 0x06: + instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n"); + len = (data[0] & 0x000000ff) + 2; + + i = 1; + for (c = 0; c <= 31; c++) { + if (data[1] & (1 << c)) { + if (i + 4 >= count) + BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_CONSTANTS"); + instr_out(data, hw_offset, i, "C%d.X = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(data, hw_offset, i, "C%d.Y = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(data, hw_offset, i, "C%d.Z = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(data, hw_offset, i, "C%d.W = %f\n", + c, int_as_float(data[i])); + i++; + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n"); + (*failures)++; + } + return len; + case 0x05: + instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n"); + len = (data[0] & 0x000000ff) + 2; + if ((len - 1) % 3 != 0 || len > 370) { + fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_PROGRAM\n"); + (*failures)++; + } + i = 1; + for (instr = 0; instr < (len - 1) / 3; instr++) { + if (i + 3 >= count) + BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE"); + instr_out(data, hw_offset, i++, "PS%03x\n", instr); + instr_out(data, hw_offset, i++, "PS%03x\n", instr); + instr_out(data, hw_offset, i++, "PS%03x\n", instr); + } + return len; + case 0x01: + instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n"); + len = (data[0] & 0x0000003f) + 2; + i = 1; + for (sampler = 0; sampler <= 15; sampler++) { + if (data[1] & (1 << sampler)) { + if (i + 3 >= count) + BUFFER_FAIL(count, len, "3DSTATE_SAMPLER_STATE"); + instr_out(data, hw_offset, i++, "sampler %d SS2\n", + sampler); + instr_out(data, hw_offset, i++, "sampler %d SS3\n", + sampler); + instr_out(data, hw_offset, i++, "sampler %d SS4\n", + sampler); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_SAMPLER_STATE\n"); + (*failures)++; + } + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]); + opcode++) + { + if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) { + len = 1; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name); + if (opcodes_3d_1d[opcode].max_len > 1) { + len = (data[0] & 0x0000ffff) + 2; + if (len < opcodes_3d_1d[opcode].min_len || + len > opcodes_3d_1d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", + opcodes_3d_1d[opcode].name); + (*failures)++; + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset, + int *failures) +{ + char immediate = (data[0] & (1 << 23)) == 0; + unsigned int len, i; + char *primtype; + + switch ((data[0] >> 18) & 0xf) { + case 0x0: primtype = "TRILIST"; break; + case 0x1: primtype = "TRISTRIP"; break; + case 0x2: primtype = "TRISTRIP_REVERSE"; break; + case 0x3: primtype = "TRIFAN"; break; + case 0x4: primtype = "POLYGON"; break; + case 0x5: primtype = "LINELIST"; break; + case 0x6: primtype = "LINESTRIP"; break; + case 0x7: primtype = "RECTLIST"; break; + case 0x8: primtype = "POINTLIST"; break; + case 0x9: primtype = "DIB"; break; + case 0xa: primtype = "CLEAR_RECT"; break; + default: primtype = "unknown"; break; + } + + /* XXX: 3DPRIM_DIB not supported */ + if (immediate) { + len = (data[0] & 0x0003ffff) + 2; + instr_out(data, hw_offset, 0, "3DPRIMITIVE inline %s\n", primtype); + if (count < len) + BUFFER_FAIL(count, len, "3DPRIMITIVE inline"); + if (!saved_s2_set || !saved_s4_set) { + fprintf(out, "unknown vertex format\n"); + for (i = 1; i < len; i++) { + instr_out(data, hw_offset, i, + " vertex data (%f float)\n", + int_as_float(data[i])); + } + } else { + unsigned int vertex = 0; + for (i = 1; i < len;) { + unsigned int tc; + +#define VERTEX_OUT(fmt, ...) do { \ + if (i < len) \ + instr_out(data, hw_offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \ + else \ + fprintf(out, " missing data in V%d\n", vertex); \ + i++; \ +} while (0) + + VERTEX_OUT("X = %f", int_as_float(data[i])); + VERTEX_OUT("Y = %f", int_as_float(data[i])); + switch (saved_s4 >> 6 & 0x7) { + case 0x1: + VERTEX_OUT("Z = %f", int_as_float(data[i])); + break; + case 0x2: + VERTEX_OUT("Z = %f", int_as_float(data[i])); + VERTEX_OUT("W = %f", int_as_float(data[i])); + break; + case 0x3: + break; + case 0x4: + VERTEX_OUT("W = %f", int_as_float(data[i])); + break; + default: + fprintf(out, "bad S4 position mask\n"); + } + + if (saved_s4 & (1 << 10)) { + VERTEX_OUT("color = (A=0x%02x, R=0x%02x, G=0x%02x, " + "B=0x%02x)", + data[i] >> 24, + (data[i] >> 16) & 0xff, + (data[i] >> 8) & 0xff, + data[i] & 0xff); + } + if (saved_s4 & (1 << 11)) { + VERTEX_OUT("spec = (A=0x%02x, R=0x%02x, G=0x%02x, " + "B=0x%02x)", + data[i] >> 24, + (data[i] >> 16) & 0xff, + (data[i] >> 8) & 0xff, + data[i] & 0xff); + } + if (saved_s4 & (1 << 12)) + VERTEX_OUT("width = 0x%08x)", data[i]); + + for (tc = 0; tc <= 7; tc++) { + switch ((saved_s2 >> (tc * 4)) & 0xf) { + case 0x0: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + break; + case 0x1: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i])); + break; + case 0x2: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.W = %f", tc, int_as_float(data[i])); + break; + case 0x3: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + break; + case 0x4: + VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]); + break; + case 0x5: + VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]); + VERTEX_OUT("T%d.ZW = 0x%08x half-float", tc, data[i]); + break; + case 0xf: + break; + default: + fprintf(out, "bad S2.T%d format\n", tc); + } + } + vertex++; + } + } + } else { + /* indirect vertices */ + len = data[0] & 0x0000ffff; /* index count */ + if (data[0] & (1 << 17)) { + /* random vertex access */ + if (count < (len + 1) / 2 + 1) { + BUFFER_FAIL(count, (len + 1) / 2 + 1, + "3DPRIMITIVE random indirect"); + } + instr_out(data, hw_offset, 0, + "3DPRIMITIVE random indirect %s (%d)\n", primtype, len); + if (len == 0) { + /* vertex indices continue until 0xffff is found */ + for (i = 1; i < count; i++) { + if ((data[i] & 0xffff) == 0xffff) { + instr_out(data, hw_offset, i, + " indices: (terminator)\n"); + return i; + } else if ((data[i] >> 16) == 0xffff) { + instr_out(data, hw_offset, i, + " indices: 0x%04x, " + "(terminator)\n", + data[i] & 0xffff); + return i; + } else { + instr_out(data, hw_offset, i, + " indices: 0x%04x, 0x%04x\n", + data[i] & 0xffff, data[i] >> 16); + } + } + fprintf(out, + "3DPRIMITIVE: no terminator found in index buffer\n"); + (*failures)++; + return count; + } else { + /* fixed size vertex index buffer */ + for (i = 0; i < len; i += 2) { + if (i * 2 == len - 1) { + instr_out(data, hw_offset, i, + " indices: 0x%04x\n", + data[i] & 0xffff); + } else { + instr_out(data, hw_offset, i, + " indices: 0x%04x, 0x%04x\n", + data[i] & 0xffff, data[i] >> 16); + } + } + } + return (len + 1) / 2 + 1; + } else { + /* sequential vertex access */ + if (count < 2) + BUFFER_FAIL(count, 2, "3DPRIMITIVE seq indirect"); + instr_out(data, hw_offset, 0, + "3DPRIMITIVE sequential indirect %s, %d starting from " + "%d\n", primtype, len, data[1] & 0xffff); + instr_out(data, hw_offset, 1, " start\n"); + return 2; + } + } + + return len; +} + +static int +decode_3d(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_3d[] = { + { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" }, + { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" }, + { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" }, + { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" }, + { 0x0d, 1, 1, "3DSTATE_MODES_4" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, + }; + + switch ((data[0] & 0x1f000000) >> 24) { + case 0x1f: + return decode_3d_primitive(data, count, hw_offset, failures); + case 0x1d: + return decode_3d_1d(data, count, hw_offset, failures); + case 0x1c: + return decode_3d_1c(data, count, hw_offset, failures); + } + + for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); + opcode++) { + if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + unsigned int len = 1, i; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); + if (opcodes_3d[opcode].max_len > 1) { + len = (data[0] & 0xff) + 2; + if (len < opcodes_3d[opcode].min_len || + len > opcodes_3d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static int +decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode, len; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_3d[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_STATE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + /* 0x7808: 3DSTATE_VERTEX_BUFFERS */ + /* 0x7809: 3DSTATE_VERTEX_ELEMENTS */ + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + }; + + len = (data[0] & 0x0000ffff) + 2; + + switch ((data[0] & 0xffff0000) >> 16) { + case 0x6101: + if (len != 6) + fprintf(out, "Bad count in STATE_BASE_ADDRESS\n"); + if (count < 6) + BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS"); + + instr_out(data, hw_offset, 0, + "STATE_BASE_ADDRESS\n"); + + if (data[1] & 1) { + instr_out(data, hw_offset, 1, "General state at 0x%08x\n", + data[1] & ~1); + } else + instr_out(data, hw_offset, 1, "General state not updated\n"); + + if (data[2] & 1) { + instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n", + data[2] & ~1); + } else + instr_out(data, hw_offset, 2, "Surface state not updated\n"); + + if (data[3] & 1) { + instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n", + data[3] & ~1); + } else + instr_out(data, hw_offset, 3, "Indirect state not updated\n"); + + if (data[4] & 1) { + instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n", + data[4] & ~1); + } else + instr_out(data, hw_offset, 4, "General state not updated\n"); + + if (data[5] & 1) { + instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n", + data[5] & ~1); + } else + instr_out(data, hw_offset, 5, "Indirect state not updated\n"); + + return len; + case 0x7800: + if (len != 7) + fprintf(out, "Bad count in 3DSTATE_PIPELINED_POINTERS\n"); + if (count < 7) + BUFFER_FAIL(count, len, "3DSTATE_PIPELINED_POINTERS"); + + instr_out(data, hw_offset, 0, + "3DSTATE_PIPELINED_POINTERS\n"); + instr_out(data, hw_offset, 1, "VS state\n"); + instr_out(data, hw_offset, 2, "GS state\n"); + instr_out(data, hw_offset, 3, "Clip state\n"); + instr_out(data, hw_offset, 4, "SF state\n"); + instr_out(data, hw_offset, 5, "WM state\n"); + instr_out(data, hw_offset, 6, "CC state\n"); + return len; + case 0x7801: + if (len != 6) + fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n"); + if (count < 6) + BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); + + instr_out(data, hw_offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + instr_out(data, hw_offset, 1, "VS binding table\n"); + instr_out(data, hw_offset, 2, "GS binding table\n"); + instr_out(data, hw_offset, 3, "Clip binding table\n"); + instr_out(data, hw_offset, 4, "SF binding table\n"); + instr_out(data, hw_offset, 5, "WM binding table\n"); + + return len; + + case 0x7900: + if (len != 4) + fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n"); + if (count < 4) + BUFFER_FAIL(count, len, "3DSTATE_DRAWING_RECTANGLE"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + instr_out(data, hw_offset, 1, "top left: %d,%d\n", + data[1] & 0xffff, + (data[1] >> 16) & 0xffff); + instr_out(data, hw_offset, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, + (data[2] >> 16) & 0xffff); + instr_out(data, hw_offset, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, + ((int)data[3] >> 16) & 0xffff); + + return len; + + case 0x7905: + if (len != 5) + fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n"); + if (count < 5) + BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + instr_out(data, hw_offset, 1, "%s, %s, pitch = %d bytes, %stiled\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not "); + instr_out(data, hw_offset, 2, "depth offset\n"); + instr_out(data, hw_offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + instr_out(data, hw_offset, 4, "volume depth\n"); + + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); + opcode++) { + if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) { + unsigned int i; + len = 1; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); + if (opcodes_3d[opcode].max_len > 1) { + len = (data[0] & 0xff) + 2; + if (len < opcodes_3d[opcode].min_len || + len > opcodes_3d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +/** + * Decodes an i830-i915 batch buffer, writing the output to stdout. + * + * \param data batch buffer contents + * \param count number of DWORDs to decode in the batch buffer + * \param hw_offset hardware address for the buffer + */ +int +intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid) +{ + int index = 0; + int failures = 0; + + out = stderr; + + while (index < count) { + switch ((data[index] & 0xe0000000) >> 29) { + case 0x0: + index += decode_mi(data + index, count - index, + hw_offset + index * 4, &failures); + break; + case 0x2: + index += decode_2d(data + index, count - index, + hw_offset + index * 4, &failures); + break; + case 0x3: + if (IS_965(devid)) { + index += decode_3d_965(data + index, count - index, + hw_offset + index * 4, &failures); + } else { + index += decode_3d(data + index, count - index, + hw_offset + index * 4, &failures); + } + break; + default: + instr_out(data, hw_offset, index, "UNKNOWN\n"); + failures++; + index++; + break; + } + fflush(out); + } + + return failures; +} + +void intel_decode_context_reset(void) +{ + saved_s2_set = 0; + saved_s4_set = 1; +} + diff --git a/shared/intel_decode.h b/shared/intel_decode.h new file mode 100644 index 0000000..c50644a --- /dev/null +++ b/shared/intel_decode.h @@ -0,0 +1,29 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +int intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid); +void intel_decode_context_reset(void); diff --git a/shared/intel_depthstencil.c b/shared/intel_depthstencil.c new file mode 100644 index 0000000..90baecd --- /dev/null +++ b/shared/intel_depthstencil.c @@ -0,0 +1,282 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "glheader.h" +#include "imports.h" +#include "context.h" +#include "depthstencil.h" +#include "fbobject.h" +#include "framebuffer.h" +#include "hash.h" +#include "mtypes.h" +#include "renderbuffer.h" + +#include "intel_context.h" +#include "intel_fbo.h" +#include "intel_depthstencil.h" +#include "intel_regions.h" + + +/** + * The GL_EXT_framebuffer_object allows the user to create their own + * framebuffer objects consisting of color renderbuffers (0 or more), + * depth renderbuffers (0 or 1) and stencil renderbuffers (0 or 1). + * + * The spec considers depth and stencil renderbuffers to be totally independent + * buffers. In reality, most graphics hardware today uses a combined + * depth+stencil buffer (one 32-bit pixel = 24 bits of Z + 8 bits of stencil). + * + * This causes difficulty because the user may create some number of depth + * renderbuffers and some number of stencil renderbuffers and bind them + * together in framebuffers in any combination. + * + * This code manages all that. + * + * 1. Depth renderbuffers are always allocated in hardware as 32bpp + * GL_DEPTH24_STENCIL8 buffers. + * + * 2. Stencil renderbuffers are initially allocated in software as 8bpp + * GL_STENCIL_INDEX8 buffers. + * + * 3. Depth and Stencil renderbuffers use the PairedStencil and PairedDepth + * fields (respectively) to indicate if the buffer's currently paired + * with another stencil or depth buffer (respectively). + * + * 4. When a depth and stencil buffer are initially both attached to the + * current framebuffer, we merge the stencil buffer values into the + * depth buffer (really a depth+stencil buffer). The then hardware uses + * the combined buffer. + * + * 5. Whenever a depth or stencil buffer is reallocated (with + * glRenderbufferStorage) we undo the pairing and copy the stencil values + * from the combined depth/stencil buffer back to the stencil-only buffer. + * + * 6. We also undo the pairing when we find a change in buffer bindings. + * + * 7. If a framebuffer is only using a depth renderbuffer (no stencil), we + * just use the combined depth/stencil buffer and ignore the stencil values. + * + * 8. If a framebuffer is only using a stencil renderbuffer (no depth) we have + * to promote the 8bpp software stencil buffer to a 32bpp hardware + * depth+stencil buffer. + * + */ + + + +static void +map_regions(GLcontext * ctx, + struct intel_renderbuffer *depthRb, + struct intel_renderbuffer *stencilRb) +{ + struct intel_context *intel = intel_context(ctx); + if (depthRb && depthRb->region) { + intel_region_map(intel, depthRb->region); + depthRb->pfMap = depthRb->region->map; + depthRb->pfPitch = depthRb->region->pitch; + } + if (stencilRb && stencilRb->region) { + intel_region_map(intel, stencilRb->region); + stencilRb->pfMap = stencilRb->region->map; + stencilRb->pfPitch = stencilRb->region->pitch; + } +} + +static void +unmap_regions(GLcontext * ctx, + struct intel_renderbuffer *depthRb, + struct intel_renderbuffer *stencilRb) +{ + struct intel_context *intel = intel_context(ctx); + if (depthRb && depthRb->region) { + intel_region_unmap(intel, depthRb->region); + depthRb->pfMap = NULL; + depthRb->pfPitch = 0; + } + if (stencilRb && stencilRb->region) { + intel_region_unmap(intel, stencilRb->region); + stencilRb->pfMap = NULL; + stencilRb->pfPitch = 0; + } +} + + + +/** + * Undo the pairing/interleaving between depth and stencil buffers. + * irb should be a depth/stencil or stencil renderbuffer. + */ +void +intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb) +{ + if (irb->PairedStencil) { + /* irb is a depth/stencil buffer */ + struct gl_renderbuffer *stencilRb; + struct intel_renderbuffer *stencilIrb; + + ASSERT(irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + + stencilRb = _mesa_lookup_renderbuffer(ctx, irb->PairedStencil); + stencilIrb = intel_renderbuffer(stencilRb); + if (stencilIrb) { + /* need to extract stencil values from the depth buffer */ + ASSERT(stencilIrb->PairedDepth == irb->Base.Name); + map_regions(ctx, irb, stencilIrb); + _mesa_extract_stencil(ctx, &irb->Base, &stencilIrb->Base); + unmap_regions(ctx, irb, stencilIrb); + stencilIrb->PairedDepth = 0; + } + irb->PairedStencil = 0; + } + else if (irb->PairedDepth) { + /* irb is a stencil buffer */ + struct gl_renderbuffer *depthRb; + struct intel_renderbuffer *depthIrb; + + ASSERT(irb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT || + irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + + depthRb = _mesa_lookup_renderbuffer(ctx, irb->PairedDepth); + depthIrb = intel_renderbuffer(depthRb); + if (depthIrb) { + /* need to extract stencil values from the depth buffer */ + ASSERT(depthIrb->PairedStencil == irb->Base.Name); + map_regions(ctx, depthIrb, irb); + _mesa_extract_stencil(ctx, &depthIrb->Base, &irb->Base); + unmap_regions(ctx, depthIrb, irb); + depthIrb->PairedStencil = 0; + } + irb->PairedDepth = 0; + } + else { + _mesa_problem(ctx, "Problem in undo_depth_stencil_pairing"); + } + + ASSERT(irb->PairedStencil == 0); + ASSERT(irb->PairedDepth == 0); +} + + +/** + * Examine the depth and stencil renderbuffers which are attached to the + * framebuffer. If both depth and stencil are attached, make sure that the + * renderbuffers are 'paired' (combined). If only depth or only stencil is + * attached, undo any previous pairing. + * + * Must be called if NewState & _NEW_BUFFER (when renderbuffer attachments + * change, for example). + */ +void +intel_validate_paired_depth_stencil(GLcontext * ctx, + struct gl_framebuffer *fb) +{ + struct intel_renderbuffer *depthRb, *stencilRb; + + depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); + stencilRb = intel_get_renderbuffer(fb, BUFFER_STENCIL); + + if (depthRb && stencilRb) { + if (depthRb == stencilRb) { + /* Using a user-created combined depth/stencil buffer. + * Nothing to do. + */ + ASSERT(depthRb->Base._BaseFormat == GL_DEPTH_STENCIL_EXT); + ASSERT(depthRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + } + else { + /* Separate depth/stencil buffers, need to interleave now */ + ASSERT(depthRb->Base._BaseFormat == GL_DEPTH_COMPONENT); + ASSERT(stencilRb->Base._BaseFormat == GL_STENCIL_INDEX); + /* may need to interleave depth/stencil now */ + if (depthRb->PairedStencil == stencilRb->Base.Name) { + /* OK, the depth and stencil buffers are already interleaved */ + ASSERT(stencilRb->PairedDepth == depthRb->Base.Name); + } + else { + /* need to setup new pairing/interleaving */ + if (depthRb->PairedStencil) { + intel_unpair_depth_stencil(ctx, depthRb); + } + if (stencilRb->PairedDepth) { + intel_unpair_depth_stencil(ctx, stencilRb); + } + + ASSERT(depthRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + ASSERT(stencilRb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT || + stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + + /* establish new pairing: interleave stencil into depth buffer */ + map_regions(ctx, depthRb, stencilRb); + _mesa_insert_stencil(ctx, &depthRb->Base, &stencilRb->Base); + unmap_regions(ctx, depthRb, stencilRb); + depthRb->PairedStencil = stencilRb->Base.Name; + stencilRb->PairedDepth = depthRb->Base.Name; + } + + } + } + else if (depthRb) { + /* Depth buffer but no stencil buffer. + * We'll use a GL_DEPTH24_STENCIL8 buffer and ignore the stencil bits. + */ + /* can't assert this until storage is allocated: + ASSERT(depthRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + */ + /* intel_undo any previous pairing */ + if (depthRb->PairedStencil) { + intel_unpair_depth_stencil(ctx, depthRb); + } + } + else if (stencilRb) { + /* Stencil buffer but no depth buffer. + * Since h/w doesn't typically support just 8bpp stencil w/out Z, + * we'll use a GL_DEPTH24_STENCIL8 buffer and ignore the depth bits. + */ + /* undo any previous pairing */ + if (stencilRb->PairedDepth) { + intel_unpair_depth_stencil(ctx, stencilRb); + } + if (stencilRb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT) { + /* promote buffer to GL_DEPTH24_STENCIL8 for hw rendering */ + _mesa_promote_stencil(ctx, &stencilRb->Base); + ASSERT(stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + } + } + + /* Finally, update the fb->_DepthBuffer and fb->_StencilBuffer fields */ + _mesa_update_depth_buffer(ctx, fb, BUFFER_DEPTH); + if (depthRb && depthRb->PairedStencil) + _mesa_update_stencil_buffer(ctx, fb, BUFFER_DEPTH); + else + _mesa_update_stencil_buffer(ctx, fb, BUFFER_STENCIL); + + + /* The hardware should use fb->Attachment[BUFFER_DEPTH].Renderbuffer + * first, if present, then fb->Attachment[BUFFER_STENCIL].Renderbuffer + * if present. + */ +} diff --git a/shared/intel_depthstencil.h b/shared/intel_depthstencil.h new file mode 100644 index 0000000..740eb0d --- /dev/null +++ b/shared/intel_depthstencil.h @@ -0,0 +1,15 @@ + +#ifndef INTEL_DEPTH_STENCIL_H +#define INTEL_DEPTH_STENCIL_H + +#include "intel_fbo.h" + +extern void +intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb); + +extern void +intel_validate_paired_depth_stencil(GLcontext * ctx, + struct gl_framebuffer *fb); + + +#endif /* INTEL_DEPTH_STENCIL_H */ diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c new file mode 100644 index 0000000..b3f6610 --- /dev/null +++ b/shared/intel_fbo.c @@ -0,0 +1,705 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "imports.h" +#include "mtypes.h" +#include "fbobject.h" +#include "framebuffer.h" +#include "renderbuffer.h" +#include "context.h" +#include "texformat.h" +#include "texrender.h" + +#include "intel_context.h" +#include "intel_buffers.h" +#include "intel_depthstencil.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_span.h" + + +#define FILE_DEBUG_FLAG DEBUG_FBO + +#define INTEL_RB_CLASS 0x12345678 + + +/* XXX FBO: move this to intel_context.h (inlined) */ +/** + * Return a gl_renderbuffer ptr casted to intel_renderbuffer. + * NULL will be returned if the rb isn't really an intel_renderbuffer. + * This is determiend by checking the ClassID. + */ +struct intel_renderbuffer * +intel_renderbuffer(struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; + if (irb && irb->Base.ClassID == INTEL_RB_CLASS) { + /*_mesa_warning(NULL, "Returning non-intel Rb\n");*/ + return irb; + } + else + return NULL; +} + + +struct intel_renderbuffer * +intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex) +{ + if (attIndex >= 0) + return intel_renderbuffer(fb->Attachment[attIndex].Renderbuffer); + else + return NULL; +} + + +void +intel_flip_renderbuffers(struct intel_framebuffer *intel_fb) +{ + int current_page = intel_fb->pf_current_page; + int next_page = (current_page + 1) % intel_fb->pf_num_pages; + struct gl_renderbuffer *tmp_rb; + + /* Exchange renderbuffers if necessary but make sure their reference counts + * are preserved. + */ + if (intel_fb->color_rb[current_page] && + intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer != + &intel_fb->color_rb[current_page]->Base) { + tmp_rb = NULL; + _mesa_reference_renderbuffer(&tmp_rb, + intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + tmp_rb = &intel_fb->color_rb[current_page]->Base; + _mesa_reference_renderbuffer( + &intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer, tmp_rb); + _mesa_reference_renderbuffer(&tmp_rb, NULL); + } + + if (intel_fb->color_rb[next_page] && + intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer != + &intel_fb->color_rb[next_page]->Base) { + tmp_rb = NULL; + _mesa_reference_renderbuffer(&tmp_rb, + intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer); + tmp_rb = &intel_fb->color_rb[next_page]->Base; + _mesa_reference_renderbuffer( + &intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer, tmp_rb); + _mesa_reference_renderbuffer(&tmp_rb, NULL); + } +} + + +struct intel_region * +intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex) +{ + struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, attIndex); + + if (irb) + return irb->region; + else + return NULL; +} + + + +/** + * Create a new framebuffer object. + */ +static struct gl_framebuffer * +intel_new_framebuffer(GLcontext * ctx, GLuint name) +{ + /* Only drawable state in intel_framebuffer at this time, just use Mesa's + * class + */ + return _mesa_new_framebuffer(ctx, name); +} + + +static void +intel_delete_renderbuffer(struct gl_renderbuffer *rb) +{ + GET_CURRENT_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + ASSERT(irb); + + if (irb->PairedStencil || irb->PairedDepth) { + intel_unpair_depth_stencil(ctx, irb); + } + + if (intel && irb->region) { + intel_region_release(&irb->region); + } + + _mesa_free(irb); +} + + + +/** + * Return a pointer to a specific pixel in a renderbuffer. + */ +static void * +intel_get_pointer(GLcontext * ctx, struct gl_renderbuffer *rb, + GLint x, GLint y) +{ + /* By returning NULL we force all software rendering to go through + * the span routines. + */ + return NULL; +} + + + +/** + * Called via glRenderbufferStorageEXT() to set the format and allocate + * storage for a user-created renderbuffer. + */ +static GLboolean +intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, + GLuint width, GLuint height) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + GLboolean softwareBuffer = GL_FALSE; + int cpp; + + ASSERT(rb->Name != 0); + + switch (internalFormat) { + case GL_R3_G3_B2: + case GL_RGB4: + case GL_RGB5: + rb->_ActualFormat = GL_RGB5; + rb->DataType = GL_UNSIGNED_BYTE; + rb->RedBits = 5; + rb->GreenBits = 6; + rb->BlueBits = 5; + cpp = 2; + break; + case GL_RGB: + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + case GL_RGBA: + case GL_RGBA2: + case GL_RGBA4: + case GL_RGB5_A1: + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + case GL_RGBA16: + rb->_ActualFormat = GL_RGBA8; + rb->DataType = GL_UNSIGNED_BYTE; + rb->RedBits = 8; + rb->GreenBits = 8; + rb->BlueBits = 8; + rb->AlphaBits = 8; + cpp = 4; + break; + case GL_STENCIL_INDEX: + case GL_STENCIL_INDEX1_EXT: + case GL_STENCIL_INDEX4_EXT: + case GL_STENCIL_INDEX8_EXT: + case GL_STENCIL_INDEX16_EXT: + /* alloc a depth+stencil buffer */ + rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->DataType = GL_UNSIGNED_INT_24_8_EXT; + rb->StencilBits = 8; + cpp = 4; + break; + case GL_DEPTH_COMPONENT16: + rb->_ActualFormat = GL_DEPTH_COMPONENT16; + rb->DataType = GL_UNSIGNED_SHORT; + rb->DepthBits = 16; + cpp = 2; + break; + case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH_COMPONENT32: + rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->DataType = GL_UNSIGNED_INT_24_8_EXT; + rb->DepthBits = 24; + cpp = 4; + break; + case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH24_STENCIL8_EXT: + rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->DataType = GL_UNSIGNED_INT_24_8_EXT; + rb->DepthBits = 24; + rb->StencilBits = 8; + cpp = 4; + break; + default: + _mesa_problem(ctx, + "Unexpected format in intel_alloc_renderbuffer_storage"); + return GL_FALSE; + } + + intelFlush(ctx); + + /* free old region */ + if (irb->region) { + intel_region_release(&irb->region); + } + + /* allocate new memory region/renderbuffer */ + if (softwareBuffer) { + return _mesa_soft_renderbuffer_storage(ctx, rb, internalFormat, + width, height); + } + else { + /* Choose a pitch to match hardware requirements: + */ + GLuint pitch = ((cpp * width + 63) & ~63) / cpp; + + /* alloc hardware renderbuffer */ + DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, + height, pitch); + + irb->region = intel_region_alloc(intel, cpp, pitch, height); + if (!irb->region) + return GL_FALSE; /* out of memory? */ + + ASSERT(irb->region->buffer); + + rb->Width = width; + rb->Height = height; + + /* This sets the Get/PutRow/Value functions */ + intel_set_span_functions(&irb->Base); + + return GL_TRUE; + } +} + + + +/** + * Called for each hardware renderbuffer when a _window_ is resized. + * Just update fields. + * Not used for user-created renderbuffers! + */ +static GLboolean +intel_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLuint width, GLuint height) +{ + ASSERT(rb->Name == 0); + rb->Width = width; + rb->Height = height; + rb->_ActualFormat = internalFormat; + + return GL_TRUE; +} + +static void +intel_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb, + GLuint width, GLuint height) +{ + struct intel_framebuffer *intel_fb = (struct intel_framebuffer*)fb; + int i; + + _mesa_resize_framebuffer(ctx, fb, width, height); + + fb->Initialized = GL_TRUE; /* XXX remove someday */ + + if (fb->Name != 0) { + return; + } + + /* Make sure all window system renderbuffers are up to date */ + for (i = 0; i < 3; i++) { + struct gl_renderbuffer *rb = &intel_fb->color_rb[i]->Base; + + /* only resize if size is changing */ + if (rb && (rb->Width != width || rb->Height != height)) { + rb->AllocStorage(ctx, rb, rb->InternalFormat, width, height); + } + } +} + +static GLboolean +intel_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLuint width, GLuint height) +{ + _mesa_problem(ctx, "intel_op_alloc_storage should never be called."); + return GL_FALSE; +} + + +void +intel_renderbuffer_set_region(struct intel_renderbuffer *rb, + struct intel_region *region) +{ + struct intel_region *old; + + old = rb->region; + rb->region = NULL; + intel_region_reference(&rb->region, region); + intel_region_release(&old); + + rb->pfMap = region->map; + rb->pfPitch = region->pitch; +} + +/** + * Create a new intel_renderbuffer which corresponds to an on-screen window, + * not a user-created renderbuffer. + */ +struct intel_renderbuffer * +intel_create_renderbuffer(GLenum intFormat) +{ + GET_CURRENT_CONTEXT(ctx); + + struct intel_renderbuffer *irb; + const GLuint name = 0; + + irb = CALLOC_STRUCT(intel_renderbuffer); + if (!irb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer"); + return NULL; + } + + _mesa_init_renderbuffer(&irb->Base, name); + irb->Base.ClassID = INTEL_RB_CLASS; + + switch (intFormat) { + case GL_RGB5: + irb->Base._ActualFormat = GL_RGB5; + irb->Base._BaseFormat = GL_RGBA; + irb->Base.RedBits = 5; + irb->Base.GreenBits = 6; + irb->Base.BlueBits = 5; + irb->Base.DataType = GL_UNSIGNED_BYTE; + break; + case GL_RGBA8: + irb->Base._ActualFormat = GL_RGBA8; + irb->Base._BaseFormat = GL_RGBA; + irb->Base.RedBits = 8; + irb->Base.GreenBits = 8; + irb->Base.BlueBits = 8; + irb->Base.AlphaBits = 8; + irb->Base.DataType = GL_UNSIGNED_BYTE; + break; + case GL_STENCIL_INDEX8_EXT: + irb->Base._ActualFormat = GL_STENCIL_INDEX8_EXT; + irb->Base._BaseFormat = GL_STENCIL_INDEX; + irb->Base.StencilBits = 8; + irb->Base.DataType = GL_UNSIGNED_BYTE; + break; + case GL_DEPTH_COMPONENT16: + irb->Base._ActualFormat = GL_DEPTH_COMPONENT16; + irb->Base._BaseFormat = GL_DEPTH_COMPONENT; + irb->Base.DepthBits = 16; + irb->Base.DataType = GL_UNSIGNED_SHORT; + break; + case GL_DEPTH_COMPONENT24: + irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; + irb->Base._BaseFormat = GL_DEPTH_COMPONENT; + irb->Base.DepthBits = 24; + irb->Base.DataType = GL_UNSIGNED_INT; + break; + case GL_DEPTH24_STENCIL8_EXT: + irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; + irb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT; + irb->Base.DepthBits = 24; + irb->Base.StencilBits = 8; + irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT; + break; + default: + _mesa_problem(NULL, + "Unexpected intFormat in intel_create_renderbuffer"); + return NULL; + } + + irb->Base.InternalFormat = intFormat; + + /* intel-specific methods */ + irb->Base.Delete = intel_delete_renderbuffer; + irb->Base.AllocStorage = intel_alloc_window_storage; + irb->Base.GetPointer = intel_get_pointer; + /* This sets the Get/PutRow/Value functions */ + intel_set_span_functions(&irb->Base); + + return irb; +} + + +/** + * Create a new renderbuffer object. + * Typically called via glBindRenderbufferEXT(). + */ +static struct gl_renderbuffer * +intel_new_renderbuffer(GLcontext * ctx, GLuint name) +{ + /*struct intel_context *intel = intel_context(ctx); */ + struct intel_renderbuffer *irb; + + irb = CALLOC_STRUCT(intel_renderbuffer); + if (!irb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer"); + return NULL; + } + + _mesa_init_renderbuffer(&irb->Base, name); + irb->Base.ClassID = INTEL_RB_CLASS; + + /* intel-specific methods */ + irb->Base.Delete = intel_delete_renderbuffer; + irb->Base.AllocStorage = intel_alloc_renderbuffer_storage; + irb->Base.GetPointer = intel_get_pointer; + /* span routines set in alloc_storage function */ + + return &irb->Base; +} + + +/** + * Called via glBindFramebufferEXT(). + */ +static void +intel_bind_framebuffer(GLcontext * ctx, GLenum target, + struct gl_framebuffer *fb, struct gl_framebuffer *fbread) +{ + if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) { + intel_draw_buffer(ctx, fb); + /* Integer depth range depends on depth buffer bits */ + if (ctx->Driver.DepthRange != NULL) + ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far); + } + else { + /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */ + } +} + + +/** + * Called via glFramebufferRenderbufferEXT(). + */ +static void +intel_framebuffer_renderbuffer(GLcontext * ctx, + struct gl_framebuffer *fb, + GLenum attachment, struct gl_renderbuffer *rb) +{ + DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0); + + intelFlush(ctx); + + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + intel_draw_buffer(ctx, fb); +} + +static GLboolean +intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, + struct gl_texture_image *texImage) +{ + if (texImage->TexFormat == &_mesa_texformat_argb8888) { + irb->Base._ActualFormat = GL_RGBA8; + irb->Base._BaseFormat = GL_RGBA; + DBG("Render to RGBA8 texture OK\n"); + } + else if (texImage->TexFormat == &_mesa_texformat_rgb565) { + irb->Base._ActualFormat = GL_RGB5; + irb->Base._BaseFormat = GL_RGB; + DBG("Render to RGB5 texture OK\n"); + } + else if (texImage->TexFormat == &_mesa_texformat_z16) { + irb->Base._ActualFormat = GL_DEPTH_COMPONENT16; + irb->Base._BaseFormat = GL_DEPTH_COMPONENT; + DBG("Render to DEPTH16 texture OK\n"); + } else if (texImage->TexFormat == &_mesa_texformat_z24_s8) { + irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; + irb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT; + DBG("Render to DEPTH_STENCIL texture OK\n"); + } + else { + DBG("Render to texture BAD FORMAT %d\n", + texImage->TexFormat->MesaFormat); + return GL_FALSE; + } + + irb->Base.InternalFormat = irb->Base._ActualFormat; + irb->Base.Width = texImage->Width; + irb->Base.Height = texImage->Height; + irb->Base.DataType = GL_UNSIGNED_BYTE; /* FBO XXX fix */ + irb->Base.RedBits = texImage->TexFormat->RedBits; + irb->Base.GreenBits = texImage->TexFormat->GreenBits; + irb->Base.BlueBits = texImage->TexFormat->BlueBits; + irb->Base.AlphaBits = texImage->TexFormat->AlphaBits; + irb->Base.DepthBits = texImage->TexFormat->DepthBits; + + irb->Base.Delete = intel_delete_renderbuffer; + irb->Base.AllocStorage = intel_nop_alloc_storage; + intel_set_span_functions(&irb->Base); + + irb->RenderToTexture = GL_TRUE; + + return GL_TRUE; +} + +/** + * When glFramebufferTexture[123]D is called this function sets up the + * gl_renderbuffer wrapper around the texture image. + * This will have the region info needed for hardware rendering. + */ +static struct intel_renderbuffer * +intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage) +{ + const GLuint name = ~0; /* not significant, but distinct for debugging */ + struct intel_renderbuffer *irb; + + /* make an intel_renderbuffer to wrap the texture image */ + irb = CALLOC_STRUCT(intel_renderbuffer); + if (!irb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture"); + return NULL; + } + + _mesa_init_renderbuffer(&irb->Base, name); + irb->Base.ClassID = INTEL_RB_CLASS; + + if (!intel_update_wrapper(ctx, irb, texImage)) { + _mesa_free(irb); + return NULL; + } + + return irb; +} + + +/** + * Called by glFramebufferTexture[123]DEXT() (and other places) to + * prepare for rendering into texture memory. This might be called + * many times to choose different texture levels, cube faces, etc + * before intel_finish_render_texture() is ever called. + */ +static void +intel_render_texture(GLcontext * ctx, + struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att) +{ + struct gl_texture_image *newImage + = att->Texture->Image[att->CubeMapFace][att->TextureLevel]; + struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); + struct intel_texture_image *intel_image; + GLuint imageOffset; + + (void) fb; + + ASSERT(newImage); + + if (!irb) { + irb = intel_wrap_texture(ctx, newImage); + if (irb) { + /* bind the wrapper to the attachment point */ + _mesa_reference_renderbuffer(&att->Renderbuffer, &irb->Base); + } + else { + /* fallback to software rendering */ + _mesa_render_texture(ctx, fb, att); + return; + } + } if (!intel_update_wrapper(ctx, irb, newImage)) { + _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); + _mesa_render_texture(ctx, fb, att); + return; + } + + DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n", + _glthread_GetID(), + att->Texture->Name, newImage->Width, newImage->Height, + irb->Base.RefCount); + + /* point the renderbufer's region to the texture image region */ + intel_image = intel_texture_image(newImage); + if (irb->region != intel_image->mt->region) { + if (irb->region) + intel_region_release(&irb->region); + intel_region_reference(&irb->region, intel_image->mt->region); + } + + /* compute offset of the particular 2D image within the texture region */ + imageOffset = intel_miptree_image_offset(intel_image->mt, + att->CubeMapFace, + att->TextureLevel); + + if (att->Texture->Target == GL_TEXTURE_3D) { + const GLuint *offsets = intel_miptree_depth_offsets(intel_image->mt, + att->TextureLevel); + imageOffset += offsets[att->Zoffset]; + } + + /* store that offset in the region */ + intel_image->mt->region->draw_offset = imageOffset; + + /* update drawing region, etc */ + intel_draw_buffer(ctx, fb); +} + + +/** + * Called by Mesa when rendering to a texture is done. + */ +static void +intel_finish_render_texture(GLcontext * ctx, + struct gl_renderbuffer_attachment *att) +{ + struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); + + DBG("End render texture (tid %x) tex %u\n", _glthread_GetID(), att->Texture->Name); + + if (irb) { + /* just release the region */ + intel_region_release(&irb->region); + } + else if (att->Renderbuffer) { + /* software fallback */ + _mesa_finish_render_texture(ctx, att); + /* XXX FBO: Need to unmap the buffer (or in intelSpanRenderStart???) */ + } +} + + +/** + * Do one-time context initializations related to GL_EXT_framebuffer_object. + * Hook in device driver functions. + */ +void +intel_fbo_init(struct intel_context *intel) +{ + intel->ctx.Driver.NewFramebuffer = intel_new_framebuffer; + intel->ctx.Driver.NewRenderbuffer = intel_new_renderbuffer; + intel->ctx.Driver.BindFramebuffer = intel_bind_framebuffer; + intel->ctx.Driver.FramebufferRenderbuffer = intel_framebuffer_renderbuffer; + intel->ctx.Driver.RenderTexture = intel_render_texture; + intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture; + intel->ctx.Driver.ResizeBuffers = intel_resize_buffers; +} diff --git a/shared/intel_fbo.h b/shared/intel_fbo.h new file mode 100644 index 0000000..c90c84b --- /dev/null +++ b/shared/intel_fbo.h @@ -0,0 +1,113 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_FBO_H +#define INTEL_FBO_H + + +struct intel_context; +struct intel_region; + +/** + * Intel framebuffer, derived from gl_framebuffer. + */ +struct intel_framebuffer +{ + struct gl_framebuffer Base; + + struct intel_renderbuffer *color_rb[3]; + + /* Drawable page flipping state */ + GLboolean pf_active; + GLuint pf_seq; + GLint pf_planes; + GLint pf_current_page; + GLint pf_num_pages; + + /* VBI + */ + GLuint vbl_waited; + + int64_t swap_ust; + int64_t swap_missed_ust; + + GLuint swap_count; + GLuint swap_missed_count; +}; + + +/** + * Intel renderbuffer, derived from gl_renderbuffer. + * Note: The PairedDepth and PairedStencil fields use renderbuffer IDs, + * not pointers because in some circumstances a deleted renderbuffer could + * result in a dangling pointer here. + */ +struct intel_renderbuffer +{ + struct gl_renderbuffer Base; + struct intel_region *region; + void *pfMap; /* possibly paged flipped map pointer */ + GLuint pfPitch; /* possibly paged flipped pitch */ + GLboolean RenderToTexture; /* RTT? */ + + GLuint PairedDepth; /**< only used if this is a depth renderbuffer */ + GLuint PairedStencil; /**< only used if this is a stencil renderbuffer */ + + GLuint pf_pending; /**< sequence number of pending flip */ + + GLuint vbl_pending; /**< vblank sequence number of pending flip */ +}; + +extern struct intel_renderbuffer *intel_renderbuffer(struct gl_renderbuffer + *rb); + +extern void +intel_renderbuffer_set_region(struct intel_renderbuffer *irb, + struct intel_region *region); + +extern struct intel_renderbuffer * +intel_create_renderbuffer(GLenum intFormat); + +extern void intel_fbo_init(struct intel_context *intel); + + +/* XXX make inline or macro */ +extern struct intel_renderbuffer *intel_get_renderbuffer(struct gl_framebuffer + *fb, + int attIndex); + +extern void intel_flip_renderbuffers(struct intel_framebuffer *intel_fb); + + +/* XXX make inline or macro */ +extern struct intel_region *intel_get_rb_region(struct gl_framebuffer *fb, + GLuint attIndex); + + + + +#endif /* INTEL_FBO_H */ diff --git a/shared/intel_ioctl.c b/shared/intel_ioctl.c new file mode 100644 index 0000000..f4566ba --- /dev/null +++ b/shared/intel_ioctl.c @@ -0,0 +1,223 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> + +#include "mtypes.h" +#include "context.h" +#include "swrast/swrast.h" + +#include "intel_context.h" +#include "intel_ioctl.h" +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_regions.h" +#include "drm.h" +#include "i915_drm.h" + +#include "intel_bufmgr_ttm.h" + +#define FILE_DEBUG_FLAG DEBUG_IOCTL + +int +intelEmitIrqLocked(struct intel_context *intel) +{ + struct drm_i915_irq_emit ie; + int ret, seq = 1; + + if (intel->no_hw) + return 1; + + /* + assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == + (DRM_LOCK_HELD|intel->hHWContext)); + */ + + ie.irq_seq = &seq; + + ret = drmCommandWriteRead(intel->driFd, DRM_I915_IRQ_EMIT, &ie, sizeof(ie)); + if (ret) { + fprintf(stderr, "%s: drm_i915_irq_emit: %d\n", __FUNCTION__, ret); + exit(1); + } + + DBG("%s --> %d\n", __FUNCTION__, seq); + + return seq; +} + +void +intelWaitIrq(struct intel_context *intel, int seq) +{ + struct drm_i915_irq_wait iw; + int ret, lastdispatch; + volatile struct drm_i915_sarea *sarea = intel->sarea; + + if (intel->no_hw) + return; + + DBG("%s %d\n", __FUNCTION__, seq); + + iw.irq_seq = seq; + + do { + lastdispatch = sarea->last_dispatch; + ret = drmCommandWrite(intel->driFd, DRM_I915_IRQ_WAIT, &iw, sizeof(iw)); + } while (ret == -EAGAIN || + ret == -EINTR || + (ret == -EBUSY && lastdispatch != sarea->last_dispatch) || + (ret == 0 && seq > sarea->last_dispatch) || + (ret == 0 && sarea->last_dispatch - seq >= (1 << 24))); + + if (ret) { + fprintf(stderr, "%s: drm_i915_irq_wait: %d\n", __FUNCTION__, ret); + exit(1); + } +} + + +void +intel_batch_ioctl(struct intel_context *intel, + GLuint start_offset, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock) +{ + struct drm_i915_batchbuffer batch; + + if (intel->no_hw) + return; + + assert(intel->locked); + assert(used); + + DBG("%s used %d offset %x..%x ignore_cliprects %d\n", + __FUNCTION__, + used, start_offset, start_offset + used, ignore_cliprects); + + /* Throw away non-effective packets. Won't work once we have + * hardware contexts which would preserve statechanges beyond a + * single buffer. + */ + batch.start = start_offset; + batch.used = used; + batch.cliprects = intel->pClipRects; + batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + batch.DR1 = 0; + batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) | + (((GLuint) intel->drawY) << 16)); + + DBG("%s: 0x%x..0x%x DR4: %x cliprects: %d\n", + __FUNCTION__, + batch.start, + batch.start + batch.used * 4, batch.DR4, batch.num_cliprects); + + if (drmCommandWrite(intel->driFd, DRM_I915_BATCHBUFFER, &batch, + sizeof(batch))) { + fprintf(stderr, "DRM_I915_BATCHBUFFER: %d\n", -errno); + UNLOCK_HARDWARE(intel); + exit(1); + } +} + +#ifdef TTM_API +void +intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + void *start, GLuint count, dri_fence **fence) +{ + struct drm_i915_execbuffer execbuf; + dri_fence *fo; + int ret; + + assert(intel->locked); + assert(used); + + if (intel->no_hw) + return; + + if (*fence) { + dri_fence_unreference(*fence); + } + + memset(&execbuf, 0, sizeof(execbuf)); + + execbuf.num_buffers = count; + execbuf.batch.used = used; + execbuf.batch.cliprects = intel->pClipRects; + execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + execbuf.batch.DR1 = 0; + execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) | + (((GLuint) intel->drawY) << 16)); + + execbuf.ops_list = (unsigned long)start; // TODO + execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED; + + do { + ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf, + sizeof(execbuf)); + } while (ret == -EAGAIN); + + if (ret != 0) { + fprintf(stderr, "DRM_I915_EXECBUFFER: %d\n", -errno); + UNLOCK_HARDWARE(intel); + exit(1); + } + + if (execbuf.fence_arg.error != 0) { + + /* + * Fence creation has failed, but the GPU has been + * idled by the kernel. Safe to continue. + */ + + *fence = NULL; + return; + } + + fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers", + &execbuf.fence_arg); + if (!fo) { + fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle); + UNLOCK_HARDWARE(intel); + exit(1); + } + *fence = fo; +} +#else +void +intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + void *start, GLuint count, dri_fence **fence) +{ +} +#endif diff --git a/i965/intel_ioctl.h b/shared/intel_ioctl.h index df27659..8674aef 100644 --- a/i965/intel_ioctl.h +++ b/shared/intel_ioctl.h @@ -35,10 +35,12 @@ int intelEmitIrqLocked( struct intel_context *intel ); void intel_batch_ioctl( struct intel_context *intel, GLuint start_offset, - GLuint used); - -void intel_cmd_ioctl( struct intel_context *intel, - char *buf, - GLuint used); + GLuint used, + GLboolean ignore_cliprects, + GLboolean allow_unlock ); +void intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + void *start, GLuint count, dri_fence **fence); #endif diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c new file mode 100644 index 0000000..9be7e02 --- /dev/null +++ b/shared/intel_mipmap_tree.c @@ -0,0 +1,489 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_chipset.h" +#include "enums.h" + +#define FILE_DEBUG_FLAG DEBUG_MIPTREE + +static GLenum +target_to_target(GLenum target) +{ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + return GL_TEXTURE_CUBE_MAP_ARB; + default: + return target; + } +} + +static struct intel_mipmap_tree * +intel_miptree_create_internal(struct intel_context *intel, + GLenum target, + GLenum internal_format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, GLuint cpp, GLuint compress_byte) +{ + GLboolean ok; + struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); + + DBG("%s target %s format %s level %d..%d\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(target), + _mesa_lookup_enum_by_nr(internal_format), first_level, last_level); + + mt->target = target_to_target(target); + mt->internal_format = internal_format; + mt->first_level = first_level; + mt->last_level = last_level; + mt->width0 = width0; + mt->height0 = height0; + mt->depth0 = depth0; + mt->cpp = compress_byte ? compress_byte : cpp; + mt->compressed = compress_byte ? 1 : 0; + mt->refcount = 1; + mt->pitch = 0; + +#ifdef I915 + if (IS_945(intel->intelScreen->deviceID)) + ok = i945_miptree_layout(intel, mt); + else + ok = i915_miptree_layout(intel, mt); +#else + ok = brw_miptree_layout(intel, mt); +#endif + + if (!ok) { + free(mt); + return NULL; + } + + return mt; +} + +struct intel_mipmap_tree * +intel_miptree_create(struct intel_context *intel, + GLenum target, + GLenum internal_format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, GLuint cpp, GLuint compress_byte) +{ + struct intel_mipmap_tree *mt; + + mt = intel_miptree_create_internal(intel, target, internal_format, + first_level, last_level, width0, + height0, depth0, cpp, compress_byte); + /* + * pitch == 0 indicates the null texture + */ + if (!mt || !mt->pitch) + return NULL; + + mt->region = intel_region_alloc(intel, + mt->cpp, mt->pitch, mt->total_height); + + if (!mt->region) { + free(mt); + return NULL; + } + + return mt; +} + +struct intel_mipmap_tree * +intel_miptree_create_for_region(struct intel_context *intel, + GLenum target, + GLenum internal_format, + GLuint first_level, + GLuint last_level, + struct intel_region *region, + GLuint depth0, + GLuint compress_byte) +{ + struct intel_mipmap_tree *mt; + + mt = intel_miptree_create_internal(intel, target, internal_format, + first_level, last_level, + region->pitch, region->height, depth0, + region->cpp, compress_byte); + if (!mt) + return mt; +#if 0 + if (mt->pitch != region->pitch) { + fprintf(stderr, + "region pitch (%d) doesn't match mipmap tree pitch (%d)\n", + region->pitch, mt->pitch); + free(mt); + return NULL; + } +#else + /* The mipmap tree pitch is aligned to 64 bytes to make sure render + * to texture works, but we don't need that for texturing from a + * pixmap. Just override it here. */ + mt->pitch = region->pitch; +#endif + + mt->region = region; + + return mt; + } + +/** + * intel_miptree_pitch_align: + * + * @intel: intel context pointer + * + * @mt: the miptree to compute pitch alignment for + * + * @pitch: the natural pitch value + * + * Given @pitch, compute a larger value which accounts for + * any necessary alignment required by the device + */ + +int intel_miptree_pitch_align (struct intel_context *intel, + struct intel_mipmap_tree *mt, + int pitch) +{ +#ifdef I915 + GLcontext *ctx = &intel->ctx; +#endif + + if (!mt->compressed) { + int pitch_align; + + if (intel->ttm) { + /* XXX: Align pitch to multiple of 64 bytes for now to allow + * render-to-texture to work in all cases. This should probably be + * replaced at some point by some scheme to only do this when really + * necessary. + */ + pitch_align = 64; + } else { + pitch_align = 4; + } + + pitch = ALIGN(pitch * mt->cpp, pitch_align); + +#ifdef I915 + /* XXX: At least the i915 seems very upset when the pitch is a multiple + * of 1024 and sometimes 512 bytes - performance can drop by several + * times. Go to the next multiple of the required alignment for now. + */ + if (!(pitch & 511) && + (pitch + pitch_align) < (1 << ctx->Const.MaxTextureLevels)) + pitch += pitch_align; +#endif + + pitch /= mt->cpp; + } + return pitch; +} + +void +intel_miptree_reference(struct intel_mipmap_tree **dst, + struct intel_mipmap_tree *src) +{ + src->refcount++; + *dst = src; + DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount); +} + +void +intel_miptree_release(struct intel_context *intel, + struct intel_mipmap_tree **mt) +{ + if (!*mt) + return; + + DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1); + if (--(*mt)->refcount <= 0) { + GLuint i; + + DBG("%s deleting %p\n", __FUNCTION__, *mt); + + intel_region_release(&((*mt)->region)); + + for (i = 0; i < MAX_TEXTURE_LEVELS; i++) + if ((*mt)->level[i].image_offset) + free((*mt)->level[i].image_offset); + + free(*mt); + } + *mt = NULL; +} + + + + +/* Can the image be pulled into a unified mipmap tree. This mirrors + * the completeness test in a lot of ways. + * + * Not sure whether I want to pass gl_texture_image here. + */ +GLboolean +intel_miptree_match_image(struct intel_mipmap_tree *mt, + struct gl_texture_image *image, + GLuint face, GLuint level) +{ + /* Images with borders are never pulled into mipmap trees. + */ + if (image->Border || + ((image->_BaseFormat == GL_DEPTH_COMPONENT) && + ((image->TexObject->WrapS == GL_CLAMP_TO_BORDER) || + (image->TexObject->WrapT == GL_CLAMP_TO_BORDER)))) + return GL_FALSE; + + if (image->InternalFormat != mt->internal_format || + image->IsCompressed != mt->compressed) + return GL_FALSE; + + if (!image->IsCompressed && + !mt->compressed && + image->TexFormat->TexelBytes != mt->cpp) + return GL_FALSE; + + /* Test image dimensions against the base level image adjusted for + * minification. This will also catch images not present in the + * tree, changed targets, etc. + */ + if (image->Width != mt->level[level].width || + image->Height != mt->level[level].height || + image->Depth != mt->level[level].depth) + return GL_FALSE; + + return GL_TRUE; +} + + +void +intel_miptree_set_level_info(struct intel_mipmap_tree *mt, + GLuint level, + GLuint nr_images, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d) +{ + mt->level[level].width = w; + mt->level[level].height = h; + mt->level[level].depth = d; + mt->level[level].level_offset = (x + y * mt->pitch) * mt->cpp; + mt->level[level].nr_images = nr_images; + + DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, mt->level[level].level_offset); + + /* Not sure when this would happen, but anyway: + */ + if (mt->level[level].image_offset) { + free(mt->level[level].image_offset); + mt->level[level].image_offset = NULL; + } + + assert(nr_images); + + mt->level[level].image_offset = malloc(nr_images * sizeof(GLuint)); + mt->level[level].image_offset[0] = 0; +} + + + +void +intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, + GLuint level, GLuint img, + GLuint x, GLuint y) +{ + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + + assert(img < mt->level[level].nr_images); + + mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp; + + DBG("%s level %d img %d pos %d,%d image_offset %x\n", + __FUNCTION__, level, img, x, y, mt->level[level].image_offset[img]); +} + + +/* Although we use the image_offset[] array to store relative offsets + * to cube faces, Mesa doesn't know anything about this and expects + * each cube face to be treated as a separate image. + * + * These functions present that view to mesa: + */ +const GLuint * +intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, GLuint level) +{ + static const GLuint zero = 0; + + if (mt->target != GL_TEXTURE_3D || mt->level[level].nr_images == 1) + return &zero; + else + return mt->level[level].image_offset; +} + + +GLuint +intel_miptree_image_offset(struct intel_mipmap_tree *mt, + GLuint face, GLuint level) +{ + if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) + return (mt->level[level].level_offset + + mt->level[level].image_offset[face]); + else + return mt->level[level].level_offset; +} + + + +/** + * Map a teximage in a mipmap tree. + * \param row_stride returns row stride in bytes + * \param image_stride returns image stride in bytes (for 3D textures). + * \param image_offsets pointer to array of pixel offsets from the returned + * pointer to each depth image + * \return address of mapping + */ +GLubyte * +intel_miptree_image_map(struct intel_context * intel, + struct intel_mipmap_tree * mt, + GLuint face, + GLuint level, + GLuint * row_stride, GLuint * image_offsets) +{ + DBG("%s \n", __FUNCTION__); + + if (row_stride) + *row_stride = mt->pitch * mt->cpp; + + if (mt->target == GL_TEXTURE_3D) { + int i; + + for (i = 0; i < mt->level[level].depth; i++) + image_offsets[i] = mt->level[level].image_offset[i] / mt->cpp; + } else { + assert(mt->level[level].depth == 1); + assert(mt->target == GL_TEXTURE_CUBE_MAP || + mt->level[level].image_offset[0] == 0); + image_offsets[0] = 0; + } + + return (intel_region_map(intel, mt->region) + + intel_miptree_image_offset(mt, face, level)); +} + +void +intel_miptree_image_unmap(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + DBG("%s\n", __FUNCTION__); + intel_region_unmap(intel, mt->region); +} + + + +/* Upload data for a particular image. + */ +void +intel_miptree_image_data(struct intel_context *intel, + struct intel_mipmap_tree *dst, + GLuint face, + GLuint level, + void *src, + GLuint src_row_pitch, + GLuint src_image_pitch) +{ + GLuint depth = dst->level[level].depth; + GLuint dst_offset = intel_miptree_image_offset(dst, face, level); + const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level); + GLuint i; + GLuint height = 0; + + DBG("%s: %d/%d\n", __FUNCTION__, face, level); + for (i = 0; i < depth; i++) { + height = dst->level[level].height; + if(dst->compressed) + height = (height + 3) / 4; + intel_region_data(intel, + dst->region, + dst_offset + dst_depth_offset[i], /* dst_offset */ + 0, 0, /* dstx, dsty */ + src, + src_row_pitch, + 0, 0, /* source x, y */ + dst->level[level].width, height); /* width, height */ + + src += src_image_pitch * dst->cpp; + } +} + +extern GLuint intel_compressed_alignment(GLenum); +/* Copy mipmap image between trees + */ +void +intel_miptree_image_copy(struct intel_context *intel, + struct intel_mipmap_tree *dst, + GLuint face, GLuint level, + struct intel_mipmap_tree *src) +{ + GLuint width = src->level[level].width; + GLuint height = src->level[level].height; + GLuint depth = src->level[level].depth; + GLuint dst_offset = intel_miptree_image_offset(dst, face, level); + GLuint src_offset = intel_miptree_image_offset(src, face, level); + const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level); + const GLuint *src_depth_offset = intel_miptree_depth_offsets(src, level); + GLuint i; + + if (dst->compressed) { + GLuint alignment = intel_compressed_alignment(dst->internal_format); + height = (height + 3) / 4; + width = ((width + alignment - 1) & ~(alignment - 1)); + } + + for (i = 0; i < depth; i++) { + intel_region_copy(intel, + dst->region, dst_offset + dst_depth_offset[i], + 0, + 0, + src->region, src_offset + src_depth_offset[i], + 0, 0, width, height); + } + +} diff --git a/shared/intel_mipmap_tree.h b/shared/intel_mipmap_tree.h new file mode 100644 index 0000000..c9537db --- /dev/null +++ b/shared/intel_mipmap_tree.h @@ -0,0 +1,226 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_MIPMAP_TREE_H +#define INTEL_MIPMAP_TREE_H + +#include "intel_regions.h" + +/* A layer on top of the intel_regions code which adds: + * + * - Code to size and layout a region to hold a set of mipmaps. + * - Query to determine if a new image fits in an existing tree. + * - More refcounting + * - maybe able to remove refcounting from intel_region? + * - ? + * + * The fixed mipmap layout of intel hardware where one offset + * specifies the position of all images in a mipmap hierachy + * complicates the implementation of GL texture image commands, + * compared to hardware where each image is specified with an + * independent offset. + * + * In an ideal world, each texture object would be associated with a + * single bufmgr buffer or 2d intel_region, and all the images within + * the texture object would slot into the tree as they arrive. The + * reality can be a little messier, as images can arrive from the user + * with sizes that don't fit in the existing tree, or in an order + * where the tree layout cannot be guessed immediately. + * + * This structure encodes an idealized mipmap tree. The GL image + * commands build these where possible, otherwise store the images in + * temporary system buffers. + */ + + +/** + * Describes the location of each texture image within a texture region. + */ +struct intel_mipmap_level +{ + /** + * Byte offset to the base of this level. + * + * This is used for mipmap levels of 1D/2D/3D textures. However, CUBE + * layouts spread images around the whole tree, so the level offset is + * always zero in that case. + */ + GLuint level_offset; + GLuint width; + GLuint height; + /** Depth of the mipmap at this level: 1 for 1D/2D/CUBE, n for 3D. */ + GLuint depth; + /** Number of images at this level: 1 for 1D/2D, 6 for CUBE, depth for 3D */ + GLuint nr_images; + + /** + * Byte offset from level_offset to the image for each cube face or depth + * level. + * + * Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table. + */ + GLuint *image_offset; +}; + +struct intel_mipmap_tree +{ + /* Effectively the key: + */ + GLenum target; + GLenum internal_format; + + GLuint first_level; + GLuint last_level; + + GLuint width0, height0, depth0; /**< Level zero image dimensions */ + GLuint cpp; + GLboolean compressed; + + /* Derived from the above: + */ + GLuint pitch; + GLuint depth_pitch; /* per-image on i945? */ + GLuint total_height; + + /* Includes image offset tables: + */ + struct intel_mipmap_level level[MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct intel_region *region; + + /* These are also refcounted: + */ + GLuint refcount; +}; + + + +struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, + GLenum target, + GLenum internal_format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + GLuint cpp, + GLuint compress_byte); + +struct intel_mipmap_tree * +intel_miptree_create_for_region(struct intel_context *intel, + GLenum target, + GLenum internal_format, + GLuint first_level, + GLuint last_level, + struct intel_region *region, + GLuint depth0, + GLuint compress_byte); + +int intel_miptree_pitch_align (struct intel_context *intel, + struct intel_mipmap_tree *mt, + int pitch); + +void intel_miptree_reference(struct intel_mipmap_tree **dst, + struct intel_mipmap_tree *src); + +void intel_miptree_release(struct intel_context *intel, + struct intel_mipmap_tree **mt); + +/* Check if an image fits an existing mipmap tree layout + */ +GLboolean intel_miptree_match_image(struct intel_mipmap_tree *mt, + struct gl_texture_image *image, + GLuint face, GLuint level); + +/* Return a pointer to an image within a tree. Return image stride as + * well. + */ +GLubyte *intel_miptree_image_map(struct intel_context *intel, + struct intel_mipmap_tree *mt, + GLuint face, + GLuint level, + GLuint * row_stride, GLuint * image_stride); + +void intel_miptree_image_unmap(struct intel_context *intel, + struct intel_mipmap_tree *mt); + + +/* Return the linear offset of an image relative to the start of the + * tree: + */ +GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt, + GLuint face, GLuint level); + +/* Return pointers to each 2d slice within an image. Indexed by depth + * value. + */ +const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, + GLuint level); + + +void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, + GLuint level, + GLuint nr_images, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d); + +void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, + GLuint level, + GLuint img, GLuint x, GLuint y); + + +/* Upload an image into a tree + */ +void intel_miptree_image_data(struct intel_context *intel, + struct intel_mipmap_tree *dst, + GLuint face, + GLuint level, + void *src, + GLuint src_row_pitch, GLuint src_image_pitch); + +/* Copy an image between two trees + */ +void intel_miptree_image_copy(struct intel_context *intel, + struct intel_mipmap_tree *dst, + GLuint face, GLuint level, + struct intel_mipmap_tree *src); + +/* i915_mipmap_tree.c: + */ +GLboolean i915_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt); +GLboolean i945_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt); +GLboolean brw_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +#endif diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c new file mode 100644 index 0000000..6417866 --- /dev/null +++ b/shared/intel_pixel.c @@ -0,0 +1,189 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portionsalloc + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "enums.h" +#include "state.h" +#include "swrast/swrast.h" + +#include "intel_context.h" +#include "intel_pixel.h" +#include "intel_regions.h" + +#define FILE_DEBUG_FLAG DEBUG_PIXEL + +static GLenum +effective_func(GLenum func, GLboolean src_alpha_is_one) +{ + if (src_alpha_is_one) { + if (func == GL_SRC_ALPHA) + return GL_ONE; + if (func == GL_ONE_MINUS_SRC_ALPHA) + return GL_ZERO; + } + + return func; +} + +/** + * Check if any fragment operations are in effect which might effect + * glDraw/CopyPixels. + */ +GLboolean +intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one) +{ + if (ctx->NewState) + _mesa_update_state(ctx); + + if (ctx->FragmentProgram._Enabled) { + DBG("fallback due to fragment program\n"); + return GL_FALSE; + } + + if (ctx->Color.BlendEnabled && + (effective_func(ctx->Color.BlendSrcRGB, src_alpha_is_one) != GL_ONE || + effective_func(ctx->Color.BlendDstRGB, src_alpha_is_one) != GL_ZERO || + ctx->Color.BlendEquationRGB != GL_FUNC_ADD || + effective_func(ctx->Color.BlendSrcA, src_alpha_is_one) != GL_ONE || + effective_func(ctx->Color.BlendDstA, src_alpha_is_one) != GL_ZERO || + ctx->Color.BlendEquationA != GL_FUNC_ADD)) { + DBG("fallback due to blend\n"); + return GL_FALSE; + } + + if (ctx->Texture._EnabledUnits) { + DBG("fallback due to texturing\n"); + return GL_FALSE; + } + + if (!(ctx->Color.ColorMask[0] && + ctx->Color.ColorMask[1] && + ctx->Color.ColorMask[2] && + ctx->Color.ColorMask[3])) { + DBG("fallback due to color masking\n"); + return GL_FALSE; + } + + if (ctx->Color.AlphaEnabled) { + DBG("fallback due to alpha\n"); + return GL_FALSE; + } + + if (ctx->Depth.Test) { + DBG("fallback due to depth test\n"); + return GL_FALSE; + } + + if (ctx->Fog.Enabled) { + DBG("fallback due to fog\n"); + return GL_FALSE; + } + + if (ctx->_ImageTransferState) { + DBG("fallback due to image transfer\n"); + return GL_FALSE; + } + + if (ctx->Stencil.Enabled) { + DBG("fallback due to image stencil\n"); + return GL_FALSE; + } + + if (ctx->Scissor.Enabled) { + /* XXX Note: Scissor could be done with the blitter */ + DBG("fallback due to image scissor\n"); + return GL_FALSE; + } + + if (ctx->RenderMode != GL_RENDER) { + DBG("fallback due to render mode\n"); + return GL_FALSE; + } + + return GL_TRUE; +} + + +GLboolean +intel_check_meta_tex_fragment_ops(GLcontext * ctx) +{ + if (ctx->NewState) + _mesa_update_state(ctx); + + /* Some of _ImageTransferState (scale, bias) could be done with + * fragment programs on i915. + */ + return !(ctx->_ImageTransferState || ctx->Fog.Enabled || /* not done yet */ + ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled); +} + +/* The intel_region struct doesn't really do enough to capture the + * format of the pixels in the region. For now this code assumes that + * the region is a display surface and hence is either ARGB8888 or + * RGB565. + * XXX FBO: If we'd pass in the intel_renderbuffer instead of region, we'd + * know the buffer's pixel format. + * + * \param format as given to glDraw/ReadPixels + * \param type as given to glDraw/ReadPixels + */ +GLboolean +intel_check_blit_format(struct intel_region * region, + GLenum format, GLenum type) +{ + if (region->cpp == 4 && + (type == GL_UNSIGNED_INT_8_8_8_8_REV || + type == GL_UNSIGNED_BYTE) && format == GL_BGRA) { + return GL_TRUE; + } + + if (region->cpp == 2 && + type == GL_UNSIGNED_SHORT_5_6_5_REV && format == GL_BGR) { + return GL_TRUE; + } + + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: bad format for blit (cpp %d, type %s format %s)\n", + __FUNCTION__, region->cpp, + _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format)); + + return GL_FALSE; +} + + +void +intelInitPixelFuncs(struct dd_function_table *functions) +{ + functions->Accum = _swrast_Accum; + if (!getenv("INTEL_NO_BLIT")) { + functions->Bitmap = intelBitmap; + functions->CopyPixels = intelCopyPixels; +#ifdef I915 + functions->ReadPixels = intelReadPixels; + functions->DrawPixels = intelDrawPixels; +#endif + } +} diff --git a/shared/intel_pixel.h b/shared/intel_pixel.h new file mode 100644 index 0000000..9c899b9 --- /dev/null +++ b/shared/intel_pixel.h @@ -0,0 +1,70 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_PIXEL_H +#define INTEL_PIXEL_H + +#include "mtypes.h" + +void intelInitPixelFuncs(struct dd_function_table *functions); + +GLboolean intel_check_blit_fragment_ops(GLcontext * ctx, + GLboolean src_alpha_is_one); + +GLboolean intel_check_meta_tex_fragment_ops(GLcontext * ctx); + +GLboolean intel_check_blit_format(struct intel_region *region, + GLenum format, GLenum type); + + +void intelReadPixels(GLcontext * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, + GLvoid * pixels); + +void intelDrawPixels(GLcontext * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, + GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid * pixels); + +void intelCopyPixels(GLcontext * ctx, + GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint destx, GLint desty, GLenum type); + +void intelBitmap(GLcontext * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte * pixels); + +#endif diff --git a/i965/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c index df9d688..81238ac 100644 --- a/i965/intel_pixel_bitmap.c +++ b/shared/intel_pixel_bitmap.c @@ -41,6 +41,8 @@ #include "intel_blit.h" #include "intel_regions.h" #include "intel_buffer_objects.h" +#include "intel_buffers.h" +#include "intel_pixel.h" @@ -91,11 +93,6 @@ static void set_bit( GLubyte *dest, dest[bit/8] |= 1 << (bit % 8); } -static int align(int x, int align) -{ - return (x + align - 1) & ~(align - 1); -} - /* Extract a rectangle's worth of data from the bitmap. Called * per-cliprect. */ @@ -147,7 +144,7 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height, } if (row_align) - bit = (bit + row_align - 1) & ~(row_align - 1); + bit = ALIGN(bit, row_align); } return count; @@ -169,11 +166,8 @@ do_blit_bitmap( GLcontext *ctx, struct intel_context *intel = intel_context(ctx); struct intel_region *dst = intel_drawbuf_region(intel); GLfloat tmpColor[4]; - - union { - GLuint ui; - GLubyte ub[4]; - } color; + GLubyte ubcolor[4]; + GLuint color8888, color565; if (!dst) return GL_FALSE; @@ -190,14 +184,17 @@ do_blit_bitmap( GLcontext *ctx, ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor); } - UNCLAMPED_FLOAT_TO_CHAN(color.ub[0], tmpColor[2]); - UNCLAMPED_FLOAT_TO_CHAN(color.ub[1], tmpColor[1]); - UNCLAMPED_FLOAT_TO_CHAN(color.ub[2], tmpColor[0]); - UNCLAMPED_FLOAT_TO_CHAN(color.ub[3], tmpColor[3]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]); + + color8888 = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], ubcolor[2], ubcolor[3]); + color565 = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]); /* Does zoom apply to bitmaps? */ - if (!intel_check_blit_fragment_ops(ctx) || + if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F) || ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) return GL_FALSE; @@ -235,10 +232,10 @@ do_blit_bitmap( GLcontext *ctx, dsty = dPriv->y + (dPriv->h - dsty - height); dstx = dPriv->x + dstx; - dest_rect.x1 = dstx; - dest_rect.y1 = dsty; - dest_rect.x2 = dstx + width; - dest_rect.y2 = dsty + height; + dest_rect.x1 = dstx < 0 ? 0 : dstx; + dest_rect.y1 = dsty < 0 ? 0 : dsty; + dest_rect.x2 = dstx + width < 0 ? 0 : dstx + width; + dest_rect.y2 = dsty + height < 0 ? 0 : dsty + height; for (i = 0; i < nbox; i++) { drm_clip_rect_t rect; @@ -268,7 +265,7 @@ do_blit_bitmap( GLcontext *ctx, for (px = 0; px < box_w; px += DX) { int h = MIN2(DY, box_h - py); int w = MIN2(DX, box_w - px); - GLuint sz = align(align(w,8) * h, 64)/8; + GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8; GLenum logic_op = ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY; @@ -292,7 +289,7 @@ do_blit_bitmap( GLcontext *ctx, dst->cpp, (GLubyte *)stipple, sz, - color.ui, + (dst->cpp == 2) ? color565 : color8888, dst->pitch, dst->buffer, 0, @@ -304,7 +301,6 @@ do_blit_bitmap( GLcontext *ctx, } } } - intel->need_flush = GL_TRUE; out: intel_batchbuffer_flush(intel->batch); } diff --git a/i965/intel_pixel_copy.c b/shared/intel_pixel_copy.c index 3bdf2fb..45f72ba 100644 --- a/i965/intel_pixel_copy.c +++ b/shared/intel_pixel_copy.c @@ -28,18 +28,21 @@ #include "glheader.h" #include "enums.h" #include "image.h" +#include "state.h" #include "mtypes.h" #include "macros.h" -#include "state.h" #include "swrast/swrast.h" #include "intel_screen.h" #include "intel_context.h" #include "intel_ioctl.h" #include "intel_batchbuffer.h" +#include "intel_buffers.h" #include "intel_blit.h" #include "intel_regions.h" +#include "intel_pixel.h" +#define FILE_DEBUG_FLAG DEBUG_PIXEL static struct intel_region * copypix_src_region(struct intel_context *intel, GLenum type) @@ -68,20 +71,20 @@ copypix_src_region(struct intel_context *intel, GLenum type) } - - /** * Check if any fragment operations are in effect which might effect - * glDraw/CopyPixels. + * glCopyPixels. Differs from intel_check_blit_fragment_ops in that + * we allow Scissor. */ -GLboolean -intel_check_blit_fragment_ops(GLcontext * ctx) +static GLboolean +intel_check_copypixel_blit_fragment_ops(GLcontext * ctx) { if (ctx->NewState) _mesa_update_state(ctx); + /* Could do logicop with the blitter: + */ return !(ctx->_ImageTransferState || - ctx->RenderMode != GL_RENDER || ctx->Color.AlphaEnabled || ctx->Depth.Test || ctx->Fog.Enabled || @@ -89,12 +92,13 @@ intel_check_blit_fragment_ops(GLcontext * ctx) !ctx->Color.ColorMask[0] || !ctx->Color.ColorMask[1] || !ctx->Color.ColorMask[2] || - !ctx->Color.ColorMask[3] || /* can do this! */ + !ctx->Color.ColorMask[3] || ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled || ctx->Color.BlendEnabled); } +#ifdef I915 /* Doesn't work for overlapping regions. Could do a double copy or * just fallback. */ @@ -113,15 +117,9 @@ do_texture_copypixels(GLcontext * ctx, DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, srcx, srcy, width, height, dstx, dsty); - if (!src || !dst || type != GL_COLOR || - ctx->_ImageTransferState || - ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F || - ctx->RenderMode != GL_RENDER || - ctx->Texture._EnabledUnits || - ctx->FragmentProgram._Enabled || - src != dst ) - return GL_FALSE; - + if (!src || !dst || type != GL_COLOR) + return GL_FALSE; + /* Can't handle overlapping regions. Don't have sufficient control * over rasterization to pull it off in-place. Punt on these for * now. @@ -135,13 +133,13 @@ do_texture_copypixels(GLcontext * ctx, srcbox.x1 = srcx; srcbox.y1 = srcy; - srcbox.x2 = srcx + width - 1; - srcbox.y2 = srcy + height - 1; + srcbox.x2 = srcx + width; + srcbox.y2 = srcy + height; dstbox.x1 = dstx; dstbox.y1 = dsty; - dstbox.x2 = dstx + width - 1; - dstbox.y2 = dsty + height - 1; + dstbox.x2 = dstx + width * ctx->Pixel.ZoomX; + dstbox.y2 = dsty + height * ctx->Pixel.ZoomY; DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2); DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2, @@ -180,22 +178,66 @@ do_texture_copypixels(GLcontext * ctx, /* Set the frontbuffer up as a large rectangular texture. */ - intel->vtbl.meta_frame_buffer_texture( intel, srcx - dstx, srcy - dsty ); + if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0, + src->pitch, + src->height, src_format, src_type)) { + intel->vtbl.leave_meta_state(intel); + return GL_FALSE; + } + intel->vtbl.meta_texture_blend_replace(intel); - - if (intel->driDrawable->numClipRects) - intel->vtbl.meta_draw_quad( intel, - dstx, dstx + width, - dsty, dsty + height, - ctx->Current.RasterPos[ 2 ], - 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0 ); - - intel->vtbl.leave_meta_state( intel ); - + + LOCK_HARDWARE(intel); + + if (intel->driDrawable->numClipRects) { + __DRIdrawablePrivate *dPriv = intel->driDrawable; + + + srcy = dPriv->h - srcy - height; /* convert from gl to hardware coords */ + + srcx += dPriv->x; + srcy += dPriv->y; + + /* Clip against the source region. This is the only source + * clipping we do. XXX: Just set the texcord wrap mode to clamp + * or similar. + * + */ + if (0) { + GLint orig_x = srcx; + GLint orig_y = srcy; + + if (!_mesa_clip_to_region(0, 0, src->pitch, src->height, + &srcx, &srcy, &width, &height)) + goto out; + + dstx += srcx - orig_x; + dsty += (srcy - orig_y) * ctx->Pixel.ZoomY; + } + + /* Just use the regular cliprect mechanism... Does this need to + * even hold the lock??? + */ + intel->vtbl.meta_draw_quad(intel, + dstx, + dstx + width * ctx->Pixel.ZoomX, + dPriv->h - (dsty + height * ctx->Pixel.ZoomY), + dPriv->h - (dsty), 0, /* XXX: what z value? */ + 0x00ff00ff, + srcx, srcx + width, srcy, srcy + height); + + out: + intel->vtbl.leave_meta_state(intel); + intel_batchbuffer_flush(intel->batch); + } + UNLOCK_HARDWARE(intel); + DBG("%s: success\n", __FUNCTION__); return GL_TRUE; } +#endif /* I915 */ + /** * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc. @@ -213,7 +255,7 @@ do_blit_copypixels(GLcontext * ctx, /* Copypixels can be more than a straight copy. Ensure all the * extra operations are disabled: */ - if (!intel_check_blit_fragment_ops(ctx) || + if (!intel_check_copypixel_blit_fragment_ops(ctx) || ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) return GL_FALSE; @@ -224,16 +266,12 @@ do_blit_copypixels(GLcontext * ctx, intelFlush(&intel->ctx); -/* intel->vtbl.render_start(intel); */ -/* intel->vtbl.emit_state(intel); */ - LOCK_HARDWARE(intel); if (intel->driDrawable->numClipRects) { __DRIdrawablePrivate *dPriv = intel->driDrawable; __DRIdrawablePrivate *dReadPriv = intel->driReadDrawable; drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t dest_rect; GLint nbox = dPriv->numClipRects; GLint delta_x = 0; GLint delta_y = 0; @@ -281,13 +319,6 @@ do_blit_copypixels(GLcontext * ctx, dsty = srcy - delta_y; } - dest_rect.x1 = dstx; - dest_rect.y1 = dsty; - dest_rect.x2 = dstx + width; - dest_rect.y2 = dsty + height; - -/* intel->vtbl.emit_flush(intel, 0); */ - /* Could do slightly more clipping: Eg, take the intersection of * the existing set of cliprects and those cliprects translated * by delta_x, delta_y: @@ -296,32 +327,35 @@ do_blit_copypixels(GLcontext * ctx, * introduce garbage when copying from obscured window regions. */ for (i = 0; i < nbox; i++) { - drm_clip_rect_t rect; + GLint clip_x = dstx; + GLint clip_y = dsty; + GLint clip_w = width; + GLint clip_h = height; - if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i])) + if (!_mesa_clip_to_region(box[i].x1, box[i].y1, box[i].x2, box[i].y2, + &clip_x, &clip_y, &clip_w, &clip_h)) continue; - - intelEmitCopyBlit(intel, - dst->cpp, + intelEmitCopyBlit(intel, dst->cpp, src->pitch, src->buffer, 0, src->tiled, dst->pitch, dst->buffer, 0, dst->tiled, - rect.x1 + delta_x, - rect.y1 + delta_y, /* srcx, srcy */ - rect.x1, rect.y1, /* dstx, dsty */ - rect.x2 - rect.x1, rect.y2 - rect.y1, + clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */ + clip_x, clip_y, /* dstx, dsty */ + clip_w, clip_h, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } - intel->need_flush = GL_TRUE; - out: + out: intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); + + DBG("%s: success\n", __FUNCTION__); return GL_TRUE; } + void intelCopyPixels(GLcontext * ctx, GLint srcx, GLint srcy, @@ -334,11 +368,12 @@ intelCopyPixels(GLcontext * ctx, if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) return; +#ifdef I915 if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) return; - - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("fallback to _swrast_CopyPixels\n"); +#endif + + DBG("fallback to _swrast_CopyPixels\n"); _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); } diff --git a/shared/intel_pixel_draw.c b/shared/intel_pixel_draw.c new file mode 100644 index 0000000..34813d2 --- /dev/null +++ b/shared/intel_pixel_draw.c @@ -0,0 +1,388 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portionsalloc + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "glheader.h" +#include "enums.h" +#include "image.h" +#include "mtypes.h" +#include "macros.h" +#include "bufferobj.h" +#include "swrast/swrast.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_ioctl.h" +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_buffers.h" +#include "intel_regions.h" +#include "intel_pixel.h" +#include "intel_buffer_objects.h" +#include "intel_tris.h" + + + +static GLboolean +do_texture_drawpixels(GLcontext * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid * pixels) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_region *dst = intel_drawbuf_region(intel); + struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj); + GLuint rowLength = unpack->RowLength ? unpack->RowLength : width; + GLuint src_offset; + + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + intelFlush(&intel->ctx); + + if (!dst) + return GL_FALSE; + + intel->vtbl.render_start(intel); + intel->vtbl.emit_state(intel); + + if (src) { + if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, + format, type, pixels)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels"); + return GL_TRUE; + } + } + else { + /* PBO only for now: + */ +/* _mesa_printf("%s - not PBO\n", __FUNCTION__); */ + return GL_FALSE; + } + + /* There are a couple of things we can't do yet, one of which is + * set the correct state for pixel operations when GL texturing is + * enabled. That's a pretty rare state and probably not worth the + * effort. A completely device-independent version of this may do + * more. + * + * Similarly, we make no attempt to merge metaops processing with + * an enabled fragment program, though it would certainly be + * possible. + */ + if (!intel_check_meta_tex_fragment_ops(ctx)) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - bad GL fragment state for metaops texture\n", + __FUNCTION__); + return GL_FALSE; + } + + intel->vtbl.install_meta_state(intel); + + + /* Is this true? Also will need to turn depth testing on according + * to state: + */ + intel->vtbl.meta_no_stencil_write(intel); + intel->vtbl.meta_no_depth_write(intel); + + /* Set the 3d engine to draw into the destination region: + */ + intel->vtbl.meta_draw_region(intel, dst, intel->depth_region); + + intel->vtbl.meta_import_pixel_state(intel); + + src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height, + format, type, 0, 0, 0); + + + /* Setup the pbo up as a rectangular texture, if possible. + * + * TODO: This is almost always possible if the i915 fragment + * program is adjusted to correctly swizzle the sampled colors. + * The major exception is any 24bit texture, like RGB888, for which + * there is no hardware support. + */ + if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, src_offset, + rowLength, height, format, type)) { + intel->vtbl.leave_meta_state(intel); + return GL_FALSE; + } + + intel->vtbl.meta_texture_blend_replace(intel); + + + LOCK_HARDWARE(intel); + + if (intel->driDrawable->numClipRects) { + __DRIdrawablePrivate *dPriv = intel->driDrawable; + GLint srcx, srcy; + GLint dstx, dsty; + + dstx = x; + dsty = dPriv->h - (y + height); + + srcx = 0; /* skiprows/pixels already done */ + srcy = 0; + + if (0) { + const GLint orig_x = dstx; + const GLint orig_y = dsty; + + if (!_mesa_clip_to_region(0, 0, dst->pitch, dst->height, + &dstx, &dsty, &width, &height)) + goto out; + + srcx += dstx - orig_x; + srcy += dsty - orig_y; + } + + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("draw %d,%d %dx%d\n", dstx, dsty, width, height); + + /* Must use the regular cliprect mechanism in order to get the + * drawing origin set correctly. Otherwise scissor state is in + * incorrect coordinate space. Does this even need to hold the + * lock??? + */ + intel->vtbl.meta_draw_quad(intel, + dstx, dstx + width * ctx->Pixel.ZoomX, + dPriv->h - (y + height * ctx->Pixel.ZoomY), + dPriv->h - (y), + -ctx->Current.RasterPos[2] * .5, + 0x00ff00ff, + srcx, srcx + width, srcy + height, srcy); + out: + intel->vtbl.leave_meta_state(intel); + intel_batchbuffer_flush(intel->batch); + } + UNLOCK_HARDWARE(intel); + return GL_TRUE; +} + + + + + +/* Pros: + * - no waiting for idle before updating framebuffer. + * + * Cons: + * - if upload is by memcpy, this may actually be slower than fallback path. + * - uploads the whole image even if destination is clipped + * + * Need to benchmark. + * + * Given the questions about performance, implement for pbo's only. + * This path is definitely a win if the pbo is already in agp. If it + * turns out otherwise, we can add the code necessary to upload client + * data to agp space before performing the blit. (Though it may turn + * out to be better/simpler just to use the texture engine). + */ +static GLboolean +do_blit_drawpixels(GLcontext * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid * pixels) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_region *dest = intel_drawbuf_region(intel); + struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj); + GLuint src_offset; + GLuint rowLength; + dri_fence *fence = NULL; + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s\n", __FUNCTION__); + + + if (!dest) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - no dest\n", __FUNCTION__); + return GL_FALSE; + } + + if (src) { + /* This validation should be done by core mesa: + */ + if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, + format, type, pixels)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels"); + return GL_TRUE; + } + } + else { + /* PBO only for now: + */ + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - not PBO\n", __FUNCTION__); + return GL_FALSE; + } + + if (!intel_check_blit_format(dest, format, type)) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - bad format for blit\n", __FUNCTION__); + return GL_FALSE; + } + + if (!intel_check_blit_fragment_ops(ctx, GL_FALSE)) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - bad GL fragment state for blitter\n", + __FUNCTION__); + return GL_FALSE; + } + + if (ctx->Pixel.ZoomX != 1.0F) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - bad PixelZoomX for blit\n", __FUNCTION__); + return GL_FALSE; + } + + + if (unpack->RowLength > 0) + rowLength = unpack->RowLength; + else + rowLength = width; + + if (ctx->Pixel.ZoomY == -1.0F) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__); + return GL_FALSE; /* later */ + y -= height; + } + else if (ctx->Pixel.ZoomY == 1.0F) { + rowLength = -rowLength; + } + else { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__); + return GL_FALSE; + } + + src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height, + format, type, 0, 0, 0); + + intelFlush(&intel->ctx); + LOCK_HARDWARE(intel); + + if (intel->driDrawable->numClipRects) { + __DRIdrawablePrivate *dPriv = intel->driDrawable; + int nbox = dPriv->numClipRects; + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t rect; + drm_clip_rect_t dest_rect; + dri_bo *src_buffer = intel_bufferobj_buffer(intel, src, INTEL_READ); + int i; + + dest_rect.x1 = dPriv->x + x; + dest_rect.y1 = dPriv->y + dPriv->h - (y + height); + dest_rect.x2 = dest_rect.x1 + width; + dest_rect.y2 = dest_rect.y1 + height; + + for (i = 0; i < nbox; i++) { + if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i])) + continue; + + intelEmitCopyBlit(intel, + dest->cpp, + rowLength, src_buffer, src_offset, GL_FALSE, + dest->pitch, dest->buffer, 0, dest->tiled, + rect.x1 - dest_rect.x1, + rect.y2 - dest_rect.y2, + rect.x1, + rect.y1, rect.x2 - rect.x1, rect.y2 - rect.y1, + ctx->Color.ColorLogicOpEnabled ? + ctx->Color.LogicOp : GL_COPY); + } + intel_batchbuffer_flush(intel->batch); + fence = intel->batch->last_fence; + dri_fence_reference(fence); + } + UNLOCK_HARDWARE(intel); + + if (fence) { + dri_fence_wait(fence); + dri_fence_unreference(fence); + } + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - DONE\n", __FUNCTION__); + + return GL_TRUE; +} + + + +void +intelDrawPixels(GLcontext * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, + GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid * pixels) +{ + if (do_blit_drawpixels(ctx, x, y, width, height, format, type, + unpack, pixels)) + return; + + if (do_texture_drawpixels(ctx, x, y, width, height, format, type, + unpack, pixels)) + return; + + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); + + if (ctx->FragmentProgram._Current == ctx->FragmentProgram._TexEnvProgram) { + /* + * We don't want the i915 texenv program to be applied to DrawPixels. + * This is really just a performance optimization (mesa will other- + * wise happily run the fragment program on each pixel in the image). + */ + struct gl_fragment_program *fpSave = ctx->FragmentProgram._Current; + /* can't just set current frag prog to 0 here as on buffer resize + we'll get new state checks which will segfault. Remains a hack. */ + ctx->FragmentProgram._Current = NULL; + ctx->FragmentProgram._UseTexEnvProgram = GL_FALSE; + ctx->FragmentProgram._Active = GL_FALSE; + _swrast_DrawPixels( ctx, x, y, width, height, format, type, + unpack, pixels ); + ctx->FragmentProgram._Current = fpSave; + ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE; + ctx->FragmentProgram._Active = GL_TRUE; + _swrast_InvalidateState(ctx, _NEW_PROGRAM); + } + else { + _swrast_DrawPixels( ctx, x, y, width, height, format, type, + unpack, pixels ); + } +} diff --git a/i915/intel_reg.h b/shared/intel_reg.h index 1ec1532..37629c0 100644 --- a/i915/intel_reg.h +++ b/shared/intel_reg.h @@ -25,16 +25,19 @@ * **************************************************************************/ +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) -#ifndef _INTEL_REG_H_ -#define _INTEL_REG_H_ +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) +/* Stalls command execution waiting for the given events to have occurred. */ +#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) - -#define CMD_3D (0x3<<29) - - -#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24)) +/* Primitive dispatch on 830-945 */ +#define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) #define PRIM_INDIRECT (1<<23) #define PRIM_INLINE (0<<23) #define PRIM_INDIRECT_SEQUENTIAL (0<<17) @@ -52,33 +55,25 @@ #define PRIM3D_DIB (0x9<<18) #define PRIM3D_MASK (0x1f<<18) -#define I915PACKCOLOR4444(r,g,b,a) \ - ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) - -#define I915PACKCOLOR1555(r,g,b,a) \ - ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ - ((a) ? 0x8000 : 0)) - -#define I915PACKCOLOR565(r,g,b) \ - ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) - -#define I915PACKCOLOR8888(r,g,b,a) \ - ((a<<24) | (r<<16) | (g<<8) | b) - +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) -#define BR00_BITBLT_CLIENT 0x40000000 -#define BR00_OP_COLOR_BLT 0x10000000 -#define BR00_OP_SRC_COPY_BLT 0x10C00000 -#define BR13_SOLID_PATTERN 0x80000000 +#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22)) +# define XY_TEXT_BYTE_PACKED (1 << 16) -#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) -#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) -#define XY_COLOR_BLT_WRITE_RGB (1<<20) +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) -#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) -#endif +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 diff --git a/shared/intel_regions.c b/shared/intel_regions.c new file mode 100644 index 0000000..35ab46a --- /dev/null +++ b/shared/intel_regions.c @@ -0,0 +1,502 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Provide additional functionality on top of bufmgr buffers: + * - 2d semantics and blit operations + * - refcounting of buffers for multiple images in a buffer. + * - refcounting of buffer mappings. + * - some logic for moving the buffers to the best memory pools for + * given operations. + * + * Most of this is to make it easier to implement the fixed-layout + * mipmap tree required by intel hardware in the face of GL's + * programming interface where each image can be specifed in random + * order and it isn't clear what layout the tree should have until the + * last moment. + */ + +#include "intel_context.h" +#include "intel_regions.h" +#include "intel_blit.h" +#include "intel_buffer_objects.h" +#include "dri_bufmgr.h" +#include "intel_bufmgr_ttm.h" +#include "intel_batchbuffer.h" + +#define FILE_DEBUG_FLAG DEBUG_REGION + +/* XXX: Thread safety? + */ +GLubyte * +intel_region_map(struct intel_context *intel, struct intel_region *region) +{ + DBG("%s\n", __FUNCTION__); + if (!region->map_refcount++) { + if (region->pbo) + intel_region_cow(intel, region); + + dri_bo_map(region->buffer, GL_TRUE); + region->map = region->buffer->virtual; + } + + return region->map; +} + +void +intel_region_unmap(struct intel_context *intel, struct intel_region *region) +{ + DBG("%s\n", __FUNCTION__); + if (!--region->map_refcount) { + dri_bo_unmap(region->buffer); + region->map = NULL; + } +} + +static struct intel_region * +intel_region_alloc_internal(struct intel_context *intel, + GLuint cpp, GLuint pitch, GLuint height, + GLuint tiled, dri_bo *buffer) +{ + struct intel_region *region; + + DBG("%s\n", __FUNCTION__); + + if (buffer == NULL) + return NULL; + + region = calloc(sizeof(*region), 1); + region->cpp = cpp; + region->pitch = pitch; + region->height = height; /* needed? */ + region->refcount = 1; + region->tiled = tiled; + region->buffer = buffer; + + return region; +} + +struct intel_region * +intel_region_alloc(struct intel_context *intel, + GLuint cpp, GLuint pitch, GLuint height) +{ + dri_bo *buffer; + + buffer = dri_bo_alloc(intel->bufmgr, "region", + pitch * cpp * height, 64, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); + + return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer); +} + +struct intel_region * +intel_region_alloc_for_handle(struct intel_context *intel, + GLuint cpp, GLuint pitch, GLuint height, + GLuint tiled, GLuint handle) +{ + dri_bo *buffer; + + buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, "region", handle); + + return intel_region_alloc_internal(intel, + cpp, pitch, height, tiled, buffer); +} + +void +intel_region_reference(struct intel_region **dst, struct intel_region *src) +{ + assert(*dst == NULL); + if (src) { + src->refcount++; + *dst = src; + } +} + +void +intel_region_release(struct intel_region **region) +{ + if (!*region) + return; + + DBG("%s %d\n", __FUNCTION__, (*region)->refcount - 1); + + ASSERT((*region)->refcount > 0); + (*region)->refcount--; + + if ((*region)->refcount == 0) { + assert((*region)->map_refcount == 0); + + if ((*region)->pbo) + (*region)->pbo->region = NULL; + (*region)->pbo = NULL; + dri_bo_unreference((*region)->buffer); + free(*region); + } + *region = NULL; +} + +/* + * XXX Move this into core Mesa? + */ +void +_mesa_copy_rect(GLubyte * dst, + GLuint cpp, + GLuint dst_pitch, + GLuint dst_x, + GLuint dst_y, + GLuint width, + GLuint height, + const GLubyte * src, + GLuint src_pitch, GLuint src_x, GLuint src_y) +{ + GLuint i; + + dst_pitch *= cpp; + src_pitch *= cpp; + dst += dst_x * cpp; + src += src_x * cpp; + dst += dst_y * dst_pitch; + src += src_y * dst_pitch; + width *= cpp; + + if (width == dst_pitch && width == src_pitch) + memcpy(dst, src, height * width); + else { + for (i = 0; i < height; i++) { + memcpy(dst, src, width); + dst += dst_pitch; + src += src_pitch; + } + } +} + + +/* Upload data to a rectangular sub-region. Lots of choices how to do this: + * + * - memcpy by span to current destination + * - upload data as new buffer and blit + * + * Currently always memcpy. + */ +void +intel_region_data(struct intel_context *intel, + struct intel_region *dst, + GLuint dst_offset, + GLuint dstx, GLuint dsty, + const void *src, GLuint src_pitch, + GLuint srcx, GLuint srcy, GLuint width, GLuint height) +{ + GLboolean locked = GL_FALSE; + + DBG("%s\n", __FUNCTION__); + + if (intel == NULL) + return; + + if (dst->pbo) { + if (dstx == 0 && + dsty == 0 && width == dst->pitch && height == dst->height) + intel_region_release_pbo(intel, dst); + else + intel_region_cow(intel, dst); + } + + if (!intel->locked) { + LOCK_HARDWARE(intel); + locked = GL_TRUE; + } + + _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset, + dst->cpp, + dst->pitch, + dstx, dsty, width, height, src, src_pitch, srcx, srcy); + + intel_region_unmap(intel, dst); + + if (locked) + UNLOCK_HARDWARE(intel); + +} + +/* Copy rectangular sub-regions. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +void +intel_region_copy(struct intel_context *intel, + struct intel_region *dst, + GLuint dst_offset, + GLuint dstx, GLuint dsty, + struct intel_region *src, + GLuint src_offset, + GLuint srcx, GLuint srcy, GLuint width, GLuint height) +{ + DBG("%s\n", __FUNCTION__); + + if (intel == NULL) + return; + + if (dst->pbo) { + if (dstx == 0 && + dsty == 0 && width == dst->pitch && height == dst->height) + intel_region_release_pbo(intel, dst); + else + intel_region_cow(intel, dst); + } + + assert(src->cpp == dst->cpp); + + intelEmitCopyBlit(intel, + dst->cpp, + src->pitch, src->buffer, src_offset, src->tiled, + dst->pitch, dst->buffer, dst_offset, dst->tiled, + srcx, srcy, dstx, dsty, width, height, + GL_COPY); +} + +/* Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +void +intel_region_fill(struct intel_context *intel, + struct intel_region *dst, + GLuint dst_offset, + GLuint dstx, GLuint dsty, + GLuint width, GLuint height, GLuint color) +{ + DBG("%s\n", __FUNCTION__); + + if (intel == NULL) + return; + + if (dst->pbo) { + if (dstx == 0 && + dsty == 0 && width == dst->pitch && height == dst->height) + intel_region_release_pbo(intel, dst); + else + intel_region_cow(intel, dst); + } + + intelEmitFillBlit(intel, + dst->cpp, + dst->pitch, dst->buffer, dst_offset, dst->tiled, + dstx, dsty, width, height, color); +} + +/* Attach to a pbo, discarding our data. Effectively zero-copy upload + * the pbo's data. + */ +void +intel_region_attach_pbo(struct intel_context *intel, + struct intel_region *region, + struct intel_buffer_object *pbo) +{ + if (region->pbo == pbo) + return; + + /* If there is already a pbo attached, break the cow tie now. + * Don't call intel_region_release_pbo() as that would + * unnecessarily allocate a new buffer we would have to immediately + * discard. + */ + if (region->pbo) { + region->pbo->region = NULL; + region->pbo = NULL; + } + + if (region->buffer) { + dri_bo_unreference(region->buffer); + region->buffer = NULL; + } + + region->pbo = pbo; + region->pbo->region = region; + dri_bo_reference(pbo->buffer); + region->buffer = pbo->buffer; +} + + +/* Break the COW tie to the pbo and allocate a new buffer. + * The pbo gets to keep the data. + */ +void +intel_region_release_pbo(struct intel_context *intel, + struct intel_region *region) +{ + assert(region->buffer == region->pbo->buffer); + region->pbo->region = NULL; + region->pbo = NULL; + dri_bo_unreference(region->buffer); + region->buffer = NULL; + + region->buffer = dri_bo_alloc(intel->bufmgr, "region", + region->pitch * region->cpp * region->height, + 64, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); +} + +/* Break the COW tie to the pbo. Both the pbo and the region end up + * with a copy of the data. + */ +void +intel_region_cow(struct intel_context *intel, struct intel_region *region) +{ + struct intel_buffer_object *pbo = region->pbo; + GLboolean was_locked = intel->locked; + + if (intel == NULL) + return; + + intel_region_release_pbo(intel, region); + + assert(region->cpp * region->pitch * region->height == pbo->Base.Size); + + DBG("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size); + + /* Now blit from the texture buffer to the new buffer: + */ + + intel_batchbuffer_flush(intel->batch); + + was_locked = intel->locked; + if (intel->locked) + LOCK_HARDWARE(intel); + + intelEmitCopyBlit(intel, + region->cpp, + region->pitch, region->buffer, 0, region->tiled, + region->pitch, pbo->buffer, 0, region->tiled, + 0, 0, 0, 0, + region->pitch, region->height, + GL_COPY); + + intel_batchbuffer_flush(intel->batch); + + if (was_locked) + UNLOCK_HARDWARE(intel); +} + +dri_bo * +intel_region_buffer(struct intel_context *intel, + struct intel_region *region, GLuint flag) +{ + if (region->pbo) { + if (flag == INTEL_WRITE_PART) + intel_region_cow(intel, region); + else if (flag == INTEL_WRITE_FULL) + intel_region_release_pbo(intel, region); + } + + return region->buffer; +} + +static struct intel_region * +intel_recreate_static(struct intel_context *intel, + const char *name, + struct intel_region *region, + intelRegion *region_desc) +{ + intelScreenPrivate *intelScreen = intel->intelScreen; + + if (region == NULL) { + region = calloc(sizeof(*region), 1); + region->refcount = 1; + } + + if (intel->ctx.Visual.rgbBits == 24) + region->cpp = 4; + else + region->cpp = intel->ctx.Visual.rgbBits / 8; + region->pitch = intelScreen->pitch; + region->height = intelScreen->height; /* needed? */ + region->tiled = region_desc->tiled; + + if (intel->ttm) { + assert(region_desc->bo_handle != -1); + region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, + name, + region_desc->bo_handle); + } else { + region->buffer = dri_bo_alloc_static(intel->bufmgr, + name, + region_desc->offset, + intelScreen->pitch * + intelScreen->height, + region_desc->map, + DRM_BO_FLAG_MEM_TT); + } + + assert(region->buffer != NULL); + + return region; +} + +/** + * Create intel_region structs to describe the static front, back, and depth + * buffers created by the xserver. + * + * Although FBO's mean we now no longer use these as render targets in + * all circumstances, they won't go away until the back and depth + * buffers become private, and the front buffer will remain even then. + * + * Note that these don't allocate video memory, just describe + * allocations alread made by the X server. + */ +void +intel_recreate_static_regions(struct intel_context *intel) +{ + intelScreenPrivate *intelScreen = intel->intelScreen; + + intel->front_region = + intel_recreate_static(intel, "front", + intel->front_region, + &intelScreen->front); + + intel->back_region = + intel_recreate_static(intel, "back", + intel->back_region, + &intelScreen->back); + +#ifdef I915 + if (intelScreen->third.handle) { + intel->third_region = + intel_recreate_static(intel, "third", + intel->third_region, + &intelScreen->third); + } +#endif /* I915 */ + + /* Still assumes front.cpp == depth.cpp. We can kill this when we move to + * private buffers. + */ + intel->depth_region = + intel_recreate_static(intel, "depth", + intel->depth_region, + &intelScreen->depth); +} diff --git a/shared/intel_regions.h b/shared/intel_regions.h new file mode 100644 index 0000000..229f79a --- /dev/null +++ b/shared/intel_regions.h @@ -0,0 +1,140 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_REGIONS_H +#define INTEL_REGIONS_H + +#include "mtypes.h" +#include "dri_bufmgr.h" + +struct intel_context; +struct intel_buffer_object; + +/** + * A layer on top of the bufmgr buffers that adds a few useful things: + * + * - Refcounting for local buffer references. + * - Refcounting for buffer maps + * - Buffer dimensions - pitch and height. + * - Blitter commands for copying 2D regions between buffers. (really???) + */ +struct intel_region +{ + dri_bo *buffer; /**< buffer manager's buffer */ + GLuint refcount; /**< Reference count for region */ + GLuint cpp; /**< bytes per pixel */ + GLuint pitch; /**< in pixels */ + GLuint height; /**< in pixels */ + GLubyte *map; /**< only non-NULL when region is actually mapped */ + GLuint map_refcount; /**< Reference count for mapping */ + + GLuint draw_offset; /**< Offset of drawing address within the region */ + GLboolean tiled; /**< True if the region is X or Y-tiled. Used on 965. */ + + struct intel_buffer_object *pbo; /* zero-copy uploads */ +}; + + +/* Allocate a refcounted region. Pointers to regions should only be + * copied by calling intel_reference_region(). + */ +struct intel_region *intel_region_alloc(struct intel_context *intel, + GLuint cpp, + GLuint pitch, GLuint height); + +struct intel_region * +intel_region_alloc_for_handle(struct intel_context *intel, + GLuint cpp, GLuint pitch, GLuint height, + GLuint tiled, unsigned int handle); + +void intel_region_reference(struct intel_region **dst, + struct intel_region *src); + +void intel_region_release(struct intel_region **ib); + +void intel_recreate_static_regions(struct intel_context *intel); + +/* Map/unmap regions. This is refcounted also: + */ +GLubyte *intel_region_map(struct intel_context *intel, + struct intel_region *ib); + +void intel_region_unmap(struct intel_context *intel, struct intel_region *ib); + + +/* Upload data to a rectangular sub-region + */ +void intel_region_data(struct intel_context *intel, + struct intel_region *dest, + GLuint dest_offset, + GLuint destx, GLuint desty, + const void *src, GLuint src_stride, + GLuint srcx, GLuint srcy, GLuint width, GLuint height); + +/* Copy rectangular sub-regions + */ +void intel_region_copy(struct intel_context *intel, + struct intel_region *dest, + GLuint dest_offset, + GLuint destx, GLuint desty, + struct intel_region *src, + GLuint src_offset, + GLuint srcx, GLuint srcy, GLuint width, GLuint height); + +/* Fill a rectangular sub-region + */ +void intel_region_fill(struct intel_context *intel, + struct intel_region *dest, + GLuint dest_offset, + GLuint destx, GLuint desty, + GLuint width, GLuint height, GLuint color); + +/* Helpers for zerocopy uploads, particularly texture image uploads: + */ +void intel_region_attach_pbo(struct intel_context *intel, + struct intel_region *region, + struct intel_buffer_object *pbo); +void intel_region_release_pbo(struct intel_context *intel, + struct intel_region *region); +void intel_region_cow(struct intel_context *intel, + struct intel_region *region); + +dri_bo *intel_region_buffer(struct intel_context *intel, + struct intel_region *region, + GLuint flag); + +void _mesa_copy_rect(GLubyte * dst, + GLuint cpp, + GLuint dst_pitch, + GLuint dst_x, + GLuint dst_y, + GLuint width, + GLuint height, + const GLubyte * src, + GLuint src_pitch, GLuint src_x, GLuint src_y); + +#endif diff --git a/shared/intel_screen.c b/shared/intel_screen.c new file mode 100644 index 0000000..5dded4b --- /dev/null +++ b/shared/intel_screen.c @@ -0,0 +1,882 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "glheader.h" +#include "context.h" +#include "framebuffer.h" +#include "matrix.h" +#include "renderbuffer.h" +#include "simple_list.h" +#include "utils.h" +#include "vblank.h" +#include "xmlpool.h" + + +#include "intel_screen.h" + +#include "intel_buffers.h" +#include "intel_tex.h" +#include "intel_span.h" +#include "intel_ioctl.h" +#include "intel_fbo.h" +#include "intel_chipset.h" + +#include "i915_drm.h" +#include "i830_dri.h" +#include "intel_regions.h" +#include "intel_batchbuffer.h" +#include "intel_bufmgr_ttm.h" + +PUBLIC const char __driConfigOptions[] = + DRI_CONF_BEGIN + DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC) + /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, + * DRI_CONF_BO_REUSE_ALL + */ + DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1") + DRI_CONF_DESC_BEGIN(en, "Buffer object reuse") + DRI_CONF_ENUM(0, "Disable buffer object reuse") + DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") + DRI_CONF_DESC_END + DRI_CONF_OPT_END + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY + DRI_CONF_FORCE_S3TC_ENABLE(false) + DRI_CONF_ALLOW_LARGE_TEXTURES(2) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_DEBUG + DRI_CONF_NO_RAST(false) + DRI_CONF_SECTION_END +DRI_CONF_END; + +const GLuint __driNConfigOptions = 6; + +#ifdef USE_NEW_INTERFACE +static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; +#endif /*USE_NEW_INTERFACE */ + +/** + * Map all the memory regions described by the screen. + * \return GL_TRUE if success, GL_FALSE if error. + */ +GLboolean +intelMapScreenRegions(__DRIscreenPrivate * sPriv) +{ + intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; + + if (intelScreen->front.handle) { + if (drmMap(sPriv->fd, + intelScreen->front.handle, + intelScreen->front.size, + (drmAddress *) & intelScreen->front.map) != 0) { + _mesa_problem(NULL, "drmMap(frontbuffer) failed!"); + return GL_FALSE; + } + } + else { + _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!"); + } + + if (0) + _mesa_printf("Back 0x%08x ", intelScreen->back.handle); + if (drmMap(sPriv->fd, + intelScreen->back.handle, + intelScreen->back.size, + (drmAddress *) & intelScreen->back.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + + if (intelScreen->third.handle) { + if (0) + _mesa_printf("Third 0x%08x ", intelScreen->third.handle); + if (drmMap(sPriv->fd, + intelScreen->third.handle, + intelScreen->third.size, + (drmAddress *) & intelScreen->third.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + } + + if (0) + _mesa_printf("Depth 0x%08x ", intelScreen->depth.handle); + if (drmMap(sPriv->fd, + intelScreen->depth.handle, + intelScreen->depth.size, + (drmAddress *) & intelScreen->depth.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + + if (0) + _mesa_printf("TEX 0x%08x ", intelScreen->tex.handle); + if (intelScreen->tex.size != 0) { + if (drmMap(sPriv->fd, + intelScreen->tex.handle, + intelScreen->tex.size, + (drmAddress *) & intelScreen->tex.map) != 0) { + intelUnmapScreenRegions(intelScreen); + return GL_FALSE; + } + } + + if (0) + printf("Mappings: front: %p back: %p third: %p depth: %p tex: %p\n", + intelScreen->front.map, + intelScreen->back.map, intelScreen->third.map, + intelScreen->depth.map, intelScreen->tex.map); + return GL_TRUE; +} + +void +intelUnmapScreenRegions(intelScreenPrivate * intelScreen) +{ +#define REALLY_UNMAP 1 + if (intelScreen->front.map) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0) + printf("drmUnmap front failed!\n"); +#endif + intelScreen->front.map = NULL; + } + if (intelScreen->back.map) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0) + printf("drmUnmap back failed!\n"); +#endif + intelScreen->back.map = NULL; + } + if (intelScreen->third.map) { +#if REALLY_UNMAP + if (drmUnmap(intelScreen->third.map, intelScreen->third.size) != 0) + printf("drmUnmap third failed!\n"); +#endif + intelScreen->third.map = NULL; + } + if (intelScreen->depth.map) { +#if REALLY_UNMAP + drmUnmap(intelScreen->depth.map, intelScreen->depth.size); + intelScreen->depth.map = NULL; +#endif + } + if (intelScreen->tex.map) { +#if REALLY_UNMAP + drmUnmap(intelScreen->tex.map, intelScreen->tex.size); + intelScreen->tex.map = NULL; +#endif + } +} + + +static void +intelPrintDRIInfo(intelScreenPrivate * intelScreen, + __DRIscreenPrivate * sPriv, I830DRIPtr gDRIPriv) +{ + fprintf(stderr, "*** Front size: 0x%x offset: 0x%x pitch: %d\n", + intelScreen->front.size, intelScreen->front.offset, + intelScreen->pitch); + fprintf(stderr, "*** Back size: 0x%x offset: 0x%x pitch: %d\n", + intelScreen->back.size, intelScreen->back.offset, + intelScreen->pitch); + fprintf(stderr, "*** Depth size: 0x%x offset: 0x%x pitch: %d\n", + intelScreen->depth.size, intelScreen->depth.offset, + intelScreen->pitch); + fprintf(stderr, "*** Texture size: 0x%x offset: 0x%x\n", + intelScreen->tex.size, intelScreen->tex.offset); + fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem); +} + + +static void +intelPrintSAREA(const struct drm_i915_sarea * sarea) +{ + fprintf(stderr, "SAREA: sarea width %d height %d\n", sarea->width, + sarea->height); + fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); + fprintf(stderr, + "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->front_offset, sarea->front_size, + (unsigned) sarea->front_handle); + fprintf(stderr, + "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->back_offset, sarea->back_size, + (unsigned) sarea->back_handle); + fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->depth_offset, sarea->depth_size, + (unsigned) sarea->depth_handle); + fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle); +} + + +/** + * A number of the screen parameters are obtained/computed from + * information in the SAREA. This function updates those parameters. + */ +void +intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen, + struct drm_i915_sarea * sarea) +{ + intelScreen->width = sarea->width; + intelScreen->height = sarea->height; + intelScreen->pitch = sarea->pitch; + + intelScreen->front.offset = sarea->front_offset; + intelScreen->front.handle = sarea->front_handle; + intelScreen->front.size = sarea->front_size; + intelScreen->front.tiled = sarea->front_tiled; + + intelScreen->back.offset = sarea->back_offset; + intelScreen->back.handle = sarea->back_handle; + intelScreen->back.size = sarea->back_size; + intelScreen->back.tiled = sarea->back_tiled; + + if (intelScreen->driScrnPriv->ddx_version.minor >= 8) { + intelScreen->third.offset = sarea->third_offset; + intelScreen->third.handle = sarea->third_handle; + intelScreen->third.size = sarea->third_size; + intelScreen->third.tiled = sarea->third_tiled; + } + + intelScreen->depth.offset = sarea->depth_offset; + intelScreen->depth.handle = sarea->depth_handle; + intelScreen->depth.size = sarea->depth_size; + intelScreen->depth.tiled = sarea->depth_tiled; + + if (intelScreen->driScrnPriv->ddx_version.minor >= 9) { + intelScreen->front.bo_handle = sarea->front_bo_handle; + intelScreen->back.bo_handle = sarea->back_bo_handle; + intelScreen->third.bo_handle = sarea->third_bo_handle; + intelScreen->depth.bo_handle = sarea->depth_bo_handle; + } else { + intelScreen->front.bo_handle = -1; + intelScreen->back.bo_handle = -1; + intelScreen->third.bo_handle = -1; + intelScreen->depth.bo_handle = -1; + } + + intelScreen->tex.offset = sarea->tex_offset; + intelScreen->logTextureGranularity = sarea->log_tex_granularity; + intelScreen->tex.handle = sarea->tex_handle; + intelScreen->tex.size = sarea->tex_size; + + if (0) + intelPrintSAREA(sarea); +} + + +/** + * DRI2 entrypoint + */ +static void +intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv, + __DRIcontextPrivate *pcp, + __DRIDrawableConfigEvent *event) +{ + struct intel_framebuffer *intel_fb = dPriv->driverPrivate; + struct intel_region *region = NULL; + struct intel_renderbuffer *rb, *depth_rb, *stencil_rb; + struct intel_context *intel = pcp->driverPrivate; + int cpp, pitch; + + cpp = intel->ctx.Visual.rgbBits / 8; + pitch = ((cpp * dPriv->w + 63) & ~63) / cpp; + + rb = intel_fb->color_rb[1]; + if (rb) { + region = intel_region_alloc(intel, cpp, pitch, dPriv->h); + intel_renderbuffer_set_region(rb, region); + } + + rb = intel_fb->color_rb[2]; + if (rb) { + region = intel_region_alloc(intel, cpp, pitch, dPriv->h); + intel_renderbuffer_set_region(rb, region); + } + + depth_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + stencil_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + if (depth_rb || stencil_rb) + region = intel_region_alloc(intel, cpp, pitch, dPriv->h); + if (depth_rb) + intel_renderbuffer_set_region(depth_rb, region); + if (stencil_rb) + intel_renderbuffer_set_region(stencil_rb, region); + + /* FIXME: Tell the X server about the regions we just allocated and + * attached. */ +} + +#define BUFFER_FLAG_TILED 0x0100 + +/** + * DRI2 entrypoint + */ +static void +intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, + __DRIcontextPrivate *pcp, + __DRIBufferAttachEvent *ba) +{ + struct intel_framebuffer *intel_fb = dPriv->driverPrivate; + struct intel_renderbuffer *rb; + struct intel_region *region; + struct intel_context *intel = pcp->driverPrivate; + GLuint tiled; + + switch (ba->buffer.attachment) { + case DRI_DRAWABLE_BUFFER_FRONT_LEFT: + rb = intel_fb->color_rb[0]; + break; + + case DRI_DRAWABLE_BUFFER_BACK_LEFT: + rb = intel_fb->color_rb[0]; + break; + + case DRI_DRAWABLE_BUFFER_DEPTH: + rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + break; + + case DRI_DRAWABLE_BUFFER_STENCIL: + rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + break; + + case DRI_DRAWABLE_BUFFER_ACCUM: + default: + fprintf(stderr, "unhandled buffer attach event, attacment type %d\n", + ba->buffer.attachment); + return; + } + +#if 0 + /* FIXME: Add this so we can filter out when the X server sends us + * attachment events for the buffers we just allocated. Need to + * get the BO handle for a render buffer. */ + if (intel_renderbuffer_get_region_handle(rb) == ba->buffer.handle) + return; +#endif + + tiled = (ba->buffer.flags & BUFFER_FLAG_TILED) > 0; + region = intel_region_alloc_for_handle(intel, ba->buffer.cpp, + ba->buffer.pitch / ba->buffer.cpp, + dPriv->h, tiled, + ba->buffer.handle); + + intel_renderbuffer_set_region(rb, region); +} + +static const __DRItexOffsetExtension intelTexOffsetExtension = { + { __DRI_TEX_OFFSET }, + intelSetTexOffset, +}; + +static const __DRItexBufferExtension intelTexBufferExtension = { + { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, + intelSetTexBuffer, +}; + +static const __DRIextension *intelScreenExtensions[] = { + &driReadDrawableExtension, + &driCopySubBufferExtension.base, + &driSwapControlExtension.base, + &driFrameTrackingExtension.base, + &driMediaStreamCounterExtension.base, + &intelTexOffsetExtension.base, + &intelTexBufferExtension.base, + NULL +}; + +static GLboolean +intel_get_param(__DRIscreenPrivate *psp, int param, int *value) +{ + int ret; + struct drm_i915_getparam gp; + + gp.param = param; + gp.value = value; + + ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); + if (ret) { + fprintf(stderr, "drm_i915_getparam: %d\n", ret); + return GL_FALSE; + } + + return GL_TRUE; +} + +static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv) +{ + intelScreenPrivate *intelScreen; + I830DRIPtr gDRIPriv = (I830DRIPtr) sPriv->pDevPriv; + struct drm_i915_sarea *sarea; + + if (sPriv->devPrivSize != sizeof(I830DRIRec)) { + fprintf(stderr, + "\nERROR! sizeof(I830DRIRec) does not match passed size from device driver\n"); + return GL_FALSE; + } + + /* Allocate the private area */ + intelScreen = (intelScreenPrivate *) CALLOC(sizeof(intelScreenPrivate)); + if (!intelScreen) { + fprintf(stderr, "\nERROR! Allocating private area failed\n"); + return GL_FALSE; + } + /* parse information in __driConfigOptions */ + driParseOptionInfo(&intelScreen->optionCache, + __driConfigOptions, __driNConfigOptions); + + intelScreen->driScrnPriv = sPriv; + sPriv->private = (void *) intelScreen; + intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset; + sarea = (struct drm_i915_sarea *) + (((GLubyte *) sPriv->pSAREA) + intelScreen->sarea_priv_offset); + + intelScreen->deviceID = gDRIPriv->deviceID; + + intelUpdateScreenFromSAREA(intelScreen, sarea); + + if (!intelMapScreenRegions(sPriv)) { + fprintf(stderr, "\nERROR! mapping regions\n"); + _mesa_free(intelScreen); + sPriv->private = NULL; + return GL_FALSE; + } + + intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset; + + if (0) + intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv); + + intelScreen->drmMinor = sPriv->drm_version.minor; + + /* Determine if IRQs are active? */ + if (!intel_get_param(sPriv, I915_PARAM_IRQ_ACTIVE, + &intelScreen->irq_active)) + return GL_FALSE; + + /* Determine if batchbuffers are allowed */ + if (!intel_get_param(sPriv, I915_PARAM_ALLOW_BATCHBUFFER, + &intelScreen->allow_batchbuffer)) + return GL_FALSE; + + sPriv->extensions = intelScreenExtensions; + + return GL_TRUE; +} + + +static void +intelDestroyScreen(__DRIscreenPrivate * sPriv) +{ + intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; + + intelUnmapScreenRegions(intelScreen); + + FREE(intelScreen); + sPriv->private = NULL; +} + + +/** + * This is called when we need to set up GL rendering to a new X window. + */ +static GLboolean +intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, + __DRIdrawablePrivate * driDrawPriv, + const __GLcontextModes * mesaVis, GLboolean isPixmap) +{ + intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private; + + if (isPixmap) { + return GL_FALSE; /* not implemented */ + } + else { + GLboolean swStencil = (mesaVis->stencilBits > 0 && + mesaVis->depthBits != 24); + GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8); + + struct intel_framebuffer *intel_fb = CALLOC_STRUCT(intel_framebuffer); + + if (!intel_fb) + return GL_FALSE; + + _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis); + + /* setup the hardware-based renderbuffers */ + { + intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, + &intel_fb->color_rb[0]->Base); + } + + if (mesaVis->doubleBufferMode) { + intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT, + &intel_fb->color_rb[1]->Base); + + if (screen->third.handle) { + struct gl_renderbuffer *tmp_rb = NULL; + + intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat); + _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base); + } + } + + if (mesaVis->depthBits == 24) { + if (mesaVis->stencilBits == 8) { + /* combined depth/stencil buffer */ + struct intel_renderbuffer *depthStencilRb + = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT); + /* note: bind RB to two attachment points */ + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, + &depthStencilRb->Base); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_STENCIL, + &depthStencilRb->Base); + } else { + struct intel_renderbuffer *depthRb + = intel_create_renderbuffer(GL_DEPTH_COMPONENT24); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, + &depthRb->Base); + } + } + else if (mesaVis->depthBits == 16) { + /* just 16-bit depth buffer, no hw stencil */ + struct intel_renderbuffer *depthRb + = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); + } + + /* now add any/all software-based renderbuffers we may need */ + _mesa_add_soft_renderbuffers(&intel_fb->Base, + GL_FALSE, /* never sw color */ + GL_FALSE, /* never sw depth */ + swStencil, mesaVis->accumRedBits > 0, + GL_FALSE, /* never sw alpha */ + GL_FALSE /* never sw aux */ ); + driDrawPriv->driverPrivate = (void *) intel_fb; + + return GL_TRUE; + } +} + +static void +intelDestroyBuffer(__DRIdrawablePrivate * driDrawPriv) +{ + _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); +} + + +/** + * Get information about previous buffer swaps. + */ +static int +intelGetSwapInfo(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo) +{ + struct intel_framebuffer *intel_fb; + + if ((dPriv == NULL) || (dPriv->driverPrivate == NULL) + || (sInfo == NULL)) { + return -1; + } + + intel_fb = dPriv->driverPrivate; + sInfo->swap_count = intel_fb->swap_count; + sInfo->swap_ust = intel_fb->swap_ust; + sInfo->swap_missed_count = intel_fb->swap_missed_count; + + sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0) + ? driCalculateSwapUsage(dPriv, 0, intel_fb->swap_missed_ust) + : 0.0; + + return 0; +} + + +/* There are probably better ways to do this, such as an + * init-designated function to register chipids and createcontext + * functions. + */ +extern GLboolean i830CreateContext(const __GLcontextModes * mesaVis, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); + +extern GLboolean i915CreateContext(const __GLcontextModes * mesaVis, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); +extern GLboolean brwCreateContext(const __GLcontextModes * mesaVis, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); + +static GLboolean +intelCreateContext(const __GLcontextModes * mesaVis, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; + +#ifdef I915 + if (IS_9XX(intelScreen->deviceID)) { + if (!IS_965(intelScreen->deviceID)) { + return i915CreateContext(mesaVis, driContextPriv, + sharedContextPrivate); + } + } else { + return i830CreateContext(mesaVis, driContextPriv, sharedContextPrivate); + } +#else + if (IS_965(intelScreen->deviceID)) + return brwCreateContext(mesaVis, driContextPriv, sharedContextPrivate); +#endif + fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID); + return GL_FALSE; +} + + +static __DRIconfig ** +intelFillInModes(__DRIscreenPrivate *psp, + unsigned pixel_bits, unsigned depth_bits, + unsigned stencil_bits, GLboolean have_back_buffer) +{ + __DRIconfig **configs; + __GLcontextModes *m; + unsigned depth_buffer_factor; + unsigned back_buffer_factor; + GLenum fb_format; + GLenum fb_type; + int i; + + /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't + * support pageflipping at all. + */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML + }; + + u_int8_t depth_bits_array[3]; + u_int8_t stencil_bits_array[3]; + + depth_bits_array[0] = 0; + depth_bits_array[1] = depth_bits; + depth_bits_array[2] = depth_bits; + + /* Just like with the accumulation buffer, always provide some modes + * with a stencil buffer. It will be a sw fallback, but some apps won't + * care about that. + */ + stencil_bits_array[0] = 0; + stencil_bits_array[1] = 0; + if (depth_bits == 24) + stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; + + stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; + + depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; + back_buffer_factor = (have_back_buffer) ? 3 : 1; + + if (pixel_bits == 16) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_SHORT_5_6_5; + } + else { + fb_format = GL_BGRA; + fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } + + configs = driCreateConfigs(fb_format, fb_type, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, back_buffer_modes, + back_buffer_factor); + if (configs == NULL) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, + __LINE__); + return NULL; + } + + /* Mark the visual as slow if there are "fake" stencil bits. + */ + for (i = 0; configs[i]; i++) { + m = &configs[i]->modes; + if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) { + m->visualRating = GLX_SLOW_CONFIG; + } + } + + return configs; +} + + +/** + * This is the driver specific part of the createNewScreen entry point. + * Called when using legacy DRI. + * + * \todo maybe fold this into intelInitDriver + * + * \return the __GLcontextModes supported by this driver + */ +static const __DRIconfig **intelInitScreen(__DRIscreenPrivate *psp) +{ +#ifdef I915 + static const __DRIversion ddx_expected = { 1, 5, 0 }; +#else + static const __DRIversion ddx_expected = { 1, 6, 0 }; +#endif + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = { 1, 5, 0 }; + I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv; + + if (!driCheckDriDdxDrmVersions2("i915", + &psp->dri_version, &dri_expected, + &psp->ddx_version, &ddx_expected, + &psp->drm_version, &drm_expected)) { + return NULL; + } + + /* Calling driInitExtensions here, with a NULL context pointer, + * does not actually enable the extensions. It just makes sure + * that all the dispatch offsets for all the extensions that + * *might* be enables are known. This is needed because the + * dispatch offsets need to be known when _mesa_context_create is + * called, but we can't enable the extensions until we have a + * context pointer. + * + * Hello chicken. Hello egg. How are you two today? + */ + intelInitExtensions(NULL, GL_TRUE); + + if (!intelInitDriver(psp)) + return NULL; + + psp->extensions = intelScreenExtensions; + + return (const __DRIconfig **) + intelFillInModes(psp, dri_priv->cpp * 8, + (dri_priv->cpp == 2) ? 16 : 24, + (dri_priv->cpp == 2) ? 0 : 8, 1); +} + +struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen) +{ + /* + * This should probably change to have the screen allocate a dummy + * context at screen creation. For now just use the current context. + */ + + GET_CURRENT_CONTEXT(ctx); + if (ctx == NULL) { + _mesa_problem(NULL, "No current context in intelScreenContext\n"); + return NULL; + } + return intel_context(ctx); +} + +/** + * This is the driver specific part of the createNewScreen entry point. + * Called when using DRI2. + * + * \return the __GLcontextModes supported by this driver + */ +static const +__DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) +{ + intelScreenPrivate *intelScreen; + + /* Calling driInitExtensions here, with a NULL context pointer, + * does not actually enable the extensions. It just makes sure + * that all the dispatch offsets for all the extensions that + * *might* be enables are known. This is needed because the + * dispatch offsets need to be known when _mesa_context_create is + * called, but we can't enable the extensions until we have a + * context pointer. + * + * Hello chicken. Hello egg. How are you two today? + */ + intelInitExtensions(NULL, GL_TRUE); + + /* Allocate the private area */ + intelScreen = (intelScreenPrivate *) CALLOC(sizeof(intelScreenPrivate)); + if (!intelScreen) { + fprintf(stderr, "\nERROR! Allocating private area failed\n"); + return GL_FALSE; + } + /* parse information in __driConfigOptions */ + driParseOptionInfo(&intelScreen->optionCache, + __driConfigOptions, __driNConfigOptions); + + intelScreen->driScrnPriv = psp; + psp->private = (void *) intelScreen; + + intelScreen->drmMinor = psp->drm_version.minor; + + /* Determine chipset ID? */ + if (!intel_get_param(psp, I915_PARAM_CHIPSET_ID, + &intelScreen->deviceID)) + return GL_FALSE; + + /* Determine if IRQs are active? */ + if (!intel_get_param(psp, I915_PARAM_IRQ_ACTIVE, + &intelScreen->irq_active)) + return GL_FALSE; + + /* Determine if batchbuffers are allowed */ + if (!intel_get_param(psp, I915_PARAM_ALLOW_BATCHBUFFER, + &intelScreen->allow_batchbuffer)) + return GL_FALSE; + + if (!intelScreen->allow_batchbuffer) { + fprintf(stderr, "batch buffer not allowed\n"); + return GL_FALSE; + } + + psp->extensions = intelScreenExtensions; + + return driConcatConfigs(intelFillInModes(psp, 16, 16, 0, 1), + intelFillInModes(psp, 32, 24, 8, 1)); +} + +const struct __DriverAPIRec driDriverAPI = { + .InitScreen = intelInitScreen, + .DestroyScreen = intelDestroyScreen, + .CreateContext = intelCreateContext, + .DestroyContext = intelDestroyContext, + .CreateBuffer = intelCreateBuffer, + .DestroyBuffer = intelDestroyBuffer, + .SwapBuffers = intelSwapBuffers, + .MakeCurrent = intelMakeCurrent, + .UnbindContext = intelUnbindContext, + .GetSwapInfo = intelGetSwapInfo, + .GetDrawableMSC = driDrawableGetMSC32, + .WaitForMSC = driWaitForMSC32, + .CopySubBuffer = intelCopySubBuffer, + + .InitScreen2 = intelInitScreen2, + .HandleDrawableConfig = intelHandleDrawableConfig, + .HandleBufferAttach = intelHandleBufferAttach, +}; diff --git a/i965/intel_screen.h b/shared/intel_screen.h index bf9a716..e62b2d7 100644 --- a/i965/intel_screen.h +++ b/shared/intel_screen.h @@ -30,39 +30,42 @@ #include <sys/time.h> #include "dri_util.h" +#include "i915_drm.h" #include "xmlconfig.h" -#include "i830_common.h" /* XXX: change name or eliminate to avoid conflict with "struct * intel_region"!!! */ -typedef struct { +typedef struct +{ drm_handle_t handle; - drmSize size; /* region size in bytes */ - char *map; /* memory map */ - int offset; /* from start of video mem, in bytes */ - int pitch; /* row stride, in pixels */ - unsigned int tiled; + drmSize size; /* region size in bytes */ + char *map; /* memory map */ + int offset; /* from start of video mem, in bytes */ + unsigned int bo_handle; /* buffer object id if available, or -1 */ + /** + * Flags if the region is tiled. + * + * Not included is Y versus X tiling. + */ + GLboolean tiled; } intelRegion; -typedef struct +typedef struct { intelRegion front; intelRegion back; - intelRegion rotated; + intelRegion third; intelRegion depth; intelRegion tex; - + int deviceID; int width; int height; - int mem; /* unused */ - - int cpp; /* for front and back buffers */ - int fbFormat; + int pitch; /* common row stride, in pixels */ int logTextureGranularity; - + __DRIscreenPrivate *driScrnPriv; unsigned int sarea_priv_offset; @@ -71,44 +74,36 @@ typedef struct int irq_active; int allow_batchbuffer; -/* struct matrix23 rotMatrix; */ - - int current_rotation; /* 0, 90, 180 or 270 */ - int rotatedWidth, rotatedHeight; - /** - * Configuration cache with default values for all contexts - */ + * Configuration cache with default values for all contexts + */ driOptionCache optionCache; } intelScreenPrivate; -extern GLboolean -intelMapScreenRegions(__DRIscreenPrivate *sPriv); -extern void -intelUnmapScreenRegions(intelScreenPrivate *intelScreen); +extern GLboolean intelMapScreenRegions(__DRIscreenPrivate * sPriv); -extern void -intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, - volatile drmI830Sarea *sarea); +extern void intelUnmapScreenRegions(intelScreenPrivate * intelScreen); extern void -intelDestroyContext(__DRIcontextPrivate *driContextPriv); +intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen, + struct drm_i915_sarea * sarea); -extern GLboolean -intelUnbindContext(__DRIcontextPrivate *driContextPriv); +extern void intelDestroyContext(__DRIcontextPrivate * driContextPriv); + +extern GLboolean intelUnbindContext(__DRIcontextPrivate * driContextPriv); extern GLboolean -intelMakeCurrent(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv); +intelMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv); + +extern void intelSwapBuffers(__DRIdrawablePrivate * dPriv); extern void -intelSwapBuffers(__DRIdrawablePrivate *dPriv); +intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h); -extern void -intelCopySubBuffer( __DRIdrawablePrivate *dPriv, - int x, int y, int w, int h ); +extern struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen); #endif diff --git a/shared/intel_span.c b/shared/intel_span.c new file mode 100644 index 0000000..742b1b8 --- /dev/null +++ b/shared/intel_span.c @@ -0,0 +1,409 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "glheader.h" +#include "macros.h" +#include "mtypes.h" +#include "colormac.h" + +#include "intel_fbo.h" +#include "intel_screen.h" +#include "intel_span.h" +#include "intel_regions.h" +#include "intel_ioctl.h" +#include "intel_tex.h" + +#include "swrast/swrast.h" + +/* + break intelWriteRGBASpan_ARGB8888 +*/ + +#undef DBG +#define DBG 0 + +#define LOCAL_VARS \ + struct intel_context *intel = intel_context(ctx); \ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ + const GLint yScale = irb->RenderToTexture ? 1 : -1; \ + const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ + GLubyte *buf = (GLubyte *) irb->pfMap \ + + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\ + GLuint p; \ + assert(irb->pfMap);\ + (void) p; + +/* XXX FBO: this is identical to the macro in spantmp2.h except we get + * the cliprect info from the context, not the driDrawable. + * Move this into spantmp2.h someday. + */ +#define HW_CLIPLOOP() \ + do { \ + int _nc = intel->numClipRects; \ + while ( _nc-- ) { \ + int minx = intel->pClipRects[_nc].x1 - intel->drawX; \ + int miny = intel->pClipRects[_nc].y1 - intel->drawY; \ + int maxx = intel->pClipRects[_nc].x2 - intel->drawX; \ + int maxy = intel->pClipRects[_nc].y2 - intel->drawY; + + + + +#define Y_FLIP(_y) ((_y) * yScale + yBias) + +#define HW_LOCK() + +#define HW_UNLOCK() + +/* 16 bit, RGB565 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) intel##x##_RGB565 +#define TAG2(x,y) intel##x##_RGB565##y +#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 2) +#include "spantmp2.h" + +/* 32 bit, ARGB8888 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel##x##_ARGB8888 +#define TAG2(x,y) intel##x##_ARGB8888##y +#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4) +#include "spantmp2.h" + +#define LOCAL_DEPTH_VARS \ + struct intel_context *intel = intel_context(ctx); \ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ + const GLuint pitch = irb->pfPitch/***XXX region->pitch*/; /* in pixels */ \ + const GLint yScale = irb->RenderToTexture ? 1 : -1; \ + const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ + char *buf = (char *) irb->pfMap/*XXX use region->map*/ + \ + (intel->drawY * pitch + intel->drawX) * irb->region->cpp; + + +#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS + +/** + ** 16-bit depthbuffer functions. + **/ +#define VALUE_TYPE GLushort + +#define WRITE_DEPTH( _x, _y, d ) \ + ((GLushort *)buf)[(_x) + (_y) * pitch] = d; + +#define READ_DEPTH( d, _x, _y ) \ + d = ((GLushort *)buf)[(_x) + (_y) * pitch]; + + +#define TAG(x) intel##x##_z16 +#include "depthtmp.h" + + +/** + ** 24/8-bit interleaved depth/stencil functions + ** Note: we're actually reading back combined depth+stencil values. + ** The wrappers in main/depthstencil.c are used to extract the depth + ** and stencil values. + **/ +#define VALUE_TYPE GLuint + +/* Change ZZZS -> SZZZ */ +#define WRITE_DEPTH( _x, _y, d ) { \ + GLuint tmp = ((d) >> 8) | ((d) << 24); \ + ((GLuint *)buf)[(_x) + (_y) * pitch] = tmp; \ +} + +/* Change SZZZ -> ZZZS */ +#define READ_DEPTH( d, _x, _y ) { \ + GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \ + d = (tmp << 8) | (tmp >> 24); \ +} + +#define TAG(x) intel##x##_z24_s8 +#include "depthtmp.h" + + +/** + ** 8-bit stencil function (XXX FBO: This is obsolete) + **/ +#define WRITE_STENCIL( _x, _y, d ) { \ + GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \ + tmp &= 0xffffff; \ + tmp |= ((d) << 24); \ + ((GLuint *) buf)[(_x) + (_y) * pitch] = tmp; \ +} + +#define READ_STENCIL( d, _x, _y ) \ + d = ((GLuint *)buf)[(_x) + (_y) * pitch] >> 24; + +#define TAG(x) intel##x##_z24_s8 +#include "stenciltmp.h" + + + +/** + * Map or unmap all the renderbuffers which we may need during + * software rendering. + * XXX in the future, we could probably convey extra information to + * reduce the number of mappings needed. I.e. if doing a glReadPixels + * from the depth buffer, we really only need one mapping. + * + * XXX Rewrite this function someday. + * We can probably just loop over all the renderbuffer attachments, + * map/unmap all of them, and not worry about the _ColorDrawBuffers + * _ColorReadBuffer, _DepthBuffer or _StencilBuffer fields. + */ +static void +intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) +{ + GLcontext *ctx = &intel->ctx; + GLuint i, j; + struct intel_renderbuffer *irb; + + /* color draw buffers */ + for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[j]; + irb = intel_renderbuffer(rb); + if (irb) { + /* this is a user-created intel_renderbuffer */ + if (irb->region) { + if (map) + intel_region_map(intel, irb->region); + else + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + } + } + + /* check for render to textures */ + for (i = 0; i < BUFFER_COUNT; i++) { + struct gl_renderbuffer_attachment *att = + ctx->DrawBuffer->Attachment + i; + struct gl_texture_object *tex = att->Texture; + if (tex) { + /* render to texture */ + ASSERT(att->Renderbuffer); + if (map) { + struct gl_texture_image *texImg; + texImg = tex->Image[att->CubeMapFace][att->TextureLevel]; + intel_tex_map_images(intel, intel_texture_object(tex)); + } + else { + intel_tex_unmap_images(intel, intel_texture_object(tex)); + } + } + } + + /* color read buffers */ + irb = intel_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + if (irb && irb->region) { + if (map) + intel_region_map(intel, irb->region); + else + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + + /* Account for front/back color page flipping. + * The span routines use the pfMap and pfPitch fields which will + * swap the front/back region map/pitch if we're page flipped. + * Do this after mapping, above, so the map field is valid. + */ +#if 0 + if (map && ctx->DrawBuffer->Name == 0) { + struct intel_renderbuffer *irbFront + = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_FRONT_LEFT); + struct intel_renderbuffer *irbBack + = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_BACK_LEFT); + if (irbBack) { + /* double buffered */ + if (intel->sarea->pf_current_page == 0) { + irbFront->pfMap = irbFront->region->map; + irbFront->pfPitch = irbFront->region->pitch; + irbBack->pfMap = irbBack->region->map; + irbBack->pfPitch = irbBack->region->pitch; + } + else { + irbFront->pfMap = irbBack->region->map; + irbFront->pfPitch = irbBack->region->pitch; + irbBack->pfMap = irbFront->region->map; + irbBack->pfPitch = irbFront->region->pitch; + } + } + } +#endif + + /* depth buffer (Note wrapper!) */ + if (ctx->DrawBuffer->_DepthBuffer) { + irb = intel_renderbuffer(ctx->DrawBuffer->_DepthBuffer->Wrapped); + if (irb && irb->region) { + if (map) { + intel_region_map(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + else { + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + } + } + + /* stencil buffer (Note wrapper!) */ + if (ctx->DrawBuffer->_StencilBuffer) { + irb = intel_renderbuffer(ctx->DrawBuffer->_StencilBuffer->Wrapped); + if (irb && irb->region) { + if (map) { + intel_region_map(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + else { + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + } + } + } +} + + + +/** + * Prepare for softare rendering. Map current read/draw framebuffers' + * renderbuffes and all currently bound texture objects. + * + * Old note: Moved locking out to get reasonable span performance. + */ +void +intelSpanRenderStart(GLcontext * ctx) +{ + struct intel_context *intel = intel_context(ctx); + GLuint i; + + intelFinish(&intel->ctx); + LOCK_HARDWARE(intel); + +#if 0 + /* Just map the framebuffer and all textures. Bufmgr code will + * take care of waiting on the necessary fences: + */ + intel_region_map(intel, intel->front_region); + intel_region_map(intel, intel->back_region); + intel_region_map(intel, intel->depth_region); +#endif + + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + intel_tex_map_images(intel, intel_texture_object(texObj)); + } + } + + intel_map_unmap_buffers(intel, GL_TRUE); +} + +/** + * Called when done softare rendering. Unmap the buffers we mapped in + * the above function. + */ +void +intelSpanRenderFinish(GLcontext * ctx) +{ + struct intel_context *intel = intel_context(ctx); + GLuint i; + + _swrast_flush(ctx); + + /* Now unmap the framebuffer: + */ +#if 0 + intel_region_unmap(intel, intel->front_region); + intel_region_unmap(intel, intel->back_region); + intel_region_unmap(intel, intel->depth_region); +#endif + + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + intel_tex_unmap_images(intel, intel_texture_object(texObj)); + } + } + + intel_map_unmap_buffers(intel, GL_FALSE); + + UNLOCK_HARDWARE(intel); +} + + +void +intelInitSpanFuncs(GLcontext * ctx) +{ + struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx); + swdd->SpanRenderStart = intelSpanRenderStart; + swdd->SpanRenderFinish = intelSpanRenderFinish; +} + + +/** + * Plug in appropriate span read/write functions for the given renderbuffer. + * These are used for the software fallbacks. + */ +void +intel_set_span_functions(struct gl_renderbuffer *rb) +{ + if (rb->_ActualFormat == GL_RGB5) { + /* 565 RGB */ + intelInitPointers_RGB565(rb); + } + else if (rb->_ActualFormat == GL_RGBA8) { + /* 8888 RGBA */ + intelInitPointers_ARGB8888(rb); + } + else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) { + intelInitDepthPointers_z16(rb); + } + else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 || /* XXX FBO remove */ + rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { + intelInitDepthPointers_z24_s8(rb); + } + else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { /* XXX FBO remove */ + intelInitStencilPointers_z24_s8(rb); + } + else { + _mesa_problem(NULL, + "Unexpected _ActualFormat in intelSetSpanFunctions"); + } +} diff --git a/i965/intel_span.h b/shared/intel_span.h index 2d4f858..5201f6d 100644 --- a/i965/intel_span.h +++ b/shared/intel_span.h @@ -28,14 +28,11 @@ #ifndef _INTEL_SPAN_H #define _INTEL_SPAN_H -#include "drirenderbuffer.h" +extern void intelInitSpanFuncs(GLcontext * ctx); -extern void intelInitSpanFuncs( GLcontext *ctx ); +extern void intelSpanRenderFinish(GLcontext * ctx); +extern void intelSpanRenderStart(GLcontext * ctx); -extern void intelSpanRenderFinish( GLcontext *ctx ); -extern void intelSpanRenderStart( GLcontext *ctx ); - -extern void -intelSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis); +extern void intel_set_span_functions(struct gl_renderbuffer *rb); #endif diff --git a/shared/intel_tex.c b/shared/intel_tex.c new file mode 100644 index 0000000..4fa18e2 --- /dev/null +++ b/shared/intel_tex.c @@ -0,0 +1,256 @@ +#include "swrast/swrast.h" +#include "texobj.h" +#include "teximage.h" +#include "mipmap.h" +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_tex.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +static GLboolean +intelIsTextureResident(GLcontext * ctx, struct gl_texture_object *texObj) +{ +#if 0 + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + return + intelObj->mt && + intelObj->mt->region && + intel_is_region_resident(intel, intelObj->mt->region); +#endif + return 1; +} + + + +static struct gl_texture_image * +intelNewTextureImage(GLcontext * ctx) +{ + DBG("%s\n", __FUNCTION__); + (void) ctx; + return (struct gl_texture_image *) CALLOC_STRUCT(intel_texture_image); +} + + +static struct gl_texture_object * +intelNewTextureObject(GLcontext * ctx, GLuint name, GLenum target) +{ + struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object); + + DBG("%s\n", __FUNCTION__); + _mesa_initialize_texture_object(&obj->base, name, target); + + return &obj->base; +} + +static void +intelDeleteTextureObject(GLcontext *ctx, + struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + if (intelObj->mt) + intel_miptree_release(intel, &intelObj->mt); + + _mesa_delete_texture_object(ctx, texObj); +} + + +static void +intelFreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intelImage = intel_texture_image(texImage); + + DBG("%s\n", __FUNCTION__); + + if (intelImage->mt) { + intel_miptree_release(intel, &intelImage->mt); + } + + if (texImage->Data) { + _mesa_free_texmemory(texImage->Data); + texImage->Data = NULL; + } +} + + +/* The system memcpy (at least on ubuntu 5.10) has problems copying + * to agp (writecombined) memory from a source which isn't 64-byte + * aligned - there is a 4x performance falloff. + * + * The x86 __memcpy is immune to this but is slightly slower + * (10%-ish) than the system memcpy. + * + * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but + * isn't much faster than x86_memcpy for agp copies. + * + * TODO: switch dynamically. + */ +static void * +do_memcpy(void *dest, const void *src, size_t n) +{ + if ((((unsigned) src) & 63) || (((unsigned) dest) & 63)) { + return __memcpy(dest, src, n); + } + else + return memcpy(dest, src, n); +} + + +#if DO_DEBUG && !defined(__ia64__) + +#ifndef __x86_64__ +static unsigned +fastrdtsc(void) +{ + unsigned eax; + __asm__ volatile ("\t" + "pushl %%ebx\n\t" + "cpuid\n\t" ".byte 0x0f, 0x31\n\t" + "popl %%ebx\n":"=a" (eax) + :"0"(0) + :"ecx", "edx", "cc"); + + return eax; +} +#else +static unsigned +fastrdtsc(void) +{ + unsigned eax; + __asm__ volatile ("\t" "cpuid\n\t" ".byte 0x0f, 0x31\n\t":"=a" (eax) + :"0"(0) + :"ecx", "edx", "ebx", "cc"); + + return eax; +} +#endif + +static unsigned +time_diff(unsigned t, unsigned t2) +{ + return ((t < t2) ? t2 - t : 0xFFFFFFFFU - (t - t2 - 1)); +} + + +static void * +timed_memcpy(void *dest, const void *src, size_t n) +{ + void *ret; + unsigned t1, t2; + double rate; + + if ((((unsigned) src) & 63) || (((unsigned) dest) & 63)) + _mesa_printf("Warning - non-aligned texture copy!\n"); + + t1 = fastrdtsc(); + ret = do_memcpy(dest, src, n); + t2 = fastrdtsc(); + + rate = time_diff(t1, t2); + rate /= (double) n; + _mesa_printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate); + return ret; +} +#endif /* DO_DEBUG */ + +/** + * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap + * level). + * + * The texture object's miptree must be mapped. + * + * It would be really nice if this was just called by Mesa whenever mipmaps + * needed to be regenerated, rather than us having to remember to do so in + * each texture image modification path. + * + * This function should also include an accelerated path. + */ +void +intel_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + int face, i; + + _mesa_generate_mipmap(ctx, target, texObj); + + /* Update the level information in our private data in the new images, since + * it didn't get set as part of a normal TexImage path. + */ + for (face = 0; face < nr_faces; face++) { + for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { + struct intel_texture_image *intelImage; + + intelImage = intel_texture_image(texObj->Image[face][i]); + if (intelImage == NULL) + break; + + intelImage->level = i; + intelImage->face = face; + /* Unreference the miptree to signal that the new Data is a bare + * pointer from mesa. + */ + intel_miptree_release(intel, &intelImage->mt); + } + } +} + +static void intelGenerateMipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); + intel_generate_mipmap(ctx, target, texObj); + intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); +} + +void +intelInitTextureFuncs(struct dd_function_table *functions) +{ + functions->ChooseTextureFormat = intelChooseTextureFormat; + functions->TexImage1D = intelTexImage1D; + functions->TexImage2D = intelTexImage2D; + functions->TexImage3D = intelTexImage3D; + functions->TexSubImage1D = intelTexSubImage1D; + functions->TexSubImage2D = intelTexSubImage2D; + functions->TexSubImage3D = intelTexSubImage3D; +#ifdef I915 + functions->CopyTexImage1D = intelCopyTexImage1D; + functions->CopyTexImage2D = intelCopyTexImage2D; + functions->CopyTexSubImage1D = intelCopyTexSubImage1D; + functions->CopyTexSubImage2D = intelCopyTexSubImage2D; +#else + functions->CopyTexImage1D = _swrast_copy_teximage1d; + functions->CopyTexImage2D = _swrast_copy_teximage2d; + functions->CopyTexSubImage1D = _swrast_copy_texsubimage1d; + functions->CopyTexSubImage2D = _swrast_copy_texsubimage2d; +#endif + functions->GetTexImage = intelGetTexImage; + functions->GenerateMipmap = intelGenerateMipmap; + + /* compressed texture functions */ + functions->CompressedTexImage2D = intelCompressedTexImage2D; + functions->GetCompressedTexImage = intelGetCompressedTexImage; + + functions->NewTextureObject = intelNewTextureObject; + functions->NewTextureImage = intelNewTextureImage; + functions->DeleteTexture = intelDeleteTextureObject; + functions->FreeTexImageData = intelFreeTextureImageData; + functions->UpdateTexturePalette = 0; + functions->IsTextureResident = intelIsTextureResident; + +#if DO_DEBUG && !defined(__ia64__) + if (INTEL_DEBUG & DEBUG_BUFMGR) + functions->TextureMemCpy = timed_memcpy; + else +#endif + functions->TextureMemCpy = do_memcpy; +} diff --git a/shared/intel_tex.h b/shared/intel_tex.h new file mode 100644 index 0000000..fe7a8ba --- /dev/null +++ b/shared/intel_tex.h @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTELTEX_INC +#define INTELTEX_INC + +#include "mtypes.h" +#include "intel_context.h" +#include "texmem.h" + + +void intelInitTextureFuncs(struct dd_function_table *functions); + +const struct gl_texture_format *intelChooseTextureFormat(GLcontext * ctx, + GLint internalFormat, + GLenum format, + GLenum type); + + +void intelTexImage3D(GLcontext * ctx, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +void intelTexSubImage3D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +void intelTexImage2D(GLcontext * ctx, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +void intelTexSubImage2D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +void intelTexImage1D(GLcontext * ctx, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +void intelTexSubImage1D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, + GLsizei width, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +void intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLint border); + +void intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border); + +void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint x, GLint y, GLsizei width); + +void intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, GLsizei width, GLsizei height); + +void intelGetTexImage(GLcontext * ctx, GLenum target, GLint level, + GLenum format, GLenum type, GLvoid * pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ); + +void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, + GLvoid *pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch); +void intelSetTexBuffer(__DRIcontext *pDRICtx, + GLint target, __DRIdrawable *pDraw); + +GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit); + +void intel_tex_map_level_images(struct intel_context *intel, + struct intel_texture_object *intelObj, + int level); + +void intel_tex_unmap_level_images(struct intel_context *intel, + struct intel_texture_object *intelObj, + int level); + +void intel_tex_map_images(struct intel_context *intel, + struct intel_texture_object *intelObj); + +void intel_tex_unmap_images(struct intel_context *intel, + struct intel_texture_object *intelObj); + +int intel_compressed_num_bytes(GLuint mesaFormat); + +void intel_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj); + +#endif diff --git a/shared/intel_tex_copy.c b/shared/intel_tex_copy.c new file mode 100644 index 0000000..1add7c6 --- /dev/null +++ b/shared/intel_tex_copy.c @@ -0,0 +1,300 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "mtypes.h" +#include "enums.h" +#include "image.h" +#include "teximage.h" +#include "mipmap.h" +#include "swrast/swrast.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_buffers.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_fbo.h" +#include "intel_tex.h" +#include "intel_blit.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +/** + * Get the intel_region which is the source for any glCopyTex[Sub]Image call. + * + * Do the best we can using the blitter. A future project is to use + * the texture engine and fragment programs for these copies. + */ +static const struct intel_region * +get_teximage_source(struct intel_context *intel, GLenum internalFormat) +{ + struct intel_renderbuffer *irb; + + DBG("%s %s\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(internalFormat)); + + switch (internalFormat) { + case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT16_ARB: + irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); + if (irb && irb->region && irb->region->cpp == 2) + return irb->region; + return NULL; + case GL_DEPTH24_STENCIL8_EXT: + case GL_DEPTH_STENCIL_EXT: + irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); + if (irb && irb->region && irb->region->cpp == 4) + return irb->region; + return NULL; + case GL_RGBA: + case GL_RGBA8: + return intel_readbuf_region(intel); + case GL_RGB: + if (intel->ctx.Visual.rgbBits == 16) + return intel_readbuf_region(intel); + return NULL; + default: + return NULL; + } +} + + +static GLboolean +do_copy_texsubimage(struct intel_context *intel, + GLenum target, + struct intel_texture_image *intelImage, + GLenum internalFormat, + GLint dstx, GLint dsty, + GLint x, GLint y, GLsizei width, GLsizei height) +{ + GLcontext *ctx = &intel->ctx; + struct gl_texture_object *texObj = intelImage->base.TexObject; + const struct intel_region *src = + get_teximage_source(intel, internalFormat); + + if (!intelImage->mt || !src) { + DBG("%s fail %p %p\n", __FUNCTION__, intelImage->mt, src); + return GL_FALSE; + } + + intelFlush(ctx); + LOCK_HARDWARE(intel); + { + GLuint image_offset = intel_miptree_image_offset(intelImage->mt, + intelImage->face, + intelImage->level); + const GLint orig_x = x; + const GLint orig_y = y; + const struct gl_framebuffer *fb = ctx->DrawBuffer; + + if (_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, fb->_Xmax, fb->_Ymax, + &x, &y, &width, &height)) { + /* Update dst for clipped src. Need to also clip the source rect. + */ + dstx += x - orig_x; + dsty += y - orig_y; + + if (ctx->ReadBuffer->Name == 0) { + /* reading from a window, adjust x, y */ + __DRIdrawablePrivate *dPriv = intel->driDrawable; + GLuint window_y; + /* window_y = position of window on screen if y=0=bottom */ + window_y = intel->intelScreen->height - (dPriv->y + dPriv->h); + y = window_y + y; + x += dPriv->x; + } + else { + /* reading from a FBO */ + /* invert Y */ + y = ctx->ReadBuffer->Height - y - 1; + } + + + /* A bit of fiddling to get the blitter to work with -ve + * pitches. But we get a nice inverted blit this way, so it's + * worth it: + */ + intelEmitCopyBlit(intel, + intelImage->mt->cpp, + -src->pitch, + src->buffer, + src->height * src->pitch * src->cpp, + GL_FALSE, + intelImage->mt->pitch, + intelImage->mt->region->buffer, + image_offset, + intelImage->mt->region->tiled, + x, y + height, dstx, dsty, width, height, + GL_COPY); /* ? */ + + intel_batchbuffer_flush(intel->batch); + } + } + + + UNLOCK_HARDWARE(intel); + + /* GL_SGIS_generate_mipmap */ + if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } + + return GL_TRUE; +} + + + + + +void +intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLint border) +{ + struct gl_texture_unit *texUnit = + &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, level); + + if (border) + goto fail; + + /* Setup or redefine the texture object, mipmap tree and texture + * image. Don't populate yet. + */ + ctx->Driver.TexImage1D(ctx, target, level, internalFormat, + width, border, + GL_RGBA, CHAN_TYPE, NULL, + &ctx->DefaultPacking, texObj, texImage); + + if (!do_copy_texsubimage(intel_context(ctx), target, + intel_texture_image(texImage), + internalFormat, 0, 0, x, y, width, 1)) + goto fail; + + return; + + fail: + _swrast_copy_teximage1d(ctx, target, level, internalFormat, x, y, + width, border); +} + +void +intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border) +{ + struct gl_texture_unit *texUnit = + &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, level); + + if (border) + goto fail; + + /* Setup or redefine the texture object, mipmap tree and texture + * image. Don't populate yet. + */ + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, + GL_RGBA, CHAN_TYPE, NULL, + &ctx->DefaultPacking, texObj, texImage); + + + if (!do_copy_texsubimage(intel_context(ctx), target, + intel_texture_image(texImage), + internalFormat, 0, 0, x, y, width, height)) + goto fail; + + return; + + fail: + _swrast_copy_teximage2d(ctx, target, level, internalFormat, x, y, + width, height, border); +} + + +void +intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint x, GLint y, GLsizei width) +{ + struct gl_texture_unit *texUnit = + &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, level); + GLenum internalFormat = texImage->InternalFormat; + + /* XXX need to check <border> as in above function? */ + + /* Need to check texture is compatible with source format. + */ + + if (!do_copy_texsubimage(intel_context(ctx), target, + intel_texture_image(texImage), + internalFormat, xoffset, 0, x, y, width, 1)) { + _swrast_copy_texsubimage1d(ctx, target, level, xoffset, x, y, width); + } +} + + + +void +intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, GLsizei width, GLsizei height) +{ + struct gl_texture_unit *texUnit = + &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, level); + GLenum internalFormat = texImage->InternalFormat; + + + /* Need to check texture is compatible with source format. + */ + + if (!do_copy_texsubimage(intel_context(ctx), target, + intel_texture_image(texImage), + internalFormat, + xoffset, yoffset, x, y, width, height)) { + + DBG("%s - fallback to swrast\n", __FUNCTION__); + + _swrast_copy_texsubimage2d(ctx, target, level, + xoffset, yoffset, x, y, width, height); + } +} diff --git a/shared/intel_tex_format.c b/shared/intel_tex_format.c new file mode 100644 index 0000000..349a29b --- /dev/null +++ b/shared/intel_tex_format.c @@ -0,0 +1,193 @@ +#include "intel_context.h" +#include "intel_tex.h" +#include "texformat.h" +#include "enums.h" + +/* It works out that this function is fine for all the supported + * hardware. However, there is still a need to map the formats onto + * hardware descriptors. + */ +/* Note that the i915 can actually support many more formats than + * these if we take the step of simply swizzling the colors + * immediately after sampling... + */ +const struct gl_texture_format * +intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, + GLenum format, GLenum type) +{ + struct intel_context *intel = intel_context(ctx); + const GLboolean do32bpt = (intel->ctx.Visual.rgbBits == 32); + + switch (internalFormat) { + case 4: + case GL_RGBA: + case GL_COMPRESSED_RGBA: + if (format == GL_BGRA) { + if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) { + return &_mesa_texformat_argb8888; + } + else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) { + return &_mesa_texformat_argb4444; + } + else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) { + return &_mesa_texformat_argb1555; + } + } + return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444; + + case 3: + case GL_RGB: + case GL_COMPRESSED_RGB: + if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { + return &_mesa_texformat_rgb565; + } + return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565; + + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + case GL_RGBA16: + return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444; + + case GL_RGBA4: + case GL_RGBA2: + return &_mesa_texformat_argb4444; + + case GL_RGB5_A1: + return &_mesa_texformat_argb1555; + + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + return &_mesa_texformat_argb8888; + + case GL_RGB5: + case GL_RGB4: + case GL_R3_G3_B2: + return &_mesa_texformat_rgb565; + + case GL_ALPHA: + case GL_ALPHA4: + case GL_ALPHA8: + case GL_ALPHA12: + case GL_ALPHA16: + case GL_COMPRESSED_ALPHA: + return &_mesa_texformat_a8; + + case 1: + case GL_LUMINANCE: + case GL_LUMINANCE4: + case GL_LUMINANCE8: + case GL_LUMINANCE12: + case GL_LUMINANCE16: + case GL_COMPRESSED_LUMINANCE: + return &_mesa_texformat_l8; + + case 2: + case GL_LUMINANCE_ALPHA: + case GL_LUMINANCE4_ALPHA4: + case GL_LUMINANCE6_ALPHA2: + case GL_LUMINANCE8_ALPHA8: + case GL_LUMINANCE12_ALPHA4: + case GL_LUMINANCE12_ALPHA12: + case GL_LUMINANCE16_ALPHA16: + case GL_COMPRESSED_LUMINANCE_ALPHA: + return &_mesa_texformat_al88; + + case GL_INTENSITY: + case GL_INTENSITY4: + case GL_INTENSITY8: + case GL_INTENSITY12: + case GL_INTENSITY16: + case GL_COMPRESSED_INTENSITY: + return &_mesa_texformat_i8; + + case GL_YCBCR_MESA: + if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE) + return &_mesa_texformat_ycbcr; + else + return &_mesa_texformat_ycbcr_rev; + + case GL_COMPRESSED_RGB_FXT1_3DFX: + return &_mesa_texformat_rgb_fxt1; + case GL_COMPRESSED_RGBA_FXT1_3DFX: + return &_mesa_texformat_rgba_fxt1; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return &_mesa_texformat_rgb_dxt1; + + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return &_mesa_texformat_rgba_dxt1; + + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return &_mesa_texformat_rgba_dxt3; + + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return &_mesa_texformat_rgba_dxt5; + + case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT16: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH_COMPONENT32: + return &_mesa_texformat_z16; + + case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH24_STENCIL8_EXT: + return &_mesa_texformat_z24_s8; + +#ifndef I915 + case GL_SRGB_EXT: + case GL_SRGB8_EXT: + case GL_SRGB_ALPHA_EXT: + case GL_SRGB8_ALPHA8_EXT: + case GL_SLUMINANCE_EXT: + case GL_SLUMINANCE8_EXT: + case GL_SLUMINANCE_ALPHA_EXT: + case GL_SLUMINANCE8_ALPHA8_EXT: + case GL_COMPRESSED_SRGB_EXT: + case GL_COMPRESSED_SRGB_ALPHA_EXT: + case GL_COMPRESSED_SLUMINANCE_EXT: + case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: + return &_mesa_texformat_srgba8; + case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + return &_mesa_texformat_srgb_dxt1; +#endif + + default: + fprintf(stderr, "unexpected texture format %s in %s\n", + _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__); + return NULL; + } + + return NULL; /* never get here */ +} + +int intel_compressed_num_bytes(GLuint mesaFormat) +{ + int bytes = 0; + switch(mesaFormat) { + + case MESA_FORMAT_RGB_FXT1: + case MESA_FORMAT_RGBA_FXT1: + case MESA_FORMAT_RGB_DXT1: + case MESA_FORMAT_RGBA_DXT1: + bytes = 2; + break; + + case MESA_FORMAT_RGBA_DXT3: + case MESA_FORMAT_RGBA_DXT5: + bytes = 4; + default: + break; + } + + return bytes; +} diff --git a/shared/intel_tex_image.c b/shared/intel_tex_image.c new file mode 100644 index 0000000..f261034 --- /dev/null +++ b/shared/intel_tex_image.c @@ -0,0 +1,760 @@ + +#include <stdlib.h> +#include <stdio.h> + +#include "glheader.h" +#include "macros.h" +#include "mtypes.h" +#include "enums.h" +#include "colortab.h" +#include "convolve.h" +#include "context.h" +#include "simple_list.h" +#include "texcompress.h" +#include "texformat.h" +#include "texobj.h" +#include "texstore.h" +#include "teximage.h" + +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_buffer_objects.h" +#include "intel_batchbuffer.h" +#include "intel_tex.h" +#include "intel_ioctl.h" +#include "intel_blit.h" +#include "intel_fbo.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +/* Functions to store texture images. Where possible, mipmap_tree's + * will be created or further instantiated with image data, otherwise + * images will be stored in malloc'd memory. A validation step is + * required to pull those images into a mipmap tree, or otherwise + * decide a fallback is required. + */ + + +static int +logbase2(int n) +{ + GLint i = 1; + GLint log2 = 0; + + while (n > i) { + i *= 2; + log2++; + } + + return log2; +} + + +/* Otherwise, store it in memory if (Border != 0) or (any dimension == + * 1). + * + * Otherwise, if max_level >= level >= min_level, create tree with + * space for textures from min_level down to max_level. + * + * Otherwise, create tree with space for textures from (level + * 0)..(1x1). Consider pruning this tree at a validation if the + * saving is worth it. + */ +static void +guess_and_alloc_mipmap_tree(struct intel_context *intel, + struct intel_texture_object *intelObj, + struct intel_texture_image *intelImage) +{ + GLuint firstLevel; + GLuint lastLevel; + GLuint width = intelImage->base.Width; + GLuint height = intelImage->base.Height; + GLuint depth = intelImage->base.Depth; + GLuint l2width, l2height, l2depth; + GLuint i, comp_byte = 0; + + DBG("%s\n", __FUNCTION__); + + if (intelImage->base.Border || + ((intelImage->base._BaseFormat == GL_DEPTH_COMPONENT) && + ((intelObj->base.WrapS == GL_CLAMP_TO_BORDER) || + (intelObj->base.WrapT == GL_CLAMP_TO_BORDER)))) + return; + + if (intelImage->level > intelObj->base.BaseLevel && + (intelImage->base.Width == 1 || + (intelObj->base.Target != GL_TEXTURE_1D && + intelImage->base.Height == 1) || + (intelObj->base.Target == GL_TEXTURE_3D && + intelImage->base.Depth == 1))) + return; + + /* If this image disrespects BaseLevel, allocate from level zero. + * Usually BaseLevel == 0, so it's unlikely to happen. + */ + if (intelImage->level < intelObj->base.BaseLevel) + firstLevel = 0; + else + firstLevel = intelObj->base.BaseLevel; + + + /* Figure out image dimensions at start level. + */ + for (i = intelImage->level; i > firstLevel; i--) { + width <<= 1; + if (height != 1) + height <<= 1; + if (depth != 1) + depth <<= 1; + } + + /* Guess a reasonable value for lastLevel. This is probably going + * to be wrong fairly often and might mean that we have to look at + * resizable buffers, or require that buffers implement lazy + * pagetable arrangements. + */ + if ((intelObj->base.MinFilter == GL_NEAREST || + intelObj->base.MinFilter == GL_LINEAR) && + intelImage->level == firstLevel) { + lastLevel = firstLevel; + } + else { + l2width = logbase2(width); + l2height = logbase2(height); + l2depth = logbase2(depth); + lastLevel = firstLevel + MAX2(MAX2(l2width, l2height), l2depth); + } + + assert(!intelObj->mt); + if (intelImage->base.IsCompressed) + comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat); + intelObj->mt = intel_miptree_create(intel, + intelObj->base.Target, + intelImage->base.InternalFormat, + firstLevel, + lastLevel, + width, + height, + depth, + intelImage->base.TexFormat->TexelBytes, + comp_byte); + + DBG("%s - success\n", __FUNCTION__); +} + + + + +static GLuint +target_to_face(GLenum target) +{ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + return ((GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X); + default: + return 0; + } +} + +/* There are actually quite a few combinations this will work for, + * more than what I've listed here. + */ +static GLboolean +check_pbo_format(GLint internalFormat, + GLenum format, GLenum type, + const struct gl_texture_format *mesa_format) +{ + switch (internalFormat) { + case 4: + case GL_RGBA: + return (format == GL_BGRA && + (type == GL_UNSIGNED_BYTE || + type == GL_UNSIGNED_INT_8_8_8_8_REV) && + mesa_format == &_mesa_texformat_argb8888); + case 3: + case GL_RGB: + return (format == GL_RGB && + type == GL_UNSIGNED_SHORT_5_6_5 && + mesa_format == &_mesa_texformat_rgb565); + case GL_YCBCR_MESA: + return (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE); + default: + return GL_FALSE; + } +} + + +/* XXX: Do this for TexSubImage also: + */ +static GLboolean +try_pbo_upload(struct intel_context *intel, + struct intel_texture_image *intelImage, + const struct gl_pixelstore_attrib *unpack, + GLint internalFormat, + GLint width, GLint height, + GLenum format, GLenum type, const void *pixels) +{ + struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); + GLuint src_offset, src_stride; + GLuint dst_offset, dst_stride; + + if (!pbo || + intel->ctx._ImageTransferState || + unpack->SkipPixels || unpack->SkipRows) { + _mesa_printf("%s: failure 1\n", __FUNCTION__); + return GL_FALSE; + } + + src_offset = (GLuint) pixels; + + if (unpack->RowLength > 0) + src_stride = unpack->RowLength; + else + src_stride = width; + + dst_offset = intel_miptree_image_offset(intelImage->mt, + intelImage->face, + intelImage->level); + + dst_stride = intelImage->mt->pitch; + + intelFlush(&intel->ctx); + LOCK_HARDWARE(intel); + { + dri_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ); + dri_bo *dst_buffer = intel_region_buffer(intel, + intelImage->mt->region, + INTEL_WRITE_FULL); + + + intelEmitCopyBlit(intel, + intelImage->mt->cpp, + src_stride, src_buffer, src_offset, GL_FALSE, + dst_stride, dst_buffer, dst_offset, GL_FALSE, + 0, 0, 0, 0, width, height, + GL_COPY); + + intel_batchbuffer_flush(intel->batch); + } + UNLOCK_HARDWARE(intel); + + return GL_TRUE; +} + + + +static GLboolean +try_pbo_zcopy(struct intel_context *intel, + struct intel_texture_image *intelImage, + const struct gl_pixelstore_attrib *unpack, + GLint internalFormat, + GLint width, GLint height, + GLenum format, GLenum type, const void *pixels) +{ + struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); + GLuint src_offset, src_stride; + GLuint dst_offset, dst_stride; + + if (!pbo || + intel->ctx._ImageTransferState || + unpack->SkipPixels || unpack->SkipRows) { + _mesa_printf("%s: failure 1\n", __FUNCTION__); + return GL_FALSE; + } + + src_offset = (GLuint) pixels; + + if (unpack->RowLength > 0) + src_stride = unpack->RowLength; + else + src_stride = width; + + dst_offset = intel_miptree_image_offset(intelImage->mt, + intelImage->face, + intelImage->level); + + dst_stride = intelImage->mt->pitch; + + if (src_stride != dst_stride || dst_offset != 0 || src_offset != 0) { + _mesa_printf("%s: failure 2\n", __FUNCTION__); + return GL_FALSE; + } + + intel_region_attach_pbo(intel, intelImage->mt->region, pbo); + + return GL_TRUE; +} + + + + + + +static void +intelTexImage(GLcontext * ctx, + GLint dims, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, GLsizei imageSize, int compressed) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + struct intel_texture_image *intelImage = intel_texture_image(texImage); + GLint postConvWidth = width; + GLint postConvHeight = height; + GLint texelBytes, sizeInBytes; + GLuint dstRowStride, srcRowStride = texImage->RowStride; + + + DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); + + intelFlush(ctx); + + intelImage->face = target_to_face(target); + intelImage->level = level; + + if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) { + _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth, + &postConvHeight); + } + + /* choose the texture format */ + texImage->TexFormat = intelChooseTextureFormat(ctx, internalFormat, + format, type); + + _mesa_set_fetch_functions(texImage, dims); + + if (texImage->TexFormat->TexelBytes == 0) { + /* must be a compressed format */ + texelBytes = 0; + texImage->IsCompressed = GL_TRUE; + texImage->CompressedSize = + ctx->Driver.CompressedTextureSize(ctx, texImage->Width, + texImage->Height, texImage->Depth, + texImage->TexFormat->MesaFormat); + } else { + texelBytes = texImage->TexFormat->TexelBytes; + + /* Minimum pitch of 32 bytes */ + if (postConvWidth * texelBytes < 32) { + postConvWidth = 32 / texelBytes; + texImage->RowStride = postConvWidth; + } + + if (!intelImage->mt) { + assert(texImage->RowStride == postConvWidth); + } + } + + /* Release the reference to a potentially orphaned buffer. + * Release any old malloced memory. + */ + if (intelImage->mt) { + intel_miptree_release(intel, &intelImage->mt); + assert(!texImage->Data); + } + else if (texImage->Data) { + _mesa_free_texmemory(texImage->Data); + texImage->Data = NULL; + } + + /* If this is the only texture image in the tree, could call + * bmBufferData with NULL data to free the old block and avoid + * waiting on any outstanding fences. + */ + if (intelObj->mt && + intelObj->mt->first_level == level && + intelObj->mt->last_level == level && + intelObj->mt->target != GL_TEXTURE_CUBE_MAP_ARB && + !intel_miptree_match_image(intelObj->mt, &intelImage->base, + intelImage->face, intelImage->level)) { + + DBG("release it\n"); + intel_miptree_release(intel, &intelObj->mt); + assert(!intelObj->mt); + } + + if (!intelObj->mt) { + guess_and_alloc_mipmap_tree(intel, intelObj, intelImage); + if (!intelObj->mt) { + DBG("guess_and_alloc_mipmap_tree: failed\n"); + } + } + + assert(!intelImage->mt); + + if (intelObj->mt && + intel_miptree_match_image(intelObj->mt, &intelImage->base, + intelImage->face, intelImage->level)) { + + intel_miptree_reference(&intelImage->mt, intelObj->mt); + assert(intelImage->mt); + } + + if (!intelImage->mt) + DBG("XXX: Image did not fit into tree - storing in local memory!\n"); + + /* PBO fastpaths: + */ + if (dims <= 2 && + intelImage->mt && + intel_buffer_object(unpack->BufferObj) && + check_pbo_format(internalFormat, format, + type, intelImage->base.TexFormat)) { + + DBG("trying pbo upload\n"); + + /* Attempt to texture directly from PBO data (zero copy upload). + * + * Currently disable as it can lead to worse as well as better + * performance (in particular when intel_region_cow() is + * required). + */ + if (intelObj->mt == intelImage->mt && + intelObj->mt->first_level == level && + intelObj->mt->last_level == level) { + + if (try_pbo_zcopy(intel, intelImage, unpack, + internalFormat, + width, height, format, type, pixels)) { + + DBG("pbo zcopy upload succeeded\n"); + return; + } + } + + + /* Otherwise, attempt to use the blitter for PBO image uploads. + */ + if (try_pbo_upload(intel, intelImage, unpack, + internalFormat, + width, height, format, type, pixels)) { + DBG("pbo upload succeeded\n"); + return; + } + + DBG("pbo upload failed\n"); + } + + + + /* intelCopyTexImage calls this function with pixels == NULL, with + * the expectation that the mipmap tree will be set up but nothing + * more will be done. This is where those calls return: + */ + if (compressed) { + pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, pixels, + unpack, + "glCompressedTexImage"); + } else { + pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, 1, + format, type, + pixels, unpack, "glTexImage"); + } + + LOCK_HARDWARE(intel); + + if (intelImage->mt) { + texImage->Data = intel_miptree_image_map(intel, + intelImage->mt, + intelImage->face, + intelImage->level, + &dstRowStride, + intelImage->base.ImageOffsets); + texImage->RowStride = dstRowStride / intelImage->mt->cpp; + } + else { + /* Allocate regular memory and store the image there temporarily. */ + if (texImage->IsCompressed) { + sizeInBytes = texImage->CompressedSize; + dstRowStride = + _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); + assert(dims != 3); + } + else { + dstRowStride = postConvWidth * texelBytes; + sizeInBytes = depth * dstRowStride * postConvHeight; + } + + texImage->Data = _mesa_alloc_texmemory(sizeInBytes); + } + + DBG("Upload image %dx%dx%d row_len %d " + "pitch %d\n", + width, height, depth, width * texelBytes, dstRowStride); + + /* Copy data. Would like to know when it's ok for us to eg. use + * the blitter to copy. Or, use the hardware to do the format + * conversion and copy: + */ + if (pixels) { + if (compressed) { + if (intelImage->mt) { + struct intel_region *dst = intelImage->mt->region; + _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch, + 0, 0, + intelImage->mt->level[level].width, + intelImage->mt->level[level].height/4, + pixels, + srcRowStride, + 0, 0); + } else + memcpy(texImage->Data, pixels, imageSize); + } else if (!texImage->TexFormat->StoreImage(ctx, dims, + texImage->_BaseFormat, + texImage->TexFormat, + texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */ + dstRowStride, + texImage->ImageOffsets, + width, height, depth, + format, type, pixels, unpack)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); + } + } + + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } + + _mesa_unmap_teximage_pbo(ctx, unpack); + + if (intelImage->mt) { + intel_miptree_image_unmap(intel, intelImage->mt); + texImage->Data = NULL; + } + + UNLOCK_HARDWARE(intel); +} + +void +intelTexImage3D(GLcontext * ctx, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + intelTexImage(ctx, 3, target, level, + internalFormat, width, height, depth, border, + format, type, pixels, unpack, texObj, texImage, 0, 0); +} + + +void +intelTexImage2D(GLcontext * ctx, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + intelTexImage(ctx, 2, target, level, + internalFormat, width, height, 1, border, + format, type, pixels, unpack, texObj, texImage, 0, 0); +} + +void +intelTexImage1D(GLcontext * ctx, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + intelTexImage(ctx, 1, target, level, + internalFormat, width, 1, 1, border, + format, type, pixels, unpack, texObj, texImage, 0, 0); +} + +void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + intelTexImage(ctx, 2, target, level, + internalFormat, width, height, 1, border, + 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, 1); +} + +/** + * Need to map texture image into memory before copying image data, + * then unmap it. + */ +static void +intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level, + GLenum format, GLenum type, GLvoid * pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, int compressed) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intelImage = intel_texture_image(texImage); + + /* Map */ + if (intelImage->mt) { + /* Image is stored in hardware format in a buffer managed by the + * kernel. Need to explicitly map and unmap it. + */ + intelImage->base.Data = + intel_miptree_image_map(intel, + intelImage->mt, + intelImage->face, + intelImage->level, + &intelImage->base.RowStride, + intelImage->base.ImageOffsets); + intelImage->base.RowStride /= intelImage->mt->cpp; + } + else { + /* Otherwise, the image should actually be stored in + * intelImage->base.Data. This is pretty confusing for + * everybody, I'd much prefer to separate the two functions of + * texImage->Data - storage for texture images in main memory + * and access (ie mappings) of images. In other words, we'd + * create a new texImage->Map field and leave Data simply for + * storage. + */ + assert(intelImage->base.Data); + } + + + if (compressed) { + _mesa_get_compressed_teximage(ctx, target, level, pixels, + texObj, texImage); + } else { + _mesa_get_teximage(ctx, target, level, format, type, pixels, + texObj, texImage); + } + + + /* Unmap */ + if (intelImage->mt) { + intel_miptree_image_unmap(intel, intelImage->mt); + intelImage->base.Data = NULL; + } +} + +void +intelGetTexImage(GLcontext * ctx, GLenum target, GLint level, + GLenum format, GLenum type, GLvoid * pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + intel_get_tex_image(ctx, target, level, format, type, pixels, + texObj, texImage, 0); + + +} + +void +intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, + GLvoid *pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + intel_get_tex_image(ctx, target, level, 0, 0, pixels, + texObj, texImage, 1); +} + +void +intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch) +{ + struct intel_context *intel = pDRICtx->driverPrivate; + struct gl_texture_object *tObj = _mesa_lookup_texture(&intel->ctx, texname); + struct intel_texture_object *intelObj = intel_texture_object(tObj); + + if (!intelObj) + return; + + if (intelObj->mt) + intel_miptree_release(intel, &intelObj->mt); + + intelObj->imageOverride = GL_TRUE; + intelObj->depthOverride = depth; + intelObj->pitchOverride = pitch; + + if (offset) + intelObj->textureOffset = offset; +} + +void +intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + struct intel_framebuffer *intel_fb = dPriv->driverPrivate; + struct intel_context *intel = pDRICtx->driverPrivate; + struct intel_texture_object *intelObj; + struct intel_texture_image *intelImage; + struct intel_mipmap_tree *mt; + struct intel_renderbuffer *rb; + struct gl_texture_unit *texUnit; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + int level = 0, type, format, internalFormat; + + texUnit = &intel->ctx.Texture.Unit[intel->ctx.Texture.CurrentUnit]; + texObj = _mesa_select_tex_object(&intel->ctx, texUnit, target); + intelObj = intel_texture_object(texObj); + + if (!intelObj) + return; + + __driParseEvents(pDRICtx, dPriv); + + rb = intel_fb->color_rb[0]; + type = GL_BGRA; + format = GL_UNSIGNED_BYTE; + internalFormat = (rb->region->cpp == 3 ? 3 : 4); + + mt = intel_miptree_create_for_region(intel, target, + internalFormat, + 0, 0, rb->region, 1, 0); + if (mt == NULL) + return; + + _mesa_lock_texture(&intel->ctx, texObj); + + if (intelObj->mt) + intel_miptree_release(intel, &intelObj->mt); + + intelObj->mt = mt; + texImage = _mesa_get_tex_image(&intel->ctx, texObj, target, level); + _mesa_init_teximage_fields(&intel->ctx, target, texImage, + rb->region->pitch, rb->region->height, 1, + 0, internalFormat); + + intelImage = intel_texture_image(texImage); + intelImage->face = target_to_face(target); + intelImage->level = level; + texImage->TexFormat = intelChooseTextureFormat(&intel->ctx, internalFormat, + type, format); + _mesa_set_fetch_functions(texImage, 2); + texImage->RowStride = rb->region->pitch; + intel_miptree_reference(&intelImage->mt, intelObj->mt); + + if (!intel_miptree_match_image(intelObj->mt, &intelImage->base, + intelImage->face, intelImage->level)) { + fprintf(stderr, "miptree doesn't match image\n"); + } + + _mesa_unlock_texture(&intel->ctx, texObj); +} diff --git a/shared/intel_tex_layout.c b/shared/intel_tex_layout.c index 39a443c..edc3a2e 100644 --- a/shared/intel_tex_layout.c +++ b/shared/intel_tex_layout.c @@ -32,12 +32,24 @@ #include "intel_mipmap_tree.h" #include "intel_tex_layout.h" +#include "intel_context.h" #include "macros.h" - -static int align(int value, int alignment) +GLuint intel_compressed_alignment(GLenum internalFormat) { - return (value + alignment - 1) & ~(alignment - 1); + GLuint alignment = 4; + + switch (internalFormat) { + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + alignment = 8; + break; + + default: + break; + } + + return alignment; } void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt ) @@ -51,17 +63,30 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr mt->pitch = mt->width0; + if (mt->compressed) { + align_w = intel_compressed_alignment(mt->internal_format); + mt->pitch = ALIGN(mt->width0, align_w); + } + /* May need to adjust pitch to accomodate the placement of * the 2nd mipmap. This occurs when the alignment * constraints of mipmap placement push the right edge of the * 2nd mipmap out past the width of its parent. */ if (mt->first_level != mt->last_level) { - GLuint mip1_width = align(minify(mt->width0), align_w) - + minify(minify(mt->width0)); + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->width0), align_w) + + ALIGN(minify(minify(mt->width0)), align_w); + } else { + mip1_width = ALIGN(minify(mt->width0), align_w) + + minify(minify(mt->width0)); + } - if (mip1_width > mt->width0) - mt->pitch = mip1_width; + if (mip1_width > mt->pitch) { + mt->pitch = mip1_width; + } } /* Pitch must be a whole number of dwords, even though we @@ -79,7 +104,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr if (mt->compressed) img_height = MAX2(1, height/4); else - img_height = align(height, align_h); + img_height = ALIGN(height, align_h); /* Because the images are packed better, the final offset @@ -90,7 +115,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr /* Layout_below: step right after second mipmap. */ if (level == mt->first_level + 1) { - x += align(width, align_w); + x += ALIGN(width, align_w); } else { y += img_height; diff --git a/shared/intel_tex_layout.h b/shared/intel_tex_layout.h index 46151db..193699d 100644 --- a/shared/intel_tex_layout.h +++ b/shared/intel_tex_layout.h @@ -39,3 +39,4 @@ static GLuint minify( GLuint d ) } extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt ); +extern GLuint intel_compressed_alignment(GLenum); diff --git a/i915/intel_tex.h b/shared/intel_tex_obj.h index 9b7e550..5a93461 100644 --- a/i915/intel_tex.h +++ b/shared/intel_tex_obj.h @@ -25,21 +25,59 @@ * **************************************************************************/ -#ifndef INTELTEX_INC -#define INTELTEX_INC +#ifndef _INTEL_TEX_OBJ_H +#define _INTEL_TEX_OBJ_H -#include "mtypes.h" -#include "intel_context.h" -#include "texmem.h" +struct intel_texture_object +{ + struct gl_texture_object base; /* The "parent" object */ + /* The mipmap tree must include at least these levels once + * validated: + */ + GLuint firstLevel; + GLuint lastLevel; -void intelInitTextureFuncs( struct dd_function_table *functions ); + /* Offset for firstLevel image: + */ + GLuint textureOffset; -void intelDestroyTexObj( intelContextPtr intel, intelTextureObjectPtr t ); -int intelUploadTexImages( intelContextPtr intel, intelTextureObjectPtr t, - GLuint face ); + /* On validation any active images held in main memory or in other + * regions will be copied to this region and the old storage freed. + */ + struct intel_mipmap_tree *mt; -GLboolean -intel_driReinitTextureHeap( driTexHeap *heap, - unsigned size ); -#endif + GLboolean imageOverride; + GLint depthOverride; + GLuint pitchOverride; +}; + +struct intel_texture_image +{ + struct gl_texture_image base; + + /* These aren't stored in gl_texture_image + */ + GLuint level; + GLuint face; + + /* If intelImage->mt != NULL, image data is stored here. + * Else if intelImage->base.Data != NULL, image is stored there. + * Else there is no image data. + */ + struct intel_mipmap_tree *mt; +}; + +static INLINE struct intel_texture_object * +intel_texture_object(struct gl_texture_object *obj) +{ + return (struct intel_texture_object *) obj; +} + +static INLINE struct intel_texture_image * +intel_texture_image(struct gl_texture_image *img) +{ + return (struct intel_texture_image *) img; +} + +#endif /* _INTEL_TEX_OBJ_H */ diff --git a/shared/intel_tex_subimage.c b/shared/intel_tex_subimage.c new file mode 100644 index 0000000..5428a1d --- /dev/null +++ b/shared/intel_tex_subimage.c @@ -0,0 +1,186 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "mtypes.h" +#include "texobj.h" +#include "texstore.h" +#include "texcompress.h" +#include "enums.h" + +#include "intel_context.h" +#include "intel_tex.h" +#include "intel_mipmap_tree.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +static void +intelTexSubimage(GLcontext * ctx, + GLint dims, + GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLint width, GLint height, GLint depth, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intelImage = intel_texture_image(texImage); + GLuint dstRowStride = 0; + + DBG("%s target %s level %d offset %d,%d %dx%d\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(target), + level, xoffset, yoffset, width, height); + + intelFlush(ctx); + + pixels = + _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format, + type, pixels, packing, "glTexSubImage2D"); + if (!pixels) + return; + + LOCK_HARDWARE(intel); + + /* Map buffer if necessary. Need to lock to prevent other contexts + * from uploading the buffer under us. + */ + if (intelImage->mt) + texImage->Data = intel_miptree_image_map(intel, + intelImage->mt, + intelImage->face, + intelImage->level, + &dstRowStride, + texImage->ImageOffsets); + else { + if (texImage->IsCompressed) { + dstRowStride = + _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); + assert(dims != 3); + } + else { + dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes; + } + } + + assert(dstRowStride); + + if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, + texImage->TexFormat, + texImage->Data, + xoffset, yoffset, zoffset, + dstRowStride, + texImage->ImageOffsets, + width, height, depth, + format, type, pixels, packing)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + } + + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } + + _mesa_unmap_teximage_pbo(ctx, packing); + + if (intelImage->mt) { + intel_miptree_image_unmap(intel, intelImage->mt); + texImage->Data = NULL; + } + + UNLOCK_HARDWARE(intel); +} + + + + + +void +intelTexSubImage3D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + + intelTexSubimage(ctx, 3, + target, level, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, packing, texObj, texImage); + +} + + + +void +intelTexSubImage2D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + + intelTexSubimage(ctx, 2, + target, level, + xoffset, yoffset, 0, + width, height, 1, + format, type, pixels, packing, texObj, texImage); + +} + + +void +intelTexSubImage1D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, + GLsizei width, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + intelTexSubimage(ctx, 1, + target, level, + xoffset, 0, 0, + width, 1, 1, + format, type, pixels, packing, texObj, texImage); + +} diff --git a/shared/intel_tex_validate.c b/shared/intel_tex_validate.c new file mode 100644 index 0000000..1b3aa89 --- /dev/null +++ b/shared/intel_tex_validate.c @@ -0,0 +1,314 @@ +#include "mtypes.h" +#include "macros.h" + +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_mipmap_tree.h" +#include "intel_tex.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +/** + * Compute which mipmap levels that really need to be sent to the hardware. + * This depends on the base image size, GL_TEXTURE_MIN_LOD, + * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL. + */ +static void +intel_calculate_first_last_level(struct intel_texture_object *intelObj) +{ + struct gl_texture_object *tObj = &intelObj->base; + const struct gl_texture_image *const baseImage = + tObj->Image[0][tObj->BaseLevel]; + + /* These must be signed values. MinLod and MaxLod can be negative numbers, + * and having firstLevel and lastLevel as signed prevents the need for + * extra sign checks. + */ + int firstLevel; + int lastLevel; + + /* Yes, this looks overly complicated, but it's all needed. + */ + switch (tObj->Target) { + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + case GL_TEXTURE_CUBE_MAP: + if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { + /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. + */ + firstLevel = lastLevel = tObj->BaseLevel; + } + else { +#ifdef I915 + firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5); + firstLevel = MAX2(firstLevel, tObj->BaseLevel); + firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2); + lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5); + lastLevel = MAX2(lastLevel, tObj->BaseLevel); + lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2); + lastLevel = MIN2(lastLevel, tObj->MaxLevel); + lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ +#else + /* Currently not taking min/max lod into account here, those + * values are programmed as sampler state elsewhere and we + * upload the same mipmap levels regardless. Not sure if + * this makes sense as it means it isn't possible for the app + * to use min/max lod to reduce texture memory pressure: + */ + firstLevel = tObj->BaseLevel; + lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, + tObj->MaxLevel); + lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ +#endif + } + break; + case GL_TEXTURE_RECTANGLE_NV: + case GL_TEXTURE_4D_SGIS: + firstLevel = lastLevel = 0; + break; + default: + return; + } + + /* save these values */ + intelObj->firstLevel = firstLevel; + intelObj->lastLevel = lastLevel; +} + +/** + * Copies the image's contents at its level into the object's miptree, + * and updates the image to point at the object's miptree. + */ +static void +copy_image_data_to_tree(struct intel_context *intel, + struct intel_texture_object *intelObj, + struct intel_texture_image *intelImage) +{ + if (intelImage->mt) { + /* Copy potentially with the blitter: + */ + intel_miptree_image_copy(intel, + intelObj->mt, + intelImage->face, + intelImage->level, intelImage->mt); + + intel_miptree_release(intel, &intelImage->mt); + } + else { + assert(intelImage->base.Data != NULL); + + /* More straightforward upload. + */ + intel_miptree_image_data(intel, + intelObj->mt, + intelImage->face, + intelImage->level, + intelImage->base.Data, + intelImage->base.RowStride, + intelImage->base.RowStride * + intelImage->base.Height); + _mesa_align_free(intelImage->base.Data); + intelImage->base.Data = NULL; + } + + intel_miptree_reference(&intelImage->mt, intelObj->mt); +} + + +/* + */ +GLuint +intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) +{ + struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + int comp_byte = 0; + int cpp; + + GLuint face, i; + GLuint nr_faces = 0; + struct intel_texture_image *firstImage; + + GLboolean need_flush = GL_FALSE; + + /* We know/require this is true by now: + */ + assert(intelObj->base._Complete); + + /* What levels must the tree include at a minimum? + */ + intel_calculate_first_last_level(intelObj); + firstImage = + intel_texture_image(intelObj->base.Image[0][intelObj->firstLevel]); + + /* Fallback case: + */ + if (firstImage->base.Border || + ((firstImage->base._BaseFormat == GL_DEPTH_COMPONENT) && + ((tObj->WrapS == GL_CLAMP_TO_BORDER) || + (tObj->WrapT == GL_CLAMP_TO_BORDER)))) { + if (intelObj->mt) { + intel_miptree_release(intel, &intelObj->mt); + } + return GL_FALSE; + } + + + /* If both firstImage and intelObj have a tree which can contain + * all active images, favour firstImage. Note that because of the + * completeness requirement, we know that the image dimensions + * will match. + */ + if (firstImage->mt && + firstImage->mt != intelObj->mt && + firstImage->mt->first_level <= intelObj->firstLevel && + firstImage->mt->last_level >= intelObj->lastLevel) { + + if (intelObj->mt) + intel_miptree_release(intel, &intelObj->mt); + + intel_miptree_reference(&intelObj->mt, firstImage->mt); + } + + if (firstImage->base.IsCompressed) { + comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat->MesaFormat); + cpp = comp_byte; + } + else cpp = firstImage->base.TexFormat->TexelBytes; + + /* Check tree can hold all active levels. Check tree matches + * target, imageFormat, etc. + * + * XXX: For some layouts (eg i945?), the test might have to be + * first_level == firstLevel, as the tree isn't valid except at the + * original start level. Hope to get around this by + * programming minLod, maxLod, baseLevel into the hardware and + * leaving the tree alone. + */ + if (intelObj->mt && + (intelObj->mt->target != intelObj->base.Target || + intelObj->mt->internal_format != firstImage->base.InternalFormat || + intelObj->mt->first_level != intelObj->firstLevel || + intelObj->mt->last_level != intelObj->lastLevel || + intelObj->mt->width0 != firstImage->base.Width || + intelObj->mt->height0 != firstImage->base.Height || + intelObj->mt->depth0 != firstImage->base.Depth || + intelObj->mt->cpp != cpp || + intelObj->mt->compressed != firstImage->base.IsCompressed)) { + intel_miptree_release(intel, &intelObj->mt); + } + + + /* May need to create a new tree: + */ + if (!intelObj->mt) { + intelObj->mt = intel_miptree_create(intel, + intelObj->base.Target, + firstImage->base.InternalFormat, + intelObj->firstLevel, + intelObj->lastLevel, + firstImage->base.Width, + firstImage->base.Height, + firstImage->base.Depth, + cpp, + comp_byte); + } + + /* Pull in any images not in the object's tree: + */ + nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + for (face = 0; face < nr_faces; face++) { + for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) { + struct intel_texture_image *intelImage = + intel_texture_image(intelObj->base.Image[face][i]); + + /* Need to import images in main memory or held in other trees. + */ + if (intelObj->mt != intelImage->mt) { + copy_image_data_to_tree(intel, intelObj, intelImage); + need_flush = GL_TRUE; + } + } + } + +#ifdef I915 + /* XXX: what is this flush about? + * On 965, it causes a batch flush in the middle of the state relocation + * emits, which means that the eventual rendering doesn't have all of the + * required relocations in place. + */ + if (need_flush) + intel_batchbuffer_flush(intel->batch); +#endif + + return GL_TRUE; +} + +void +intel_tex_map_level_images(struct intel_context *intel, + struct intel_texture_object *intelObj, + int level) +{ + GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + GLuint face; + + for (face = 0; face < nr_faces; face++) { + struct intel_texture_image *intelImage = + intel_texture_image(intelObj->base.Image[face][level]); + + if (intelImage->mt) { + intelImage->base.Data = + intel_miptree_image_map(intel, + intelImage->mt, + intelImage->face, + intelImage->level, + &intelImage->base.RowStride, + intelImage->base.ImageOffsets); + /* convert stride to texels, not bytes */ + intelImage->base.RowStride /= intelImage->mt->cpp; + /* intelImage->base.ImageStride /= intelImage->mt->cpp; */ + } + } +} + +void +intel_tex_unmap_level_images(struct intel_context *intel, + struct intel_texture_object *intelObj, + int level) +{ + GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + GLuint face; + + for (face = 0; face < nr_faces; face++) { + struct intel_texture_image *intelImage = + intel_texture_image(intelObj->base.Image[face][level]); + + if (intelImage->mt) { + intel_miptree_image_unmap(intel, intelImage->mt); + intelImage->base.Data = NULL; + } + } +} + +void +intel_tex_map_images(struct intel_context *intel, + struct intel_texture_object *intelObj) +{ + int i; + + DBG("%s\n", __FUNCTION__); + + for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) + intel_tex_map_level_images(intel, intelObj, i); +} + +void +intel_tex_unmap_images(struct intel_context *intel, + struct intel_texture_object *intelObj) +{ + int i; + + for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) + intel_tex_unmap_level_images(intel, intelObj, i); +} diff --git a/i965/server/i830_dri.h b/shared/server/i830_dri.h index 2295181..def049e 100644 --- a/i965/server/i830_dri.h +++ b/shared/server/i830_dri.h @@ -1,15 +1,14 @@ -/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.4 2002/10/30 12:52:18 alanh Exp $ */ +/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.6 2003/09/28 20:15:59 alanh Exp $ */ #ifndef _I830_DRI_H #define _I830_DRI_H #include "xf86drm.h" -#include "i830_common.h" #define I830_MAX_DRAWABLES 256 #define I830_MAJOR_VERSION 1 -#define I830_MINOR_VERSION 3 +#define I830_MINOR_VERSION 9 #define I830_PATCHLEVEL 0 #define I830_REG_SIZE 0x80000 @@ -24,7 +23,7 @@ typedef struct _I830DRIRec { drmSize unused3; /* depthbufferSize */ drm_handle_t unused4; /* depthbuffer */ - drmSize unused5; /* rotatedSize /*/ + drmSize unused5; /* rotatedSize */ drm_handle_t unused6; /* rotatedbuffer */ drm_handle_t unused7; /* textures */ diff --git a/i915/server/intel.h b/shared/server/intel.h index d7858a2..6ea7249 100644 --- a/i915/server/intel.h +++ b/shared/server/intel.h @@ -75,6 +75,9 @@ #define I830_GMCH_CTRL 0x52 +#define I830_GMCH_MEM_MASK 0x1 +#define I830_GMCH_MEM_64M 0x1 +#define I830_GMCH_MEM_128M 0 #define I830_GMCH_GMS_MASK 0x70 #define I830_GMCH_GMS_DISABLED 0x00 @@ -141,7 +144,7 @@ typedef struct _I830Rec { unsigned char *MMIOBase; unsigned char *FbBase; int cpp; - + uint32_t aper_size; unsigned int bios_version; /* These are set in PreInit and never changed. */ diff --git a/i915/server/intel_dri.c b/shared/server/intel_dri.c index b6946b7..e49c421 100644 --- a/i915/server/intel_dri.c +++ b/shared/server/intel_dri.c @@ -292,15 +292,43 @@ static void I830SetupMemoryTiling(const DRIDriverContext *ctx, I830Rec *pI830) static int I830DetectMemory(const DRIDriverContext *ctx, I830Rec *pI830) { - struct pci_device host_bridge; + struct pci_device host_bridge, ig_dev; uint32_t gmch_ctrl; int memsize = 0; int range; - + uint32_t aper_size; + uint32_t membase2 = 0; + memset(&host_bridge, 0, sizeof(host_bridge)); + memset(&ig_dev, 0, sizeof(ig_dev)); + + ig_dev.dev = 2; pci_device_cfg_read_u32(&host_bridge, &gmch_ctrl, I830_GMCH_CTRL); - + + if (IS_I830(pI830) || IS_845G(pI830)) { + if ((gmch_ctrl & I830_GMCH_MEM_MASK) == I830_GMCH_MEM_128M) { + aper_size = 0x80000000; + } else { + aper_size = 0x40000000; + } + } else { + if (IS_I9XX(pI830)) { + int ret; + ret = pci_device_cfg_read_u32(&ig_dev, &membase2, 0x18); + if (membase2 & 0x08000000) + aper_size = 0x8000000; + else + aper_size = 0x10000000; + + fprintf(stderr,"aper size is %08X %08x %d\n", aper_size, membase2, ret); + } else + aper_size = 0x8000000; + } + + pI830->aper_size = aper_size; + + /* We need to reduce the stolen size, by the GTT and the popup. * The GTT varying according the the FbMapSize and the popup is 4KB */ range = (ctx->shared.fbSize / (1024*1024)) + 4; @@ -577,7 +605,8 @@ I830AllocateMemory(const DRIDriverContext *ctx, I830Rec *pI830) fprintf(stderr,"unable to allocate context buffer %ld\n", ret); return FALSE; } - + +#if 0 memset(&(pI830->TexMem), 0, sizeof(pI830->TexMem)); pI830->TexMem.Key = -1; @@ -588,6 +617,7 @@ I830AllocateMemory(const DRIDriverContext *ctx, I830Rec *pI830) fprintf(stderr,"unable to allocate texture memory %ld\n", ret); return FALSE; } +#endif return TRUE; } @@ -605,12 +635,29 @@ I830BindMemory(const DRIDriverContext *ctx, I830Rec *pI830) return FALSE; if (!BindAgpRange(ctx, &pI830->ContextMem)) return FALSE; +#if 0 if (!BindAgpRange(ctx, &pI830->TexMem)) return FALSE; - +#endif return TRUE; } +static void SetupDRIMM(const DRIDriverContext *ctx, I830Rec *pI830) +{ + unsigned long aperEnd = ROUND_DOWN_TO(pI830->aper_size, GTT_PAGE_SIZE) / GTT_PAGE_SIZE; + unsigned long aperStart = ROUND_TO(pI830->aper_size - KB(32768), GTT_PAGE_SIZE) / GTT_PAGE_SIZE; + + fprintf(stderr, "aper size is %08X\n", ctx->shared.fbSize); + if (drmMMInit(ctx->drmFD, aperStart, aperEnd - aperStart, DRM_BO_MEM_TT)) { + fprintf(stderr, + "DRM MM Initialization Failed\n"); + } else { + fprintf(stderr, + "DRM MM Initialized at offset 0x%lx length %d page\n", aperStart, aperEnd-aperStart); + } + +} + static Bool I830CleanupDma(const DRIDriverContext *ctx) { @@ -810,6 +857,7 @@ I830DRIMapScreenRegions(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sar fprintf(stderr, "[drm] Depth Buffer = 0x%08x\n", sarea->depth_handle); +#if 0 if (drmAddMap(ctx->drmFD, (drm_handle_t)sarea->tex_offset, sarea->tex_size, DRM_AGP, 0, @@ -820,7 +868,7 @@ I830DRIMapScreenRegions(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sar } fprintf(stderr, "[drm] textures = 0x%08x\n", sarea->tex_handle); - +#endif return TRUE; } @@ -848,29 +896,6 @@ I830DRIUnmapScreenRegions(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sa } } -static void -I830InitTextureHeap(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ - /* Start up the simple memory manager for agp space */ - drmI830MemInitHeap drmHeap; - drmHeap.region = I830_MEM_REGION_AGP; - drmHeap.start = 0; - drmHeap.size = sarea->tex_size; - - if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT_HEAP, - &drmHeap, sizeof(drmHeap))) { - fprintf(stderr, - "[drm] Failed to initialized agp heap manager\n"); - } else { - fprintf(stderr, - "[drm] Initialized kernel agp heap manager, %d\n", - sarea->tex_size); - - I830SetParam(ctx, I830_SETPARAM_TEX_LRU_LOG_GRANULARITY, - sarea->log_tex_granularity); - } -} - static Bool I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) { @@ -892,7 +917,7 @@ I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) /* init to zero to be safe */ I830DRIMapScreenRegions(ctx, pI830, sarea); - I830InitTextureHeap(ctx, pI830, sarea); + SetupDRIMM(ctx, pI830); if (ctx->pciDevice != PCI_CHIP_845_G && ctx->pciDevice != PCI_CHIP_I830_M) { @@ -1084,6 +1109,10 @@ I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830) return FALSE; } + pSAREAPriv->rotated_offset = -1; + pSAREAPriv->rotated_size = 0; + pSAREAPriv->rotated_pitch = ctx->shared.virtualWidth; + pSAREAPriv->front_offset = pI830->FrontBuffer.Start; pSAREAPriv->front_size = pI830->FrontBuffer.Size; pSAREAPriv->width = ctx->shared.virtualWidth; @@ -1095,8 +1124,10 @@ I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830) pSAREAPriv->back_size = pI830->BackBuffer.Size; pSAREAPriv->depth_offset = pI830->DepthBuffer.Start; pSAREAPriv->depth_size = pI830->DepthBuffer.Size; +#if 0 pSAREAPriv->tex_offset = pI830->TexMem.Start; pSAREAPriv->tex_size = pI830->TexMem.Size; +#endif pSAREAPriv->log_tex_granularity = pI830->TexGranularity; ctx->driverClientMsg = malloc(sizeof(I830DRIRec)); @@ -1108,14 +1139,6 @@ I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830) pI830DRI->height = ctx->shared.virtualHeight; pI830DRI->mem = ctx->shared.fbSize; pI830DRI->cpp = ctx->cpp; - pI830DRI->backOffset = pI830->BackBuffer.Start; - pI830DRI->backPitch = pI830->BackBuffer.Pitch; - - pI830DRI->depthOffset = pI830->DepthBuffer.Start; - pI830DRI->depthPitch = pI830->DepthBuffer.Pitch; - - pI830DRI->fbOffset = pI830->FrontBuffer.Start; - pI830DRI->fbStride = pI830->FrontBuffer.Pitch; pI830DRI->bitsPerPixel = ctx->bpp; pI830DRI->sarea_priv_offset = sizeof(drm_sarea_t); |