diff options
author | Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> | 2008-03-30 15:33:07 +0200 |
---|---|---|
committer | Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> | 2008-03-30 15:33:07 +0200 |
commit | a0e133114bbde5c1f7e6ea20613b513f7fbbbdae (patch) | |
tree | b0dc6c5e287c1ae0bf3bc8357ab992cbb792df11 | |
parent | 43b05ffda491046bf0cca7f0cdd8a337eb09f1f1 (diff) | |
parent | 767dfa5b9c07fe6ae267e1ff0d3bbbf093a04c44 (diff) |
Merge branch 'mesa_7_0_branch' into i915tex_branch
-rw-r--r-- | docs/relnotes-7.0.3.html | 10 | ||||
-rw-r--r-- | include/GL/glext.h | 19 | ||||
-rw-r--r-- | src/mesa/main/context.c | 4 | ||||
-rw-r--r-- | src/mesa/main/drawpix.c | 14 | ||||
-rw-r--r-- | src/mesa/main/fbobject.c | 2 | ||||
-rw-r--r-- | src/mesa/main/get.c | 12 | ||||
-rw-r--r-- | src/mesa/main/get_gen.py | 5 | ||||
-rw-r--r-- | src/mesa/main/imports.h | 6 | ||||
-rw-r--r-- | src/mesa/main/teximage.c | 3 | ||||
-rw-r--r-- | src/mesa/shader/arbprogparse.c | 7 | ||||
-rw-r--r-- | src/mesa/shader/prog_statevars.c | 4 | ||||
-rw-r--r-- | src/mesa/shader/program.c | 2 | ||||
-rw-r--r-- | src/mesa/shader/shader_api.c | 6 | ||||
-rw-r--r-- | src/mesa/shader/slang/slang_emit.c | 16 | ||||
-rw-r--r-- | src/mesa/tnl/t_vertex_sse.c | 4 | ||||
-rw-r--r-- | src/mesa/x86/rtasm/x86sse.c | 247 | ||||
-rw-r--r-- | src/mesa/x86/rtasm/x86sse.h | 63 |
17 files changed, 284 insertions, 140 deletions
diff --git a/docs/relnotes-7.0.3.html b/docs/relnotes-7.0.3.html index a2a3437442..212467d6ff 100644 --- a/docs/relnotes-7.0.3.html +++ b/docs/relnotes-7.0.3.html @@ -8,7 +8,7 @@ <body bgcolor="#eeeeee"> -<H1>Mesa 7.0.3 Release Notes / February TBD, 2008</H1> +<H1>Mesa 7.0.3 Release Notes / March TBD, 2008</H1> <p> Mesa 7.0.3 is a stable release with bug fixes since version 7.0.2. @@ -40,8 +40,16 @@ Mesa 7.0.3 is a stable release with bug fixes since version 7.0.2. <li>Bad strings given to glProgramStringARB() didn't generate GL_INVALID_OPERATION <li>Fixed minor point rasterization regression (bug 11016) <li>state.texenv.color state var didn't work in GL_ARB_fragment_program (bug 14931) +<li>glBitmap from a PBO didn't always work +<li>glGetTexImage into a PBO didn't always work </ul> +<h2>Changes</h2> +<ul> +<li>Updated glext.h to version 40 +</ul> + + <h2>Driver Status</h2> diff --git a/include/GL/glext.h b/include/GL/glext.h index 2519a6cc5e..2b22714c30 100644 --- a/include/GL/glext.h +++ b/include/GL/glext.h @@ -46,9 +46,9 @@ extern "C" { /*************************************************************/ /* Header file version number, required by OpenGL ABI for Linux */ -/* glext.h last updated 2007/02/12 */ +/* glext.h last updated 2008/03/24 */ /* Current version at http://www.opengl.org/registry/ */ -#define GL_GLEXT_VERSION 39 +#define GL_GLEXT_VERSION 40 #ifndef GL_VERSION_1_2 #define GL_UNSIGNED_BYTE_3_3_2 0x8032 @@ -3091,8 +3091,8 @@ extern "C" { #ifndef GL_EXT_framebuffer_blit #define GL_READ_FRAMEBUFFER_EXT 0x8CA8 #define GL_DRAW_FRAMEBUFFER_EXT 0x8CA9 -#define GL_READ_FRAMEBUFFER_BINDING_EXT GL_FRAMEBUFFER_BINDING_EXT -#define GL_DRAW_FRAMEBUFFER_BINDING_EXT 0x8CAA +#define GL_DRAW_FRAMEBUFFER_BINDING_EXT GL_FRAMEBUFFER_BINDING_EXT +#define GL_READ_FRAMEBUFFER_BINDING_EXT 0x8CAA #endif #ifndef GL_EXT_framebuffer_multisample @@ -3379,6 +3379,9 @@ extern "C" { #define GL_RGBA_INTEGER_MODE_EXT 0x8D9E #endif +#ifndef GL_GREMEDY_frame_terminator +#endif + /*************************************************************/ @@ -7252,6 +7255,14 @@ typedef void (APIENTRYP PFNGLCLEARCOLORIIEXTPROC) (GLint red, GLint green, GLint typedef void (APIENTRYP PFNGLCLEARCOLORIUIEXTPROC) (GLuint red, GLuint green, GLuint blue, GLuint alpha); #endif +#ifndef GL_GREMEDY_frame_terminator +#define GL_GREMEDY_frame_terminator 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glFrameTerminatorGREMEDY (void); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLFRAMETERMINATORGREMEDYPROC) (void); +#endif + #ifdef __cplusplus } diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 9de515d840..e79a905346 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -687,10 +687,10 @@ free_shared_state( GLcontext *ctx, struct gl_shared_state *ss ) _mesa_DeleteHashTable(ss->Programs); #endif #if FEATURE_ARB_vertex_program - _mesa_delete_program(ctx, ss->DefaultVertexProgram); + ctx->Driver.DeleteProgram(ctx, ss->DefaultVertexProgram); #endif #if FEATURE_ARB_fragment_program - _mesa_delete_program(ctx, ss->DefaultFragmentProgram); + ctx->Driver.DeleteProgram(ctx, ss->DefaultFragmentProgram); #endif #if FEATURE_ATI_fragment_shader diff --git a/src/mesa/main/drawpix.c b/src/mesa/main/drawpix.c index c82abccc41..3acccf0430 100644 --- a/src/mesa/main/drawpix.c +++ b/src/mesa/main/drawpix.c @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 7.0.3 * - * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -341,12 +341,10 @@ _mesa_Bitmap( GLsizei width, GLsizei height, } if (ctx->RenderMode == GL_RENDER) { - if (bitmap) { - /* Truncate, to satisfy conformance tests (matches SGI's OpenGL). */ - GLint x = IFLOOR(ctx->Current.RasterPos[0] - xorig); - GLint y = IFLOOR(ctx->Current.RasterPos[1] - yorig); - ctx->Driver.Bitmap( ctx, x, y, width, height, &ctx->Unpack, bitmap ); - } + /* Truncate, to satisfy conformance tests (matches SGI's OpenGL). */ + GLint x = IFLOOR(ctx->Current.RasterPos[0] - xorig); + GLint y = IFLOOR(ctx->Current.RasterPos[1] - yorig); + ctx->Driver.Bitmap( ctx, x, y, width, height, &ctx->Unpack, bitmap ); } #if _HAVE_FULL_GL else if (ctx->RenderMode == GL_FEEDBACK) { diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8e7d66cb9c..bbdc46f002 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -302,7 +302,7 @@ test_attachment_completeness(const GLcontext *ctx, GLenum format, /* OK */ } else if (ctx->Extensions.EXT_packed_depth_stencil && - att->Renderbuffer->_BaseFormat == GL_DEPTH_STENCIL_EXT) { + texImage->TexFormat->BaseFormat == GL_DEPTH_STENCIL_EXT) { /* OK */ } else { diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index eb81ee4a52..2cbd60539e 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -1864,6 +1864,10 @@ _mesa_GetBooleanv( GLenum pname, GLboolean *params ) CHECK_EXT1(EXT_framebuffer_object, "GetBooleanv"); params[0] = INT_TO_BOOLEAN(ctx->Const.MaxRenderbufferSize); break; + case GL_READ_FRAMEBUFFER_BINDING_EXT: + CHECK_EXT1(EXT_framebuffer_blit, "GetBooleanv"); + params[0] = INT_TO_BOOLEAN(ctx->ReadBuffer->Name); + break; case GL_MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB: CHECK_EXT1(ARB_fragment_shader, "GetBooleanv"); params[0] = INT_TO_BOOLEAN(ctx->Const.FragmentProgram.MaxUniformComponents); @@ -3695,6 +3699,10 @@ _mesa_GetFloatv( GLenum pname, GLfloat *params ) CHECK_EXT1(EXT_framebuffer_object, "GetFloatv"); params[0] = (GLfloat)(ctx->Const.MaxRenderbufferSize); break; + case GL_READ_FRAMEBUFFER_BINDING_EXT: + CHECK_EXT1(EXT_framebuffer_blit, "GetFloatv"); + params[0] = (GLfloat)(ctx->ReadBuffer->Name); + break; case GL_MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB: CHECK_EXT1(ARB_fragment_shader, "GetFloatv"); params[0] = (GLfloat)(ctx->Const.FragmentProgram.MaxUniformComponents); @@ -5526,6 +5534,10 @@ _mesa_GetIntegerv( GLenum pname, GLint *params ) CHECK_EXT1(EXT_framebuffer_object, "GetIntegerv"); params[0] = ctx->Const.MaxRenderbufferSize; break; + case GL_READ_FRAMEBUFFER_BINDING_EXT: + CHECK_EXT1(EXT_framebuffer_blit, "GetIntegerv"); + params[0] = ctx->ReadBuffer->Name; + break; case GL_MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB: CHECK_EXT1(ARB_fragment_shader, "GetIntegerv"); params[0] = ctx->Const.FragmentProgram.MaxUniformComponents; diff --git a/src/mesa/main/get_gen.py b/src/mesa/main/get_gen.py index 33be768999..6ff9168020 100644 --- a/src/mesa/main/get_gen.py +++ b/src/mesa/main/get_gen.py @@ -977,6 +977,11 @@ StateVars = [ ["ctx->Const.MaxRenderbufferSize"], "", ["EXT_framebuffer_object"] ), + # GL_EXT_framebuffer_blit + # NOTE: GL_DRAW_FRAMEBUFFER_BINDING_EXT == GL_FRAMEBUFFER_BINDING_EXT + ( "GL_READ_FRAMEBUFFER_BINDING_EXT", GLint, ["ctx->ReadBuffer->Name"], "", + ["EXT_framebuffer_blit"] ), + # GL_ARB_fragment_shader ( "GL_MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB", GLint, ["ctx->Const.FragmentProgram.MaxUniformComponents"], "", diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index ebdfc452a7..d6dc725b0c 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.5.2 + * Version: 7.0.3 * - * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -159,7 +159,7 @@ typedef union { GLfloat f; GLint i; } fi_type; ***/ #if defined(__i386__) || defined(__386__) || defined(__sparc__) || \ defined(__s390x__) || defined(__powerpc__) || \ - defined(__amd64__) || \ + defined(__amd64__) || defined(__x86_64__) || \ defined(ia64) || defined(__ia64__) || \ defined(__hppa__) || defined(hpux) || \ defined(__mips) || defined(_MIPS_ARCH) || \ diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index d857a4f3a4..26d8fbc8de 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -2177,9 +2177,6 @@ _mesa_GetTexImage( GLenum target, GLint level, GLenum format, return; } - if (!pixels) - return; - _mesa_lock_texture(ctx, texObj); { texImage = _mesa_select_tex_image(ctx, texObj, target, level); diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index 4de8ccf087..f74f727b24 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -1609,8 +1609,6 @@ parse_attrib_binding(GLcontext * ctx, const GLubyte ** inst, program_error(ctx, Program->Position, "Bad attribute binding"); } - Program->Base.InputsRead |= (1 << *inputReg); - return err; } @@ -2565,6 +2563,11 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst, return 1; } + /* Add attributes to InputsRead only if they are used the program. + * This avoids the handling of unused ATTRIB declarations in the drivers. */ + if (*File == PROGRAM_INPUT) + Program->Base.InputsRead |= (1 << *Index); + return 0; } diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c index 76295568ac..05daa8011d 100644 --- a/src/mesa/shader/prog_statevars.c +++ b/src/mesa/shader/prog_statevars.c @@ -181,7 +181,7 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT+face][i]; } /* [3] = material alpha */ - value[3] = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE+face][3]; + value[3] = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT+face][3]; return; case STATE_DIFFUSE: for (i = 0; i < 3; i++) { @@ -197,7 +197,7 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SPECULAR+face][i]; } /* [3] = material alpha */ - value[3] = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE+face][3]; + value[3] = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SPECULAR+face][3]; return; default: _mesa_problem(ctx, "Invalid lightprod state in fetch_state"); diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c index 1f227390af..95cabe7323 100644 --- a/src/mesa/shader/program.c +++ b/src/mesa/shader/program.c @@ -343,7 +343,7 @@ _mesa_clone_program(GLcontext *ctx, const struct gl_program *prog) clone->Format = prog->Format; clone->Instructions = _mesa_alloc_instructions(prog->NumInstructions); if (!clone->Instructions) { - _mesa_delete_program(ctx, clone); + ctx->Driver.DeleteProgram(ctx, clone); return NULL; } _mesa_copy_instructions(clone->Instructions, prog->Instructions, diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 96940be5df..5f9e2b84a4 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -80,7 +80,7 @@ _mesa_clear_shader_program_data(GLcontext *ctx, /* to prevent a double-free in the next call */ shProg->VertexProgram->Base.Parameters = NULL; } - _mesa_delete_program(ctx, &shProg->VertexProgram->Base); + ctx->Driver.DeleteProgram(ctx, &shProg->VertexProgram->Base); shProg->VertexProgram = NULL; } @@ -89,7 +89,7 @@ _mesa_clear_shader_program_data(GLcontext *ctx, /* to prevent a double-free in the next call */ shProg->FragmentProgram->Base.Parameters = NULL; } - _mesa_delete_program(ctx, &shProg->FragmentProgram->Base); + ctx->Driver.DeleteProgram(ctx, &shProg->FragmentProgram->Base); shProg->FragmentProgram = NULL; } @@ -247,7 +247,7 @@ _mesa_free_shader(GLcontext *ctx, struct gl_shader *sh) _mesa_free(sh->InfoLog); for (i = 0; i < sh->NumPrograms; i++) { assert(sh->Programs[i]); - _mesa_delete_program(ctx, sh->Programs[i]); + ctx->Driver.DeleteProgram(ctx, sh->Programs[i]); } if (sh->Programs) _mesa_free(sh->Programs); diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index 9947544a08..9c307c6275 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -677,6 +677,7 @@ static struct prog_instruction * emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n) { struct prog_instruction *inst; + slang_ir_node tmpNode; assert(n->Opcode == IR_CLAMP); /* ch[0] = value @@ -722,18 +723,27 @@ emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n) emit(emitInfo, n->Children[1]); emit(emitInfo, n->Children[2]); + /* Some GPUs don't allow reading from output registers. So if the + * dest for this clamp() is an output reg, we can't use that reg for + * the intermediate result. Use a temp register instead. + */ + _mesa_bzero(&tmpNode, sizeof(tmpNode)); + alloc_temp_storage(emitInfo, &tmpNode, n->Store->Size); + /* tmp = max(ch[0], ch[1]) */ inst = new_instruction(emitInfo, OPCODE_MAX); - storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); + storage_to_dst_reg(&inst->DstReg, tmpNode.Store, n->Writemask); storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store); storage_to_src_reg(&inst->SrcReg[1], n->Children[1]->Store); - /* tmp = min(tmp, ch[2]) */ + /* n->dest = min(tmp, ch[2]) */ inst = new_instruction(emitInfo, OPCODE_MIN); storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); - storage_to_src_reg(&inst->SrcReg[0], n->Store); + storage_to_src_reg(&inst->SrcReg[0], tmpNode.Store); storage_to_src_reg(&inst->SrcReg[1], n->Children[2]->Store); + free_temp_storage(emitInfo->vt, &tmpNode); + return inst; } diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c index 9515d9f81f..f1c98fe2d1 100644 --- a/src/mesa/tnl/t_vertex_sse.c +++ b/src/mesa/tnl/t_vertex_sse.c @@ -648,12 +648,12 @@ void _tnl_generate_sse_emit( GLcontext *ctx ) p.ctx = ctx; p.inputs_safe = 0; /* for now */ - p.outputs_safe = 1; /* for now */ + p.outputs_safe = 0; /* for now */ p.have_sse2 = cpu_has_xmm2; p.identity = x86_make_reg(file_XMM, 6); p.chan0 = x86_make_reg(file_XMM, 7); - if (!x86_init_func(&p.func, MAX_SSE_CODE_SIZE)) { + if (!x86_init_func_size(&p.func, MAX_SSE_CODE_SIZE)) { vtx->emit = NULL; return; } diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c index 612cd51a6e..772471c723 100644 --- a/src/mesa/x86/rtasm/x86sse.c +++ b/src/mesa/x86/rtasm/x86sse.c @@ -1,4 +1,4 @@ -#if defined(USE_X86_ASM) || defined(SLANG_X86) +#if defined(__i386__) || defined(__386__) #include "imports.h" #include "x86sse.h" @@ -6,54 +6,78 @@ #define DISASSEM 0 #define X86_TWOB 0x0f -/* Emit bytes to the instruction stream: - */ -static void emit_1b( struct x86_function *p, GLbyte b0 ) +static unsigned char *cptr( void (*label)() ) { - *(GLbyte *)(p->csr++) = b0; + return (unsigned char *)(unsigned long)label; } -static void emit_1i( struct x86_function *p, GLint i0 ) + +static void do_realloc( struct x86_function *p ) { - *(GLint *)(p->csr) = i0; - p->csr += 4; + if (p->size == 0) { + p->size = 1024; + p->store = _mesa_exec_malloc(p->size); + p->csr = p->store; + } + else { + unsigned used = p->csr - p->store; + unsigned char *tmp = p->store; + p->size *= 2; + p->store = _mesa_exec_malloc(p->size); + memcpy(p->store, tmp, used); + p->csr = p->store + used; + _mesa_exec_free(tmp); + } } -static void disassem( struct x86_function *p, const char *fn ) +/* Emit bytes to the instruction stream: + */ +static unsigned char *reserve( struct x86_function *p, int bytes ) { -#if DISASSEM && 0 - if (fn && fn != p->fn) { - _mesa_printf("0x%x: %s\n", p->csr, fn); - p->fn = fn; + if (p->csr + bytes - p->store > p->size) + do_realloc(p); + + { + unsigned char *csr = p->csr; + p->csr += bytes; + return csr; } -#endif } -static void emit_1ub_fn( struct x86_function *p, GLubyte b0, const char *fn ) + + +static void emit_1b( struct x86_function *p, char b0 ) { - disassem(p, fn); - *(p->csr++) = b0; + char *csr = (char *)reserve(p, 1); + *csr = b0; } -static void emit_2ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, const char *fn ) +static void emit_1i( struct x86_function *p, int i0 ) { - disassem(p, fn); - *(p->csr++) = b0; - *(p->csr++) = b1; + int *icsr = (int *)reserve(p, sizeof(i0)); + *icsr = i0; } -static void emit_3ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn ) +static void emit_1ub( struct x86_function *p, unsigned char b0 ) { - disassem(p, fn); - *(p->csr++) = b0; - *(p->csr++) = b1; - *(p->csr++) = b2; + unsigned char *csr = reserve(p, 1); + *csr++ = b0; } -#define emit_1ub(p, b0) emit_1ub_fn(p, b0, __FUNCTION__) -#define emit_2ub(p, b0, b1) emit_2ub_fn(p, b0, b1, __FUNCTION__) -#define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__) +static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) +{ + unsigned char *csr = reserve(p, 2); + *csr++ = b0; + *csr++ = b1; +} +static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) +{ + unsigned char *csr = reserve(p, 3); + *csr++ = b0; + *csr++ = b1; + *csr++ = b2; +} /* Build a modRM byte + possible displacement. No treatment of SIB @@ -63,7 +87,7 @@ static void emit_modrm( struct x86_function *p, struct x86_reg reg, struct x86_reg regmem ) { - GLubyte val = 0; + unsigned char val = 0; assert(reg.mod == mod_REG); @@ -71,13 +95,13 @@ static void emit_modrm( struct x86_function *p, val |= reg.idx << 3; /* reg field */ val |= regmem.idx; /* r/m field */ - emit_1ub_fn(p, val, 0); + emit_1ub(p, val); /* Oh-oh we've stumbled into the SIB thing. */ if (regmem.file == file_REG32 && regmem.idx == reg_SP) { - emit_1ub_fn(p, 0x24, 0); /* simplistic! */ + emit_1ub(p, 0x24); /* simplistic! */ } switch (regmem.mod) { @@ -98,7 +122,7 @@ static void emit_modrm( struct x86_function *p, static void emit_modrm_noreg( struct x86_function *p, - GLuint op, + unsigned op, struct x86_reg regmem ) { struct x86_reg dummy = x86_make_reg(file_REG32, op); @@ -111,21 +135,21 @@ static void emit_modrm_noreg( struct x86_function *p, * the arguments presented. */ static void emit_op_modrm( struct x86_function *p, - GLubyte op_dst_is_reg, - GLubyte op_dst_is_mem, + unsigned char op_dst_is_reg, + unsigned char op_dst_is_mem, struct x86_reg dst, struct x86_reg src ) { switch (dst.mod) { case mod_REG: - emit_1ub_fn(p, op_dst_is_reg, 0); + emit_1ub(p, op_dst_is_reg); emit_modrm(p, dst, src); break; case mod_INDIRECT: case mod_DISP32: case mod_DISP8: assert(src.mod == mod_REG); - emit_1ub_fn(p, op_dst_is_mem, 0); + emit_1ub(p, op_dst_is_mem); emit_modrm(p, src, dst); break; default: @@ -156,7 +180,7 @@ struct x86_reg x86_make_reg( enum x86_reg_file file, } struct x86_reg x86_make_disp( struct x86_reg reg, - GLint disp ) + int disp ) { assert(reg.file == file_REG32); @@ -185,7 +209,7 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg ) return x86_make_reg( reg.file, reg.idx ); } -GLubyte *x86_get_label( struct x86_function *p ) +unsigned char *x86_get_label( struct x86_function *p ) { return p->csr; } @@ -199,13 +223,13 @@ GLubyte *x86_get_label( struct x86_function *p ) void x86_jcc( struct x86_function *p, enum x86_cc cc, - GLubyte *label ) + unsigned char *label ) { - GLint offset = label - (x86_get_label(p) + 2); + int offset = label - (x86_get_label(p) + 2); if (offset <= 127 && offset >= -128) { emit_1ub(p, 0x70 + cc); - emit_1b(p, (GLbyte) offset); + emit_1b(p, (char) offset); } else { offset = label - (x86_get_label(p) + 6); @@ -216,7 +240,7 @@ void x86_jcc( struct x86_function *p, /* Always use a 32bit offset for forward jumps: */ -GLubyte *x86_jcc_forward( struct x86_function *p, +unsigned char *x86_jcc_forward( struct x86_function *p, enum x86_cc cc ) { emit_2ub(p, 0x0f, 0x80 + cc); @@ -224,14 +248,14 @@ GLubyte *x86_jcc_forward( struct x86_function *p, return x86_get_label(p); } -GLubyte *x86_jmp_forward( struct x86_function *p) +unsigned char *x86_jmp_forward( struct x86_function *p) { emit_1ub(p, 0xe9); emit_1i(p, 0); return x86_get_label(p); } -GLubyte *x86_call_forward( struct x86_function *p) +unsigned char *x86_call_forward( struct x86_function *p) { emit_1ub(p, 0xe8); emit_1i(p, 0); @@ -241,28 +265,41 @@ GLubyte *x86_call_forward( struct x86_function *p) /* Fixup offset from forward jump: */ void x86_fixup_fwd_jump( struct x86_function *p, - GLubyte *fixup ) + unsigned char *fixup ) { *(int *)(fixup - 4) = x86_get_label(p) - fixup; } -void x86_jmp( struct x86_function *p, GLubyte *label) +void x86_jmp( struct x86_function *p, unsigned char *label) { emit_1ub(p, 0xe9); emit_1i(p, label - x86_get_label(p) - 4); } -void x86_call( struct x86_function *p, GLubyte *label) +#if 0 +/* This doesn't work once we start reallocating & copying the + * generated code on buffer fills, because the call is relative to the + * current pc. + */ +void x86_call( struct x86_function *p, void (*label)()) { emit_1ub(p, 0xe8); - emit_1i(p, label - x86_get_label(p) - 4); + emit_1i(p, cptr(label) - x86_get_label(p) - 4); +} +#else +void x86_call( struct x86_function *p, struct x86_reg reg) +{ + emit_1ub(p, 0xff); + emit_modrm(p, reg, reg); } +#endif + /* michal: * Temporary. As I need immediate operands, and dont want to mess with the codegen, * I load the immediate into general purpose register and use it. */ -void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, GLint imm ) +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); @@ -502,6 +539,14 @@ void sse_addss( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse_andnps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x55); + emit_modrm( p, dst, src ); +} + void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) @@ -510,6 +555,13 @@ void sse_andps( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse_rsqrtps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); +} void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, @@ -538,6 +590,21 @@ void sse_movlhps( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse_orps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x56); + emit_modrm( p, dst, src ); +} + +void sse_xorps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x57); + emit_modrm( p, dst, src ); +} void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, @@ -559,7 +626,7 @@ void sse_cvtps2pi( struct x86_function *p, void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, - GLubyte shuf) + unsigned char shuf) { emit_2ub(p, X86_TWOB, 0xC6); emit_modrm(p, dest, arg0); @@ -569,13 +636,21 @@ void sse_shufps( struct x86_function *p, void sse_cmpps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, - GLubyte cc) + unsigned char cc) { emit_2ub(p, X86_TWOB, 0xC2); emit_modrm(p, dest, arg0); emit_1ub(p, cc); } +void sse_pmovmskb( struct x86_function *p, + struct x86_reg dest, + struct x86_reg src) +{ + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dest, src); +} + /*********************************************************************** * SSE2 instructions */ @@ -586,13 +661,21 @@ void sse_cmpps( struct x86_function *p, void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, - GLubyte shuf) + unsigned char shuf) { emit_3ub(p, 0x66, X86_TWOB, 0x70); emit_modrm(p, dest, arg0); emit_1ub(p, shuf); } +void sse2_cvttps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); + emit_modrm( p, dst, src ); +} + void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) @@ -625,6 +708,14 @@ void sse2_packuswb( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse2_rcpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) @@ -712,11 +803,11 @@ void x87_fclex( struct x86_function *p ) static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, - GLubyte dst0ub0, - GLubyte dst0ub1, - GLubyte arg0ub0, - GLubyte arg0ub1, - GLubyte argmem_noreg) + unsigned char dst0ub0, + unsigned char dst0ub1, + unsigned char arg0ub0, + unsigned char arg0ub1, + unsigned char argmem_noreg) { assert(dst.file == file_x87); @@ -729,7 +820,7 @@ static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86 assert(0); } else if (dst.idx == 0) { - assert(arg.file = file_REG32); + assert(arg.file == file_REG32); emit_1ub(p, 0xd8); emit_modrm_noreg(p, argmem_noreg, arg); } @@ -1056,44 +1147,42 @@ void mmx_movq( struct x86_function *p, * account any push/pop activity: */ struct x86_reg x86_fn_arg( struct x86_function *p, - GLuint arg ) + unsigned arg ) { return x86_make_disp(x86_make_reg(file_REG32, reg_SP), p->stack_offset + arg * 4); /* ??? */ } -/** - * Initialize an x86_function object, allocating space for up to - * 'code_size' bytes of code. - */ -GLboolean x86_init_func( struct x86_function *p, GLuint code_size ) +void x86_init_func( struct x86_function *p ) { - assert(!p->store); + p->size = 0; + p->store = NULL; + p->csr = p->store; +} + +int x86_init_func_size( struct x86_function *p, unsigned code_size ) +{ + p->size = code_size; p->store = _mesa_exec_malloc(code_size); - if (p->store) { - p->csr = p->store; - return GL_TRUE; - } - else { - p->csr = NULL; - return GL_FALSE; - } + p->csr = p->store; + return p->store != NULL; } void x86_release_func( struct x86_function *p ) { - if (p->store) - _mesa_exec_free(p->store); - p->store = p->csr = NULL; + _mesa_exec_free(p->store); + p->store = NULL; + p->csr = NULL; + p->size = 0; } void (*x86_get_func( struct x86_function *p ))(void) { - if (DISASSEM) + if (DISASSEM && p->store) _mesa_printf("disassemble %p %p\n", p->store, p->csr); - return (void (*)(void))p->store; + return (void (*)(void)) (unsigned long) p->store; } #else diff --git a/src/mesa/x86/rtasm/x86sse.h b/src/mesa/x86/rtasm/x86sse.h index 42b09937bc..f6282f5bd4 100644 --- a/src/mesa/x86/rtasm/x86sse.h +++ b/src/mesa/x86/rtasm/x86sse.h @@ -2,26 +2,25 @@ #ifndef _X86SSE_H_ #define _X86SSE_H_ -#if defined(USE_X86_ASM) || defined(SLANG_X86) - -#include "glheader.h" +#if defined(__i386__) || defined(__386__) /* It is up to the caller to ensure that instructions issued are * suitable for the host cpu. There are no checks made in this module * for mmx/sse/sse2 support on the cpu. */ struct x86_reg { - GLuint file:3; - GLuint idx:3; - GLuint mod:2; /* mod_REG if this is just a register */ - GLint disp:24; /* only +/- 23bits of offset - should be enough... */ + unsigned file:3; + unsigned idx:3; + unsigned mod:2; /* mod_REG if this is just a register */ + int disp:24; /* only +/- 23bits of offset - should be enough... */ }; struct x86_function { - GLubyte *store; - GLubyte *csr; - GLuint stack_offset; - GLint need_emms; + unsigned size; + unsigned char *store; + unsigned char *csr; + unsigned stack_offset; + int need_emms; const char *fn; }; @@ -80,7 +79,8 @@ enum sse_cc { */ -GLboolean x86_init_func( struct x86_function *p, GLuint code_size ); +void x86_init_func( struct x86_function *p ); +int x86_init_func_size( struct x86_function *p, unsigned code_size ); void x86_release_func( struct x86_function *p ); void (*x86_get_func( struct x86_function *p ))( void ); @@ -92,7 +92,7 @@ struct x86_reg x86_make_reg( enum x86_reg_file file, enum x86_reg_name idx ); struct x86_reg x86_make_disp( struct x86_reg reg, - GLint disp ); + int disp ); struct x86_reg x86_deref( struct x86_reg reg ); @@ -101,31 +101,32 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg ); /* Labels, jumps and fixup: */ -GLubyte *x86_get_label( struct x86_function *p ); +unsigned char *x86_get_label( struct x86_function *p ); void x86_jcc( struct x86_function *p, enum x86_cc cc, - GLubyte *label ); + unsigned char *label ); -GLubyte *x86_jcc_forward( struct x86_function *p, +unsigned char *x86_jcc_forward( struct x86_function *p, enum x86_cc cc ); -GLubyte *x86_jmp_forward( struct x86_function *p); +unsigned char *x86_jmp_forward( struct x86_function *p); -GLubyte *x86_call_forward( struct x86_function *p); +unsigned char *x86_call_forward( struct x86_function *p); void x86_fixup_fwd_jump( struct x86_function *p, - GLubyte *fixup ); + unsigned char *fixup ); -void x86_jmp( struct x86_function *p, GLubyte *label ); +void x86_jmp( struct x86_function *p, unsigned char *label ); -void x86_call( struct x86_function *p, GLubyte *label ); +/* void x86_call( struct x86_function *p, void (*label)() ); */ +void x86_call( struct x86_function *p, struct x86_reg reg); /* michal: * Temporary. As I need immediate operands, and dont want to mess with the codegen, * I load the immediate into general purpose register and use it. */ -void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, GLint imm ); +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); /* Macro for sse_shufps() and sse2_pshufd(): @@ -141,19 +142,24 @@ void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg sr void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf ); +void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, GLubyte cc ); +void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, + unsigned char cc ); void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); @@ -166,9 +172,14 @@ void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf ); +void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); @@ -239,7 +250,7 @@ void x87_fucom( struct x86_function *p, struct x86_reg arg ); * account any push/pop activity. Note - doesn't track explict * manipulation of ESP by other instructions. */ -struct x86_reg x86_fn_arg( struct x86_function *p, GLuint arg ); +struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); #endif #endif |