summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Hellstrom <thomas-at-tungstengraphics-dot-com>2008-03-30 15:33:07 +0200
committerThomas Hellstrom <thomas-at-tungstengraphics-dot-com>2008-03-30 15:33:07 +0200
commita0e133114bbde5c1f7e6ea20613b513f7fbbbdae (patch)
treeb0dc6c5e287c1ae0bf3bc8357ab992cbb792df11
parent43b05ffda491046bf0cca7f0cdd8a337eb09f1f1 (diff)
parent767dfa5b9c07fe6ae267e1ff0d3bbbf093a04c44 (diff)
Merge branch 'mesa_7_0_branch' into i915tex_branch
-rw-r--r--docs/relnotes-7.0.3.html10
-rw-r--r--include/GL/glext.h19
-rw-r--r--src/mesa/main/context.c4
-rw-r--r--src/mesa/main/drawpix.c14
-rw-r--r--src/mesa/main/fbobject.c2
-rw-r--r--src/mesa/main/get.c12
-rw-r--r--src/mesa/main/get_gen.py5
-rw-r--r--src/mesa/main/imports.h6
-rw-r--r--src/mesa/main/teximage.c3
-rw-r--r--src/mesa/shader/arbprogparse.c7
-rw-r--r--src/mesa/shader/prog_statevars.c4
-rw-r--r--src/mesa/shader/program.c2
-rw-r--r--src/mesa/shader/shader_api.c6
-rw-r--r--src/mesa/shader/slang/slang_emit.c16
-rw-r--r--src/mesa/tnl/t_vertex_sse.c4
-rw-r--r--src/mesa/x86/rtasm/x86sse.c247
-rw-r--r--src/mesa/x86/rtasm/x86sse.h63
17 files changed, 284 insertions, 140 deletions
diff --git a/docs/relnotes-7.0.3.html b/docs/relnotes-7.0.3.html
index a2a3437442..212467d6ff 100644
--- a/docs/relnotes-7.0.3.html
+++ b/docs/relnotes-7.0.3.html
@@ -8,7 +8,7 @@
<body bgcolor="#eeeeee">
-<H1>Mesa 7.0.3 Release Notes / February TBD, 2008</H1>
+<H1>Mesa 7.0.3 Release Notes / March TBD, 2008</H1>
<p>
Mesa 7.0.3 is a stable release with bug fixes since version 7.0.2.
@@ -40,8 +40,16 @@ Mesa 7.0.3 is a stable release with bug fixes since version 7.0.2.
<li>Bad strings given to glProgramStringARB() didn't generate GL_INVALID_OPERATION
<li>Fixed minor point rasterization regression (bug 11016)
<li>state.texenv.color state var didn't work in GL_ARB_fragment_program (bug 14931)
+<li>glBitmap from a PBO didn't always work
+<li>glGetTexImage into a PBO didn't always work
</ul>
+<h2>Changes</h2>
+<ul>
+<li>Updated glext.h to version 40
+</ul>
+
+
<h2>Driver Status</h2>
diff --git a/include/GL/glext.h b/include/GL/glext.h
index 2519a6cc5e..2b22714c30 100644
--- a/include/GL/glext.h
+++ b/include/GL/glext.h
@@ -46,9 +46,9 @@ extern "C" {
/*************************************************************/
/* Header file version number, required by OpenGL ABI for Linux */
-/* glext.h last updated 2007/02/12 */
+/* glext.h last updated 2008/03/24 */
/* Current version at http://www.opengl.org/registry/ */
-#define GL_GLEXT_VERSION 39
+#define GL_GLEXT_VERSION 40
#ifndef GL_VERSION_1_2
#define GL_UNSIGNED_BYTE_3_3_2 0x8032
@@ -3091,8 +3091,8 @@ extern "C" {
#ifndef GL_EXT_framebuffer_blit
#define GL_READ_FRAMEBUFFER_EXT 0x8CA8
#define GL_DRAW_FRAMEBUFFER_EXT 0x8CA9
-#define GL_READ_FRAMEBUFFER_BINDING_EXT GL_FRAMEBUFFER_BINDING_EXT
-#define GL_DRAW_FRAMEBUFFER_BINDING_EXT 0x8CAA
+#define GL_DRAW_FRAMEBUFFER_BINDING_EXT GL_FRAMEBUFFER_BINDING_EXT
+#define GL_READ_FRAMEBUFFER_BINDING_EXT 0x8CAA
#endif
#ifndef GL_EXT_framebuffer_multisample
@@ -3379,6 +3379,9 @@ extern "C" {
#define GL_RGBA_INTEGER_MODE_EXT 0x8D9E
#endif
+#ifndef GL_GREMEDY_frame_terminator
+#endif
+
/*************************************************************/
@@ -7252,6 +7255,14 @@ typedef void (APIENTRYP PFNGLCLEARCOLORIIEXTPROC) (GLint red, GLint green, GLint
typedef void (APIENTRYP PFNGLCLEARCOLORIUIEXTPROC) (GLuint red, GLuint green, GLuint blue, GLuint alpha);
#endif
+#ifndef GL_GREMEDY_frame_terminator
+#define GL_GREMEDY_frame_terminator 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFrameTerminatorGREMEDY (void);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLFRAMETERMINATORGREMEDYPROC) (void);
+#endif
+
#ifdef __cplusplus
}
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 9de515d840..e79a905346 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -687,10 +687,10 @@ free_shared_state( GLcontext *ctx, struct gl_shared_state *ss )
_mesa_DeleteHashTable(ss->Programs);
#endif
#if FEATURE_ARB_vertex_program
- _mesa_delete_program(ctx, ss->DefaultVertexProgram);
+ ctx->Driver.DeleteProgram(ctx, ss->DefaultVertexProgram);
#endif
#if FEATURE_ARB_fragment_program
- _mesa_delete_program(ctx, ss->DefaultFragmentProgram);
+ ctx->Driver.DeleteProgram(ctx, ss->DefaultFragmentProgram);
#endif
#if FEATURE_ATI_fragment_shader
diff --git a/src/mesa/main/drawpix.c b/src/mesa/main/drawpix.c
index c82abccc41..3acccf0430 100644
--- a/src/mesa/main/drawpix.c
+++ b/src/mesa/main/drawpix.c
@@ -1,8 +1,8 @@
/*
* Mesa 3-D graphics library
- * Version: 6.5
+ * Version: 7.0.3
*
- * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -341,12 +341,10 @@ _mesa_Bitmap( GLsizei width, GLsizei height,
}
if (ctx->RenderMode == GL_RENDER) {
- if (bitmap) {
- /* Truncate, to satisfy conformance tests (matches SGI's OpenGL). */
- GLint x = IFLOOR(ctx->Current.RasterPos[0] - xorig);
- GLint y = IFLOOR(ctx->Current.RasterPos[1] - yorig);
- ctx->Driver.Bitmap( ctx, x, y, width, height, &ctx->Unpack, bitmap );
- }
+ /* Truncate, to satisfy conformance tests (matches SGI's OpenGL). */
+ GLint x = IFLOOR(ctx->Current.RasterPos[0] - xorig);
+ GLint y = IFLOOR(ctx->Current.RasterPos[1] - yorig);
+ ctx->Driver.Bitmap( ctx, x, y, width, height, &ctx->Unpack, bitmap );
}
#if _HAVE_FULL_GL
else if (ctx->RenderMode == GL_FEEDBACK) {
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 8e7d66cb9c..bbdc46f002 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -302,7 +302,7 @@ test_attachment_completeness(const GLcontext *ctx, GLenum format,
/* OK */
}
else if (ctx->Extensions.EXT_packed_depth_stencil &&
- att->Renderbuffer->_BaseFormat == GL_DEPTH_STENCIL_EXT) {
+ texImage->TexFormat->BaseFormat == GL_DEPTH_STENCIL_EXT) {
/* OK */
}
else {
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index eb81ee4a52..2cbd60539e 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1864,6 +1864,10 @@ _mesa_GetBooleanv( GLenum pname, GLboolean *params )
CHECK_EXT1(EXT_framebuffer_object, "GetBooleanv");
params[0] = INT_TO_BOOLEAN(ctx->Const.MaxRenderbufferSize);
break;
+ case GL_READ_FRAMEBUFFER_BINDING_EXT:
+ CHECK_EXT1(EXT_framebuffer_blit, "GetBooleanv");
+ params[0] = INT_TO_BOOLEAN(ctx->ReadBuffer->Name);
+ break;
case GL_MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB:
CHECK_EXT1(ARB_fragment_shader, "GetBooleanv");
params[0] = INT_TO_BOOLEAN(ctx->Const.FragmentProgram.MaxUniformComponents);
@@ -3695,6 +3699,10 @@ _mesa_GetFloatv( GLenum pname, GLfloat *params )
CHECK_EXT1(EXT_framebuffer_object, "GetFloatv");
params[0] = (GLfloat)(ctx->Const.MaxRenderbufferSize);
break;
+ case GL_READ_FRAMEBUFFER_BINDING_EXT:
+ CHECK_EXT1(EXT_framebuffer_blit, "GetFloatv");
+ params[0] = (GLfloat)(ctx->ReadBuffer->Name);
+ break;
case GL_MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB:
CHECK_EXT1(ARB_fragment_shader, "GetFloatv");
params[0] = (GLfloat)(ctx->Const.FragmentProgram.MaxUniformComponents);
@@ -5526,6 +5534,10 @@ _mesa_GetIntegerv( GLenum pname, GLint *params )
CHECK_EXT1(EXT_framebuffer_object, "GetIntegerv");
params[0] = ctx->Const.MaxRenderbufferSize;
break;
+ case GL_READ_FRAMEBUFFER_BINDING_EXT:
+ CHECK_EXT1(EXT_framebuffer_blit, "GetIntegerv");
+ params[0] = ctx->ReadBuffer->Name;
+ break;
case GL_MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB:
CHECK_EXT1(ARB_fragment_shader, "GetIntegerv");
params[0] = ctx->Const.FragmentProgram.MaxUniformComponents;
diff --git a/src/mesa/main/get_gen.py b/src/mesa/main/get_gen.py
index 33be768999..6ff9168020 100644
--- a/src/mesa/main/get_gen.py
+++ b/src/mesa/main/get_gen.py
@@ -977,6 +977,11 @@ StateVars = [
["ctx->Const.MaxRenderbufferSize"], "",
["EXT_framebuffer_object"] ),
+ # GL_EXT_framebuffer_blit
+ # NOTE: GL_DRAW_FRAMEBUFFER_BINDING_EXT == GL_FRAMEBUFFER_BINDING_EXT
+ ( "GL_READ_FRAMEBUFFER_BINDING_EXT", GLint, ["ctx->ReadBuffer->Name"], "",
+ ["EXT_framebuffer_blit"] ),
+
# GL_ARB_fragment_shader
( "GL_MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB", GLint,
["ctx->Const.FragmentProgram.MaxUniformComponents"], "",
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index ebdfc452a7..d6dc725b0c 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -1,8 +1,8 @@
/*
* Mesa 3-D graphics library
- * Version: 6.5.2
+ * Version: 7.0.3
*
- * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -159,7 +159,7 @@ typedef union { GLfloat f; GLint i; } fi_type;
***/
#if defined(__i386__) || defined(__386__) || defined(__sparc__) || \
defined(__s390x__) || defined(__powerpc__) || \
- defined(__amd64__) || \
+ defined(__amd64__) || defined(__x86_64__) || \
defined(ia64) || defined(__ia64__) || \
defined(__hppa__) || defined(hpux) || \
defined(__mips) || defined(_MIPS_ARCH) || \
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index d857a4f3a4..26d8fbc8de 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2177,9 +2177,6 @@ _mesa_GetTexImage( GLenum target, GLint level, GLenum format,
return;
}
- if (!pixels)
- return;
-
_mesa_lock_texture(ctx, texObj);
{
texImage = _mesa_select_tex_image(ctx, texObj, target, level);
diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c
index 4de8ccf087..f74f727b24 100644
--- a/src/mesa/shader/arbprogparse.c
+++ b/src/mesa/shader/arbprogparse.c
@@ -1609,8 +1609,6 @@ parse_attrib_binding(GLcontext * ctx, const GLubyte ** inst,
program_error(ctx, Program->Position, "Bad attribute binding");
}
- Program->Base.InputsRead |= (1 << *inputReg);
-
return err;
}
@@ -2565,6 +2563,11 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst,
return 1;
}
+ /* Add attributes to InputsRead only if they are used the program.
+ * This avoids the handling of unused ATTRIB declarations in the drivers. */
+ if (*File == PROGRAM_INPUT)
+ Program->Base.InputsRead |= (1 << *Index);
+
return 0;
}
diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c
index 76295568ac..05daa8011d 100644
--- a/src/mesa/shader/prog_statevars.c
+++ b/src/mesa/shader/prog_statevars.c
@@ -181,7 +181,7 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT+face][i];
}
/* [3] = material alpha */
- value[3] = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE+face][3];
+ value[3] = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT+face][3];
return;
case STATE_DIFFUSE:
for (i = 0; i < 3; i++) {
@@ -197,7 +197,7 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SPECULAR+face][i];
}
/* [3] = material alpha */
- value[3] = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE+face][3];
+ value[3] = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SPECULAR+face][3];
return;
default:
_mesa_problem(ctx, "Invalid lightprod state in fetch_state");
diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c
index 1f227390af..95cabe7323 100644
--- a/src/mesa/shader/program.c
+++ b/src/mesa/shader/program.c
@@ -343,7 +343,7 @@ _mesa_clone_program(GLcontext *ctx, const struct gl_program *prog)
clone->Format = prog->Format;
clone->Instructions = _mesa_alloc_instructions(prog->NumInstructions);
if (!clone->Instructions) {
- _mesa_delete_program(ctx, clone);
+ ctx->Driver.DeleteProgram(ctx, clone);
return NULL;
}
_mesa_copy_instructions(clone->Instructions, prog->Instructions,
diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c
index 96940be5df..5f9e2b84a4 100644
--- a/src/mesa/shader/shader_api.c
+++ b/src/mesa/shader/shader_api.c
@@ -80,7 +80,7 @@ _mesa_clear_shader_program_data(GLcontext *ctx,
/* to prevent a double-free in the next call */
shProg->VertexProgram->Base.Parameters = NULL;
}
- _mesa_delete_program(ctx, &shProg->VertexProgram->Base);
+ ctx->Driver.DeleteProgram(ctx, &shProg->VertexProgram->Base);
shProg->VertexProgram = NULL;
}
@@ -89,7 +89,7 @@ _mesa_clear_shader_program_data(GLcontext *ctx,
/* to prevent a double-free in the next call */
shProg->FragmentProgram->Base.Parameters = NULL;
}
- _mesa_delete_program(ctx, &shProg->FragmentProgram->Base);
+ ctx->Driver.DeleteProgram(ctx, &shProg->FragmentProgram->Base);
shProg->FragmentProgram = NULL;
}
@@ -247,7 +247,7 @@ _mesa_free_shader(GLcontext *ctx, struct gl_shader *sh)
_mesa_free(sh->InfoLog);
for (i = 0; i < sh->NumPrograms; i++) {
assert(sh->Programs[i]);
- _mesa_delete_program(ctx, sh->Programs[i]);
+ ctx->Driver.DeleteProgram(ctx, sh->Programs[i]);
}
if (sh->Programs)
_mesa_free(sh->Programs);
diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c
index 9947544a08..9c307c6275 100644
--- a/src/mesa/shader/slang/slang_emit.c
+++ b/src/mesa/shader/slang/slang_emit.c
@@ -677,6 +677,7 @@ static struct prog_instruction *
emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
{
struct prog_instruction *inst;
+ slang_ir_node tmpNode;
assert(n->Opcode == IR_CLAMP);
/* ch[0] = value
@@ -722,18 +723,27 @@ emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
emit(emitInfo, n->Children[1]);
emit(emitInfo, n->Children[2]);
+ /* Some GPUs don't allow reading from output registers. So if the
+ * dest for this clamp() is an output reg, we can't use that reg for
+ * the intermediate result. Use a temp register instead.
+ */
+ _mesa_bzero(&tmpNode, sizeof(tmpNode));
+ alloc_temp_storage(emitInfo, &tmpNode, n->Store->Size);
+
/* tmp = max(ch[0], ch[1]) */
inst = new_instruction(emitInfo, OPCODE_MAX);
- storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
+ storage_to_dst_reg(&inst->DstReg, tmpNode.Store, n->Writemask);
storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store);
storage_to_src_reg(&inst->SrcReg[1], n->Children[1]->Store);
- /* tmp = min(tmp, ch[2]) */
+ /* n->dest = min(tmp, ch[2]) */
inst = new_instruction(emitInfo, OPCODE_MIN);
storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
- storage_to_src_reg(&inst->SrcReg[0], n->Store);
+ storage_to_src_reg(&inst->SrcReg[0], tmpNode.Store);
storage_to_src_reg(&inst->SrcReg[1], n->Children[2]->Store);
+ free_temp_storage(emitInfo->vt, &tmpNode);
+
return inst;
}
diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c
index 9515d9f81f..f1c98fe2d1 100644
--- a/src/mesa/tnl/t_vertex_sse.c
+++ b/src/mesa/tnl/t_vertex_sse.c
@@ -648,12 +648,12 @@ void _tnl_generate_sse_emit( GLcontext *ctx )
p.ctx = ctx;
p.inputs_safe = 0; /* for now */
- p.outputs_safe = 1; /* for now */
+ p.outputs_safe = 0; /* for now */
p.have_sse2 = cpu_has_xmm2;
p.identity = x86_make_reg(file_XMM, 6);
p.chan0 = x86_make_reg(file_XMM, 7);
- if (!x86_init_func(&p.func, MAX_SSE_CODE_SIZE)) {
+ if (!x86_init_func_size(&p.func, MAX_SSE_CODE_SIZE)) {
vtx->emit = NULL;
return;
}
diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c
index 612cd51a6e..772471c723 100644
--- a/src/mesa/x86/rtasm/x86sse.c
+++ b/src/mesa/x86/rtasm/x86sse.c
@@ -1,4 +1,4 @@
-#if defined(USE_X86_ASM) || defined(SLANG_X86)
+#if defined(__i386__) || defined(__386__)
#include "imports.h"
#include "x86sse.h"
@@ -6,54 +6,78 @@
#define DISASSEM 0
#define X86_TWOB 0x0f
-/* Emit bytes to the instruction stream:
- */
-static void emit_1b( struct x86_function *p, GLbyte b0 )
+static unsigned char *cptr( void (*label)() )
{
- *(GLbyte *)(p->csr++) = b0;
+ return (unsigned char *)(unsigned long)label;
}
-static void emit_1i( struct x86_function *p, GLint i0 )
+
+static void do_realloc( struct x86_function *p )
{
- *(GLint *)(p->csr) = i0;
- p->csr += 4;
+ if (p->size == 0) {
+ p->size = 1024;
+ p->store = _mesa_exec_malloc(p->size);
+ p->csr = p->store;
+ }
+ else {
+ unsigned used = p->csr - p->store;
+ unsigned char *tmp = p->store;
+ p->size *= 2;
+ p->store = _mesa_exec_malloc(p->size);
+ memcpy(p->store, tmp, used);
+ p->csr = p->store + used;
+ _mesa_exec_free(tmp);
+ }
}
-static void disassem( struct x86_function *p, const char *fn )
+/* Emit bytes to the instruction stream:
+ */
+static unsigned char *reserve( struct x86_function *p, int bytes )
{
-#if DISASSEM && 0
- if (fn && fn != p->fn) {
- _mesa_printf("0x%x: %s\n", p->csr, fn);
- p->fn = fn;
+ if (p->csr + bytes - p->store > p->size)
+ do_realloc(p);
+
+ {
+ unsigned char *csr = p->csr;
+ p->csr += bytes;
+ return csr;
}
-#endif
}
-static void emit_1ub_fn( struct x86_function *p, GLubyte b0, const char *fn )
+
+
+static void emit_1b( struct x86_function *p, char b0 )
{
- disassem(p, fn);
- *(p->csr++) = b0;
+ char *csr = (char *)reserve(p, 1);
+ *csr = b0;
}
-static void emit_2ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, const char *fn )
+static void emit_1i( struct x86_function *p, int i0 )
{
- disassem(p, fn);
- *(p->csr++) = b0;
- *(p->csr++) = b1;
+ int *icsr = (int *)reserve(p, sizeof(i0));
+ *icsr = i0;
}
-static void emit_3ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn )
+static void emit_1ub( struct x86_function *p, unsigned char b0 )
{
- disassem(p, fn);
- *(p->csr++) = b0;
- *(p->csr++) = b1;
- *(p->csr++) = b2;
+ unsigned char *csr = reserve(p, 1);
+ *csr++ = b0;
}
-#define emit_1ub(p, b0) emit_1ub_fn(p, b0, __FUNCTION__)
-#define emit_2ub(p, b0, b1) emit_2ub_fn(p, b0, b1, __FUNCTION__)
-#define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__)
+static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
+{
+ unsigned char *csr = reserve(p, 2);
+ *csr++ = b0;
+ *csr++ = b1;
+}
+static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
+{
+ unsigned char *csr = reserve(p, 3);
+ *csr++ = b0;
+ *csr++ = b1;
+ *csr++ = b2;
+}
/* Build a modRM byte + possible displacement. No treatment of SIB
@@ -63,7 +87,7 @@ static void emit_modrm( struct x86_function *p,
struct x86_reg reg,
struct x86_reg regmem )
{
- GLubyte val = 0;
+ unsigned char val = 0;
assert(reg.mod == mod_REG);
@@ -71,13 +95,13 @@ static void emit_modrm( struct x86_function *p,
val |= reg.idx << 3; /* reg field */
val |= regmem.idx; /* r/m field */
- emit_1ub_fn(p, val, 0);
+ emit_1ub(p, val);
/* Oh-oh we've stumbled into the SIB thing.
*/
if (regmem.file == file_REG32 &&
regmem.idx == reg_SP) {
- emit_1ub_fn(p, 0x24, 0); /* simplistic! */
+ emit_1ub(p, 0x24); /* simplistic! */
}
switch (regmem.mod) {
@@ -98,7 +122,7 @@ static void emit_modrm( struct x86_function *p,
static void emit_modrm_noreg( struct x86_function *p,
- GLuint op,
+ unsigned op,
struct x86_reg regmem )
{
struct x86_reg dummy = x86_make_reg(file_REG32, op);
@@ -111,21 +135,21 @@ static void emit_modrm_noreg( struct x86_function *p,
* the arguments presented.
*/
static void emit_op_modrm( struct x86_function *p,
- GLubyte op_dst_is_reg,
- GLubyte op_dst_is_mem,
+ unsigned char op_dst_is_reg,
+ unsigned char op_dst_is_mem,
struct x86_reg dst,
struct x86_reg src )
{
switch (dst.mod) {
case mod_REG:
- emit_1ub_fn(p, op_dst_is_reg, 0);
+ emit_1ub(p, op_dst_is_reg);
emit_modrm(p, dst, src);
break;
case mod_INDIRECT:
case mod_DISP32:
case mod_DISP8:
assert(src.mod == mod_REG);
- emit_1ub_fn(p, op_dst_is_mem, 0);
+ emit_1ub(p, op_dst_is_mem);
emit_modrm(p, src, dst);
break;
default:
@@ -156,7 +180,7 @@ struct x86_reg x86_make_reg( enum x86_reg_file file,
}
struct x86_reg x86_make_disp( struct x86_reg reg,
- GLint disp )
+ int disp )
{
assert(reg.file == file_REG32);
@@ -185,7 +209,7 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg )
return x86_make_reg( reg.file, reg.idx );
}
-GLubyte *x86_get_label( struct x86_function *p )
+unsigned char *x86_get_label( struct x86_function *p )
{
return p->csr;
}
@@ -199,13 +223,13 @@ GLubyte *x86_get_label( struct x86_function *p )
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
- GLubyte *label )
+ unsigned char *label )
{
- GLint offset = label - (x86_get_label(p) + 2);
+ int offset = label - (x86_get_label(p) + 2);
if (offset <= 127 && offset >= -128) {
emit_1ub(p, 0x70 + cc);
- emit_1b(p, (GLbyte) offset);
+ emit_1b(p, (char) offset);
}
else {
offset = label - (x86_get_label(p) + 6);
@@ -216,7 +240,7 @@ void x86_jcc( struct x86_function *p,
/* Always use a 32bit offset for forward jumps:
*/
-GLubyte *x86_jcc_forward( struct x86_function *p,
+unsigned char *x86_jcc_forward( struct x86_function *p,
enum x86_cc cc )
{
emit_2ub(p, 0x0f, 0x80 + cc);
@@ -224,14 +248,14 @@ GLubyte *x86_jcc_forward( struct x86_function *p,
return x86_get_label(p);
}
-GLubyte *x86_jmp_forward( struct x86_function *p)
+unsigned char *x86_jmp_forward( struct x86_function *p)
{
emit_1ub(p, 0xe9);
emit_1i(p, 0);
return x86_get_label(p);
}
-GLubyte *x86_call_forward( struct x86_function *p)
+unsigned char *x86_call_forward( struct x86_function *p)
{
emit_1ub(p, 0xe8);
emit_1i(p, 0);
@@ -241,28 +265,41 @@ GLubyte *x86_call_forward( struct x86_function *p)
/* Fixup offset from forward jump:
*/
void x86_fixup_fwd_jump( struct x86_function *p,
- GLubyte *fixup )
+ unsigned char *fixup )
{
*(int *)(fixup - 4) = x86_get_label(p) - fixup;
}
-void x86_jmp( struct x86_function *p, GLubyte *label)
+void x86_jmp( struct x86_function *p, unsigned char *label)
{
emit_1ub(p, 0xe9);
emit_1i(p, label - x86_get_label(p) - 4);
}
-void x86_call( struct x86_function *p, GLubyte *label)
+#if 0
+/* This doesn't work once we start reallocating & copying the
+ * generated code on buffer fills, because the call is relative to the
+ * current pc.
+ */
+void x86_call( struct x86_function *p, void (*label)())
{
emit_1ub(p, 0xe8);
- emit_1i(p, label - x86_get_label(p) - 4);
+ emit_1i(p, cptr(label) - x86_get_label(p) - 4);
+}
+#else
+void x86_call( struct x86_function *p, struct x86_reg reg)
+{
+ emit_1ub(p, 0xff);
+ emit_modrm(p, reg, reg);
}
+#endif
+
/* michal:
* Temporary. As I need immediate operands, and dont want to mess with the codegen,
* I load the immediate into general purpose register and use it.
*/
-void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, GLint imm )
+void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
{
assert(dst.mod == mod_REG);
emit_1ub(p, 0xb8 + dst.idx);
@@ -502,6 +539,14 @@ void sse_addss( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse_andnps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_2ub(p, X86_TWOB, 0x55);
+ emit_modrm( p, dst, src );
+}
+
void sse_andps( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -510,6 +555,13 @@ void sse_andps( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse_rsqrtps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_2ub(p, X86_TWOB, 0x52);
+ emit_modrm( p, dst, src );
+}
void sse_rsqrtss( struct x86_function *p,
struct x86_reg dst,
@@ -538,6 +590,21 @@ void sse_movlhps( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse_orps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_2ub(p, X86_TWOB, 0x56);
+ emit_modrm( p, dst, src );
+}
+
+void sse_xorps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_2ub(p, X86_TWOB, 0x57);
+ emit_modrm( p, dst, src );
+}
void sse_cvtps2pi( struct x86_function *p,
struct x86_reg dst,
@@ -559,7 +626,7 @@ void sse_cvtps2pi( struct x86_function *p,
void sse_shufps( struct x86_function *p,
struct x86_reg dest,
struct x86_reg arg0,
- GLubyte shuf)
+ unsigned char shuf)
{
emit_2ub(p, X86_TWOB, 0xC6);
emit_modrm(p, dest, arg0);
@@ -569,13 +636,21 @@ void sse_shufps( struct x86_function *p,
void sse_cmpps( struct x86_function *p,
struct x86_reg dest,
struct x86_reg arg0,
- GLubyte cc)
+ unsigned char cc)
{
emit_2ub(p, X86_TWOB, 0xC2);
emit_modrm(p, dest, arg0);
emit_1ub(p, cc);
}
+void sse_pmovmskb( struct x86_function *p,
+ struct x86_reg dest,
+ struct x86_reg src)
+{
+ emit_3ub(p, 0x66, X86_TWOB, 0xD7);
+ emit_modrm(p, dest, src);
+}
+
/***********************************************************************
* SSE2 instructions
*/
@@ -586,13 +661,21 @@ void sse_cmpps( struct x86_function *p,
void sse2_pshufd( struct x86_function *p,
struct x86_reg dest,
struct x86_reg arg0,
- GLubyte shuf)
+ unsigned char shuf)
{
emit_3ub(p, 0x66, X86_TWOB, 0x70);
emit_modrm(p, dest, arg0);
emit_1ub(p, shuf);
}
+void sse2_cvttps2dq( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
+ emit_modrm( p, dst, src );
+}
+
void sse2_cvtps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -625,6 +708,14 @@ void sse2_packuswb( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_rcpps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ emit_2ub(p, X86_TWOB, 0x53);
+ emit_modrm( p, dst, src );
+}
+
void sse2_rcpss( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -712,11 +803,11 @@ void x87_fclex( struct x86_function *p )
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
- GLubyte dst0ub0,
- GLubyte dst0ub1,
- GLubyte arg0ub0,
- GLubyte arg0ub1,
- GLubyte argmem_noreg)
+ unsigned char dst0ub0,
+ unsigned char dst0ub1,
+ unsigned char arg0ub0,
+ unsigned char arg0ub1,
+ unsigned char argmem_noreg)
{
assert(dst.file == file_x87);
@@ -729,7 +820,7 @@ static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86
assert(0);
}
else if (dst.idx == 0) {
- assert(arg.file = file_REG32);
+ assert(arg.file == file_REG32);
emit_1ub(p, 0xd8);
emit_modrm_noreg(p, argmem_noreg, arg);
}
@@ -1056,44 +1147,42 @@ void mmx_movq( struct x86_function *p,
* account any push/pop activity:
*/
struct x86_reg x86_fn_arg( struct x86_function *p,
- GLuint arg )
+ unsigned arg )
{
return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 4); /* ??? */
}
-/**
- * Initialize an x86_function object, allocating space for up to
- * 'code_size' bytes of code.
- */
-GLboolean x86_init_func( struct x86_function *p, GLuint code_size )
+void x86_init_func( struct x86_function *p )
{
- assert(!p->store);
+ p->size = 0;
+ p->store = NULL;
+ p->csr = p->store;
+}
+
+int x86_init_func_size( struct x86_function *p, unsigned code_size )
+{
+ p->size = code_size;
p->store = _mesa_exec_malloc(code_size);
- if (p->store) {
- p->csr = p->store;
- return GL_TRUE;
- }
- else {
- p->csr = NULL;
- return GL_FALSE;
- }
+ p->csr = p->store;
+ return p->store != NULL;
}
void x86_release_func( struct x86_function *p )
{
- if (p->store)
- _mesa_exec_free(p->store);
- p->store = p->csr = NULL;
+ _mesa_exec_free(p->store);
+ p->store = NULL;
+ p->csr = NULL;
+ p->size = 0;
}
void (*x86_get_func( struct x86_function *p ))(void)
{
- if (DISASSEM)
+ if (DISASSEM && p->store)
_mesa_printf("disassemble %p %p\n", p->store, p->csr);
- return (void (*)(void))p->store;
+ return (void (*)(void)) (unsigned long) p->store;
}
#else
diff --git a/src/mesa/x86/rtasm/x86sse.h b/src/mesa/x86/rtasm/x86sse.h
index 42b09937bc..f6282f5bd4 100644
--- a/src/mesa/x86/rtasm/x86sse.h
+++ b/src/mesa/x86/rtasm/x86sse.h
@@ -2,26 +2,25 @@
#ifndef _X86SSE_H_
#define _X86SSE_H_
-#if defined(USE_X86_ASM) || defined(SLANG_X86)
-
-#include "glheader.h"
+#if defined(__i386__) || defined(__386__)
/* It is up to the caller to ensure that instructions issued are
* suitable for the host cpu. There are no checks made in this module
* for mmx/sse/sse2 support on the cpu.
*/
struct x86_reg {
- GLuint file:3;
- GLuint idx:3;
- GLuint mod:2; /* mod_REG if this is just a register */
- GLint disp:24; /* only +/- 23bits of offset - should be enough... */
+ unsigned file:3;
+ unsigned idx:3;
+ unsigned mod:2; /* mod_REG if this is just a register */
+ int disp:24; /* only +/- 23bits of offset - should be enough... */
};
struct x86_function {
- GLubyte *store;
- GLubyte *csr;
- GLuint stack_offset;
- GLint need_emms;
+ unsigned size;
+ unsigned char *store;
+ unsigned char *csr;
+ unsigned stack_offset;
+ int need_emms;
const char *fn;
};
@@ -80,7 +79,8 @@ enum sse_cc {
*/
-GLboolean x86_init_func( struct x86_function *p, GLuint code_size );
+void x86_init_func( struct x86_function *p );
+int x86_init_func_size( struct x86_function *p, unsigned code_size );
void x86_release_func( struct x86_function *p );
void (*x86_get_func( struct x86_function *p ))( void );
@@ -92,7 +92,7 @@ struct x86_reg x86_make_reg( enum x86_reg_file file,
enum x86_reg_name idx );
struct x86_reg x86_make_disp( struct x86_reg reg,
- GLint disp );
+ int disp );
struct x86_reg x86_deref( struct x86_reg reg );
@@ -101,31 +101,32 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
/* Labels, jumps and fixup:
*/
-GLubyte *x86_get_label( struct x86_function *p );
+unsigned char *x86_get_label( struct x86_function *p );
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
- GLubyte *label );
+ unsigned char *label );
-GLubyte *x86_jcc_forward( struct x86_function *p,
+unsigned char *x86_jcc_forward( struct x86_function *p,
enum x86_cc cc );
-GLubyte *x86_jmp_forward( struct x86_function *p);
+unsigned char *x86_jmp_forward( struct x86_function *p);
-GLubyte *x86_call_forward( struct x86_function *p);
+unsigned char *x86_call_forward( struct x86_function *p);
void x86_fixup_fwd_jump( struct x86_function *p,
- GLubyte *fixup );
+ unsigned char *fixup );
-void x86_jmp( struct x86_function *p, GLubyte *label );
+void x86_jmp( struct x86_function *p, unsigned char *label );
-void x86_call( struct x86_function *p, GLubyte *label );
+/* void x86_call( struct x86_function *p, void (*label)() ); */
+void x86_call( struct x86_function *p, struct x86_reg reg);
/* michal:
* Temporary. As I need immediate operands, and dont want to mess with the codegen,
* I load the immediate into general purpose register and use it.
*/
-void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, GLint imm );
+void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
/* Macro for sse_shufps() and sse2_pshufd():
@@ -141,19 +142,24 @@ void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg sr
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
-void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
+void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+ unsigned char shuf );
+void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
-void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, GLubyte cc );
+void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
+ unsigned char cc );
void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -166,9 +172,14 @@ void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
-void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
+void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+ unsigned char shuf );
+void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -239,7 +250,7 @@ void x87_fucom( struct x86_function *p, struct x86_reg arg );
* account any push/pop activity. Note - doesn't track explict
* manipulation of ESP by other instructions.
*/
-struct x86_reg x86_fn_arg( struct x86_function *p, GLuint arg );
+struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
#endif
#endif