xc/extras/Mesa/src/mmath.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333


/*
 * Mesa 3-D graphics library
 * Version:  3.3
 * 
 * Copyright (C) 1999  Brian Paul   All Rights Reserved.
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */


/*
 * Faster arithmetic functions.  If the FAST_MATH preprocessor symbol is
 * defined on the command line (-DFAST_MATH) then we'll use some (hopefully)
 * faster functions for sqrt(), etc.
 */


#ifndef MMATH_H
#define MMATH_H


#include "glheader.h"


/*
 * Set the x86 FPU control word to guarentee only 32 bits of presision
 * are stored in registers.  Allowing the FPU to store more introduces
 * differences between situations where numbers are pulled out of memory
 * vs. situations where the compiler is able to optimize register usage.
 * 
 * In the worst case, we force the compiler to use a memory access to
 * truncate the float, by specifying the 'volatile' keyword.
 */
#if defined(__linux__) && defined(__i386__) 
#include <fpu_control.h>

#if !defined(_FPU_SETCW)
#define _FPU_SETCW __setfpucw
typedef unsigned short fpu_control_t;
#endif

#if !defined(_FPU_GETCW)
#define _FPU_GETCW(a) (a) = __fpu_control;
#endif

/* Set it up how we want it.
 */
#if !defined(NO_FAST_MATH) 
#define START_FAST_MATH(x)                  \
   {								\
      static fpu_control_t mask = _FPU_SINGLE | _FPU_MASK_IM	\
            | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM	\
            | _FPU_MASK_UM | _FPU_MASK_PM;			\
      _FPU_GETCW( x );						\
      _FPU_SETCW( mask );					\
   }
#else
#define START_FAST_MATH(x)			\
   {						\
      static fpu_control_t mask = _FPU_DEFAULT;	\
      _FPU_GETCW( x );				\
      _FPU_SETCW( mask );			\
   }
#endif

/* Put it back how the application had it.
 */
#define END_FAST_MATH(x)			\
   {						\
      _FPU_SETCW( x );				\
   }

#define HAVE_FAST_MATH

#elif defined(__WATCOMC__) && !defined(NO_FAST_MATH) 

/* This is the watcom specific inline assembly version of setcw and getcw */

void START_FAST_MATH2(unsigned short *x);
#pragma aux START_FAST_MATH2 =          \
    "fstcw   word ptr [esi]"            \
    "or      word ptr [esi], 0x3f"      \
    "fldcw   word ptr [esi]"            \
    parm [esi]                          \
    modify exact [];

void END_FAST_MATH2(unsigned short *x);
#pragma aux END_FAST_MATH2 =            \
    "fldcw   word ptr [esi]"            \
    parm [esi]                          \
    modify exact [];

#define START_FAST_MATH(x)  START_FAST_MATH2(& x)          
#define END_FAST_MATH(x)  END_FAST_MATH2(& x)

/*
__inline START_FAST_MATH(unsigned short x)
    {                               
    _asm {                          
        fstcw   ax                  
        mov     x , ax              
        or      ax, 0x3f            
        fldcw   ax                  
        }                           
    }

__inline END_FAST_MATH(unsigned short x)    
    {                               
    _asm {                          
        fldcw   x                   
        }                           
    }
*/
#define HAVE_FAST_MATH

#else
#define START_FAST_MATH(x) (void)(x)
#define END_FAST_MATH(x)   (void)(x)

/* The mac float really is a float, with the same precision as a
 * single precision 387 float.
 */
#if defined(macintosh)
#define HAVE_FAST_MATH
#endif

#endif


/*
 * Float -> Int conversion
 */

#if defined(USE_X86_ASM)
#if defined(__GNUC__) && defined(__i386__)
static __inline__ int FloatToInt(float f)
{
   int r;
   __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
   return r;
}
#elif  defined(__MSC__) && defined(__WIN32__)
static __inline int FloatToInt(float f)
{
   int r;
   _asm {
	 fld f
	 fistp r
	}
   return r;
}
#elif defined(__WATCOMC__)
long FloatToInt(float f);
#pragma aux FloatToInt =                \
	"push   eax"                        \
	"fistp  dword ptr [esp]"            \
	"pop    eax"                        \
	parm [8087]                         \
	value [eax]                         \
	modify exact [eax];
float asm_sqrt (float x);
#pragma aux asm_sqrt =                  \
	"fsqrt"                             \
	parm [8087]                         \
	value [8087]                        \
	modify exact [];
#else
#define FloatToInt(F) ((int) (F))
#endif
#else
#define FloatToInt(F) ((int) (F))
#endif


/*
 * Square root
 */

extern float gl_sqrt(float x);
    
#ifdef FAST_MATH
#if defined (__WATCOMC__) && defined(USE_X86_ASM)
#  define GL_SQRT(X)  asm_sqrt(X)
#else
#  define GL_SQRT(X)  gl_sqrt(X)
#endif
#else
#  define GL_SQRT(X)  sqrt(X)
#endif


/*
 * Normalize a 3-element vector to unit length.
 */
#define NORMALIZE_3FV( V )			\
do {						\
   GLdouble len = LEN_SQUARED_3FV(V);		\
   if (len > 1e-50) {				\
      len = 1.0 / GL_SQRT(len);			\
      V[0] = (GLfloat) (V[0] * len);		\
      V[1] = (GLfloat) (V[1] * len);		\
      V[2] = (GLfloat) (V[2] * len);		\
   }						\
} while(0)

#define LEN_3FV( V ) (GL_SQRT(V[0]*V[0]+V[1]*V[1]+V[2]*V[2]))

#define LEN_SQUARED_3FV( V ) (V[0]*V[0]+V[1]*V[1]+V[2]*V[2])

/*
 * Optimization for:
 * GLfloat f;
 * GLubyte b = FloatToInt(CLAMP(f, 0, 1) * 255)
 */

#if defined(__i386__) || defined(__sparc__)
#define USE_IEEE
#endif

#if defined(USE_IEEE) && !defined(DEBUG)

#define IEEE_ONE 0x3f7f0000

#define CLAMP_FLOAT_COLOR(f)			\
	do {					\
	   if (*(GLuint *)&f >= IEEE_ONE)	\
	      f = (*(GLint *)&f < 0) ? 0 : 1;	\
	} while(0)

#define CLAMP_FLOAT_COLOR_VALUE(f)		\
    ( (*(GLuint *)&f >= IEEE_ONE)		\
      ? ((*(GLint *)&f < 0) ? 0 : 1)		\
      : f )

/* 
 * This function/macro is sensitive to precision.  Test carefully
 * if you change it.
 */
#define FLOAT_COLOR_TO_UBYTE_COLOR(b, f)                        \
        do {                                                    \
           union { GLfloat r; GLuint i; } tmp;                  \
           tmp.r = f;                                           \
           b = ((tmp.i >= IEEE_ONE)                             \
               ? ((GLint)tmp.i < 0) ? (GLubyte)0 : (GLubyte)255 \
               : (tmp.r = tmp.r*(255.0F/256.0F) + 32768.0F,     \
                  (GLubyte)tmp.i));                             \
        } while (0)


#define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \
         FLOAT_COLOR_TO_UBYTE_COLOR(b, f)

#else

#define CLAMP_FLOAT_COLOR(f) \
        (void) CLAMP_SELF(f,0,1)

#define CLAMP_FLOAT_COLOR_VALUE(f) \
        CLAMP(f,0,1)
       
#define FLOAT_COLOR_TO_UBYTE_COLOR(b, f)			\
	b = ((GLubyte) FloatToInt(CLAMP(f, 0.0F, 1.0F) * 255.0F))

#define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \
	b = ((GLubyte) FloatToInt(f * 255.0F))

#endif


extern float gl_ubyte_to_float_color_tab[256];
extern float gl_ubyte_to_float_255_color_tab[256];
#define UBYTE_COLOR_TO_FLOAT_COLOR(c) gl_ubyte_to_float_color_tab[c]

#define UBYTE_COLOR_TO_FLOAT_255_COLOR(c) gl_ubyte_to_float_255_color_tab[c]

#define UBYTE_COLOR_TO_FLOAT_255_COLOR2(f,c) \
    (*(int *)&(f)) = ((int *)gl_ubyte_to_float_255_color_tab)[c]


#define UBYTE_RGBA_TO_FLOAT_RGBA(f,b) 		\
do {						\
   f[0] = UBYTE_COLOR_TO_FLOAT_COLOR(b[0]);	\
   f[1] = UBYTE_COLOR_TO_FLOAT_COLOR(b[1]);	\
   f[2] = UBYTE_COLOR_TO_FLOAT_COLOR(b[2]);	\
   f[3] = UBYTE_COLOR_TO_FLOAT_COLOR(b[3]);	\
} while(0)


#define UBYTE_RGBA_TO_FLOAT_255_RGBA(f,b) 		\
do {						\
   f[0] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[0]);	\
   f[1] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[1]);	\
   f[2] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[2]);	\
   f[3] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[3]);	\
} while(0)

#define FLOAT_RGBA_TO_UBYTE_RGBA(b,f) 		\
do {						\
   FLOAT_COLOR_TO_UBYTE_COLOR((b[0]),(f[0]));	\
   FLOAT_COLOR_TO_UBYTE_COLOR((b[1]),(f[1]));	\
   FLOAT_COLOR_TO_UBYTE_COLOR((b[2]),(f[2]));	\
   FLOAT_COLOR_TO_UBYTE_COLOR((b[3]),(f[3]));	\
} while(0)

#define FLOAT_RGB_TO_UBYTE_RGB(b,f) 		\
do {						\
   FLOAT_COLOR_TO_UBYTE_COLOR(b[0],f[0]);	\
   FLOAT_COLOR_TO_UBYTE_COLOR(b[1],f[1]);	\
   FLOAT_COLOR_TO_UBYTE_COLOR(b[2],f[2]);	\
} while(0)


extern void gl_init_math(void);


#endif