Improve SSE2 performance, add SSE support. r=jrmuizel

This patch greatly improves the performance of QCMS transformations on x86 & x86_64 systems. Some notes: 0. On 32-bit x86 systems it does runtime selection between non-SIMD, SSE, and SSE2 code paths. 1. On x86_64 systems the SSE2 code path is always taken. The non-SIMD and SSE code paths are left intact, but contemporary versions of the GCC and MSVC compilers will see that they cannot be reached and optimize them away. 2. The execution of the SSE2 code path is reduced by 67%, relative to the original Intel/Microsoft formatted ASM code. The relative performance is seen on a Pentium4 (Northwood) 2.4GHz CPU with DDR1 RAM. 3. The SSE code path provides a 80% reduction in execution time, relative to the non-SIMD code path. The relative performance is seen on a Pentium3 (Coppermine) 1.26GHz CPU with SDRAM. 4. The code has been split out into separate files so that it can be built with different cflags (-msse, and -msse2) when using gcc.
author: Steve Snyder <swsnyder@snydernet.net> 2010-02-18 16:10:07 -0500
committer: Jeff Muizelaar <jmuizelaar@mozilla.com> 2010-02-18 16:10:07 -0500
commit: 99afd86d57d431dc3317c1123d8078cbf5afdb97 (patch)
tree: 7c2048b2016551563276c94b678e480373bd9f91 /transform.c
parent: d280a4b155590d07a9459d25c3acde144b96a6bc (diff)
1 files changed, 47 insertions, 369 deletions
diff --git a/transform.c b/transform.c
index 727622c..d9fd089 100644
--- a/transform.c
+++ b/transform.c
@@ -25,9 +25,10 @@
 #include <assert.h>
 #include "qcmsint.h"
 
-#if defined(_M_IX86) || defined(__i386__) || defined(__x86_64__) || defined(_M_AMD64)
+/* for MSVC, GCC, and Intel compilers */
+#if defined(_M_IX86) || defined(__i386__) || defined(_M_AMD64) || defined(__x86_64__)
 #define X86
-#endif
+#endif /* _M_IX86 || __i386__ || _M_AMD64 || __x86_64__ */
 
 //XXX: could use a bettername
 typedef uint16_t uint16_fract_t;
@@ -734,352 +735,6 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
 	}
 }
 
-static const ALIGN float floatScale = 65536.0f;
-static const ALIGN float * const floatScaleAddr = &floatScale; // Win32 ASM doesn't know how to take addressOf inline
-
-static const ALIGN float clampMaxValue = ((float) (65536 - 1)) / 65536.0f;
-
-#ifdef X86
-#if 0
-#include <emmintrin.h>
-void qcms_transform_data_rgb_out_lut_sse_intrin(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
-{
-	int i;
-	float (*mat)[4] = transform->matrix;
-        char input_back[32];
-	/* Ensure we have a buffer that's 16 byte aligned regardless of the original
-	 * stack alignment. We can't use __attribute__((aligned(16))) or __declspec(align(32))
-	 * because they don't work on stack variables. gcc 4.4 does do the right thing 
-	 * on x86 but that's too new for us right now. For more info: gcc bug #16660 */
-        float *input = (float*)(((uintptr_t)&input_back[16]) & ~0xf);
-        /* share input and output locations to save having to keep the
-         * locations in separate registers */
-        uint32_t* output = (uint32_t*)input;
-	for (i=0; i<length; i++) {
-		const float *clampMax = &clampMaxValue;
-
-		unsigned char device_r = *src++;
-		unsigned char device_g = *src++;
-		unsigned char device_b = *src++;
-
-		__m128 xmm1 = _mm_load_ps(mat[0]);
-		__m128 xmm2 = _mm_load_ps(mat[1]);
-		__m128 xmm3 = _mm_load_ps(mat[2]);
-
-		__m128 vec_r = _mm_load_ss(&transform->input_gamma_table_r[device_r]);
-		vec_r = _mm_shuffle_ps(vec_r, vec_r, 0);
-		__m128 vec_g = _mm_load_ss(&transform->input_gamma_table_r[device_g]);
-		vec_g = _mm_shuffle_ps(vec_g, vec_g, 0);
-		__m128 vec_b = _mm_load_ss(&transform->input_gamma_table_r[device_b]);
-		vec_b = _mm_shuffle_ps(vec_b, vec_b, 0);
-
-		vec_r = _mm_mul_ps(vec_r, xmm1);
-		vec_g = _mm_mul_ps(vec_g, xmm2);
-		vec_b = _mm_mul_ps(vec_b, xmm3);
-
-		vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b));
-
-		__m128 max = _mm_load_ss(&clampMax);
-		max = _mm_shuffle_ps(max, max, 0);
-		__m128 min = _mm_setzero_ps();
-
-		vec_r = _mm_max_ps(min, vec_r);
-		vec_r = _mm_min_ps(max, vec_r);
-
-		__m128 scale = _mm_load_ss(&floatScale);
-		scale = _mm_shuffle_ps(scale, scale, 0);
-		__m128 result = _mm_mul_ps(vec_r, scale);
-
-		__m128i out = _mm_cvtps_epi32(result);
-		_mm_store_si128((__m128i*)input, out);
-
-		*dest++ = transform->output_table_r->data[output[0]];
-		*dest++ = transform->output_table_g->data[output[1]];
-		*dest++ = transform->output_table_b->data[output[2]];
-	}
-}
-#endif
-
-#if defined(_MSC_VER) && defined(_M_AMD64)
-#include <emmintrin.h>
-#endif
-
-static void qcms_transform_data_rgb_out_lut_sse(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
-{
-	unsigned int i;
-	float (*mat)[4] = transform->matrix;
-        char input_back[32];
-	/* Ensure we have a buffer that's 16 byte aligned regardless of the original
-	 * stack alignment. We can't use __attribute__((aligned(16))) or __declspec(align(32))
-	 * because they don't work on stack variables. gcc 4.4 does do the right thing 
-	 * on x86 but that's too new for us right now. For more info: gcc bug #16660 */
-        float *input = (float*)(((uintptr_t)&input_back[16]) & ~0xf);
-        /* share input and output locations to save having to keep the
-         * locations in separate registers */
-        uint32_t* output = (uint32_t*)input;
-	for (i = 0; i < length; i++) {
-		const float *clampMax = &clampMaxValue;
-
-		unsigned char device_r = *src++;
-		unsigned char device_g = *src++;
-		unsigned char device_b = *src++;
-
-		input[0] = transform->input_gamma_table_r[device_r];
-		input[1] = transform->input_gamma_table_g[device_g];
-		input[2] = transform->input_gamma_table_b[device_b];
-
-#ifdef __GNUC__
-		__asm(
-                      "movaps (%0), %%xmm1;\n\t"          // Move the first matrix column to xmm1
-                      "movaps 16(%0), %%xmm2;\n\t"        // Move the second matrix column to xmm2
-                      "movaps 32(%0), %%xmm3;\n\t"        // move the third matrix column to xmm3
-                      "movaps (%3), %%xmm0;\n\t"        // Move the vector to xmm0
-
-                                                          // Note - We have to copy and then shuffle because of the weird
-                                                          // semantics of shufps
-                                                          //
-                      "movaps %%xmm0, %%xmm4;\n\t"        // Copy the vector to xmm4
-                      "shufps $0, %%xmm4, %%xmm4;\n\t"    // Shuffle to repeat the first vector element repeated 4 times
-                      "mulps %%xmm4, %%xmm1;\n\t"         // Multiply the first vector element by the first matrix column
-                      "movaps %%xmm0, %%xmm5; \n\t"       // Copy the vector to xmm5
-                      "shufps $0x55, %%xmm5, %%xmm5;\n\t" // Shuffle to repeat the second vector element repeated 4 times
-                      "mulps %%xmm5, %%xmm2;\n\t"         // Multiply the second vector element by the seccond matrix column 
-                      "movaps %%xmm0, %%xmm6;\n\t"        // Copy the vector to xmm6
-                      "shufps $0xAA, %%xmm6, %%xmm6;\n\t" // Shuffle to repeat the third vector element repeated 4 times
-                      "mulps %%xmm6, %%xmm3;\n\t"         // Multiply the third vector element by the third matrix column
-
-                      "addps %%xmm3, %%xmm2;\n\t"         // Sum (second + third) columns
-                      "addps %%xmm2, %%xmm1;\n\t"         // Sum ((second + third) + first) columns
-
-                      "movss (%1), %%xmm7;\n\t"        // load the floating point representation of 65535/65536 
-                      "shufps $0, %%xmm7, %%xmm7;\n\t" // move it into all of the four slots
-                      "minps %%xmm7, %%xmm1;\n\t"      // clamp the vector to 1.0 max
-                      "xorps %%xmm6, %%xmm6;\n\t"       // get us cleared bitpatern, which is 0.0f
-                      "maxps %%xmm6, %%xmm1;\n\t"      // clamp the vector to 0.0 min
-                      "movss (%2), %%xmm5;\n\t"        // load the floating point scale factor
-                      "shufps $0, %%xmm5, %%xmm5;\n\t" // put it in all four slots
-                      "mulps %%xmm5, %%xmm1;\n\t"      // multiply by the scale factor
-                      "cvtps2dq %%xmm1, %%xmm1;\n\t"   // convert to integers
-                      "movdqa %%xmm1, (%3);\n\t"       // store
-
-                      : 
-                      : "r" (mat), "r" (clampMax), "r" (&floatScale), "r" (input)
-                      : "memory"
-/* older versions of gcc don't know about these registers so only include them as constraints
-   if gcc knows about them */
-#ifdef __SSE2__
-                        , "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
-#endif
-                      );
-#elif defined(_MSC_VER) && defined(_M_IX86)
-                __asm {
-                      mov      eax, mat
-                      mov      ecx, clampMax
-                      mov      edx, floatScaleAddr
-		      mov      ebx, input
-
-                      movaps   xmm1, [eax]
-                      movaps   xmm2, [eax + 16]
-                      movaps   xmm3, [eax + 32]
-                      movaps   xmm0, [ebx]
-
-                      movaps   xmm4, xmm0
-                      shufps   xmm4, xmm4, 0
-                      mulps    xmm1, xmm4
-                      movaps   xmm5, xmm0
-                      shufps   xmm5, xmm5, 0x55
-                      mulps    xmm2, xmm5
-                      movaps   xmm6, xmm0
-                      shufps   xmm6, xmm6, 0xAA
-                      mulps    xmm3, xmm6
-
-                      addps    xmm2, xmm3
-                      addps    xmm1, xmm2
-
-                      movss    xmm7, [ecx]
-                      shufps   xmm7, xmm7, 0
-                      minps    xmm1, xmm7
-                      xorps    xmm6, xmm6
-                      maxps    xmm1, xmm6
-                      movss    xmm5, [edx]
-                      shufps   xmm5, xmm5, 0
-                      mulps    xmm1, xmm5
-                      cvtps2dq xmm1, xmm1
-                      movdqa   [ebx], xmm1
-                }
-#elif defined(_MSC_VER) && defined(_M_AMD64)
-                {
-                        __m128 xmm0, xmm1, xmm2, xmm3, xmm5, xmm6, xmm7;
-
-                        xmm1 = _mm_load_ps((__m128*)mat);
-                        xmm2 = _mm_load_ps(((__m128*)mat) + 1);
-                        xmm3 = _mm_load_ps(((__m128*)mat) + 2);
-                        xmm0 = _mm_load_ps((__m128*)input);
-
-                        xmm1 = _mm_mul_ps(xmm1, _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(0,0,0,0)));
-                        xmm2 = _mm_mul_ps(xmm2, _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(1,1,1,1)));
-                        xmm3 = _mm_mul_ps(xmm3, _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(2,2,2,2)));
-
-                        xmm1 = _mm_add_ps(xmm1, _mm_add_ps(xmm2, xmm3));
-
-                        xmm7 = _mm_load_ss(clampMax);
-                        xmm7 = _mm_shuffle_ps(xmm7, xmm7, _MM_SHUFFLE(0,0,0,0));
-                        xmm1 = _mm_min_ps(xmm1, xmm7);
-                        xmm6 = _mm_xor_ps(xmm6, xmm6);
-                        xmm1 = _mm_max_ps(xmm1, xmm6);
-                        xmm5 = _mm_load_ss(&floatScale);
-                        xmm5 = _mm_shuffle_ps(xmm5, xmm5, _MM_SHUFFLE(0,0,0,0));
-                        xmm1 = _mm_mul_ps(xmm1, xmm5);
-                        _mm_store_si128((__m128i*)input, _mm_cvtps_epi32(xmm1));
-                }
-#else
-#error "Unknown platform"
-#endif
-
-		*dest++ = transform->output_table_r->data[output[0]];
-		*dest++ = transform->output_table_g->data[output[1]];
-		*dest++ = transform->output_table_b->data[output[2]];
-	}
-}
-
-static void qcms_transform_data_rgba_out_lut_sse(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
-{
-	unsigned int i;
-	float (*mat)[4] = transform->matrix;
-        char input_back[32];
-	/* align input on 16 byte boundary */
-        float *input = (float*)(((uintptr_t)&input_back[16]) & ~0xf);
-        /* share input and output locations to save having to keep the
-         * locations in separate registers */
-        uint32_t* output = (uint32_t*)input;
-	for (i = 0; i < length; i++) {
-		const float *clampMax = &clampMaxValue;
-
-		unsigned char device_r = *src++;
-		unsigned char device_g = *src++;
-		unsigned char device_b = *src++;
-		unsigned char alpha = *src++;
-
-		input[0] = transform->input_gamma_table_r[device_r];
-		input[1] = transform->input_gamma_table_g[device_g];
-		input[2] = transform->input_gamma_table_b[device_b];
-
-#ifdef __GNUC__
-		__asm(
-                      "movaps (%0), %%xmm1;\n\t"          // Move the first matrix column to xmm1
-                      "movaps 16(%0), %%xmm2;\n\t"        // Move the second matrix column to xmm2
-                      "movaps 32(%0), %%xmm3;\n\t"        // move the third matrix column to xmm3
-                      "movaps (%3), %%xmm0;\n\t"        // Move the vector to xmm0
-
-                                                          // Note - We have to copy and then shuffle because of the weird
-                                                          // semantics of shufps
-                                                          //
-                      "movaps %%xmm0, %%xmm4;\n\t"        // Copy the vector to xmm4
-                      "shufps $0, %%xmm4, %%xmm4;\n\t"    // Shuffle to repeat the first vector element repeated 4 times
-                      "mulps %%xmm4, %%xmm1;\n\t"         // Multiply the first vector element by the first matrix column
-                      "movaps %%xmm0, %%xmm5; \n\t"       // Copy the vector to xmm5
-                      "shufps $0x55, %%xmm5, %%xmm5;\n\t" // Shuffle to repeat the second vector element repeated 4 times
-                      "mulps %%xmm5, %%xmm2;\n\t"         // Multiply the second vector element by the seccond matrix column 
-                      "movaps %%xmm0, %%xmm6;\n\t"        // Copy the vector to xmm6
-                      "shufps $0xAA, %%xmm6, %%xmm6;\n\t" // Shuffle to repeat the third vector element repeated 4 times
-                      "mulps %%xmm6, %%xmm3;\n\t"         // Multiply the third vector element by the third matrix column
-
-                      "addps %%xmm3, %%xmm2;\n\t"         // Sum (second + third) columns
-                      "addps %%xmm2, %%xmm1;\n\t"         // Sum ((second + third) + first) columns
-
-                      "movss (%1), %%xmm7;\n\t"        // load the floating point representation of 65535/65536 
-                      "shufps $0, %%xmm7, %%xmm7;\n\t" // move it into all of the four slots
-                      "minps %%xmm7, %%xmm1;\n\t"      // clamp the vector to 1.0 max
-                      "xorps %%xmm6, %%xmm6;\n\t"       // get us cleared bitpatern, which is 0.0f
-                      "maxps %%xmm6, %%xmm1;\n\t"      // clamp the vector to 0.0 min
-                      "movss (%2), %%xmm5;\n\t"        // load the floating point scale factor
-                      "shufps $0, %%xmm5, %%xmm5;\n\t" // put it in all four slots
-                      "mulps %%xmm5, %%xmm1;\n\t"      // multiply by the scale factor
-                      "cvtps2dq %%xmm1, %%xmm1;\n\t"   // convert to integers
-                      "movdqa %%xmm1, (%3);\n\t"       // store
-
-                      : 
-                      : "r" (mat), "r" (clampMax), "r" (&floatScale), "r" (input)
-                      : "memory"
-/* older versions of gcc don't know about these registers so only include them as constraints
-   if gcc knows about them */
-#ifdef __SSE2__
-                        , "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
-#endif
-                      );
-#elif defined(_MSC_VER) && defined(_M_IX86)
-                __asm {
-                      mov      eax, mat
-                      mov      ecx, clampMax
-                      mov      edx, floatScaleAddr
-		      mov      ebx, input
-
-                      movaps   xmm1, [eax]
-                      movaps   xmm2, [eax + 16]
-                      movaps   xmm3, [eax + 32]
-                      movaps   xmm0, [ebx]
-
-                      movaps   xmm4, xmm0
-                      shufps   xmm4, xmm4, 0
-                      mulps    xmm1, xmm4
-                      movaps   xmm5, xmm0
-                      shufps   xmm5, xmm5, 0x55
-                      mulps    xmm2, xmm5
-                      movaps   xmm6, xmm0
-                      shufps   xmm6, xmm6, 0xAA
-                      mulps    xmm3, xmm6
-
-                      addps    xmm2, xmm3
-                      addps    xmm1, xmm2
-
-                      movss    xmm7, [ecx]
-                      shufps   xmm7, xmm7, 0
-                      minps    xmm1, xmm7
-                      xorps    xmm6, xmm6
-                      maxps    xmm1, xmm6
-                      movss    xmm5, [edx]
-                      shufps   xmm5, xmm5, 0
-                      mulps    xmm1, xmm5
-                      cvtps2dq xmm1, xmm1
-                      movdqa   [ebx], xmm1
-                }
-#elif defined(_MSC_VER) && defined(_M_AMD64)
-                {
-                        __m128 xmm0, xmm1, xmm2, xmm3, xmm5, xmm6, xmm7;
-
-                        xmm1 = _mm_load_ps((__m128*)mat);
-                        xmm2 = _mm_load_ps(((__m128*)mat) + 1);
-                        xmm3 = _mm_load_ps(((__m128*)mat) + 2);
-                        xmm0 = _mm_load_ps((__m128*)input);
-
-                        xmm1 = _mm_mul_ps(xmm1, _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(0,0,0,0)));
-                        xmm2 = _mm_mul_ps(xmm2, _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(1,1,1,1)));
-                        xmm3 = _mm_mul_ps(xmm3, _mm_shuffle_ps(xmm0, xmm0, _MM_SHUFFLE(2,2,2,2)));
-
-                        xmm1 = _mm_add_ps(xmm1, _mm_add_ps(xmm2, xmm3));
-
-                        xmm7 = _mm_load_ss(clampMax);
-                        xmm7 = _mm_shuffle_ps(xmm7, xmm7, _MM_SHUFFLE(0,0,0,0));
-                        xmm1 = _mm_min_ps(xmm1, xmm7);
-                        xmm6 = _mm_xor_ps(xmm6, xmm6);
-                        xmm1 = _mm_max_ps(xmm1, xmm6);
-                        xmm5 = _mm_load_ss(&floatScale);
-                        xmm5 = _mm_shuffle_ps(xmm5, xmm5, _MM_SHUFFLE(0,0,0,0));
-                        xmm1 = _mm_mul_ps(xmm1, xmm5);
-                        _mm_store_si128((__m128i*)input, _mm_cvtps_epi32(xmm1));
-                }
-#else
-#error "Unknown platform"
-#endif
-
-		*dest++ = transform->output_table_r->data[output[0]];
-		*dest++ = transform->output_table_g->data[output[1]];
-		*dest++ = transform->output_table_b->data[output[2]];
-		*dest++ = alpha;
-	}
-}
-#endif
-
 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 {
 	unsigned int i;
@@ -1380,7 +1035,7 @@ qcms_bool compute_precache(struct curveType *trc, uint8_t *output)
 	return true;
 }
 
-
+#ifdef X86
 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
 // mozilla/jpeg)
  // -------------------------------------------------------------------------
@@ -1423,31 +1078,43 @@ static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t
 }
 #endif
 
-// -------------------------Runtime SSE2 Detection-----------------------------
+// -------------------------Runtime SSEx Detection-----------------------------
 
+/* MMX is always supported per
+ *  Gecko v1.9.1 minimum CPU requirements */
+#define SSE1_EDX_MASK (1UL << 25)
 #define SSE2_EDX_MASK (1UL << 26)
-static qcms_bool sse2_available(void)
+#define SSE3_ECX_MASK (1UL <<  0)
+
+static int sse_version_available(void)
 {
 #if defined(__x86_64__) || defined(_M_AMD64)
-       return true;
+	/* we know at build time that 64-bit CPUs always have SSE2
+	 * this tells the compiler that non-SSE2 branches will never be
+	 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
+	return 2;
 #elif defined(HAS_CPUID)
-       static int has_sse2 = -1;
-       uint32_t a, b, c, d;
-       uint32_t function = 0x00000001;
-
-       if (has_sse2 == -1) {
-              has_sse2 = 0;
-	      cpuid(function, &a, &b, &c, &d);
-              if (d & SSE2_EDX_MASK)
-                     has_sse2 = 1;
-              else
-                     has_sse2 = 0;
-       }
+	static int sse_version = -1;
+	uint32_t a, b, c, d;
+	uint32_t function = 0x00000001;
+
+	if (sse_version == -1) {
+		sse_version = 0;
+		cpuid(function, &a, &b, &c, &d);
+		if (c & SSE3_ECX_MASK)
+			sse_version = 3;
+		else if (d & SSE2_EDX_MASK)
+			sse_version = 2;
+		else if (d & SSE1_EDX_MASK)
+			sse_version = 1;
+	}
 
-       return has_sse2;
+	return sse_version;
+#else
+	return 0;
 #endif
-       return false;
 }
+#endif
 
 void build_output_lut(struct curveType *trc,
 		uint16_t **output_gamma_lut, size_t *output_gamma_lut_length)
@@ -1553,11 +1220,18 @@ qcms_transform* qcms_transform_create(
             }
 	    if (precache) {
 #ifdef X86
-		    if (sse2_available()) {
+		    if (sse_version_available() >= 2) {
 			    if (in_type == QCMS_DATA_RGB_8)
-				    transform->transform_fn = qcms_transform_data_rgb_out_lut_sse;
+				    transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
 			    else
-				    transform->transform_fn = qcms_transform_data_rgba_out_lut_sse;
+				    transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
+
+		    } else
+		    if (sse_version_available() >= 1) {
+			    if (in_type == QCMS_DATA_RGB_8)
+				    transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
+			    else
+				    transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
 
 		    } else
 #endif
@@ -1639,6 +1313,10 @@ qcms_transform* qcms_transform_create(
 	return transform;
 }
 
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+/* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
+__attribute__((__force_align_arg_pointer__))
+#endif
 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
 {
 	transform->transform_fn(transform, src, dest, length);
author	Steve Snyder <swsnyder@snydernet.net>	2010-02-18 16:10:07 -0500
committer	Jeff Muizelaar <jmuizelaar@mozilla.com>	2010-02-18 16:10:07 -0500
commit	99afd86d57d431dc3317c1123d8078cbf5afdb97 (patch)
tree	7c2048b2016551563276c94b678e480373bd9f91 /transform.c
parent	d280a4b155590d07a9459d25c3acde144b96a6bc (diff)