diff options
Diffstat (limited to 'unpremultiply-invb.c')
-rw-r--r-- | unpremultiply-invb.c | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/unpremultiply-invb.c b/unpremultiply-invb.c new file mode 100644 index 0000000..a2b17bf --- /dev/null +++ b/unpremultiply-invb.c @@ -0,0 +1,138 @@ +/* An unpremultiplier using reciprocal multiplication. It specialises + * constant runs and solid runs of pixels with low overhead loops and + * uses only a 1KB table of reciprocals. */ +/* gcc -c -W -Wall -O3 -funroll-all-loops -fomit-frame-pointer -std=c99 unpremultiply-inv32.c */ +#include <stdint.h> +#include <stddef.h> + +/* Pixel format config for a 32 bit pixel with 8 bit components. Only + * the location of alpha matters. */ +#ifndef ASHIFT +# define ASHIFT 24 +#endif +#define RSHIFT ((24 + ASHIFT) % 32) +#define GSHIFT ((16 + ASHIFT) % 32) +#define BSHIFT (( 8 + ASHIFT) % 32) + +#define AMASK (255U << ASHIFT) +#define RMASK (255U << RSHIFT) +#define GMASK (255U << GSHIFT) +#define BMASK (255U << BSHIFT) + +/* Set to 1 if the input can have superluminant pixels. */ +#define DO_CLAMP_INPUT 0 + +/* Shift x left by y bits. Supports negative y for right shifts. */ +#define SHIFT(x, y) ((y) < 0 ? (x) >> (-(y)) : (x) << (y)) + +#define ceil_div(a,b) ((a) + (b)-1) / (b) + +/* The reciprocal_table[i] entries are defined by + * + * 0 when i = 0 + * 255 / i when i > 0 + * + * represented in fixed point format with RECIPROCAL_BITS of + * precision and errors rounded up. */ +#define RECIPROCAL_BITS 16 +static uint32_t const reciprocal_table[256] = { +# define R(i) ((i) ? ceil_div(255*(1<<RECIPROCAL_BITS), (i)) : 0) +# define R1(i) R(i), R(i+1), R(i+2), R(i+3) +# define R2(i) R1(i), R1(i+4), R1(i+8), R1(i+12) +# define R3(i) R2(i), R2(i+16), R2(i+32), R2(i+48) + R3(0), R3(64), R3(128), R3(192) +}; + +/* Transfer num_pixels unpremultiplied pixels from src[] to dst[]. + * This version uses a short probe period of a few pixels to identify + * runs of constant or solid pixels. When a run is identified it + * falls into a special case loop for the duration of the run. */ +void +unpremultiply_with_invb( + uint32_t * restrict dst, + uint32_t const * restrict src, + size_t num_pixels) +{ + size_t i = 0; + while (i < num_pixels) { + /* We want to identify long runs of constant input pixels and + * cache the unpremultiplied. */ + uint32_t const_in, const_out; + + /* Diff is the or of all bitwise differences from const_in + * during the probe period. If it is zero after the probe + * period then every input pixel was identical in the + * probe. */ + unsigned diff = 0; + + /* Accumulator for all alphas of the probe period pixels, + * biased to make the sum zero if the */ + unsigned accu = -2*255; + + { + uint32_t rgba, a, r, g, b, recip; + rgba = const_in = src[i]; + a = (rgba >> ASHIFT) & 255; + accu += a; + r = (rgba >> RSHIFT) & 255; + g = (rgba >> GSHIFT) & 255; + b = (rgba >> BSHIFT) & 255; + recip = reciprocal_table[a]; +#if DO_CLAMP_INPUT + r = r < a ? r : a; + g = g < a ? g : a; + b = b < a ? b : a; +#endif + r = SHIFT(r * recip, RSHIFT - RECIPROCAL_BITS); + g = SHIFT(g * recip, GSHIFT - RECIPROCAL_BITS); + b = SHIFT(b * recip, BSHIFT - RECIPROCAL_BITS); + dst[i] = const_out = + (r & RMASK) | (g & GMASK) | (b & BMASK) | (rgba & AMASK); + } + + if (i + 1 == num_pixels) + return; + + { + uint32_t rgba, a, r, g, b, recip; + rgba = src[i+1]; + a = (rgba >> ASHIFT) & 255; + accu += a; + r = (rgba >> RSHIFT) & 255; + g = (rgba >> GSHIFT) & 255; + b = (rgba >> BSHIFT) & 255; + recip = reciprocal_table[a]; +#if DO_CLAMP_INPUT + r = r < a ? r : a; + g = g < a ? g : a; + b = b < a ? b : a; +#endif + diff = rgba ^ const_in; + r = SHIFT(r * recip, RSHIFT - RECIPROCAL_BITS); + g = SHIFT(g * recip, GSHIFT - RECIPROCAL_BITS); + b = SHIFT(b * recip, BSHIFT - RECIPROCAL_BITS); + dst[i+1] = + (r & RMASK) | (g & GMASK) | (b & BMASK) | (rgba & AMASK); + } + + i += 2; + + /* Fall into special cases if we have special + * circumstances. */ + if (0 != (accu & diff)) + continue; + + if (0 == accu) { /* a run of solid pixels. */ + uint32_t in; + while (AMASK == ((in = src[i]) & AMASK)) { + dst[i++] = in; + if (i == num_pixels) return; + } + } else if (0 == diff) { /* a run of constant pixels. */ + while (src[i] == const_in) { + dst[i++] = const_out; + if (i == num_pixels) return; + } + } + } +} |