diff options
author | Siarhei Siamashka <siarhei.siamashka@gmail.com> | 2012-11-24 19:43:41 +0200 |
---|---|---|
committer | Siarhei Siamashka <siarhei.siamashka@gmail.com> | 2012-11-25 12:47:03 +0200 |
commit | ad29fd4b8b6c718e98b38db21dba869ae1583046 (patch) | |
tree | 4dc0ab06ed8d4974d4ab1609e13215abd9a4ae50 | |
parent | aef5ac273da233b7502914a963af4ccf54c3ecf8 (diff) |
test: Added a better PRNG (pseudorandom number generator)
This adds a fast SIMD-optimized variant of a small noncryptographic
PRNG originally developed by Bob Jenkins:
http://www.burtleburtle.net/bob/rand/smallprng.html
The generated pseudorandom data is good enough to pass "Big Crush"
tests from TestU01 (http://en.wikipedia.org/wiki/TestU01).
SIMD code uses http://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html
which is a GCC specific extension. There is also a slower alternative
code path, which should work with any C compiler.
The performance of filling buffer with random data:
Intel Core i7 @2.8GHz (SSE2) : ~5.9 GB/s
ARM Cortex-A15 @1.7GHz (NEON) : ~2.2 GB/s
IBM Cell PPU @3.2GHz (Altivec) : ~1.7 GB/s
-rw-r--r-- | test/Makefile.sources | 3 | ||||
-rw-r--r-- | test/prng-test.c | 172 | ||||
-rw-r--r-- | test/utils-prng.c | 238 | ||||
-rw-r--r-- | test/utils-prng.h | 168 |
4 files changed, 581 insertions, 0 deletions
diff --git a/test/Makefile.sources b/test/Makefile.sources index 0778971..8c0b505 100644 --- a/test/Makefile.sources +++ b/test/Makefile.sources @@ -1,5 +1,6 @@ # Tests (sorted by expected completion time) TESTPROGRAMS = \ + prng-test \ a1-trap-test \ pdf-op-test \ region-test \ @@ -33,8 +34,10 @@ BENCHMARKS = \ # Utility functions libutils_sources = \ utils.c \ + utils-prng.c \ $(NULL) libutils_headers = \ utils.h \ + utils-prng.h \ $(NULL) diff --git a/test/prng-test.c b/test/prng-test.c new file mode 100644 index 0000000..0a3ad5e --- /dev/null +++ b/test/prng-test.c @@ -0,0 +1,172 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Based on the public domain implementation of small noncryptographic PRNG + * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdlib.h> +#include "utils-prng.h" +#include "utils.h" + +/* The original code from http://www.burtleburtle.net/bob/rand/smallprng.html */ + +typedef uint32_t u4; +typedef struct ranctx { u4 a; u4 b; u4 c; u4 d; } ranctx; + +#define rot(x,k) (((x)<<(k))|((x)>>(32-(k)))) +u4 ranval( ranctx *x ) { + u4 e = x->a - rot(x->b, 27); + x->a = x->b ^ rot(x->c, 17); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + return x->d; +} + +void raninit( ranctx *x, u4 seed ) { + u4 i; + x->a = 0xf1ea5eed, x->b = x->c = x->d = seed; + for (i=0; i<20; ++i) { + (void)ranval(x); + } +} + +/*****************************************************************************/ + +#define BUFSIZE (8 * 1024 * 1024) +#define N 50 + +void bench (void) +{ + double t1, t2; + int i; + prng_t prng; + uint8_t *buf = aligned_malloc (16, BUFSIZE + 1); + + prng_srand_r (&prng, 1234); + t1 = gettime(); + for (i = 0; i < N; i++) + prng_randmemset_r (&prng, buf, BUFSIZE, 0); + t2 = gettime(); + printf ("aligned randmemset : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + t1 = gettime(); + for (i = 0; i < N; i++) + prng_randmemset_r (&prng, buf + 1, BUFSIZE, 0); + t2 = gettime(); + printf ("unaligned randmemset : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + t1 = gettime(); + for (i = 0; i < N; i++) + { + prng_randmemset_r (&prng, buf, BUFSIZE, RANDMEMSET_MORE_00_AND_FF); + } + t2 = gettime (); + printf ("aligned randmemset (more 00 and FF) : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + t1 = gettime(); + for (i = 0; i < N; i++) + { + prng_randmemset_r (&prng, buf + 1, BUFSIZE, RANDMEMSET_MORE_00_AND_FF); + } + t2 = gettime (); + printf ("unaligned randmemset (more 00 and FF) : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + free (buf); +} + +#define SMALLBUFSIZE 100 + +int main (int argc, char *argv[]) +{ + const uint32_t ref_crc[RANDMEMSET_MORE_00_AND_FF + 1] = + { + 0xBA06763D, 0x103FC550, 0x8B59ABA5, 0xD82A0F39 + }; + uint32_t crc1, crc2; + uint32_t ref, seed, seed0, seed1, seed2, seed3; + prng_rand_128_data_t buf; + uint8_t *bytebuf = aligned_malloc(16, SMALLBUFSIZE + 1); + ranctx x; + prng_t prng; + prng_randmemset_flags_t flags; + + if (argc > 1 && strcmp(argv[1], "-bench") == 0) + { + bench (); + return 0; + } + + /* basic test */ + raninit (&x, 0); + prng_srand_r (&prng, 0); + assert (ranval (&x) == prng_rand_r (&prng)); + + /* test for simd code */ + seed = 0; + prng_srand_r (&prng, seed); + seed0 = (seed = seed * 1103515245 + 12345); + seed1 = (seed = seed * 1103515245 + 12345); + seed2 = (seed = seed * 1103515245 + 12345); + seed3 = (seed = seed * 1103515245 + 12345); + prng_rand_128_r (&prng, &buf); + + raninit (&x, seed0); + ref = ranval (&x); + assert (ref == buf.w[0]); + + raninit (&x, seed1); + ref = ranval (&x); + assert (ref == buf.w[1]); + + raninit (&x, seed2); + ref = ranval (&x); + assert (ref == buf.w[2]); + + raninit (&x, seed3); + ref = ranval (&x); + assert (ref == buf.w[3]); + + /* test for randmemset */ + for (flags = 0; flags <= RANDMEMSET_MORE_00_AND_FF; flags++) + { + prng_srand_r (&prng, 1234); + prng_randmemset_r (&prng, bytebuf, 16, flags); + prng_randmemset_r (&prng, bytebuf + 16, SMALLBUFSIZE - 17, flags); + crc1 = compute_crc32 (0, bytebuf, SMALLBUFSIZE - 1); + prng_srand_r (&prng, 1234); + prng_randmemset_r (&prng, bytebuf + 1, SMALLBUFSIZE - 1, flags); + crc2 = compute_crc32 (0, bytebuf + 1, SMALLBUFSIZE - 1); + assert (ref_crc[flags] == crc1); + assert (ref_crc[flags] == crc2); + } + + free (bytebuf); + + return 0; +} diff --git a/test/utils-prng.c b/test/utils-prng.c new file mode 100644 index 0000000..7c2dd6a --- /dev/null +++ b/test/utils-prng.c @@ -0,0 +1,238 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Based on the public domain implementation of small noncryptographic PRNG + * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "utils.h" +#include "utils-prng.h" + +void smallprng_srand_r (smallprng_t *x, uint32_t seed) +{ + uint32_t i; + x->a = 0xf1ea5eed, x->b = x->c = x->d = seed; + for (i = 0; i < 20; ++i) + smallprng_rand_r (x); +} + +/* + * Set a 32-bit seed for PRNG + * + * LCG is used here for generating independent seeds for different + * smallprng instances (in the case if smallprng is also used for + * generating these seeds, "Big Crush" test from TestU01 detects + * some problems in the glued 'prng_rand_128_r' output data). + * Actually we might be even better using some cryptographic + * hash for this purpose, but LCG seems to be also enough for + * passing "Big Crush". + */ +void prng_srand_r (prng_t *x, uint32_t seed) +{ +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + int i; + prng_rand_128_data_t dummy; + smallprng_srand_r (&x->p0, seed); + x->a[0] = x->a[1] = x->a[2] = x->a[3] = 0xf1ea5eed; + x->b[0] = x->c[0] = x->d[0] = (seed = seed * 1103515245 + 12345); + x->b[1] = x->c[1] = x->d[1] = (seed = seed * 1103515245 + 12345); + x->b[2] = x->c[2] = x->d[2] = (seed = seed * 1103515245 + 12345); + x->b[3] = x->c[3] = x->d[3] = (seed = seed * 1103515245 + 12345); + for (i = 0; i < 20; ++i) + prng_rand_128_r (x, &dummy); +#else + smallprng_srand_r (&x->p0, seed); + smallprng_srand_r (&x->p1, (seed = seed * 1103515245 + 12345)); + smallprng_srand_r (&x->p2, (seed = seed * 1103515245 + 12345)); + smallprng_srand_r (&x->p3, (seed = seed * 1103515245 + 12345)); + smallprng_srand_r (&x->p4, (seed = seed * 1103515245 + 12345)); +#endif +} + +static force_inline void +store_rand_128_data (void *addr, prng_rand_128_data_t *d, int aligned) +{ +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + if (aligned) + { + *(uint8x16 *)addr = d->vb; + return; + } +#endif + /* we could try something better for unaligned writes (packed attribute), + * but GCC is not very reliable: http://gcc.gnu.org/PR55454 */ + memcpy (addr, d, 16); +} + +/* + * Helper function and the actual code for "prng_randmemset_r" function + */ +static force_inline void +randmemset_internal (prng_t *prng, + uint8_t *buf, + size_t size, + prng_randmemset_flags_t flags, + int aligned) +{ + prng_t local_prng = *prng; + prng_rand_128_data_t randdata; + + while (size >= 16) + { + prng_rand_128_data_t t; + if (flags == 0) + { + prng_rand_128_r (&local_prng, &randdata); + } + else + { + prng_rand_128_r (&local_prng, &t); + prng_rand_128_r (&local_prng, &randdata); +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + if (flags & RANDMEMSET_MORE_FF) + { + const uint8x16 const_C0 = + { + 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, + 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0 + }; + randdata.vb |= (t.vb >= const_C0); + } + if (flags & RANDMEMSET_MORE_00) + { + const uint8x16 const_40 = + { + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 + }; + randdata.vb &= (t.vb >= const_40); + } +#else + #define PROCESS_ONE_LANE(i) \ + if (flags & RANDMEMSET_MORE_FF) \ + { \ + uint32_t mask_ff = (t.w[i] & (t.w[i] << 1)) & 0x80808080; \ + mask_ff |= mask_ff >> 1; \ + mask_ff |= mask_ff >> 2; \ + mask_ff |= mask_ff >> 4; \ + randdata.w[i] |= mask_ff; \ + } \ + if (flags & RANDMEMSET_MORE_00) \ + { \ + uint32_t mask_00 = (t.w[i] | (t.w[i] << 1)) & 0x80808080; \ + mask_00 |= mask_00 >> 1; \ + mask_00 |= mask_00 >> 2; \ + mask_00 |= mask_00 >> 4; \ + randdata.w[i] &= mask_00; \ + } + + PROCESS_ONE_LANE (0) + PROCESS_ONE_LANE (1) + PROCESS_ONE_LANE (2) + PROCESS_ONE_LANE (3) +#endif + } + if (is_little_endian ()) + { + store_rand_128_data (buf, &randdata, aligned); + buf += 16; + } + else + { +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + const uint8x16 bswap_shufflemask = + { + 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + }; + randdata.vb = __builtin_shuffle (randdata.vb, bswap_shufflemask); + store_rand_128_data (buf, &randdata, aligned); + buf += 16; +#else + uint8_t t1, t2, t3, t4; + #define STORE_ONE_LANE(i) \ + t1 = randdata.b[i * 4 + 3]; \ + t2 = randdata.b[i * 4 + 2]; \ + t3 = randdata.b[i * 4 + 1]; \ + t4 = randdata.b[i * 4 + 0]; \ + *buf++ = t1; \ + *buf++ = t2; \ + *buf++ = t3; \ + *buf++ = t4; + + STORE_ONE_LANE (0) + STORE_ONE_LANE (1) + STORE_ONE_LANE (2) + STORE_ONE_LANE (3) +#endif + } + size -= 16; + } + while (size > 0) + { + uint8_t randbyte = prng_rand_r (&local_prng) & 0xFF; + if (flags != 0) + { + uint8_t t = prng_rand_r (&local_prng) & 0xFF; + if ((flags & RANDMEMSET_MORE_FF) && (t >= 0xC0)) + randbyte = 0xFF; + if ((flags & RANDMEMSET_MORE_00) && (t < 0x40)) + randbyte = 0x00; + } + *buf++ = randbyte; + size--; + } + *prng = local_prng; +} + +/* + * Fill memory buffer with random data. Flags argument may be used + * to tweak some statistics properties: + * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 + * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF + */ +void prng_randmemset_r (prng_t *prng, + void *voidbuf, + size_t size, + prng_randmemset_flags_t flags) +{ + uint8_t *buf = (uint8_t *)voidbuf; + if ((uintptr_t)buf & 15) + { + /* unaligned buffer */ + if (flags == 0) + randmemset_internal (prng, buf, size, 0, 0); + else if (flags == RANDMEMSET_MORE_00_AND_FF) + randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 0); + else + randmemset_internal (prng, buf, size, flags, 0); + } + else + { + /* aligned buffer */ + if (flags == 0) + randmemset_internal (prng, buf, size, 0, 1); + else if (flags == RANDMEMSET_MORE_00_AND_FF) + randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 1); + else + randmemset_internal (prng, buf, size, flags, 1); + } +} diff --git a/test/utils-prng.h b/test/utils-prng.h new file mode 100644 index 0000000..285107f --- /dev/null +++ b/test/utils-prng.h @@ -0,0 +1,168 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Based on the public domain implementation of small noncryptographic PRNG + * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __UTILS_PRNG_H__ +#define __UTILS_PRNG_H__ + +/* + * This file provides a fast SIMD-optimized noncryptographic PRNG (pseudorandom + * number generator), with the output good enough to pass "Big Crush" tests + * from TestU01 (http://en.wikipedia.org/wiki/TestU01). + * + * SIMD code uses http://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html + * which is a GCC specific extension. There is also a slower alternative + * code path, which should work with any C compiler. + * + * The "prng_t" structure keeps the internal state of the random number + * generator. It is possible to have multiple instances of the random number + * generator active at the same time, in this case each of them needs to have + * its own "prng_t". All the functions take a pointer to "prng_t" + * as the first argument. + * + * Functions: + * + * ---------------------------------------------------------------------------- + * void prng_srand_r (prng_t *prng, uint32_t seed); + * + * Initialize the pseudorandom number generator. The sequence of preudorandom + * numbers is deterministic and only depends on "seed". Any two generators + * initialized with the same seed will produce exactly the same sequence. + * + * ---------------------------------------------------------------------------- + * uint32_t prng_rand_r (prng_t *prng); + * + * Generate a single uniformly distributed 32-bit pseudorandom value. + * + * ---------------------------------------------------------------------------- + * void prng_randmemset_r (prng_t *prng, + * void *buffer, + * size_t size, + * prng_randmemset_flags_t flags); + * + * Fills the memory buffer "buffer" with "size" bytes of pseudorandom data. + * The "flags" argument may be used to tweak some statistics properties: + * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 + * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF + * The flags can be combined. This allows a bit better simulation of typical + * pixel data, which normally contains a lot of fully transparent or fully + * opaque pixels. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" + +/*****************************************************************************/ + +#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) +#define GCC_VECTOR_EXTENSIONS_SUPPORTED +typedef uint32_t uint32x4 __attribute__ ((vector_size(16))); +typedef uint8_t uint8x16 __attribute__ ((vector_size(16))); +#endif + +typedef struct +{ + uint32_t a, b, c, d; +} smallprng_t; + +typedef struct +{ +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + uint32x4 a, b, c, d; +#else + smallprng_t p1, p2, p3, p4; +#endif + smallprng_t p0; +} prng_t; + +typedef union +{ + uint8_t b[16]; + uint32_t w[4]; +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + uint8x16 vb; + uint32x4 vw; +#endif +} prng_rand_128_data_t; + +/*****************************************************************************/ + +static force_inline uint32_t +smallprng_rand_r (smallprng_t *x) +{ + uint32_t e = x->a - ((x->b << 27) + (x->b >> (32 - 27))); + x->a = x->b ^ ((x->c << 17) ^ (x->c >> (32 - 17))); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + return x->d; +} + +/* Generate 4 bytes (32-bits) of random data */ +static force_inline uint32_t +prng_rand_r (prng_t *x) +{ + return smallprng_rand_r (&x->p0); +} + +/* Generate 16 bytes (128-bits) of random data */ +static force_inline void +prng_rand_128_r (prng_t *x, prng_rand_128_data_t *data) +{ +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + uint32x4 e = x->a - ((x->b << 27) + (x->b >> (32 - 27))); + x->a = x->b ^ ((x->c << 17) ^ (x->c >> (32 - 17))); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + data->vw = x->d; +#else + data->w[0] = smallprng_rand_r (&x->p1); + data->w[1] = smallprng_rand_r (&x->p2); + data->w[2] = smallprng_rand_r (&x->p3); + data->w[3] = smallprng_rand_r (&x->p4); +#endif +} + +typedef enum +{ + RANDMEMSET_MORE_00 = 1, /* ~25% chance for 0x00 bytes */ + RANDMEMSET_MORE_FF = 2, /* ~25% chance for 0xFF bytes */ + RANDMEMSET_MORE_00_AND_FF = (RANDMEMSET_MORE_00 | RANDMEMSET_MORE_FF) +} prng_randmemset_flags_t; + +/* Set the 32-bit seed for PRNG */ +void prng_srand_r (prng_t *prng, uint32_t seed); + +/* Fill memory buffer with random data */ +void prng_randmemset_r (prng_t *prng, + void *buffer, + size_t size, + prng_randmemset_flags_t flags); + +#endif |