2 files changed, 450 insertions, 0 deletions
diff --git a/test/Makefile.sources b/test/Makefile.sources
index dd7582a..5d55e67 100644
--- a/test/Makefile.sources
+++ b/test/Makefile.sources
@@ -26,6 +26,7 @@ TESTPROGRAMS =			      \
 	glyph-test		      \
 	solid-test		      \
 	stress-test		      \
+	cover-test		      \
 	blitters-test		      \
 	affine-test		      \
 	scaling-test		      \
diff --git a/test/cover-test.c b/test/cover-test.c
new file mode 100644
index 0000000..83e2972
--- /dev/null
+++ b/test/cover-test.c
@@ -0,0 +1,449 @@
+/*
+ * Copyright © 2015 RISC OS Open Ltd
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the copyright holders not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  The copyright holders make no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Ben Avison (bavison@riscosopen.org)
+ *
+ */
+
+/*
+ * This test aims to verify both numerical correctness and the honouring of
+ * array bounds for scaled plots (both nearest-neighbour and bilinear) at or
+ * close to the boundary conditions for applicability of "cover" type fast paths
+ * and iter fetch routines.
+ *
+ * It has a secondary purpose: by setting the env var EXACT (to any value) it
+ * will only test plots that are exactly on the boundary condition. This makes
+ * it possible to ensure that "cover" routines are being used to the maximum,
+ * although this requires the use of a debugger or code instrumentation to
+ * verify.
+ */
+
+#include "utils.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Approximate limits for random scale factor generation - these ensure we can
+ * get at least 8x reduction and 8x enlargement.
+ */
+#define LOG2_MAX_FACTOR (3)
+
+/* 1/sqrt(2) (or sqrt(0.5), or 2^-0.5) as a 0.32 fixed-point number */
+#define INV_SQRT_2_0POINT32_FIXED (0xB504F334u)
+
+/* The largest increment that can be generated by random_scale_factor().
+ * This occurs when the "mantissa" part is 0xFFFFFFFF and the "exponent"
+ * part is -LOG2_MAX_FACTOR.
+ */
+#define MAX_INC ((pixman_fixed_t) \
+                 (INV_SQRT_2_0POINT32_FIXED >> (31 - 16 - LOG2_MAX_FACTOR)))
+
+/* Minimum source width (in pixels) based on a typical page size of 4K and
+ * maximum colour depth of 32bpp.
+ */
+#define MIN_SRC_WIDTH (4096 / 4)
+
+/* Derive the destination width so that at max increment we fit within source */
+#define DST_WIDTH (MIN_SRC_WIDTH * pixman_fixed_1 / MAX_INC)
+
+/* Calculate heights the other way round.
+ * No limits due to page alignment here.
+ */
+#define DST_HEIGHT 3
+#define SRC_HEIGHT ((DST_HEIGHT * MAX_INC + pixman_fixed_1 - 1) / pixman_fixed_1)
+
+/* At the time of writing, all the scaled fast paths use SRC, OVER or ADD
+ * Porter-Duff operators. XOR is included in the list to ensure good
+ * representation of iter scanline fetch routines.
+ */
+static const pixman_op_t op_list[] = {
+    PIXMAN_OP_SRC,
+    PIXMAN_OP_OVER,
+    PIXMAN_OP_ADD,
+    PIXMAN_OP_XOR,
+};
+
+/* At the time of writing, all the scaled fast paths use a8r8g8b8, x8r8g8b8
+ * or r5g6b5, or red-blue swapped versions of the same. When a mask channel is
+ * used, it is always a8 (and so implicitly not component alpha). a1r5g5b5 is
+ * included because it is the only other format to feature in any iters. */
+static const pixman_format_code_t img_fmt_list[] = {
+    PIXMAN_a8r8g8b8,
+    PIXMAN_x8r8g8b8,
+    PIXMAN_r5g6b5,
+    PIXMAN_a1r5g5b5
+};
+
+/* This is a flag reflecting the environment variable EXACT. It can be used
+ * to ensure that source coordinates corresponding exactly to the "cover" limits
+ * are used, rather than any "near misses". This can, for example, be used in
+ * conjunction with a debugger to ensure that only COVER fast paths are used.
+ */
+static int exact;
+
+static pixman_image_t *
+create_src_image (pixman_format_code_t fmt)
+{
+    pixman_image_t *tmp_img, *img;
+
+    /* We need the left-most and right-most MIN_SRC_WIDTH pixels to have
+     * predictable values, even though fence_image_create_bits() may allocate
+     * an image somewhat larger than that, by an amount that varies depending
+     * upon the page size on the current platform. The solution is to create a
+     * temporary non-fenced image that is exactly MIN_SRC_WIDTH wide and blit it
+     * into the fenced image.
+     */
+    tmp_img = pixman_image_create_bits (fmt, MIN_SRC_WIDTH, SRC_HEIGHT,
+                                        NULL, 0);
+    if (tmp_img == NULL)
+        return NULL;
+
+    img = fence_image_create_bits (fmt, MIN_SRC_WIDTH, SRC_HEIGHT, TRUE);
+    if (img == NULL)
+    {
+        pixman_image_unref (tmp_img);
+        return NULL;
+    }
+
+    prng_randmemset (tmp_img->bits.bits,
+                     tmp_img->bits.rowstride * SRC_HEIGHT * sizeof (uint32_t),
+                     0);
+    image_endian_swap (tmp_img);
+
+    pixman_image_composite (PIXMAN_OP_SRC, tmp_img, NULL, img,
+                            0, 0, 0, 0, 0, 0,
+                            MIN_SRC_WIDTH, SRC_HEIGHT);
+    pixman_image_composite (PIXMAN_OP_SRC, tmp_img, NULL, img,
+                            0, 0, 0, 0, img->bits.width - MIN_SRC_WIDTH, 0,
+                            MIN_SRC_WIDTH, SRC_HEIGHT);
+
+    pixman_image_unref (tmp_img);
+
+    return img;
+}
+
+static pixman_fixed_t
+random_scale_factor(void)
+{
+    /* Get a random number with top bit set. */
+    uint32_t f = prng_rand () | 0x80000000u;
+
+    /* In log(2) space, this is still approximately evenly spread between 31
+     * and 32. Divide by sqrt(2) to centre the distribution on 2^31.
+     */
+    f = ((uint64_t) f * INV_SQRT_2_0POINT32_FIXED) >> 32;
+
+    /* Now shift right (ie divide by an integer power of 2) to spread the
+     * distribution between centres at 2^(16 +/- LOG2_MAX_FACTOR).
+     */
+    f >>= 31 - 16 + prng_rand_n (2 * LOG2_MAX_FACTOR + 1) - LOG2_MAX_FACTOR;
+
+    return f;
+}
+
+static pixman_fixed_t
+calc_translate (int            dst_size,
+                int            src_size,
+                pixman_fixed_t scale,
+                pixman_bool_t  low_align,
+                pixman_bool_t  bilinear)
+{
+    pixman_fixed_t ref_src, ref_dst, scaled_dst;
+
+    if (low_align)
+    {
+        ref_src = bilinear ? pixman_fixed_1 / 2 : pixman_fixed_e;
+        ref_dst = pixman_fixed_1 / 2;
+    }
+    else
+    {
+        ref_src = pixman_int_to_fixed (src_size) -
+                  bilinear * pixman_fixed_1 / 2;
+        ref_dst = pixman_int_to_fixed (dst_size) - pixman_fixed_1 / 2;
+    }
+
+    scaled_dst = ((uint64_t) ref_dst * scale + pixman_fixed_1 / 2) /
+                 pixman_fixed_1;
+
+    /* We need the translation to be set such that when ref_dst is fed through
+     * the transformation matrix, we get ref_src as the result.
+     */
+    return ref_src - scaled_dst;
+}
+
+static pixman_fixed_t
+random_offset (void)
+{
+    pixman_fixed_t offset = 0;
+
+    /* Ensure we test the exact case quite a lot */
+    if (prng_rand_n (2))
+        return offset;
+
+    /* What happens when we are close to the edge of the first
+     * interpolation step?
+     */
+    if (prng_rand_n (2))
+        offset += (pixman_fixed_1 >> BILINEAR_INTERPOLATION_BITS) - 16;
+
+    /* Try fine-grained variations */
+    offset += prng_rand_n (32);
+
+    /* Test in both directions */
+    if (prng_rand_n (2))
+        offset = -offset;
+
+    return offset;
+}
+
+static void
+check_transform (pixman_image_t     *dst_img,
+                 pixman_image_t     *src_img,
+                 pixman_transform_t *transform,
+                 pixman_bool_t       bilinear)
+{
+    pixman_vector_t v1, v2;
+
+    v1.vector[0] = pixman_fixed_1 / 2;
+    v1.vector[1] = pixman_fixed_1 / 2;
+    v1.vector[2] = pixman_fixed_1;
+    assert (pixman_transform_point (transform, &v1));
+
+    v2.vector[0] = pixman_int_to_fixed (dst_img->bits.width) -
+                   pixman_fixed_1 / 2;
+    v2.vector[1] = pixman_int_to_fixed (dst_img->bits.height) -
+                   pixman_fixed_1 / 2;
+    v2.vector[2] = pixman_fixed_1;
+    assert (pixman_transform_point (transform, &v2));
+
+    if (bilinear)
+    {
+        assert (v1.vector[0] >= pixman_fixed_1 / 2);
+        assert (v1.vector[1] >= pixman_fixed_1 / 2);
+        assert (v2.vector[0] <= pixman_int_to_fixed (src_img->bits.width) -
+                                    pixman_fixed_1 / 2);
+        assert (v2.vector[1] <= pixman_int_to_fixed (src_img->bits.height) -
+                                    pixman_fixed_1 / 2);
+    }
+    else
+    {
+        assert (v1.vector[0] >= pixman_fixed_e);
+        assert (v1.vector[1] >= pixman_fixed_e);
+        assert (v2.vector[0] <= pixman_int_to_fixed (src_img->bits.width));
+        assert (v2.vector[1] <= pixman_int_to_fixed (src_img->bits.height));
+    }
+}
+
+static uint32_t
+test_cover (int testnum, int verbose)
+{
+    pixman_fixed_t         x_scale, y_scale;
+    pixman_bool_t          left_align, top_align;
+    pixman_bool_t          bilinear;
+    pixman_filter_t        filter;
+    pixman_op_t            op;
+    size_t                 src_fmt_index;
+    pixman_format_code_t   src_fmt, dst_fmt, mask_fmt;
+    pixman_image_t        *src_img, *dst_img, *mask_img;
+    pixman_transform_t     src_transform, mask_transform;
+    pixman_fixed_t         fuzz[4];
+    uint32_t               crc32;
+
+    /* We allocate one fenced image for each pixel format up-front. This is to
+     * avoid spending a lot of time on memory management rather than on testing
+     * Pixman optimisations. We need one per thread because the transformation
+     * matrices and filtering are properties of the source and mask images.
+     */
+    static pixman_image_t *src_imgs[ARRAY_LENGTH (img_fmt_list)];
+    static pixman_image_t *mask_bits_img;
+    static pixman_bool_t   fence_images_created;
+#ifdef USE_OPENMP
+#pragma omp threadprivate (src_imgs)
+#pragma omp threadprivate (mask_bits_img)
+#pragma omp threadprivate (fence_images_created)
+#endif
+
+    if (!fence_images_created)
+    {
+        int i;
+
+        prng_srand (0);
+
+        for (i = 0; i < ARRAY_LENGTH (img_fmt_list); i++)
+            src_imgs[i] = create_src_image (img_fmt_list[i]);
+
+        mask_bits_img = create_src_image (PIXMAN_a8);
+
+        fence_images_created = TRUE;
+    }
+
+    prng_srand (testnum);
+
+    x_scale = random_scale_factor ();
+    y_scale = random_scale_factor ();
+    left_align = prng_rand_n (2);
+    top_align = prng_rand_n (2);
+    bilinear = prng_rand_n (2);
+    filter = bilinear ? PIXMAN_FILTER_BILINEAR : PIXMAN_FILTER_NEAREST;
+
+    op = op_list[prng_rand_n (ARRAY_LENGTH (op_list))];
+
+    dst_fmt = img_fmt_list[prng_rand_n (ARRAY_LENGTH (img_fmt_list))];
+    dst_img = pixman_image_create_bits (dst_fmt, DST_WIDTH, DST_HEIGHT,
+                                        NULL, 0);
+    prng_randmemset (dst_img->bits.bits,
+                     dst_img->bits.rowstride * DST_HEIGHT * sizeof (uint32_t),
+                     0);
+    image_endian_swap (dst_img);
+
+    src_fmt_index = prng_rand_n (ARRAY_LENGTH (img_fmt_list));
+    src_fmt = img_fmt_list[src_fmt_index];
+    src_img = src_imgs[src_fmt_index];
+    pixman_image_set_filter (src_img, filter, NULL, 0);
+    pixman_transform_init_scale (&src_transform, x_scale, y_scale);
+    src_transform.matrix[0][2] = calc_translate (dst_img->bits.width,
+                                                 src_img->bits.width,
+                                                 x_scale, left_align, bilinear);
+    src_transform.matrix[1][2] = calc_translate (dst_img->bits.height,
+                                                 src_img->bits.height,
+                                                 y_scale, top_align, bilinear);
+
+    if (prng_rand_n (2))
+    {
+        /* No mask */
+        mask_fmt = PIXMAN_null;
+        mask_img = NULL;
+    }
+    else if (prng_rand_n (2))
+    {
+        /* a8 bitmap mask */
+        mask_fmt = PIXMAN_a8;
+        mask_img = mask_bits_img;
+        pixman_image_set_filter (mask_img, filter, NULL, 0);
+        pixman_transform_init_scale (&mask_transform, x_scale, y_scale);
+        mask_transform.matrix[0][2] = calc_translate (dst_img->bits.width,
+                                                      mask_img->bits.width,
+                                                      x_scale, left_align,
+                                                      bilinear);
+        mask_transform.matrix[1][2] = calc_translate (dst_img->bits.height,
+                                                      mask_img->bits.height,
+                                                      y_scale, top_align,
+                                                      bilinear);
+    }
+    else
+    {
+        /* Solid mask */
+        pixman_color_t color;
+        memset (&color, 0xAA, sizeof color);
+        mask_fmt = PIXMAN_solid;
+        mask_img = pixman_image_create_solid_fill (&color);
+    }
+
+    if (!exact)
+    {
+        int i = 0;
+
+        while (i < 4)
+            fuzz[i++] = random_offset ();
+
+        src_transform.matrix[0][2] += fuzz[0];
+        src_transform.matrix[1][2] += fuzz[1];
+        mask_transform.matrix[0][2] += fuzz[2];
+        mask_transform.matrix[1][2] += fuzz[3];
+    }
+
+    pixman_image_set_transform (src_img, &src_transform);
+    if (mask_fmt == PIXMAN_a8)
+        pixman_image_set_transform (mask_img, &mask_transform);
+
+    if (verbose)
+    {
+        printf ("op=%s\n", operator_name (op));
+        printf ("src_fmt=%s, dst_fmt=%s, mask_fmt=%s\n",
+                format_name (src_fmt), format_name (dst_fmt),
+                format_name (mask_fmt));
+        printf ("x_scale=0x%08X, y_scale=0x%08X, align %s/%s, %s\n",
+                x_scale, y_scale,
+                left_align ? "left" : "right", top_align ? "top" : "bottom",
+                bilinear ? "bilinear" : "nearest");
+
+        if (!exact)
+        {
+            int i = 0;
+
+            printf ("fuzz factors");
+            while (i < 4)
+                printf (" %d", fuzz[i++]);
+            printf ("\n");
+        }
+    }
+
+    if (exact)
+    {
+        check_transform (dst_img, src_img, &src_transform, bilinear);
+        if (mask_fmt == PIXMAN_a8)
+            check_transform (dst_img, mask_img, &mask_transform, bilinear);
+    }
+
+    pixman_image_composite (op, src_img, mask_img, dst_img,
+                            0, 0, 0, 0, 0, 0,
+                            dst_img->bits.width, dst_img->bits.height);
+
+    if (verbose)
+        print_image (dst_img);
+
+    crc32 = compute_crc32_for_image (0, dst_img);
+
+    pixman_image_unref (dst_img);
+    if (mask_fmt == PIXMAN_solid)
+        pixman_image_unref (mask_img);
+
+    return crc32;
+}
+
+#if BILINEAR_INTERPOLATION_BITS == 7
+#define CHECKSUM_FUZZ  0x6B56F607
+#define CHECKSUM_EXACT 0xA669F4A3
+#elif BILINEAR_INTERPOLATION_BITS == 4
+#define CHECKSUM_FUZZ  0x83119ED0
+#define CHECKSUM_EXACT 0x0D3382CD
+#else
+#define CHECKSUM_FUZZ  0x00000000
+#define CHECKSUM_EXACT 0x00000000
+#endif
+
+int
+main (int argc, const char *argv[])
+{
+    unsigned long page_size;
+
+    page_size = fence_get_page_size ();
+    if (page_size == 0 || page_size > 16 * 1024)
+        return 77; /* automake SKIP */
+
+    exact = getenv ("EXACT") != NULL;
+    if (exact)
+        printf ("Doing plots that are exactly aligned to boundaries\n");
+
+    return fuzzer_test_main ("cover", 2000000,
+                             exact ? CHECKSUM_EXACT : CHECKSUM_FUZZ,
+                             test_cover, argc, argv);
+}