#include #include #include "pixman-private.h" #include "ir.h" /* Describes a compositing operation */ #define PIXMAN_null PIXMAN_FORMAT(0,0,0,0,0,0) /* For now, we don't support * * - transformations * - filters * - repeats * - solid/gradients */ typedef struct op_info_t op_info_t; struct op_info_t { pixman_op_t op; pixman_format_code_t src_fmt; pixman_format_code_t mask_fmt; pixman_format_code_t dest_fmt; pixman_format_code_t intermediate_fmt; gboolean need_unpack; gboolean two_strands; int vsize; int n_pixels; }; static gboolean verify_image_types (pixman_image_t *source, pixman_image_t *mask, pixman_image_t *dest) { if (source->common.transform || source->common.filter != PIXMAN_FILTER_NEAREST || source->common.repeat != PIXMAN_REPEAT_NONE || source->common.type != BITS) { return FALSE; } if (mask) { if (mask->common.transform || mask->common.filter != PIXMAN_FILTER_NEAREST || mask->common.repeat != PIXMAN_REPEAT_NONE || mask->common.type != BITS) { return FALSE; } } return TRUE; } #define PIXMAN_a16r16g16b16 PIXMAN_FORMAT(64,PIXMAN_TYPE_ARGB,16,16,16,16); static void generate_inner_loop (IR *ir, const op_info_t *info, int n_pixels, IRVar *w, IRVar *src, IRVar *mask, IRVar *dest) { /* Note: this can almost deal with the case where n_pixels * intermediate expanded fits in a register completely. In that case, * unpacking will still be necessary, but we don't need the extra * register. * One way to deal with this would be to not do any *packing* and * let a dead code eliminator do its job. */ /* src1 = read (src, n_pixels); */ /* src1 = convert (src1, sfmt, intermediate_fmt); */ /* if (need_unpack) src1, src2 = unpack (src1); */ /* mask1 = read (mask, n_pixels); */ /* mask1 = convert (mask1, mfmt, intermediate_fmt); */ /* if (need_unpack) mask1, mask2 = unpack (mask); */ /* If component alpha, * src1, alpha1 = combine_mask (src1, mask1); * if (need_unpack) { src2, alpha2 = combine (src2, mask2); } * else * alpha1 = expand (src1 alpha); * if (need_unpack) { alpha2 = expand (src2 alpha); } */ /* dest1 = read (dest1, n_pixels); */ /* dest1 = convert (dest1, dfmt, intermediate_fmt); */ /* if (need_unpack) dest1, dest2 = unpack (dest1); */ /* res1 = combine (src1, alpha1, dest1) */ /* if (need_unpack) { res2 = combine (src2, alpha2, dest2); } */ /* if (need_unpack) { res1 = pack (res1, res2); } */ /* write (dst, res1, n_pixels) */ } static IR * generate_intermediate (const op_info_t *info) { #if 0 /* This is the prototype of the function we generate. * * The "_skip" arguments are "_stride - (width * src_bpp) / 8" * * This is to minimize the amount of setup code that has to be jit * compiled. * * Note that this doesn't work for transformations (or source pictures * for that matter), so it will have to be revisited at some point * * height must be > 0, or bad things will happen. */ void (* CompositeOp) (uint32_t *src_start, /* arg 0 */ int32_t src_skip, /* arg 1 */ uint32_t *mask_start, /* arg 2 */ int32_t mask_skip, /* arg 3 */ uint32_t *dest_start, /* arg 4 */ int32_t dest_skip, /* arg 5 */ uint32_t width, /* arg 6 */ uint32_t height); /* arg 7 */ #endif IR *ir; IRVar *src_start, *src_skip; IRVar *mask_start, *mask_skip; IRVar *dest_start, *dest_skip; IRVar *width, *height, *h, *w; int n_pixels; ir = ir_new (); src_start = ir_arg (ir, 0, IR_POINTER); src_skip = ir_arg (ir, 1, IR_INT32); mask_start = ir_arg (ir, 2, IR_POINTER); mask_skip = ir_arg (ir, 3, IR_INT32); dest_start = ir_arg (ir, 4, IR_POINTER); dest_skip = ir_arg (ir, 5, IR_INT32); width = ir_arg (ir, 6, IR_UINT32); height = ir_arg (ir, 7, IR_UINT32); h = ir_var_new_similar (ir, height); w = ir_var_new_similar (ir, width); ir_mov (ir, h, height); ir_jump (ir, "outer_test"); /* Unnecessary if height > 0? */ ir_label (ir, "outer_loop"); { int mask = PIXMAN_FORMAT_BPP (info->dest_fmt) * n_pixels - 1; IRVar *v = ir_var_new_similar (ir, dest_start); int i; ir_mov (ir, w, width); ir_jump (ir, "inner_test"); /* Unnecessary if width > 0? */ ir_label (ir, "inner_loop"); ir_jump_nand (ir, dest_start, ir_iconst1 (ir, mask), "full_width"); /* Generate inner loops */ for (i = 1; i < n_pixels; i *= 2) { /* if (w >= 2 * i && !(dest & (2 * i - 1))) */ /* goto next */ } n_pixels = info->n_pixels; while (n_pixels) { generate_inner_loop (ir, info, n_pixels, w, src_start, mask_start, dest_start); n_pixels = n_pixels / 2; } ir_label (ir, "inner_test"); ir_jump_neq (ir, w, ir_iconst1 (ir, 0), "inner_loop"); } /* Go to next line */ ir_add (ir, src_start, src_start, src_skip); ir_add (ir, mask_start, mask_start, mask_skip); ir_add (ir, dest_start, dest_start, dest_skip); ir_sub (ir, h, h, ir_iconst1 (ir, 1)); ir_label (ir, "outer_test"); ir_jump_neq (ir, h, ir_iconst1 (ir, 0), "outer_loop"); return ir; } static IR * make_ir (int pref_vsize, pixman_op_t op, pixman_image_t *source, pixman_image_t *mask, pixman_image_t *dest) { op_info_t info; int intermediate_size; int mult; if (!verify_image_types (source, mask, dest)) return NULL; /* * Basic information */ info.op = op; info.src_fmt = source->bits.format; info.mask_fmt = mask? mask->bits.format : PIXMAN_null; info.dest_fmt = dest->bits.format; /* * Determine intermediate format */ if (info.op == PIXMAN_OP_ADD && info.mask_fmt == PIXMAN_null && info.src_fmt == PIXMAN_a8 && info.dest_fmt == PIXMAN_a8) { info.intermediate_fmt = PIXMAN_a8; } else if (PIXMAN_FORMAT_16BPC (info.src_fmt) || PIXMAN_FORMAT_16BPC (info.mask_fmt) || PIXMAN_FORMAT_16BPC (info.dest_fmt)) { info.intermediate_fmt = PIXMAN_a16r16g16b16; } else { info.intermediate_fmt = PIXMAN_a8r8g8b8; } /* Decide whether we will need multiplications (if so, the * pixels will have to be unpacked, otherwise they won't) */ if (info.mask_fmt == PIXMAN_null && ((op == PIXMAN_OP_ADD) || (op == PIXMAN_OP_OVER && PIXMAN_FORMAT_A (info.src_fmt) == 0) || (op == PIXMAN_OP_SRC))) { info.need_unpack = FALSE; } else { info.need_unpack = TRUE; } mult = info.need_unpack ? 2 : 1; intermediate_size = PIXMAN_FORMAT_BPP (info.intermediate_fmt) / 8; intermediate_size = (intermediate_size + 3) & ~0x3; /* Now determine the vector size we will use, based on the * backend's preferred vector size. We need to make sure that * there is enough room for at least one complete intermediate, * taking into account whether we need multiplications. * * We will never deal with pixels that are split over more * than one register - if that would be the case, the backend * will just have to provide bigger registers. */ if (pref_vsize < mult * intermediate_size) { info.n_pixels = 1; /* In this case, backends will need to deal with getting * vectors that are bigger than their preferred size */ info.vsize = mult * intermediate_size; } else { /* We don't take mult into account here because the unpacking * will make use of an additional register. */ info.n_pixels = pref_vsize / intermediate_size; info.vsize = pref_vsize; } return generate_intermediate (&info); } int main () { IR *ir; ir = make_ir (16, PIXMAN_OP_OVER, NULL, NULL, NULL); return ir != NULL; }