#include #include #include #include #include #include #include #include #include #include "ir.h" static IRVar * load_mask (IR *ir, pixman_format_code_t code, IRVar *pointer) { IRVar *result = ir_var_new (ir, 4, IR_UINT8); IRVar *t = ir_var_new (ir, 4, IR_UINT8); if (code != PIXMAN_a8) { assert (0); return NULL; } ir_load8 (ir, result, pointer); ir_mov (ir, t, result); return result; } static IRVar * load (IR *ir, pixman_format_code_t code, IRVar *pointer) { IRVar *result = ir_var_new (ir, 4, IR_UINT8); if (code != PIXMAN_a8r8g8b8) { assert (0); return NULL; } ir_load32 (ir, result, pointer); return result; } /* Note: this function assumes its input are 0.8 * fixpoint numbers. */ static IRVar * pixel_multiply (IR *ir, IRVar *src, IRVar *mask) { IRVar *res = ir_var_new_similar (ir, src); IRVar *t = ir_var_new_similar (ir, src); ir_mul (ir, res, src, mask); ir_adds (ir, res, res, ir_iconst1 (ir, 0x0080)); ir_shrl (ir, t, res, ir_iconst1 (ir, 8)); ir_adds (ir, res, res, t); ir_shrl (ir, res, res, ir_iconst1 (ir, 8)); return res; } static IR * generate_func (pixman_op_t op, pixman_format_code_t src_format, pixman_format_code_t mask_format, pixman_format_code_t dest_format) { IR *ir = ir_new (); IRVar *width = ir_arg (ir, 0, IR_UINT32); IRVar *height = ir_arg (ir, 1, IR_UINT32); IRVar *src = ir_arg (ir, 2, IR_POINTER); IRVar *mask = ir_arg (ir, 3, IR_POINTER); IRVar *dest = ir_arg (ir, 4, IR_POINTER); IRVar *src_stride = ir_arg (ir, 5, IR_INT32); /* In bytes */ IRVar *mask_stride = ir_arg (ir, 6, IR_INT32); /* In bytes */ IRVar *dest_stride = ir_arg (ir, 7, IR_INT32); /* In bytes */ IRVar *h, *w; ir_mov (ir, h, height); ir_jump (ir, "outer_test"); { IRVar *src_line = ir_var_new (ir, 1, IR_POINTER); IRVar *mask_line = ir_var_new (ir, 1, IR_POINTER); IRVar *dest_line = ir_var_new (ir, 1, IR_POINTER); /* Outer loop */ ir_label (ir, "outer_loop"); ir_mov (ir, dest_line, dest); ir_add (ir, dest, dest, dest_stride); ir_mov (ir, src_line, src); ir_add (ir, src, src, src_stride); ir_mov (ir, mask_line, mask); ir_add (ir, mask, mask, mask_stride); ir_mov (ir, w, width); ir_jump (ir, "inner_test"); { IRVar *s, *m, *d; /* Inner loop */ ir_label (ir, "inner_loop"); m = load_mask (ir, PIXMAN_a8, mask_line); s = load (ir, PIXMAN_a8r8g8b8, src_line); d = load (ir, PIXMAN_a8r8g8b8, dest_line); if (op == PIXMAN_OP_OVER) { /* Assumptions made here: * * The _lo and _hi variables contain at least one full * expanded pixel. (Otherwise shuffle4 doesn't work) * This sort of implies that we have do two pixels at * a time, or at least that there is enough room for that. * Unfortunately, this doesn't work if the intermediate * format is 16bit and the backend is mmx. * * Channels are expanded to 16 bits. Otherwise * pixel_multiply() doesn't work. * * The mask is read as 8888, not as 8000. * * All of this is highly inconvenient for armv6 and * pre-mmx x86. * * For JIT compiling, do we simply assume that there * is a vector unit? Or do we have separate IR code * for non-vector arches? Or do we emulate a vector * unit in non-vector machine code? * * ARMs have many registers, so simply emulating a 64 * bit vector unit may not be too bad. * */ IRVar *alpha_lo = ir_var_new_wider (ir, m); IRVar *alpha_hi = ir_var_new_wider (ir, m); IRVar *src_lo = ir_var_new_wider (ir, s); IRVar *src_hi = ir_var_new_wider (ir, s); IRVar *dest_lo = ir_var_new_wider (ir, s); IRVar *dest_hi = ir_var_new_wider (ir, s); IRVar *neg_alpha_lo = ir_var_new_wider (ir, m); IRVar *neg_alpha_hi = ir_var_new_wider (ir, m); ir_unpack_low (ir, alpha_lo, m, ir_iconst1 (ir, 0)); ir_unpack_high (ir, alpha_hi, m, ir_iconst1 (ir, 0)); ir_unpack_low (ir, src_lo, s, ir_iconst1 (ir, 0)); ir_unpack_high (ir, src_hi, s, ir_iconst1 (ir, 0)); src_lo = pixel_multiply (ir, src_lo, alpha_lo); src_hi = pixel_multiply (ir, src_hi, alpha_hi); ir_pack (ir, s, src_lo, src_hi); ir_shuffle4 (ir, neg_alpha_lo, src_lo, 3, 3, 3, 3); ir_shuffle4 (ir, neg_alpha_hi, src_hi, 3, 3, 3, 3); ir_xor (ir, neg_alpha_lo, alpha_lo, ir_iconst1 (ir, 0x00ff)); ir_xor (ir, neg_alpha_hi, alpha_hi, ir_iconst1 (ir, 0x00ff)); ir_unpack_low (ir, dest_lo, dest, ir_iconst1 (ir, 0)); ir_unpack_high (ir, dest_hi, dest, ir_iconst1 (ir, 0)); dest_lo = pixel_multiply (ir, dest_lo, neg_alpha_lo); dest_hi = pixel_multiply (ir, dest_hi, neg_alpha_hi); ir_pack (ir, d, dest_lo, dest_hi); ir_adds (ir, s, s, d); ir_store (ir, s, dest_line); } else { assert (0); return NULL; } ir_sub (ir, w, w, ir_iconst1 (ir, 1)); ir_label (ir, "inner_test"); ir_jump_neq (ir, w, ir_iconst1 (ir, 0), "inner_loop"); } ir_sub (ir, h, h, ir_iconst1 (ir, 1)); ir_label (ir, "outer_test"); ir_jump_neq (ir, h, ir_iconst1 (ir, 0), "outer_loop"); } return ir; } int main () { IR *ir = generate_func (PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8); if (ir) g_print ("Generated IR\n"); return 0; }