#include <glib.h>
#include <pixman.h>
#include "pixman-private.h"
#include "ir.h"

/* Describes a compositing operation */
#define PIXMAN_null		PIXMAN_FORMAT(0,0,0,0,0,0)

/* For now, we don't support
 *
 *   - transformations
 *   - filters
 *   - repeats
 *   - solid/gradients
 */
typedef struct op_info_t op_info_t;
struct op_info_t
{
    pixman_op_t			op;
    pixman_format_code_t	src_fmt;
    pixman_format_code_t	mask_fmt;
    pixman_format_code_t	dest_fmt;
    pixman_format_code_t	intermediate_fmt;
    gboolean			need_unpack;
    gboolean			two_strands;
    int				vsize;
    int				n_pixels;
};

static gboolean
verify_image_types (pixman_image_t *source,
		    pixman_image_t *mask,
		    pixman_image_t *dest)
{
    if (source->common.transform				||
	source->common.filter != PIXMAN_FILTER_NEAREST		||
	source->common.repeat != PIXMAN_REPEAT_NONE		||
	source->common.type != BITS)
    {
	return FALSE;
    }

    if (mask)
    {
	if (mask->common.transform				||
	    mask->common.filter != PIXMAN_FILTER_NEAREST	||
	    mask->common.repeat != PIXMAN_REPEAT_NONE		||
	    mask->common.type != BITS)
	{
	    return FALSE;
	}
    }

    return TRUE;
}

#define PIXMAN_a16r16g16b16 PIXMAN_FORMAT(64,PIXMAN_TYPE_ARGB,16,16,16,16);

static void
generate_inner_loop (IR			*ir,
		     const op_info_t	*info,
		     int		 n_pixels,
		     IRVar		*w,
		     IRVar              *src,
		     IRVar              *mask,
		     IRVar              *dest)
{
    /* Note: this can almost deal with the case where n_pixels
     * intermediate expanded fits in a register completely. In that case,
     * unpacking will still be necessary, but we don't need the extra
     * register.
     *    One way to deal with this would be to not do any *packing* and
     * let a dead code eliminator do its job.
     */
    
    /* src1 = read (src, n_pixels); */
    /* src1 = convert (src1, sfmt, intermediate_fmt); */
    /* if (need_unpack) src1, src2 = unpack (src1);  */
    
    /* mask1 = read (mask, n_pixels); */
    /* mask1 = convert (mask1, mfmt, intermediate_fmt); */
    /* if (need_unpack) mask1, mask2 = unpack (mask); */

    /* If component alpha, 
     *     src1, alpha1 = combine_mask (src1, mask1);
     *     if (need_unpack) { src2, alpha2 = combine (src2, mask2); } 
     * else
     *     alpha1 = expand (src1 alpha);
     *     if (need_unpack) { alpha2 = expand (src2 alpha); }
     */

    /* dest1 = read (dest1, n_pixels); */
    /* dest1 = convert (dest1, dfmt, intermediate_fmt); */
    /* if (need_unpack) dest1, dest2 = unpack (dest1); */

    /* res1 = combine (src1, alpha1, dest1) */
    
    /* if (need_unpack) { res2 = combine (src2, alpha2, dest2); } */

    /* if (need_unpack) { res1 = pack (res1, res2); } */
    
    /* write (dst, res1, n_pixels) */
    
}

static IR *
generate_intermediate (const op_info_t *info)
{
#if 0
    /* This is the prototype of the function we generate.
     *
     * The "_skip" arguments are "_stride - (width * src_bpp) / 8"
     *
     * This is to minimize the amount of setup code that has to be jit
     * compiled.
     *
     * Note that this doesn't work for transformations (or source pictures
     * for that matter), so it will have to be revisited at some point
     *
     * height must be > 0, or bad things will happen.
     */
    void (* CompositeOp) (uint32_t *src_start,		/* arg 0 */
			  int32_t src_skip,		/* arg 1 */
			  
			  uint32_t *mask_start,		/* arg 2 */
			  int32_t mask_skip,		/* arg 3 */
			  
			  uint32_t *dest_start,		/* arg 4 */
			  int32_t dest_skip,		/* arg 5 */
			  
			  uint32_t width,		/* arg 6 */
			  uint32_t height);		/* arg 7 */
#endif

    IR *ir;
    IRVar *src_start, *src_skip;
    IRVar *mask_start, *mask_skip;
    IRVar *dest_start, *dest_skip;
    IRVar *width, *height, *h, *w;
    int n_pixels;
   
    ir = ir_new ();

    src_start  = ir_arg (ir, 0, IR_POINTER);
    src_skip   = ir_arg (ir, 1, IR_INT32);
    mask_start = ir_arg (ir, 2, IR_POINTER);
    mask_skip  = ir_arg (ir, 3, IR_INT32);
    dest_start = ir_arg (ir, 4, IR_POINTER);
    dest_skip  = ir_arg (ir, 5, IR_INT32);
    width      = ir_arg (ir, 6, IR_UINT32);
    height     = ir_arg (ir, 7, IR_UINT32);

    h	       = ir_var_new_similar (ir, height);
    w	       = ir_var_new_similar (ir, width);
    
    ir_mov (ir, h, height);

    ir_jump (ir, "outer_test");	/* Unnecessary if height > 0? */

    ir_label (ir, "outer_loop");

    {
	int mask = PIXMAN_FORMAT_BPP (info->dest_fmt) * n_pixels - 1;
	IRVar *v = ir_var_new_similar (ir, dest_start);
	int i;
	
	ir_mov (ir, w, width);

	ir_jump (ir, "inner_test");	/* Unnecessary if width > 0? */
	ir_label (ir, "inner_loop");

	ir_jump_nand (ir, dest_start, ir_iconst1 (ir, mask), "full_width");
	
	/* Generate inner loops */
	for (i = 1; i < n_pixels; i *= 2)
	{
	    /* if (w >= 2 * i && !(dest & (2 * i - 1))) */
	    /*      goto next     */
	}
	n_pixels = info->n_pixels;
	while (n_pixels)
	{
	    generate_inner_loop (ir, info, n_pixels, w, src_start, mask_start, dest_start);
	    
	    n_pixels = n_pixels / 2;
	}

	ir_label (ir, "inner_test");
	ir_jump_neq (ir, w, ir_iconst1 (ir, 0), "inner_loop");
    }

    /* Go to next line */
    ir_add (ir, src_start, src_start, src_skip);
    ir_add (ir, mask_start, mask_start, mask_skip);
    ir_add (ir, dest_start, dest_start, dest_skip);
    
    ir_sub (ir, h, h, ir_iconst1 (ir, 1));

    ir_label (ir, "outer_test");
    ir_jump_neq	(ir, h, ir_iconst1 (ir, 0), "outer_loop");

    return ir;
}
    
static IR *
make_ir (int		 pref_vsize,
	 pixman_op_t	 op,
	 pixman_image_t *source,
	 pixman_image_t *mask,
	 pixman_image_t *dest)
{
    op_info_t info;
    int intermediate_size;
    int mult;
    
    if (!verify_image_types (source, mask, dest))
	return NULL;

    /*
     * Basic information
     */
    info.op = op;
    info.src_fmt = source->bits.format;
    info.mask_fmt = mask? mask->bits.format : PIXMAN_null;
    info.dest_fmt = dest->bits.format;

    /*
     * Determine intermediate format
     */
    if (info.op == PIXMAN_OP_ADD		&&
	info.mask_fmt == PIXMAN_null		&&
	info.src_fmt == PIXMAN_a8		&&
	info.dest_fmt == PIXMAN_a8)
    {
	info.intermediate_fmt = PIXMAN_a8;
    }
    else if (PIXMAN_FORMAT_16BPC (info.src_fmt)		||
	     PIXMAN_FORMAT_16BPC (info.mask_fmt)	||
	     PIXMAN_FORMAT_16BPC (info.dest_fmt))
    {
	info.intermediate_fmt = PIXMAN_a16r16g16b16;
    }
    else
    {
	info.intermediate_fmt = PIXMAN_a8r8g8b8;
    }
    
    
    /* Decide whether we will need multiplications (if so, the
     * pixels will have to be unpacked, otherwise they won't)
     */
    if (info.mask_fmt == PIXMAN_null				&&
	((op == PIXMAN_OP_ADD)						||
	 (op == PIXMAN_OP_OVER && PIXMAN_FORMAT_A (info.src_fmt) == 0)	||
	 (op == PIXMAN_OP_SRC)))
    {
	info.need_unpack = FALSE;
    }
    else
    {
	info.need_unpack = TRUE;
    }

    mult = info.need_unpack ? 2 : 1;

    intermediate_size = PIXMAN_FORMAT_BPP (info.intermediate_fmt) / 8;
    intermediate_size = (intermediate_size + 3) & ~0x3;
    
    /* Now determine the vector size we will use, based on the
     * backend's preferred vector size. We need to make sure that
     * there is enough room for at least one complete intermediate,
     * taking into account whether we need multiplications.
     * 
     * We will never deal with pixels that are split over more
     * than one register - if that would be the case, the backend
     * will just have to provide bigger registers.
     */
    if (pref_vsize < mult * intermediate_size)
    {
	info.n_pixels = 1;

	/* In this case, backends will need to deal with getting
	 * vectors that are bigger than their preferred size
	 */
	info.vsize = mult * intermediate_size;
    }
    else
    {
	/* We don't take mult into account here because the unpacking
	 * will make use of an additional register.
	 */
	info.n_pixels = pref_vsize / intermediate_size;

	info.vsize = pref_vsize;
    }

    return generate_intermediate (&info);
}

int
main ()
{
    IR *ir;

    ir = make_ir (16, PIXMAN_OP_OVER, NULL, NULL, NULL);

    return ir != NULL;
}