diff options
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | iterjit.c | 240 | ||||
-rw-r--r-- | regnaive.c | 98 | ||||
-rw-r--r-- | regnaive.h | 44 |
4 files changed, 241 insertions, 145 deletions
@@ -32,8 +32,8 @@ blitter: $(COMMON) blitter.c testsuite: $(COMMON) testsuite.c $(CC) $(CFLAGS) -o testsuite $(COMMON_SOURCES) testsuite.c $(LDFLAGS) -iterjit: $(COMMON) iterjit.c - $(CC) $(CFLAGS) -o iterjit $(COMMON_SOURCES) iterjit.c reggroups.c $(LDFLAGS) +iterjit: $(COMMON) iterjit.c regnaive.c regnaive.h + $(CC) $(CFLAGS) -o iterjit $(COMMON_SOURCES) regnaive.c iterjit.c $(LDFLAGS) clean: rm -f genrender blitter testsuite iterjit @@ -4,9 +4,8 @@ #include <pixman.h> #include <string.h> #include "simplex86.h" -#include "stack-man.h" -#include "regscope.h" #include "crc32.h" +#include "regnaive.h" typedef struct { @@ -196,7 +195,6 @@ struct jit_t { assembler_t *assembler; fragment_t *fragment; - stack_man_t stack_man; reg_alloc_t gp_allocator; reg_alloc_t xmm_allocator; }; @@ -295,7 +293,7 @@ struct jit_dest_iter_t static const reg_pool_t xmm_pool = { - 16 /* n_registers */, 16 /* register size */, I_movdqa, + 16, { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, }, @@ -303,9 +301,9 @@ static const reg_pool_t xmm_pool = static const reg_pool_t gp64_pool = { - 14 /* n_registers */, 8 /* register size */, I_mov, - { rax, rbx, rcx, rdx, rsi, rdi, - r8, r9, r10, r11, r12, r13, r14, r15, + 14, + { rsi, rdi, r8, r9, r10, r11, rcx, rdx, + rax, rbx, r12, r13, r14, r15, } }; @@ -317,40 +315,47 @@ jit_new (void) jit->assembler = assembler_new ("pixman"); jit->fragment = fragment_new (jit->assembler); - stack_manager_init (&jit->stack_man); - - reg_alloc_init (&jit->xmm_allocator, jit->fragment, &jit->stack_man, &xmm_pool); - reg_alloc_init (&jit->gp_allocator, jit->fragment, &jit->stack_man, &gp64_pool); + reg_alloc_init (&jit->xmm_allocator, &xmm_pool); + reg_alloc_init (&jit->gp_allocator, &gp64_pool); return jit; } -void -jit_switch_group (jit_t *jit, const char *group) -{ - -} - reg_t jit_alloc_gp (jit_t *jit) { return reg_alloc_alloc (&jit->gp_allocator); } -reg_t jit_alloc_xmm (jit_t *jit); +reg_t +jit_alloc_xmm (jit_t *jit) +{ + return reg_alloc_alloc (&jit->xmm_allocator); +} -reg_t jit_preserve_gp (jit_t *jit, reg_t reg) +reg_t +jit_preserve_gp (jit_t *jit, reg_t reg) { return reg_alloc_alloc_preserve (&jit->gp_allocator, reg); } -void jit_free_gp (jit_t *jit, reg_t reg) +reg_t +jit_preserve_xmm (jit_t *jit, reg_t reg) +{ + return reg_alloc_alloc_preserve (&jit->xmm_allocator, reg); +} + +void +jit_free_gp (jit_t *jit, reg_t reg) { reg_alloc_free (&jit->gp_allocator, reg); } -void jit_free_gp (jit_t *jit, reg_t reg); -void jit_free_xmm (jit_t *jit, reg_t reg); +void +jit_free_xmm (jit_t *jit, reg_t reg) +{ + reg_alloc_free (&jit->xmm_allocator, reg); +} static void src_a8r8g8b8_begin (jit_src_iter_t *src, @@ -359,8 +364,7 @@ src_a8r8g8b8_begin (jit_src_iter_t *src, { reg_t image, tmp; - jit_switch_group (jit, "src"); - info = jit_preserve_gp (jit, info); + jit_preserve_gp (jit, info); src->line = jit_alloc_gp (jit); src->stride = jit_alloc_gp (jit); image = jit_alloc_gp (jit); @@ -386,11 +390,6 @@ src_a8r8g8b8_begin (jit_src_iter_t *src, jit_free_gp (jit, info); jit_free_gp (jit, image); jit_free_gp (jit, tmp); - - jit_save_gp (jit, - src->line, &src->line_save, - src->stride, &src->stride_save, - 0); } static void @@ -398,21 +397,10 @@ src_a8r8g8b8_begin_line (jit_src_iter_t *src, jit_t *jit) { src->s = jit_alloc_gp (jit); - jit_restore_gp (jit, - src->line, src->line_save, - src->stride, src->stride_save, - 0); - BEGIN_ASM (jit->fragment) I_mov, src->s, src->line, I_add, src->line, src->stride, END_ASM (); - - jit_save_gp (jit, - src->line, src->line_save, - src->stride, src->stride_save, - src->s, src->s_save, - 0); } static reg_t @@ -420,6 +408,7 @@ src_a8r8g8b8_load_pixels (jit_src_iter_t *src, jit_t *jit, int n_pixels) { instruction_t move; reg_t r; + int size; r = jit_alloc_xmm (jit); @@ -427,29 +416,24 @@ src_a8r8g8b8_load_pixels (jit_src_iter_t *src, jit_t *jit, int n_pixels) { case 1: move = I_movd; + size = DWORD_PTR; break; case 2: move = I_movq; + size = QWORD_PTR; break; case 4: move = I_movdqu; + size = 0; break; } - jit_restore_gp (jit, - src->s, src->s_save, - 0); - BEGIN_ASM (jit->fragment) - move, r, PTR (src->s), + move, r, size + PTR (src->s), END_ASM (); - jit_save_gp (jit, - src->s, src->s_save, - 0); - jit_free_xmm (jit, r); return r; @@ -460,40 +444,20 @@ src_a8r8g8b8_advance_pixels (jit_src_iter_t *src, jit_t *jit, int n_pixels) { int n_bytes = n_pixels * 4; - jit_reload_gp (jit, - src->s, src->s_save, - 0); - BEGIN_ASM (jit->fragment) I_add, src->s, IMM (n_bytes), END_ASM (); - - jit_save_gp (jit, - src->s, src->s_save, - 0); } static void src_a8r8g8b8_end_line (jit_src_iter_t *src, jit_t *jit) { - jit_switch_group (jit, "src"); - /* FIXME: This may result in a dead load from memory. - * We may need a "restore_and_throw_away" call that - * doesn't emit the load. - * Though, then we don't need the save either ... - */ - jit_restore_gp (jit, - src->s, src->s_save, - src->line, src->line_save, - src->stride, src->stride_save, - 0); jit_free_gp (jit, src->s); } static void src_a8r8g8b8_end (jit_src_iter_t *src, jit_t *jit) { - jit_switch_group (jit, "src"); jit_free_gp (jit, src->stride); jit_free_gp (jit, src->line); } @@ -521,7 +485,6 @@ dest_a8r8g8b8_begin (jit_dest_iter_t * dest, { reg_t image, tmp; - jit_switch_group (jit, "dest"); info = jit_preserve_gp (jit, info); dest->line = jit_alloc_gp (jit); dest->stride = jit_alloc_gp (jit); @@ -566,12 +529,8 @@ dest_a8r8g8b8_process_line (jit_dest_iter_t *dest, int i; src->begin_line (src, jit); - mask->begin_line (mask, jit); - - jit_switch_group (jit, "dest"); - - jit_reload_gp (jit, dest->line); - jit_reload_gp (jit, dest->stride); + if (mask) + mask->begin_line (mask, jit); dest->d = jit_alloc_gp (jit); dest->w = jit_alloc_gp (jit); @@ -590,9 +549,6 @@ dest_a8r8g8b8_process_line (jit_dest_iter_t *dest, asprintf (&test, "horz_%d_test", i); /* FIXME OOM */ asprintf (&done, "horz_%d_done", i); /* FIXME OOM */ - jit_switch_group (jit, "dest"); - jit_reload_gp (jit, dest->w); - BEGIN_ASM (jit->fragment) I_jmp, LABEL (test), END_ASM(); @@ -614,10 +570,8 @@ dest_a8r8g8b8_process_line (jit_dest_iter_t *dest, DEFINE_LABEL (test), END_ASM (); - jit_switch_group (jit, "dest"); if (i < 2) { - jit_reload_gp (jit, dest->d); BEGIN_ASM (jit->fragment) /* If aligned properly, skip to next block */ I_test, dest->d, IMM (n_pixels[i] * 4 * 2 - 1), @@ -625,7 +579,6 @@ dest_a8r8g8b8_process_line (jit_dest_iter_t *dest, END_ASM (); } - jit_reload_gp (jit, dest->w); BEGIN_ASM (jit->fragment) I_cmp, dest->w, IMM (n_pixels[i]), I_jge, LABEL (loop), @@ -645,29 +598,31 @@ static reg_t dest_a8r8g8b8_load_pixels (jit_dest_iter_t *dest, jit_t *jit, int n_pixels) { instruction_t move; + int size; reg_t r; - jit_switch_group (jit, "dest"); - jit_reload_gp (jit, dest->d); r = jit_alloc_xmm (jit); switch (n_pixels) { case 1: move = I_movd; + size = DWORD_PTR; break; case 2: move = I_movq; + size = QWORD_PTR; break; case 4: move = I_movdqa; + size = 0; break; } BEGIN_ASM (jit->fragment) - move, r, PTR (dest->d), + move, r, size + PTR (dest->d), END_ASM (); jit_free_xmm (jit, r); @@ -679,39 +634,35 @@ static void dest_a8r8g8b8_store_pixels (jit_dest_iter_t *dest, jit_t *jit, int n_pixels, reg_t reg) { instruction_t move; + int size; switch (n_pixels) { case 1: move = I_movd; + size = DWORD_PTR; break; case 2: move = I_movq; + size = QWORD_PTR; break; case 4: move = I_movdqa; + size = 0; break; } - jit_switch_group (jit, "dest"); - jit_reload_gp (jit, dest->d); - BEGIN_ASM (jit->fragment) - move, reg, PTR (dest->d), + move, reg, size + PTR (dest->d), END_ASM(); } static void dest_a8r8g8b8_advance_pixels (jit_dest_iter_t *dest, jit_t *jit, int n_pixels) { - int n_bytes; - - jit_switch_group (jit, "dest"); - jit_reload_gp (jit, dest->d); - - n_bytes = n_pixels * 4; + int n_bytes = n_pixels * 4; BEGIN_ASM (jit->fragment) I_add, dest->d, IMM (n_bytes), @@ -721,7 +672,6 @@ dest_a8r8g8b8_advance_pixels (jit_dest_iter_t *dest, jit_t *jit, int n_pixels) static void dest_a8r8g8b8_end (jit_dest_iter_t *dest, jit_t *jit) { - } jit_dest_iter_t * @@ -750,13 +700,10 @@ combine_over (jit_combiner_t *combiner, jit_t *jit, s = src->load_pixels (src, jit, n_pixels); - jit_switch_group (jit, "combiner"); - s = jit_preserve_gp (jit, s); + s = jit_preserve_xmm (jit, s); src->advance_pixels (src, jit, n_pixels); - jit_switch_group (jit, "combiner"); - m00ff = zero = jit_alloc_xmm (jit); m_hi = jit_alloc_xmm (jit); m_lo = jit_alloc_xmm (jit); @@ -765,15 +712,13 @@ combine_over (jit_combiner_t *combiner, jit_t *jit, /* Generate zero */ I_pxor, zero, zero, - /* Expand source */ + /* Expand alpha */ I_movdqa, m_hi, s, I_movdqa, m_lo, s, I_punpckhbw, m_hi, zero, I_punpcklbw, m_lo, zero, - I_pshuflw, m_hi, 0xff, - I_pshuflw, m_lo, 0xff, - I_pshufhw, m_hi, 0xff, - I_pshufhw, m_lo, 0xff, + I_pshuflw, m_hi, m_hi, UIMM (0xff), + I_pshuflw, m_lo, m_lo, UIMM (0xff), /* Negate mask */ I_pcmpeqw, m00ff, m00ff, @@ -786,8 +731,7 @@ combine_over (jit_combiner_t *combiner, jit_t *jit, d = dest->load_pixels (dest, jit, n_pixels); - jit_switch_group (jit, "combiner"); - d = jit_preserve_gp (jit, d); + d = jit_preserve_xmm (jit, d); m0080 = zero = jit_alloc_xmm (jit); m0101 = jit_alloc_xmm (jit); @@ -849,7 +793,7 @@ combiner_create_over (void) return combiner; } -void +uint8_t * generate_kernel (jit_t *jit, jit_src_iter_t *src, jit_src_iter_t *mask, @@ -857,26 +801,12 @@ generate_kernel (jit_t *jit, jit_combiner_t *combiner) { reg_t h, composite_info; - reg_t r_rbx, r_r12, r_r13, r_r14, r_r15; - - jit_switch_group (jit, "outer"); + fragment_t *prologue, *epilogue; composite_info = rsi; composite_info = jit_preserve_gp (jit, composite_info); - /* Preserve callee-save registers */ - r_rbx = jit_preserve_gp (jit, rbx); - r_r12 = jit_preserve_gp (jit, r12); - r_r13 = jit_preserve_gp (jit, r13); - r_r14 = jit_preserve_gp (jit, r14); - r_r15 = jit_preserve_gp (jit, r15); - - BEGIN_ASM (jit->fragment) - I_push, rbp, - I_mov, rbp, rsp, - END_ASM(); - h = jit_alloc_gp (jit); BEGIN_ASM (jit->fragment) @@ -901,8 +831,6 @@ generate_kernel (jit_t *jit, dest->process_line ( dest, jit, src, mask, combiner); - jit_switch_group (jit, "outer"); - jit_reload_gp (jit, h); BEGIN_ASM (jit->fragment) I_sub, h, IMM (1), I_jnz, LABEL ("vertical_loop"), @@ -916,31 +844,56 @@ generate_kernel (jit_t *jit, mask->end (mask, jit); src->end (src, jit); - jit_switch_group (jit, "outer"); + BEGIN_ASM (jit->fragment) + DEFINE_LABEL ("done"), + END_ASM(); + + /* Prologue */ + prologue = fragment_new (jit->assembler); - /* Restore callee-save registers */ -#define RESTORE(reg) \ - r_##reg = jit_reload_gp (jit, r_##reg); \ - if (r_##reg != reg) \ + BEGIN_ASM (prologue) + I_push, rbp, + I_mov, rbp, rsp, + END_ASM (); + +#define SAVE(reg) \ + if (reg_alloc_clobbered (&jit->gp_allocator, reg)) \ { \ - BEGIN_ASM (jit->fragment) \ - I_mov, reg, r_##reg, \ + BEGIN_ASM (prologue) \ + I_push, reg, \ END_ASM (); \ } + SAVE (rbx); + SAVE (r12); + SAVE (r13); + SAVE (r14); + SAVE (r15); - BEGIN_ASM (jit->fragment) - DEFINE_LABEL ("done"), - END_ASM(); + epilogue = fragment_new (jit->assembler); - RESTORE(rbx); - RESTORE(r12); - RESTORE(r13); - RESTORE(r14); - RESTORE(r15); +#define RESTORE(reg) \ + if (reg_alloc_clobbered (&jit->gp_allocator, reg)) \ + { \ + BEGIN_ASM (epilogue) \ + I_pop, reg, \ + END_ASM (); \ + } + RESTORE (r15); + RESTORE (r14); + RESTORE (r13); + RESTORE (r12); + RESTORE (rbx); - BEGIN_ASM (jit->fragment) + BEGIN_ASM (epilogue) I_pop, rbp, - END_ASM(); + I_ret, + END_ASM (); + + return assembler_link (jit->assembler, + prologue, + jit->fragment, + epilogue, + NULL); } int @@ -950,11 +903,12 @@ main () jit_combiner_t *combiner = combiner_create_over (); jit_src_iter_t *src = src_iter_create_a8r8g8b8 (); jit_t *jit = jit_new (); + uint8_t *code; /* n_8_8888() */ printf ("iter jit\n"); - generate_kernel (jit, src, NULL, dest, combiner); + code = generate_kernel (jit, src, NULL, dest, combiner); return 0; } diff --git a/regnaive.c b/regnaive.c new file mode 100644 index 0000000..c2c5fdc --- /dev/null +++ b/regnaive.c @@ -0,0 +1,98 @@ +#include <assert.h> +#include "regnaive.h" + +typedef struct reg_alloc_t reg_alloc_t; +typedef struct reg_pool_t reg_pool_t; + +#define MAX_REGISTERS (16) +#define MAX_SPILLS (16) + +static int +find_index (reg_alloc_t *ra, reg_t reg) +{ + int i; + + for (i = 0; i < ra->pool->n_registers; ++i) + { + if (ra->pool->registers[i] == reg) + return i; + } + + assert (0); + return -1; +} + +void +reg_alloc_init (reg_alloc_t *ra, + const reg_pool_t *reg_pool) +{ + int i; + + ra->pool = reg_pool; + for (i = 0; i < ra->pool->n_registers; ++i) + { + ra->allocated[i] = FALSE; + ra->clobbered[i] = FALSE; + } + ra->failed = FALSE; +} + +/* This resurrects a register that has been freed, but is known + * to still contain a useful value. + */ +reg_t +reg_alloc_alloc_preserve (reg_alloc_t *ra, reg_t reg) +{ + int idx = find_index (ra, reg); + + assert (!ra->allocated[idx]); + + ra->allocated[idx] = TRUE; + ra->clobbered[idx] = TRUE; + + return reg; +} + +reg_t +reg_alloc_alloc (reg_alloc_t *ra) +{ + int i; + + for (i = 0; i < ra->pool->n_registers; ++i) + { + if (!ra->allocated[i]) + { + ra->allocated[i] = TRUE; + ra->clobbered[i] = TRUE; + + return ra->pool->registers[i]; + } + } + + ra->failed = TRUE; + return (reg_t)0; +} + +void +reg_alloc_free (reg_alloc_t *ra, reg_t reg) +{ + int idx = find_index (ra, reg); + + assert (ra->allocated[idx]); + + ra->allocated[idx] = FALSE; +} + +int +reg_alloc_failed (reg_alloc_t *ra) +{ + return ra->failed; +} + +int +reg_alloc_clobbered (reg_alloc_t *ra, reg_t reg) +{ + int idx = find_index (ra, reg); + + return ra->clobbered[idx]; +} diff --git a/regnaive.h b/regnaive.h new file mode 100644 index 0000000..65730fe --- /dev/null +++ b/regnaive.h @@ -0,0 +1,44 @@ +#include "stack-man.h" +#include "simplex86.h" + +typedef struct reg_alloc_t reg_alloc_t; +typedef struct reg_pool_t reg_pool_t; + +#define MAX_REGISTERS (16) +#define MAX_SPILLS (16) + +struct reg_pool_t +{ + int n_registers; + reg_t registers[MAX_REGISTERS]; +}; + +struct reg_alloc_t +{ + const reg_pool_t * pool; + int allocated[MAX_REGISTERS]; + int clobbered[MAX_REGISTERS]; + int failed; +}; + +void +reg_alloc_init (reg_alloc_t *ra, + const reg_pool_t *reg_pool); + +/* This resurrects a register that has been freed, but is known + * to still contain a useful value. + */ +reg_t +reg_alloc_alloc_preserve (reg_alloc_t *ra, reg_t reg); + +reg_t +reg_alloc_alloc (reg_alloc_t *ra); + +void +reg_alloc_free (reg_alloc_t *ra, reg_t reg); + +int +reg_alloc_failed (reg_alloc_t *ra); + +int +reg_alloc_clobbered (reg_alloc_t *ra, reg_t reg); |