summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSøren Sandmann Pedersen <ssp@redhat.com>2013-09-14 20:17:23 -0400
committerSøren Sandmann Pedersen <ssp@redhat.com>2013-09-14 20:17:56 -0400
commit01b4b9f26232b1f95a339fae4ce6e681f7236272 (patch)
tree04c34a8ba684bd97efed53fb96edf8637b3646f0
parenta39a6bf773b6f6a86f13917ec764bbfe705e81b1 (diff)
todo
-rw-r--r--iterjit.c143
1 files changed, 143 insertions, 0 deletions
diff --git a/iterjit.c b/iterjit.c
index e393567..3196acd 100644
--- a/iterjit.c
+++ b/iterjit.c
@@ -65,6 +65,149 @@
4. if that fails, then load both into registers
(this should be rare/impossible)
+ - When a variable absolutely *must* be in a register, do it like this:
+
+ mov t1, [t2 + 8 * t3]
+
+ t1, t2 and t3 here *must* be in registers. Replace instruction with
+
+ MOV tt1, t1
+ MOV tt2, t2
+ MOV tt3, t3
+ MOV tt1, [tt2 + 8 * tt3]
+ MOV t1, tt1
+
+ and mark tt1, tt2, and tt3 as "unspillable" and (since they
+ are not live at the same time as their counterparts, "hint"
+ that they should be stored in the same register.
+
+ Implications:
+ - live ranges must be more exact (ie., must be able to
+ contain holes)
+
+ - variables can now be unspillable; as long as such
+ variables can only occur as the result of the
+ above construction, we should be fine.
+
+ - when the linear scan allocator encounters a
+ situation where there are unspillable variables, it
+ processes the unspillable variables first, assigning
+ them to their hints if they have one and their hint
+ was not itself spilled, and if that register is
+ available.
+
+ - mov elision: a mov between the same register should
+ be discarded.
+
+Concepts:
+
+ - 'code': array of uint64, can be register allocated or not. If not,
+ it may contain OP_VAR and OP_VAR_MEM{8,16,32,64} operands.
+
+ - assembler: knows how to convert register allocated code to
+ machine code
+
+ - fragment: Can take 'code' and turn it into annotated machine
+ code. Should probably go away and become just 'code' since
+ the details of annotation are irrelevant to the outside
+
+ - register allocator: will hand out OP_VARs, knows how to
+ convert non-register-allocated into register-allocated. Is
+ specific to a register class.
+
+ - stack manager: will hand out stack offsets based on size etc.
+ Will also tell how much stack space is needed in total
+
+ - code manager: will hand out blocks of writable/executable
+ memory that can be written into. Will (eventually) deal with
+ handling ELF files. This will need to be OS specific.
+
+ - BEGIN_ASM/END_ASM should become BEGIN_CODE/END_CODE and just
+ return a pointer; maybe even
+
+ BEGIN_CODE (&code)
+
+ END_CODE ();
+
+ which will copy the code into malloced memory and store a
+ pointer to it in code. (It will be appended, so the memory
+ management needs to be a little more complex here).
+
+ - jit: Contains all the above, and will turn non-register
+ allocated code into a pointer to executable code.
+
+The 'code' data structure:
+
+ - needs ability to be appended to from an array
+
+ - needs ability to be 'rewritten', ie., a linear scan through it
+ where instructions are inserted and removed
+
+ - needs to be concatenatable
+
+It's tempting to say that the various operations will free and allocate:
+
+ - code = register_allocate (code)
+
+will free the original code and return a new one, and
+
+ - executable = assembler_assemble (code, .... );
+
+will free all the code arrays. There should also be a regular
+code_free. So,
+
+ typedef struct
+ {
+ int n_elements;
+ uint64_t code[1];
+ } code_t;
+
+and
+
+ register_allocator_t allocator;
+
+ register_alloc_init (&allocator, &stack_man);
+
+ op1 = register_alloc_new (&allocator, gp_pool);
+ op2 = register_alloc_new (&allocator, xmm_pool);
+ op3 = register_alloc_new (&allocator, xmm_pool);
+
+ code_t *code = code_new();
+
+ BEGIN_CODE (&code)
+ ...,
+ END_CODE();
+
+ code = register_alloc_allocate (&gp_alloc, code);
+
+
+
+Note: An interesting fix to x86 crazyness is to just turn
+
+ mov d, [b + x * k + i]
+
+ into
+
+ mov t1, x
+ shl t1, k
+ add t1, i
+ add t1, b
+ mov d, [t1]
+
+ and then have a peephole pass that recognizes the above and
+ turns it back into x86 addressing when possible.
+
+ Ie., recognize
+
+ live t1
+ mov r0, r1,
+ [shl r0, {1,2,3}],
+ [add r0, imm],
+ add r0, r2,
+ mov d, [t1]
+ dead t1
+
+
Flow:
- outer loop:
- generates outer loop