1 files changed, 143 insertions, 0 deletions
diff --git a/iterjit.c b/iterjit.c
index e393567..3196acd 100644
--- a/iterjit.c
+++ b/iterjit.c
@@ -65,6 +65,149 @@
 		     4. if that fails, then load both into registers
 		        (this should be rare/impossible)
 
+	- When a variable absolutely *must* be in a register, do it like this:
+
+		mov t1, [t2 + 8 * t3]
+
+	  t1, t2 and t3 here *must* be in registers. Replace instruction with
+
+		MOV tt1, t1
+		MOV tt2, t2
+		MOV tt3, t3
+		MOV tt1, [tt2 + 8 * tt3]
+		MOV t1, tt1
+
+	  and mark tt1, tt2, and tt3 as "unspillable" and (since they
+	  are not live at the same time as their counterparts, "hint"
+	  that they should be stored in the same register.
+
+	  Implications:
+		- live ranges must be more exact (ie., must be able to 
+		  contain holes)
+
+	        - variables can now be unspillable; as long as such
+		  variables can only occur as the result of the
+		  above construction, we should be fine.
+
+	        - when the linear scan allocator encounters a
+                  situation where there are unspillable variables, it
+                  processes the unspillable variables first, assigning
+                  them to their hints if they have one and their hint
+                  was not itself spilled, and if that register is
+                  available.
+
+		- mov elision: a mov between the same register should
+		  be discarded.
+
+Concepts:
+
+	- 'code': array of uint64, can be register allocated or not. If not,
+	  it may contain OP_VAR and OP_VAR_MEM{8,16,32,64} operands.
+
+	- assembler: knows how to convert register allocated code to
+          machine code
+
+	- fragment: Can take 'code' and turn it into annotated machine
+	  code.  Should probably go away and become just 'code' since
+	  the details of annotation are irrelevant to the outside
+
+	- register allocator: will hand out OP_VARs, knows how to
+	  convert non-register-allocated into register-allocated. Is
+	  specific to a register class.
+
+	- stack manager: will hand out stack offsets based on size etc.
+	  Will also tell how much stack space is needed in total
+
+	- code manager: will hand out blocks of writable/executable
+	  memory that can be written into. Will (eventually) deal with
+	  handling ELF files. This will need to be OS specific.
+
+	- BEGIN_ASM/END_ASM should become BEGIN_CODE/END_CODE and just
+	  return a pointer; maybe even
+
+		BEGIN_CODE (&code)
+
+		END_CODE ();
+
+	  which will copy the code into malloced memory and store a
+	  pointer to it in code. (It will be appended, so the memory
+	  management needs to be a little more complex here).
+
+	- jit: Contains all the above, and will turn non-register
+	  allocated code into a pointer to executable code.
+
+The 'code' data structure:
+
+	- needs ability to be appended to from an array
+
+	- needs ability to be 'rewritten', ie., a linear scan through it
+	  where instructions are inserted and removed
+
+	- needs to be concatenatable
+
+It's tempting to say that the various operations will free and allocate:
+
+	- code = register_allocate (code)
+
+will free the original code and return a new one, and 
+
+	- executable = assembler_assemble (code, .... );
+
+will free all the code arrays. There should also be a regular
+code_free. So,
+
+	typedef struct
+	{
+		int		n_elements;
+		uint64_t	code[1];
+	} code_t;
+
+and 
+
+	register_allocator_t allocator;
+
+	register_alloc_init (&allocator, &stack_man);
+
+	op1 = register_alloc_new (&allocator, gp_pool);
+	op2 = register_alloc_new (&allocator, xmm_pool);
+	op3 = register_alloc_new (&allocator, xmm_pool);
+
+	code_t *code = code_new();
+
+	BEGIN_CODE (&code)
+		...,
+	END_CODE();
+
+	code = register_alloc_allocate (&gp_alloc, code);
+
+
+
+Note: An interesting fix to x86 crazyness is to just turn
+
+	mov d, [b + x * k + i]
+
+      into
+
+	mov t1, x
+	shl t1, k
+	add t1, i
+	add t1, b
+	mov d, [t1]
+
+      and then have a peephole pass that recognizes the above and
+      turns it back into x86 addressing when possible.
+
+      Ie., recognize
+
+        live t1
+	mov r0, r1,
+	[shl r0, {1,2,3}],
+	[add r0, imm],
+	add r0, r2,
+	mov d, [t1]
+	dead t1
+
+
   Flow:
   - outer loop:
 	- generates outer loop