summaryrefslogtreecommitdiff
path: root/reference/idiv-vert.asm
diff options
context:
space:
mode:
Diffstat (limited to 'reference/idiv-vert.asm')
-rw-r--r--reference/idiv-vert.asm154
1 files changed, 74 insertions, 80 deletions
diff --git a/reference/idiv-vert.asm b/reference/idiv-vert.asm
index 292cf79..bb5adce 100644
--- a/reference/idiv-vert.asm
+++ b/reference/idiv-vert.asm
@@ -1,9 +1,9 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r0.x) in0
+@in(r0.y) in1
+@in(r0.z) in2
+@in(r0.w) in3
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -12,90 +12,89 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)cov.s32f32 r0.x, c1.x
-cov.s32f32 r0.y, c1.y
-mov.f32f32 r0.z, c0.y
-mov.f32f32 r0.w, c0.x
-absneg.f r0.x, (abs)r0.x
-absneg.f r0.y, (abs)r0.y
-xor.b r0.z, r0.z, c1.y
-xor.b r0.w, r0.w, c1.x
+@const(c3.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c4.x) 0xfffffffe, 0x0000001f, 0x00000000, 0x00000000
+(sy)(ss)cov.s32f32 r1.x, c1.x
+cov.s32f32 r1.y, c1.y
+mov.f32f32 r1.z, c0.y
+mov.f32f32 r1.w, c0.x
+absneg.f r1.x, (abs)r1.x
+absneg.f r1.y, (abs)r1.y
+xor.b r1.z, r1.z, c1.y
+xor.b r1.w, r1.w, c1.x
cov.s32f32 r2.x, c0.y
cov.s32f32 r2.y, c0.x
mov.f32f32 r2.z, c3.y
-rcp r0.x, r0.x
-(ss)add.u r0.x, r0.x, c4.x
-rcp r0.y, r0.y
-(ss)add.u r2.w, r0.y, c4.x
-(ss)absneg.f r0.y, (abs)r2.y
+rcp r1.x, r1.x
+(ss)add.u r1.x, r1.x, c4.x
+rcp r1.y, r1.y
+(ss)add.u r1.y, r1.y, c4.x
+absneg.f r2.y, (abs)r2.y
absneg.f r2.x, (abs)r2.x
-shr.b r2.y, r0.z, c4.y
-shr.b r3.x, r0.w, c4.y
-mul.f r0.y, r0.y, r0.x
-mul.f r0.z, r2.x, r2.w
-mov.f32f32 r2.x, c3.y
-mov.f32f32 r3.y, c3.x
-cov.f32s32 r3.z, r0.y
-absneg.s r3.w, (abs)c1.x
-cov.f32s32 r4.x, r0.z
-absneg.s r4.y, (abs)c1.y
-mov.f32f32 r4.z, c3.x
-mull.u r0.y, r3.z, r3.w
-mov.f32f32 r4.w, c3.x
-madsh.m16 r0.y, r3.z, r3.w, r0.y
-mull.u r0.z, r4.x, r4.y
-madsh.m16 r0.y, r3.w, r3.z, r0.y
-absneg.s r5.x, (abs)c0.x
-madsh.m16 r0.z, r4.x, r4.y, r0.z
+shr.b r1.z, r1.z, c4.y
+shr.b r1.w, r1.w, c4.y
+mul.f r2.y, r2.y, r1.x
+mul.f r2.x, r2.x, r1.y
+mov.f32f32 r2.w, c3.y
+mov.f32f32 r3.x, c3.x
+cov.f32s32 r2.y, r2.y
+absneg.s r3.y, (abs)c1.x
+cov.f32s32 r2.x, r2.x
+absneg.s r3.z, (abs)c1.y
+mov.f32f32 r3.w, c3.x
+mull.u r4.x, r2.y, r3.y
+mov.f32f32 r4.y, c3.x
+madsh.m16 r4.x, r2.y, r3.y, r4.x
+mull.u r4.z, r2.x, r3.z
+madsh.m16 r4.x, r3.y, r2.y, r4.x
+absneg.s r4.w, (abs)c0.x
+madsh.m16 r4.z, r2.x, r3.z, r4.z
+mov.f32f32 r5.x, c3.y
mov.f32f32 r5.y, c3.y
-mov.f32f32 r5.z, c3.y
-sub.u r0.y, r5.x, r0.y
-madsh.m16 r5.w, r4.y, r4.x, r0.z
-absneg.s r6.x, (abs)c0.y
-mov.f32f32 r6.y, c3.x
-cov.u32f32 r6.z, r0.y
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r0.z, r1.z
-mov.f32f32 r0.y, r1.y
-mul.f r1.y, r6.z, r0.x
-sub.u r1.z, r6.x, r5.w
-mov.f32f32 r0.x, r1.x
-nop
-cov.f32u32 r1.x, r1.y
-cov.u32f32 r1.y, r1.z
+sub.u r4.x, r4.w, r4.x
+madsh.m16 r4.z, r3.z, r2.x, r4.z
+absneg.s r5.z, (abs)c0.y
+mov.f32f32 r5.w, c3.x
+cov.u32f32 r4.x, r4.x
+(rpt2)nop
+mul.f r1.x, r4.x, r1.x
+sub.u r4.x, r5.z, r4.z
(rpt1)nop
-add.u r1.x, r3.z, r1.x
-mul.f r1.y, r1.y, r2.w
+cov.f32u32 r1.x, r1.x
+cov.u32f32 r4.x, r4.x
(rpt1)nop
-mull.u r1.z, r1.x, r3.w
+add.u r1.x, r2.y, r1.x
+mul.f r1.y, r4.x, r1.y
+(rpt1)nop
+mull.u r2.y, r1.x, r3.y
cov.f32u32 r1.y, r1.y
-madsh.m16 r1.z, r1.x, r3.w, r1.z
+madsh.m16 r2.y, r1.x, r3.y, r2.y
nop
-madsh.m16 r1.z, r3.w, r1.x, r1.z
-add.u r1.y, r4.x, r1.y
+madsh.m16 r2.y, r3.y, r1.x, r2.y
+add.u r1.y, r2.x, r1.y
(rpt1)nop
-sub.u r1.z, r5.x, r1.z
-mull.u r1.w, r1.y, r4.y
+sub.u r2.x, r4.w, r2.y
+mull.u r2.y, r1.y, r3.z
(rpt1)nop
-cmps.u.ge r1.z, r1.z, r3.w
-madsh.m16 r1.w, r1.y, r4.y, r1.w
+cmps.u.ge r2.x, r2.x, r3.y
+madsh.m16 r2.y, r1.y, r3.z, r2.y
(rpt1)nop
-add.u r1.x, r1.x, r1.z
-madsh.m16 r1.z, r4.y, r1.y, r1.w
+add.u r1.x, r1.x, r2.x
+madsh.m16 r2.x, r3.z, r1.y, r2.y
(rpt1)nop
-absneg.s r1.w, (neg)r1.x
-sub.u r1.z, r6.x, r1.z
+absneg.s r2.y, (neg)r1.x
+sub.u r2.x, r5.z, r2.x
(rpt1)nop
-sel.b32 r1.x, r1.w, r3.x, r1.x
-cmps.u.ge r1.z, r1.z, r4.y
+sel.b32 r1.x, r2.y, r1.w, r1.x
+cmps.u.ge r1.w, r2.x, r3.z
(rpt1)nop
cmps.u.eq r1.x, r1.x, c2.x
-add.u r1.y, r1.y, r1.z
+add.u r1.y, r1.y, r1.w
(rpt1)nop
absneg.s r1.x, (neg)r1.x
-absneg.s r1.z, (neg)r1.y
+absneg.s r1.w, (neg)r1.y
(rpt2)nop
-sel.b32 r1.y, r1.z, r2.y, r1.y
+sel.b32 r1.y, r1.w, r1.z, r1.y
(rpt2)nop
cmps.u.eq r1.y, r1.y, c2.y
(rpt2)nop
@@ -105,18 +104,13 @@ and.b r1.x, r1.x, r1.y
(rpt2)nop
cmps.u.ne r1.x, r1.x, c3.x
(rpt2)nop
-sel.b32 r1.y, r2.x, r1.x, r2.z
-sel.b32 r1.z, r4.z, r1.x, r3.y
-sel.b32 r2.x, r5.y, r1.x, r4.w
-sel.b32 r1.x, r6.y, r1.x, r5.z
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r1.x, r1.x
+sel.b32 r1.w, r2.w, r1.x, r2.z
+sel.b32 r1.z, r3.w, r1.x, r3.x
+sel.b32 r1.y, r5.x, r1.x, r4.y
+sel.b32 r1.x, r5.w, r1.x, r5.y
end
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0)
-; VERT: 123 instructions, 0 half, 7 full
+; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0)
+; VERT: 119 instructions, 0 half, 6 full