diff options
Diffstat (limited to 'reference/idiv-vert.asm')
-rw-r--r-- | reference/idiv-vert.asm | 154 |
1 files changed, 74 insertions, 80 deletions
diff --git a/reference/idiv-vert.asm b/reference/idiv-vert.asm index 292cf79..bb5adce 100644 --- a/reference/idiv-vert.asm +++ b/reference/idiv-vert.asm @@ -1,9 +1,9 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r0.x) in0 +@in(r0.y) in1 +@in(r0.z) in2 +@in(r0.w) in3 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -12,90 +12,89 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)cov.s32f32 r0.x, c1.x -cov.s32f32 r0.y, c1.y -mov.f32f32 r0.z, c0.y -mov.f32f32 r0.w, c0.x -absneg.f r0.x, (abs)r0.x -absneg.f r0.y, (abs)r0.y -xor.b r0.z, r0.z, c1.y -xor.b r0.w, r0.w, c1.x +@const(c3.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c4.x) 0xfffffffe, 0x0000001f, 0x00000000, 0x00000000 +(sy)(ss)cov.s32f32 r1.x, c1.x +cov.s32f32 r1.y, c1.y +mov.f32f32 r1.z, c0.y +mov.f32f32 r1.w, c0.x +absneg.f r1.x, (abs)r1.x +absneg.f r1.y, (abs)r1.y +xor.b r1.z, r1.z, c1.y +xor.b r1.w, r1.w, c1.x cov.s32f32 r2.x, c0.y cov.s32f32 r2.y, c0.x mov.f32f32 r2.z, c3.y -rcp r0.x, r0.x -(ss)add.u r0.x, r0.x, c4.x -rcp r0.y, r0.y -(ss)add.u r2.w, r0.y, c4.x -(ss)absneg.f r0.y, (abs)r2.y +rcp r1.x, r1.x +(ss)add.u r1.x, r1.x, c4.x +rcp r1.y, r1.y +(ss)add.u r1.y, r1.y, c4.x +absneg.f r2.y, (abs)r2.y absneg.f r2.x, (abs)r2.x -shr.b r2.y, r0.z, c4.y -shr.b r3.x, r0.w, c4.y -mul.f r0.y, r0.y, r0.x -mul.f r0.z, r2.x, r2.w -mov.f32f32 r2.x, c3.y -mov.f32f32 r3.y, c3.x -cov.f32s32 r3.z, r0.y -absneg.s r3.w, (abs)c1.x -cov.f32s32 r4.x, r0.z -absneg.s r4.y, (abs)c1.y -mov.f32f32 r4.z, c3.x -mull.u r0.y, r3.z, r3.w -mov.f32f32 r4.w, c3.x -madsh.m16 r0.y, r3.z, r3.w, r0.y -mull.u r0.z, r4.x, r4.y -madsh.m16 r0.y, r3.w, r3.z, r0.y -absneg.s r5.x, (abs)c0.x -madsh.m16 r0.z, r4.x, r4.y, r0.z +shr.b r1.z, r1.z, c4.y +shr.b r1.w, r1.w, c4.y +mul.f r2.y, r2.y, r1.x +mul.f r2.x, r2.x, r1.y +mov.f32f32 r2.w, c3.y +mov.f32f32 r3.x, c3.x +cov.f32s32 r2.y, r2.y +absneg.s r3.y, (abs)c1.x +cov.f32s32 r2.x, r2.x +absneg.s r3.z, (abs)c1.y +mov.f32f32 r3.w, c3.x +mull.u r4.x, r2.y, r3.y +mov.f32f32 r4.y, c3.x +madsh.m16 r4.x, r2.y, r3.y, r4.x +mull.u r4.z, r2.x, r3.z +madsh.m16 r4.x, r3.y, r2.y, r4.x +absneg.s r4.w, (abs)c0.x +madsh.m16 r4.z, r2.x, r3.z, r4.z +mov.f32f32 r5.x, c3.y mov.f32f32 r5.y, c3.y -mov.f32f32 r5.z, c3.y -sub.u r0.y, r5.x, r0.y -madsh.m16 r5.w, r4.y, r4.x, r0.z -absneg.s r6.x, (abs)c0.y -mov.f32f32 r6.y, c3.x -cov.u32f32 r6.z, r0.y -mov.f32f32 r0.w, r1.w -mov.f32f32 r0.z, r1.z -mov.f32f32 r0.y, r1.y -mul.f r1.y, r6.z, r0.x -sub.u r1.z, r6.x, r5.w -mov.f32f32 r0.x, r1.x -nop -cov.f32u32 r1.x, r1.y -cov.u32f32 r1.y, r1.z +sub.u r4.x, r4.w, r4.x +madsh.m16 r4.z, r3.z, r2.x, r4.z +absneg.s r5.z, (abs)c0.y +mov.f32f32 r5.w, c3.x +cov.u32f32 r4.x, r4.x +(rpt2)nop +mul.f r1.x, r4.x, r1.x +sub.u r4.x, r5.z, r4.z (rpt1)nop -add.u r1.x, r3.z, r1.x -mul.f r1.y, r1.y, r2.w +cov.f32u32 r1.x, r1.x +cov.u32f32 r4.x, r4.x (rpt1)nop -mull.u r1.z, r1.x, r3.w +add.u r1.x, r2.y, r1.x +mul.f r1.y, r4.x, r1.y +(rpt1)nop +mull.u r2.y, r1.x, r3.y cov.f32u32 r1.y, r1.y -madsh.m16 r1.z, r1.x, r3.w, r1.z +madsh.m16 r2.y, r1.x, r3.y, r2.y nop -madsh.m16 r1.z, r3.w, r1.x, r1.z -add.u r1.y, r4.x, r1.y +madsh.m16 r2.y, r3.y, r1.x, r2.y +add.u r1.y, r2.x, r1.y (rpt1)nop -sub.u r1.z, r5.x, r1.z -mull.u r1.w, r1.y, r4.y +sub.u r2.x, r4.w, r2.y +mull.u r2.y, r1.y, r3.z (rpt1)nop -cmps.u.ge r1.z, r1.z, r3.w -madsh.m16 r1.w, r1.y, r4.y, r1.w +cmps.u.ge r2.x, r2.x, r3.y +madsh.m16 r2.y, r1.y, r3.z, r2.y (rpt1)nop -add.u r1.x, r1.x, r1.z -madsh.m16 r1.z, r4.y, r1.y, r1.w +add.u r1.x, r1.x, r2.x +madsh.m16 r2.x, r3.z, r1.y, r2.y (rpt1)nop -absneg.s r1.w, (neg)r1.x -sub.u r1.z, r6.x, r1.z +absneg.s r2.y, (neg)r1.x +sub.u r2.x, r5.z, r2.x (rpt1)nop -sel.b32 r1.x, r1.w, r3.x, r1.x -cmps.u.ge r1.z, r1.z, r4.y +sel.b32 r1.x, r2.y, r1.w, r1.x +cmps.u.ge r1.w, r2.x, r3.z (rpt1)nop cmps.u.eq r1.x, r1.x, c2.x -add.u r1.y, r1.y, r1.z +add.u r1.y, r1.y, r1.w (rpt1)nop absneg.s r1.x, (neg)r1.x -absneg.s r1.z, (neg)r1.y +absneg.s r1.w, (neg)r1.y (rpt2)nop -sel.b32 r1.y, r1.z, r2.y, r1.y +sel.b32 r1.y, r1.w, r1.z, r1.y (rpt2)nop cmps.u.eq r1.y, r1.y, c2.y (rpt2)nop @@ -105,18 +104,13 @@ and.b r1.x, r1.x, r1.y (rpt2)nop cmps.u.ne r1.x, r1.x, c3.x (rpt2)nop -sel.b32 r1.y, r2.x, r1.x, r2.z -sel.b32 r1.z, r4.z, r1.x, r3.y -sel.b32 r2.x, r5.y, r1.x, r4.w -sel.b32 r1.x, r6.y, r1.x, r5.z -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r2.x -mov.f32f32 r1.x, r1.x +sel.b32 r1.w, r2.w, r1.x, r2.z +sel.b32 r1.z, r3.w, r1.x, r3.x +sel.b32 r1.y, r5.x, r1.x, r4.y +sel.b32 r1.x, r5.w, r1.x, r5.y end nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) -; VERT: 123 instructions, 0 half, 7 full +; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) +; VERT: 119 instructions, 0 half, 6 full |