diff options
Diffstat (limited to 'reference/stk/stk0601.asm')
-rw-r--r-- | reference/stk/stk0601.asm | 239 |
1 files changed, 112 insertions, 127 deletions
diff --git a/reference/stk/stk0601.asm b/reference/stk/stk0601.asm index aadaff7..5f20141 100644 --- a/reference/stk/stk0601.asm +++ b/reference/stk/stk0601.asm @@ -1,20 +1,20 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r3.x) in0 +@in(r3.y) in1 +@in(r3.z) in2 +@in(r3.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r0.w) in8 -@in(r1.x) in9 -@in(r1.y) in10 -@in(r1.z) in11 -@in(r3.x) in12 -@in(r3.y) in13 -@in(r3.z) in14 -@in(r3.w) in15 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 +@in(r2.x) in12 +@in(r2.y) in13 +@in(r2.z) in14 +@in(r2.w) in15 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,135 +27,120 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r1.w, r2.x, c13.x +@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r3.x, c13.x mul.f r4.x, r0.x, c4.x -mad.f32 r1.w, c14.x, r2.y, r1.w +mad.f32 r0.w, c14.x, r3.y, r0.w mad.f32 r4.x, c4.y, r0.y, r4.x -mad.f32 r1.w, c15.x, r2.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r4.y, c16.x, r2.w, r1.w -mov.f32f32 r1.w, r4.x -mov.f32f32 r4.x, c10.z +mad.f32 r0.w, c15.x, r3.z, r0.w +mad.f32 r4.x, c4.z, r0.z, r4.x +mad.f32 r0.w, c16.x, r3.w, r0.w +mov.f32f32 r4.y, c10.z mov.f32f32 r4.z, c10.y -mul.f r4.w, r4.y, r4.y -mul.f r5.x, r2.x, c13.y -mad.f32 r5.y, c4.z, r0.z, r1.w -mad.f32 r1.w, c14.y, r2.y, r5.x -mov.f32f32 r1.z, r1.z -mad.f32 r1.w, c15.y, r2.z, r1.w -mul.f r5.x, r5.y, c11.x -mad.f32 r5.z, c16.y, r2.w, r1.w -mul.f r1.w, r0.x, c5.x -max.f r1.z, r1.z, c20.x -mad.f32 r4.x, c8.z, r4.x, c9.z -mad.f32 r4.w, r5.z, r5.z, r4.w -mad.f32 r5.w, c5.y, r0.y, r1.w -min.f r1.w, r1.z, c20.y -add.f r1.z, c17.z, r4.x -mov.f32f32 r4.x, r4.w -mul.f r4.w, r2.x, c13.z -mov.f32f32 r5.w, r5.w -mad.f32 r4.w, c14.z, r2.y, r4.w +mov.f32f32 r4.w, c10.x +mul.f r5.x, r0.w, r0.w +mul.f r5.y, r3.x, c13.y +mul.f r5.z, r4.x, c11.x +mad.f32 r5.y, c14.y, r3.y, r5.y +mul.f r5.w, r0.x, c5.x +mad.f32 r5.y, c15.y, r3.z, r5.y +mad.f32 r5.w, c5.y, r0.y, r5.w +mad.f32 r5.y, c16.y, r3.w, r5.y mad.f32 r5.w, c5.z, r0.z, r5.w -mad.f32 r4.w, c15.z, r2.z, r4.w +mad.f32 r4.y, c8.z, r4.y, c9.z mad.f32 r4.z, c8.y, r4.z, c9.y -mad.f32 r4.w, c16.z, r2.w, r4.w -mad.f32 r5.x, c11.y, r5.w, r5.x -mov.f32f32 r6.x, c10.x +mad.f32 r5.x, r5.y, r5.y, r5.x +mul.f r6.x, r3.x, c13.z +mad.f32 r5.z, c11.y, r5.w, r5.z +mad.f32 r6.x, c14.z, r3.y, r6.x mul.f r0.x, r0.x, c6.x -mad.f32 r4.x, r4.w, r4.w, r4.x -mov.f32f32 r5.x, r5.x +mad.f32 r6.x, c15.z, r3.z, r6.x mad.f32 r0.x, c6.y, r0.y, r0.x -add.f r0.y, c17.y, r4.z -mad.f32 r4.z, c8.x, r6.x, c9.x -mul.f r6.x, r2.x, c0.w -mul.f r6.y, r2.x, c0.z -rsq r4.x, (abs)r4.x -(ss)mov.f32f32 r4.x, r4.x -mov.f32f32 r0.x, r0.x -add.f r4.z, c17.x, r4.z +mad.f32 r0.y, c16.z, r3.w, r6.x mad.f32 r0.x, c6.z, r0.z, r0.x -mul.f r0.z, r4.y, r4.x -mul.f r4.y, r5.z, r4.x -mul.f r4.x, r4.w, r4.x -mad.f32 r4.w, c11.z, r0.x, r5.x -add.f r0.z, c11.x, (neg)r0.z -add.f r4.y, c11.y, (neg)r4.y -add.f r4.x, c11.z, (neg)r4.x -max.f r5.x, r4.w, c20.x -mul.f r5.z, r0.z, r0.z -cmps.f.lt r4.w, (neg)r4.w, c20.x -mad.f32 r5.z, r4.y, r4.y, r5.z -mov.f32f32 r5.x, r5.x -mul.f r1.x, c18.y, r1.x -mul.f r0.w, c18.x, r0.w -mov.f32f32 r5.z, r5.z -mul.f r1.y, c18.z, r1.y -mad.f32 r5.z, r4.x, r4.x, r5.z -mad.f32 r1.x, r5.x, r1.x, r0.y -mad.f32 r4.z, r5.x, r0.w, r4.z -mad.f32 r0.y, c1.w, r2.y, r6.x -mad.f32 r0.w, c1.z, r2.y, r6.y -mul.f r6.x, r2.x, c0.y -mul.f r2.x, r2.x, c0.x -rsq r5.z, (abs)r5.z -(ss)mov.f32f32 r5.z, r5.z -mad.f32 r1.y, r5.x, r1.y, r1.z -mad.f32 r0.y, c2.w, r2.z, r0.y -mad.f32 r1.z, c2.z, r2.z, r0.w -mul.f r0.z, r0.z, r5.z -mul.f r4.y, r4.y, r5.z -mul.f r4.x, r4.x, r5.z -mad.f32 r0.w, c3.w, r2.w, r0.y -mul.f r0.y, r5.y, r0.z -mad.f32 r0.z, c3.z, r2.w, r1.z -mad.f32 r0.y, r5.w, r4.y, r0.y -mad.f32 r1.z, c1.y, r2.y, r6.x -mad.f32 r2.x, c1.x, r2.y, r2.x -mov.f32f32 r2.y, c7.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, c2.y, r2.z, r1.z -mad.f32 r0.x, r0.x, r4.x, r0.y -mad.f32 r0.y, c3.y, r2.w, r1.z -mad.f32 r1.z, c2.x, r2.z, r2.x -min.f r4.x, r2.y, c20.z -max.f r4.y, r0.x, c20.x -mad.f32 r0.x, c3.x, r2.w, r1.z -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x -nop -log2 r1.z, r4.y -(ss)mov.f32f32 r1.z, r1.z -(rpt2)nop -mul.f r1.z, r4.x, r1.z +add.f r0.z, c17.z, r4.y +add.f r4.y, c17.y, r4.z +mad.f32 r4.z, r0.y, r0.y, r5.x +mad.f32 r5.x, c11.z, r0.x, r5.z +mad.f32 r4.w, c8.x, r4.w, c9.x +mul.f r5.z, r3.x, c0.w +mul.f r6.x, r3.x, c0.z +mul.f r6.y, r3.x, c0.y +mul.f r3.x, r3.x, c0.x +rsq r4.z, (abs)r4.z +(ss)mov.f32f32 r6.z, r4.z +mul.f r0.y, r0.y, r4.z +(ss)max.f r4.z, r5.x, c20.x +add.f r4.w, c17.x, r4.w +mul.f r0.w, r0.w, r6.z +mul.f r5.y, r5.y, r6.z +(rpt1)nop +add.f r0.w, c11.x, (neg)r0.w +add.f r5.y, c11.y, (neg)r5.y +add.f r0.y, c11.z, (neg)r0.y +mov.f32f32 r6.z, r4.z +mul.f r6.w, r0.w, r0.w +mul.f r1.z, c18.z, r1.z +mad.f32 r6.w, r5.y, r5.y, r6.w +mul.f r1.y, c18.y, r1.y +mad.f32 r6.w, r0.y, r0.y, r6.w +mad.f32 r1.z, r6.z, r1.z, r0.z +mul.f r0.z, c18.x, r1.x +cmps.f.lt r1.x, (neg)r5.x, c20.x +mad.f32 r5.x, c1.w, r3.y, r5.z +mad.f32 r5.z, c1.z, r3.y, r6.x +mad.f32 r6.x, c1.y, r3.y, r6.y +rsq r6.y, (abs)r6.w +(ss)mov.f32f32 r6.w, r6.y +mul.f r0.y, r0.y, r6.y +mad.f32 r1.y, r6.z, r1.y, r4.y +mad.f32 r4.y, r4.z, r0.z, r4.w +mul.f r0.z, r0.w, r6.w +mul.f r4.z, r5.y, r6.w +mad.f32 r0.w, c2.w, r3.z, r5.x +mad.f32 r4.w, c2.z, r3.z, r5.z +mul.f r0.z, r4.x, r0.z +mad.f32 r0.w, c3.w, r3.w, r0.w +mad.f32 r4.x, r5.w, r4.z, r0.z +mad.f32 r0.z, c3.z, r3.w, r4.w +mad.f32 r0.x, r0.x, r0.y, r4.x +mad.f32 r0.y, c2.y, r3.z, r6.x +mad.f32 r3.x, c1.x, r3.y, r3.x +max.f r1.w, r1.w, c20.x +max.f r0.x, r0.x, c20.x +mad.f32 r0.y, c3.y, r3.w, r0.y +mad.f32 r3.x, c2.x, r3.z, r3.x +min.f r1.w, r1.w, c20.y +mov.f32f32 r3.y, c7.x +(rpt1)nop +log2 r3.z, r0.x +(ss)mad.f32 r0.x, c3.x, r3.w, r3.x +min.f r3.x, r3.y, c20.z (rpt2)nop -mov.f32f32 r1.z, r1.z +(ss)mul.f r3.x, r3.x, r3.z (rpt5)nop -exp2 r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -(rpt2)nop -sel.b32 r1.z, r1.z, r4.w, c20.x -(rpt2)nop -mov.f32f32 r1.z, r1.z -(rpt2)nop -mov.f32f32 r1.z, r1.z +exp2 r3.x, r3.x +(ss)sel.b32 r1.x, r3.x, r1.x, c20.x (rpt2)nop -mad.f32 r1.y, c19.z, r1.z, r1.y -mad.f32 r1.x, c19.y, r1.z, r1.x -mad.f32 r1.z, c19.x, r1.z, r4.z -nop -max.f r1.y, r1.y, c20.x +(ss)mov.f32f32 r3.x, r1.x +mad.f32 r1.x, c19.x, r1.x, r4.y +(rpt1)nop +mov.f32f32 r3.x, r3.x max.f r1.x, r1.x, c20.x -max.f r3.x, r1.z, c20.x +(rpt1)nop +mad.f32 r1.z, c19.z, r3.x, r1.z +mad.f32 r1.y, c19.y, r3.x, r1.y +min.f r1.x, r1.x, c20.y nop -min.f r1.z, r1.y, c20.y -min.f r1.y, r1.x, c20.y -min.f r1.x, r3.x, c20.y +max.f r1.z, r1.z, c20.x +max.f r1.y, r1.y, c20.x +(rpt1)nop +min.f r1.z, r1.z, c20.y +min.f r1.y, r1.y, c20.y end nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r0.w (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) -; VERT: 144 instructions, 0 half, 7 full +; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) +; VERT: 124 instructions, 0 half, 7 full |