diff options
Diffstat (limited to 'reference/stk-mines/stk-mines-22.asm')
-rw-r--r-- | reference/stk-mines/stk-mines-22.asm | 412 |
1 files changed, 164 insertions, 248 deletions
diff --git a/reference/stk-mines/stk-mines-22.asm b/reference/stk-mines/stk-mines-22.asm index 1435d60..141701a 100644 --- a/reference/stk-mines/stk-mines-22.asm +++ b/reference/stk-mines/stk-mines-22.asm @@ -6,275 +6,191 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000 +@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd +@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x absneg.f r0.w, (neg)c6.x -mov.f32f32 r1.x, c3.x -bary.f r1.y, 1, r0.x -add.f r1.z, r0.z, c3.x -add.f r1.w, r0.z, c4.x -add.f r2.x, c7.x, r0.w -add.f r0.z, r0.z, r1.x -mov.f32f32 r1.x, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, c9.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r1.x -add.f r1.x, r1.y, c3.y -mov.f32f32 r2.w, r1.z -add.f r1.z, r1.y, c4.y -rcp r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r1.x, r1.x -bary.f r3.y, 6, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -add.f r0.w, r3.y, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.z, r0.z -mov.f32f32 r2.z, r1.x -mul.f r0.z, r0.w, r1.w -mov.f32f32 r3.x, r1.z -add.f r0.w, r1.y, r2.x -cmps.f.lt r1.x, c6.x, r3.y -cmps.f.lt r1.y, c7.x, r3.y -bary.f r1.z, 11, r0.x -sam (f32)(xyz)r1.w, r2.y, s#0, t#0 -(sy)mad.f32 r2.x, c8.x, r2.x, c8.y -mov.f32f32 r0.z, r0.z -(ss)nop -sam (f32)(xyz)r2.z, r2.w, s#1, t#1 -(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y -mad.f32 r2.z, c8.x, r2.z, c8.y -mov.f32f32 r2.x, r2.x -mul.f r3.y, r0.z, c5.w -mul.f r3.w, r0.z, c5.z -mul.f r4.x, r0.z, c5.y -mul.f r2.x, c8.z, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, c8.x, r1.w, c8.y -mul.f r2.w, c8.z, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, r2.z, r2.z -mov.f32f32 r4.x, r4.x -mul.f r4.z, r1.w, r1.w -mov.f32f32 r2.w, r2.w -mad.f32 r4.z, r2.x, r2.x, r4.z -mul.f r4.w, r0.z, c5.x -add.f r0.z, c10.x, (neg)r0.z -mad.f32 r3.w, r2.w, r2.w, r3.w -mov.f32f32 r4.z, r4.z +bary.f r1.x, 6, r0.x +mov.f32f32 r1.y, c3.x +add.f r1.z, r0.z, c4.x +bary.f r2.x, 1, r0.x +add.f r2.y, r0.z, c3.x +add.f r2.w, c7.x, r0.w +cmps.f.lt r3.x, c6.x, r1.x +add.f r1.w, r2.x, c4.y +add.f r2.z, r2.x, c3.y +cmps.f.lt r3.y, c7.x, r1.x +add.f r3.z, r0.z, r1.y +cov.u32f32 r0.z, r3.x +rcp r1.y, r2.w +add.f r0.w, r1.x, r0.w +cov.u32f32 r1.x, r3.y +sam (f32)(xyz)r3.w, r1.z, s#1, t#1 +(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y +sam (f32)(xyz)r2.y, r2.y, s#0, t#0 +(sy)mad.f32 r1.w, c8.x, r2.z, c8.y +(ss)mul.f r0.w, r0.w, r1.y +mad.f32 r1.y, c8.x, r3.w, c8.y +mul.f r1.z, c8.z, r1.z +mul.f r1.w, c8.z, r1.w +(ss)mov.f32f32 r2.z, r0.w +mov.f32f32 r3.x, r1.y +mov.f32f32 r3.y, r1.z +mov.f32f32 r4.x, r1.w mad.f32 r2.y, c8.x, r2.y, c8.y -mov.f32f32 r4.w, r4.w -mad.f32 r3.x, c8.x, r3.x, c8.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r3.w mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -mad.f32 r4.z, r2.y, r2.y, r4.z -cov.u32f32 r1.x, r1.x -cov.u32f32 r1.y, r1.y -(rpt3)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w +mov.f32f32 r3.y, r3.y +mul.f r4.z, r2.z, c5.w +mul.f r4.w, r2.z, c5.z +mul.f r1.y, r1.y, r3.x +mov.f32f32 r5.x, r2.y +mad.f32 r1.y, r1.z, r3.y, r1.y +mad.f32 r1.z, c8.x, r4.y, c8.y +mul.f r4.y, r2.z, c5.y +mul.f r2.z, r2.z, c5.x +mul.f r2.y, r2.y, r5.x +mov.f32f32 r3.w, r1.z +mad.f32 r1.w, r1.w, r4.x, r2.y +mad.f32 r2.y, c8.x, r2.w, c8.y +add.f r0.w, c10.x, (neg)r0.w +mov.f32f32 r2.w, r3.w +cmps.f.ne r0.z, r0.z, c9.x cmps.f.ne r1.x, r1.x, c9.x -mul.f r1.w, r1.w, r4.z -mul.f r2.x, r2.x, r4.z -mul.f r2.y, r2.y, r4.z -mad.f32 r4.z, r3.x, r3.x, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r0.w -cmps.f.ne r0.w, r1.y, c9.x -mov.f32f32 r1.y, r1.z -rsq r1.z, r4.z -(ss)mov.f32f32 r1.z, r1.z -(ss)bary.f r4.z, 12, r0.x -bary.f r5.x, 13, r0.x +mov.f32f32 r3.w, c9.x +mad.f32 r1.y, r2.w, r2.w, r1.y +mov.f32f32 r2.w, r2.y mov.f32f32 r5.y, c9.x -mad.f32 r1.w, r2.z, r1.z, r1.w -mad.f32 r2.x, r2.w, r1.z, r2.x -mad.f32 r1.z, r3.x, r1.z, r2.y -sam (f32)(xyzw)r2.y, r3.z, s#2, t#2 -(ss)mov.f32f32 r3.z, r4.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.w, r5.x -mul.f r1.w, r1.w, c8.w -mul.f r2.x, r2.x, c8.w -mul.f r1.z, r1.z, c8.w -nop -mov.f32f32 r1.w, r1.w -bary.f r4.z, 8, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.x, c9.x -mul.f r5.z, r1.w, r4.z -bary.f r5.w, 9, r0.x -mul.f r6.x, r4.z, r1.w -mov.f32f32 r6.y, c5.w -mov.f32f32 r6.z, c5.z -mad.f32 r5.z, r2.x, r5.w, r5.z -mad.f32 r6.x, r5.w, r2.x, r6.x -mov.f32f32 r6.w, c5.y +mov.f32f32 r5.z, c9.x +mov.f32f32 r5.w, c5.w +mov.f32f32 r6.x, c5.z +mov.f32f32 r6.y, c5.y +rsq r1.y, r1.y +(ss)mov.f32f32 r6.z, r1.y +mad.f32 r1.w, r2.w, r2.w, r1.w +add.f r3.w, r2.x, r3.w +bary.f r2.x, 11, r0.x +bary.f r2.w, 12, r0.x +bary.f r6.w, 13, r0.x +bary.f r7.x, 8, r0.x +bary.f r7.y, 9, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +(ss)mul.f r1.w, r2.y, r1.w +sam (f32)(xyzw)r7.w, r3.z, s#2, t#2 bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r5.z -mov.f32f32 r5.z, r6.x -mov.f32f32 r6.x, c5.x -mad.f32 r0.y, r1.z, r0.x, r0.y -mad.f32 r5.z, r0.x, r1.z, r5.z -(rpt1)nop -mul.f r1.w, r0.y, r1.w -max.f r5.z, r5.z, c9.x -mul.f r2.x, r0.y, r2.x -mul.f r0.y, r0.y, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r5.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mul.f r1.z, c8.x, r1.z -mad.f32 r1.w, c9.y, r1.w, c9.z -mul.f r2.x, c8.x, r2.x -mul.f r0.y, c8.x, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -add.f r1.z, r4.z, (neg)r1.z -(sy)mul.f r3.x, r3.x, r1.w -mul.f r2.w, r2.w, r1.w -mul.f r2.z, r2.z, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.z, r3.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r7.x, r2.z -mul.f r7.y, r1.z, r1.z -add.f r2.x, r5.w, (neg)r2.x -add.f r4.z, r4.z, r5.x -add.f r0.x, r0.x, (neg)r0.y -mul.f r0.y, r2.y, r1.w -mov.f32f32 r1.w, r2.x -add.f r2.x, r4.z, r5.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r4.z, r1.w, r1.w, r7.y +mov.f32f32 r0.y, c5.x +mul.f r2.y, r5.x, r7.z +(ss)mul.f r3.z, r4.x, r7.z +mad.f32 r2.y, r3.x, r6.z, r2.y +mad.f32 r3.x, r3.y, r6.z, r3.z +mad.f32 r1.y, r1.z, r1.y, r1.w +nop +mul.f r1.z, r2.y, c8.w +mul.f r1.w, r3.x, c8.w +mul.f r1.y, r1.y, c8.w +nop +mov.f32f32 r2.y, r1.z +mul.f r1.z, r7.x, r1.z +mov.f32f32 r3.x, r1.w +mov.f32f32 r3.y, r1.y +mul.f r3.z, r2.y, r7.x +mad.f32 r1.z, r7.y, r1.w, r1.z +mad.f32 r1.w, r3.x, r7.y, r3.z +mad.f32 r1.y, r0.x, r1.y, r1.z +mad.f32 r1.z, r3.y, r0.x, r1.w (rpt2)nop -mov.f32f32 r4.z, r4.z -nop -mad.f32 r4.z, r0.x, r0.x, r4.z -(rpt5)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z +mul.f r1.w, r1.z, r2.y +max.f r1.y, r1.y, c9.x +mul.f r2.y, r1.z, r3.x +mul.f r1.z, r1.z, r3.y +mul.f r1.w, c8.x, r1.w +mad.f32 r1.y, c9.y, r1.y, c9.z +mul.f r2.y, c8.x, r2.y +mul.f r1.z, c8.x, r1.z +add.f r1.w, r7.x, (neg)r1.w +mov.f32f32 r3.x, r1.y +add.f r2.y, r7.y, (neg)r2.y +add.f r0.x, r0.x, (neg)r1.z +mov.f32f32 r1.z, r1.w +(sy)mul.f r3.y, r8.z, r3.x +mov.f32f32 r3.z, r2.y +mov.f32f32 r3.w, r0.x +mul.f r1.w, r1.w, r1.z +add.f r4.x, r3.y, r5.z +mad.f32 r1.w, r2.y, r3.z, r1.w +mul.f r2.y, r8.y, r3.x +mad.f32 r1.w, r3.w, r3.w, r1.w +add.f r3.w, r4.x, r5.y +mul.f r3.x, r8.x, r3.x +mul.f r1.y, r7.w, r1.y (rpt2)nop -mul.f r1.z, r1.z, r4.z -mul.f r1.w, r1.w, r4.z -mul.f r0.x, r0.x, r4.z -nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r4.x, r1.w +mul.f r0.x, r0.x, r1.w +(rpt1)nop +mul.f r1.z, r1.z, r4.x +(ss)mul.f r1.w, r3.z, r4.x +(rpt1)nop +mul.f r1.z, r1.z, r2.x nop -mul.f r1.y, r1.z, r1.y +mad.f32 r1.z, r1.w, r2.w, r1.z nop -mad.f32 r1.y, r1.w, r3.z, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -nop -mad.f32 r0.x, r0.x, r3.w, r1.y -(rpt2)nop -mov.f32f32 r0.x, r0.x +mad.f32 r0.x, r0.x, r6.w, r1.z (rpt2)nop max.f r0.x, r0.x, c9.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x mov.f32f32 r1.z, r0.x -cmps.f.lt r0.x, c9.x, r0.x -(rpt1)nop -mul.f r1.y, r1.y, r1.z -cov.u32f32 r0.x, r0.x +(rpt2)nop +mul.f r0.x, r0.x, r1.z +cmps.f.lt r1.z, c9.x, r1.z (rpt1)nop -mov.f32f32 r1.y, r1.y -cmps.f.ne r0.x, r0.x, c9.x +mov.f32f32 r1.w, r0.x +cov.u32f32 r1.z, r1.z (rpt1)nop -mul.f r1.y, r1.y, r1.y -sel.b32 r1.z, r2.x, r0.x, r3.x +mul.f r0.x, r0.x, r1.w +cmps.f.ne r1.z, r1.z, c9.x (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.z +mov.f32f32 r1.w, r0.x +sel.b32 r2.x, r3.w, r1.z, r3.y +mul.f r0.x, r0.x, c9.w +nop +mul.f r1.w, r1.w, r1.w +mad.f32 r2.w, r0.w, r2.x, r4.z +add.f r3.y, r2.y, r0.x +add.f r3.z, r3.x, r0.x +mul.f r1.w, r1.w, c9.z +add.f r0.x, r1.y, r0.x +sel.b32 r2.x, r2.w, r0.z, r2.x +nop +mov.f32f32 r2.w, r1.w +add.f r0.x, r0.x, r1.w +sel.b32 r1.w, r5.w, r1.x, r2.x +nop +add.f r2.x, r3.y, r2.w +add.f r2.w, r3.z, r2.w +sel.b32 r0.x, r0.x, r1.z, r1.y +nop +sel.b32 r1.y, r2.x, r1.z, r2.y +sel.b32 r1.z, r2.w, r1.z, r3.x +mad.f32 r2.x, r0.w, r0.x, r2.z +nop +mad.f32 r2.y, r0.w, r1.y, r4.w +mad.f32 r0.w, r0.w, r1.z, r4.y +sel.b32 r0.x, r2.x, r0.z, r0.x +nop +sel.b32 r1.y, r2.y, r0.z, r1.y +sel.b32 r0.z, r0.w, r0.z, r1.z (rpt1)nop -mul.f r2.x, r1.y, r1.y -mov.f32f32 r1.w, r1.w -mul.f r1.y, r1.y, c9.w -nop -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, r0.z, r1.w, r3.y -mov.f32f32 r3.x, r1.y -mov.f32f32 r3.y, r1.y -mul.f r2.x, r2.x, c9.z -sel.b32 r1.z, r1.w, r1.x, r1.z -add.f r1.w, r5.z, r3.x -add.f r3.x, r7.x, r3.y -mov.f32f32 r2.x, r2.x -sel.b32 r1.z, r6.y, r0.w, r1.z -mov.f32f32 r1.y, r1.y -nop -mov.f32f32 r3.y, r2.x -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, r2.x -add.f r1.y, r2.y, r1.y -add.f r2.y, r1.w, r3.y -add.f r3.x, r3.x, r3.z -mov.f32f32 r1.w, r1.z -nop -sel.b32 r1.z, r2.y, r0.x, r2.w -sel.b32 r2.y, r3.x, r0.x, r2.z -add.f r1.y, r1.y, r2.x -nop -mov.f32f32 r2.x, r1.z -mov.f32f32 r2.z, r2.y -sel.b32 r0.x, r1.y, r0.x, r0.y -nop -mov.f32f32 r0.y, r2.x -mov.f32f32 r1.y, r2.z -mov.f32f32 r2.x, r0.x -nop -mad.f32 r0.y, r0.z, r0.y, r4.y -mad.f32 r1.y, r0.z, r1.y, r4.x -mov.f32f32 r2.x, r2.x -nop -sel.b32 r0.y, r0.y, r1.x, r1.z -sel.b32 r1.y, r1.y, r1.x, r2.y -mad.f32 r0.z, r0.z, r2.x, r4.w +sel.b32 r1.z, r6.x, r1.x, r1.y +sel.b32 r1.y, r6.y, r1.x, r0.z +sel.b32 r1.x, r0.y, r1.x, r0.x +end nop -sel.b32 r0.y, r6.z, r0.w, r0.y -sel.b32 r1.y, r6.w, r0.w, r1.y -sel.b32 r0.x, r0.z, r1.x, r0.x nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.y -sel.b32 r0.x, r6.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x -end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) -; FRAG: 297 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) +; FRAG: 193 instructions, 0 half, 9 full |