diff options
Diffstat (limited to 'reference/xon7.asm')
-rw-r--r-- | reference/xon7.asm | 327 |
1 files changed, 131 insertions, 196 deletions
diff --git a/reference/xon7.asm b/reference/xon7.asm index 3846190..faa1d33 100644 --- a/reference/xon7.asm +++ b/reference/xon7.asm @@ -6,215 +6,150 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c10.x) 0x3f000000, 0xbf000000, 0x40000000, 0xbf800000 +@const(c11.x) 0x00000000, 0x3f800000, 0x3e800000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 0, r0.x -bary.f r1.x, 4, r0.x -bary.f r1.y, 3, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.w -mov.f32f32 r2.y, r0.w -mul.f r2.z, r1.x, r1.x -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r1.y -mov.f32f32 r3.y, r1.w -bary.f r3.w, 1, r0.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r3.x, r1.z -bary.f r1.z, 5, r0.x -mov.f32f32 r1.w, r3.w -mov.f32f32 r2.y, r3.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mad.f32 r2.z, r1.z, r1.z, r2.z -sam (f32)(xyz)r4.z, r2.w, s#4, t#4 -(sy)(ss)mad.f32 r2.w, c10.z, r4.z, c10.w -mov.f32f32 r3.z, r1.w -mov.f32f32 r4.y, r2.y -mov.f32f32 r1.w, r2.z -mov.f32f32 r2.y, r2.w -bary.f r2.z, 12, r0.x -bary.f r2.w, 16, r0.x -bary.f r3.x, 8, r0.x -sam (f32)(xyz)r5.y, r3.y, s#0, t#0 -(sy)(ss)add.f r3.y, r5.y, c10.y -mul.f r2.z, r2.y, r2.z -mad.f32 r3.z, c10.z, r4.w, c10.w -mul.f r2.w, r2.y, r2.w -mov.f32f32 r3.y, r3.y -mul.f r2.y, r2.y, r3.x -mov.f32f32 r3.x, r3.z -bary.f r3.z, 13, r0.x -mul.f r4.z, r3.y, r3.y -add.f r4.w, r5.z, c10.y -bary.f r5.y, 17, r0.x -mad.f32 r2.z, r3.x, r3.z, r2.z -bary.f r3.z, 9, r0.x -mov.f32f32 r4.w, r4.w -mad.f32 r2.w, r3.x, r5.y, r2.w -mov.f32f32 r2.z, r2.z -mad.f32 r5.x, c10.z, r5.x, c10.w -mad.f32 r4.z, r4.w, r4.w, r4.z -mov.f32f32 r2.w, r2.w -mad.f32 r2.y, r3.x, r3.z, r2.y -mov.f32f32 r3.x, r5.x -bary.f r3.z, 14, r0.x -mov.f32f32 r4.z, r4.z -add.f r5.x, r5.w, c10.y -bary.f r5.y, 18, r0.x -mad.f32 r2.z, r3.x, r3.z, r2.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.z, r5.x -bary.f r5.x, 10, r0.x -mov.f32f32 r2.z, r2.z -mad.f32 r2.w, r3.x, r5.y, r2.w -mad.f32 r4.z, r3.z, r3.z, r4.z -mad.f32 r2.y, r3.x, r5.x, r2.y +bary.f r0.w, 3, r0.x +bary.f r1.x, 0, r0.x +bary.f r1.z, 4, r0.x +mov.f32f32 r2.y, r0.z +mov.f32f32 r2.z, r0.w +mov.f32f32 r2.w, r1.x +bary.f r1.y, 1, r0.x +sam (f32)(xyz)r3.y, r0.z, s#4, t#4 +(sy)(ss)mad.f32 r0.z, c10.z, r3.y, c10.w +mov.f32f32 r4.x, r1.x +bary.f r0.w, 16, r0.x +mul.f r1.w, r1.z, r1.z +mov.f32f32 r3.y, r0.z +bary.f r4.z, 12, r0.x +mov.f32f32 r4.y, r1.y +mov.f32f32 r3.x, r1.y +bary.f r4.w, 8, r0.x +mul.f r4.z, r3.y, r4.z +mad.f32 r3.z, c10.z, r3.z, c10.w +mul.f r0.z, r0.z, r0.w +mul.f r0.w, r3.y, r4.w +bary.f r3.y, 17, r0.x +mov.f32f32 r4.w, r3.z +bary.f r5.x, 13, r0.x +sam (f32)(xyz)r5.y, r4.x, s#0, t#0 +(sy)(ss)add.f r4.x, r5.y, c10.y +sam (f32)(xyzw)r6.x, r2.w, s#1, t#1 +(ss)bary.f r2.w, 9, r0.x +(sy)cmps.f.lt r3.x, r6.w, c10.x +mad.f32 r4.y, r4.w, r5.x, r4.z +mad.f32 r3.w, c10.z, r3.w, c10.w +mov.f32f32 r4.z, r4.x +cov.u32f32 r3.x, r3.x +mad.f32 r0.w, r4.w, r2.w, r0.w +mov.f32f32 r2.w, r3.w +bary.f r4.w, 14, r0.x +mul.f r4.x, r4.x, r4.z +add.f r5.x, r5.z, c10.y +mov.f32f32 r5.y, (0.000000) +mad.f32 r4.y, r2.w, r4.w, r4.y +bary.f r4.w, 10, r0.x +mad.f32 r0.z, r3.z, r3.y, r0.z +bary.f r3.y, 5, r0.x +mov.f32f32 r3.z, r4.y +mad.f32 r0.w, r2.w, r4.w, r0.w +mov.f32f32 r2.w, r5.x +cmps.f.ne p0.x, r3.x, r5.y +bary.f r3.x, 18, r0.x +mul.f r4.w, r0.w, r0.w +mad.f32 r4.x, r5.x, r2.w, r4.x +mad.f32 r4.y, r4.y, r3.z, r4.w +mad.f32 r0.z, r3.w, r3.x, r0.z +add.f r3.x, r5.w, c10.y bary.f (ei)r0.x, 6, r0.x -sam (f32)(xyzw)r5.x, r4.x, s#1, t#1 -mov.f32f32 r0.y, r2.w -(sy)cmps.f.lt r2.w, r5.w, c10.x -mul.f r3.x, r2.y, r2.y -mad.f32 r1.w, r0.x, r0.x, r1.w -mad.f32 r3.x, r2.z, r2.z, r3.x -(ss)rsq r4.x, r4.z -(ss)mov.f32f32 r4.x, r4.x +kill p0.x +mov.f32f32 r0.y, r0.z +mov.f32f32 r3.w, r3.x +mad.f32 r4.w, r3.y, r3.y, r1.w +mul.f r1.w, r6.w, c9.x mov.f32f32 r0.y, r0.y -cov.u32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mul.f r3.y, r3.y, r4.x -mad.f32 r3.x, r0.y, r0.y, r3.x -mul.f r4.y, r4.w, r4.x -mul.f r3.z, r3.z, r4.x -(rpt3)nop -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, r4.y +mad.f32 r3.w, r3.w, r3.w, r4.x +mad.f32 r4.x, r0.x, r0.x, r4.w +mul.f r4.w, r6.z, c5.z +mad.f32 r0.y, r0.y, r0.y, r4.y +mul.f r4.y, r6.z, c6.z +mul.f r5.x, r6.y, c6.y +mul.f r5.y, r6.x, c6.x +rsq r3.w, r3.w +(ss)mov.f32f32 r5.z, r3.w +mul.f r3.x, r3.x, r3.w +(ss)rsq r3.w, r4.x +(ss)mov.f32f32 r4.x, r3.w +rsq r0.y, r0.y +(ss)mov.f32f32 r5.w, r0.y +(ss)mul.f r0.y, r0.z, r0.y +mul.f r0.z, r4.z, r5.z +mul.f r2.w, r2.w, r5.z +mul.f r0.w, r0.w, r5.w +mov.f32f32 r4.z, r0.y +mul.f r3.z, r3.z, r5.w +mov.f32f32 r5.z, r0.z +mul.f r0.z, r0.z, r0.w +mov.f32f32 r0.w, r0.w +mad.f32 r0.z, r2.w, r3.z, r0.z +max.f r5.w, c11.z, r4.z +mad.f32 r0.z, r3.x, r4.z, r0.z +mul.f r6.z, r5.z, r0.w mov.f32f32 r3.z, r3.z -mul.f r2.y, r2.y, r3.x -mul.f r0.y, r0.y, r3.x -mul.f r2.z, r2.z, r3.x -mov.f32f32 r3.x, (0.000000) -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -cmps.f.ne p0.x, r2.w, r3.x -mul.f r2.w, r3.y, r2.y -mul.f r3.x, r3.y, r2.y -mad.f32 r2.w, r4.x, r2.z, r2.w -mad.f32 r3.x, r4.x, r2.z, r3.x -max.f r4.y, c11.z, r0.y -rsq r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w mov.f32f32 r2.w, r2.w +mul.f r5.z, r0.z, r5.z mov.f32f32 r3.x, r3.x -mad.f32 r2.w, r3.z, r0.y, r2.w -mad.f32 r3.x, r3.z, r0.y, r3.x -mov.f32f32 r4.y, r4.y -kill p0.x -mul.f r3.y, r2.w, r3.y -mul.f r4.x, r2.w, r4.x -mul.f r2.w, r2.w, r3.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r4.x -mov.f32f32 r2.w, r2.w -max.f r3.x, r3.x, c11.x -mul.f r3.y, c10.z, r3.y -mul.f r3.z, c10.z, r3.z +rcp r5.w, r5.w +(ss)mov.f32f32 r6.w, r5.w +mad.f32 r6.z, r2.w, r3.z, r6.z +mul.f r5.z, c10.z, r5.z +mad.f32 r4.z, r3.x, r4.z, r6.z +mul.f r2.w, r0.z, r2.w +mul.f r0.z, r0.z, r3.x +add.f r0.w, r0.w, (neg)r5.z +mul.f r1.z, r1.z, r4.x +max.f r3.x, r4.z, c11.x mul.f r2.w, c10.z, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.w, r2.w -mul.f r4.x, r5.z, c6.z -add.f r2.y, r2.y, (neg)r3.y -add.f r2.z, r2.z, (neg)r3.z -add.f r0.y, r0.y, (neg)r2.w -mul.f r2.w, r4.x, r3.x -mov.f32f32 r2.y, r2.y -mul.f r1.x, r1.x, r1.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.x, r1.x -mul.f r3.y, r5.y, c6.y -mul.f r3.z, r5.x, c6.x -rcp r4.x, r4.y -(ss)mov.f32f32 r4.x, r4.x -mul.f r1.x, r2.y, r1.x -mul.f r1.z, r1.z, r1.w -mul.f r2.y, r3.y, r3.x -mul.f r3.x, r3.z, r3.x -nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r0.z -mad.f32 r0.z, r2.z, r1.z, r1.x -mov.f32f32 r1.x, r1.y -mul.f r0.x, r0.x, r1.w -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r1.x -mov.f32f32 r0.x, r0.x -mul.f r1.x, r5.w, c9.x -mul.f r1.y, r5.z, c5.z -mul.f r1.z, r5.y, c5.y -mad.f32 r0.x, r0.y, r0.x, r0.z -mov.f32f32 r0.y, r1.x -nop -sam (f32)(xyz)r5.y, r3.y, s#3, t#3 -(sy)mul.f r0.z, r5.w, r4.x -mov.f32f32 r0.x, r0.x -mul.f r1.x, r5.z, r4.x -mul.f r1.w, r5.y, r4.x -nop +mul.f r0.z, c10.z, r0.z +mul.f r0.w, r0.w, r1.z +mov.f32f32 r1.z, r3.x +add.f r2.w, r3.z, (neg)r2.w +mul.f r3.y, r3.y, r4.x +mul.f r3.x, r5.y, r3.x +add.f r0.y, r0.y, (neg)r0.z +sam (f32)(xyz)r7.x, r2.y, s#3, t#3 +mul.f r0.z, r4.y, r1.z +mad.f32 r0.w, r2.w, r3.y, r0.w +mul.f r0.x, r0.x, r3.w +mul.f r1.z, r5.x, r1.z +(sy)(ss)mul.f r2.y, r7.z, r6.w +mul.f r2.z, r7.y, r6.w +mad.f32 r0.x, r0.y, r0.x, r0.w +mul.f r0.y, r6.y, c5.y +mul.f r0.w, r6.x, c5.x +mul.f r2.w, r7.x, r5.w max.f r0.x, (neg)r0.x, c11.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.z, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r1.y -mov.f32f32 r1.y, r1.z -mul.f r1.z, r5.x, c5.x -(ss)mov.f32f32 r3.y, r0.w -mov.f32f32 r0.w, r3.w -log2 r0.x, r0.x -(rpt2)nop -mov.f32f32 r3.z, r0.w -mov.f32f32 r0.w, r1.z -(rpt4)nop -sam (f32)(xyzw)r3.y, r3.y, s#2, t#2 -(sy)mad.f32 r1.z, c8.x, r4.x, c11.y -mul.f r3.w, r3.w, c7.z -(ss)mul.f r3.z, r3.z, c7.y +sam (f32)(xyzw)r3.y, r1.x, s#2, t#2 +(sy)(ss)mul.f r1.x, r3.w, c7.z +mul.f r1.y, r3.z, c7.y +mad.f32 r3.z, c8.x, r4.x, c11.y mul.f r3.y, r3.y, c7.x -mov.f32f32 r1.z, r1.z -(rpt2)nop -(ss)mul.f r0.x, r1.z, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(rpt1)nop +log2 r0.x, r0.x +(ss)mul.f r0.x, r3.z, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mad.f32 r1.z, r3.w, r0.x, r2.w -mad.f32 r2.y, r3.z, r0.x, r2.y +(ss)mad.f32 r0.z, r1.x, r0.x, r0.z +mad.f32 r1.x, r1.y, r0.x, r1.z (ss)mad.f32 r0.x, r3.y, r0.x, r3.x nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x -nop -mad.f32 r0.y, r1.z, r0.z, r0.y -mad.f32 r0.z, r2.y, r1.x, r1.y -mad.f32 r0.x, r0.x, r2.z, r0.w -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +mad.f32 r1.z, r0.z, r2.y, r4.w +mad.f32 r1.y, r1.x, r2.z, r0.y +mad.f32 r1.x, r0.x, r2.w, r0.w end ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) -; FRAG: 226 instructions, 0 half, 6 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) +; FRAG: 146 instructions, 0 half, 8 full |