summaryrefslogtreecommitdiff
path: root/reference/idiv-vert.asm
blob: bb5adce81fb16783edbfe023059a6cba46f28a82 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
; options:
; VERT: new compiler
@in(r0.x)	in0
@in(r0.y)	in1
@in(r0.z)	in2
@in(r0.w)	in3
@out(r0.x)	out0
@out(r0.y)	out1
@out(r0.z)	out2
@out(r0.w)	out3
@out(r1.x)	out4
@out(r1.y)	out5
@out(r1.z)	out6
@out(r1.w)	out7
@const(c3.x)	0x00000000, 0x3f800000, 0x00000000, 0x00000000
@const(c4.x)	0xfffffffe, 0x0000001f, 0x00000000, 0x00000000
(sy)(ss)cov.s32f32 r1.x, c1.x
cov.s32f32 r1.y, c1.y
mov.f32f32 r1.z, c0.y
mov.f32f32 r1.w, c0.x
absneg.f r1.x, (abs)r1.x
absneg.f r1.y, (abs)r1.y
xor.b r1.z, r1.z, c1.y
xor.b r1.w, r1.w, c1.x
cov.s32f32 r2.x, c0.y
cov.s32f32 r2.y, c0.x
mov.f32f32 r2.z, c3.y
rcp r1.x, r1.x
(ss)add.u r1.x, r1.x, c4.x
rcp r1.y, r1.y
(ss)add.u r1.y, r1.y, c4.x
absneg.f r2.y, (abs)r2.y
absneg.f r2.x, (abs)r2.x
shr.b r1.z, r1.z, c4.y
shr.b r1.w, r1.w, c4.y
mul.f r2.y, r2.y, r1.x
mul.f r2.x, r2.x, r1.y
mov.f32f32 r2.w, c3.y
mov.f32f32 r3.x, c3.x
cov.f32s32 r2.y, r2.y
absneg.s r3.y, (abs)c1.x
cov.f32s32 r2.x, r2.x
absneg.s r3.z, (abs)c1.y
mov.f32f32 r3.w, c3.x
mull.u r4.x, r2.y, r3.y
mov.f32f32 r4.y, c3.x
madsh.m16 r4.x, r2.y, r3.y, r4.x
mull.u r4.z, r2.x, r3.z
madsh.m16 r4.x, r3.y, r2.y, r4.x
absneg.s r4.w, (abs)c0.x
madsh.m16 r4.z, r2.x, r3.z, r4.z
mov.f32f32 r5.x, c3.y
mov.f32f32 r5.y, c3.y
sub.u r4.x, r4.w, r4.x
madsh.m16 r4.z, r3.z, r2.x, r4.z
absneg.s r5.z, (abs)c0.y
mov.f32f32 r5.w, c3.x
cov.u32f32 r4.x, r4.x
(rpt2)nop
mul.f r1.x, r4.x, r1.x
sub.u r4.x, r5.z, r4.z
(rpt1)nop
cov.f32u32 r1.x, r1.x
cov.u32f32 r4.x, r4.x
(rpt1)nop
add.u r1.x, r2.y, r1.x
mul.f r1.y, r4.x, r1.y
(rpt1)nop
mull.u r2.y, r1.x, r3.y
cov.f32u32 r1.y, r1.y
madsh.m16 r2.y, r1.x, r3.y, r2.y
nop
madsh.m16 r2.y, r3.y, r1.x, r2.y
add.u r1.y, r2.x, r1.y
(rpt1)nop
sub.u r2.x, r4.w, r2.y
mull.u r2.y, r1.y, r3.z
(rpt1)nop
cmps.u.ge r2.x, r2.x, r3.y
madsh.m16 r2.y, r1.y, r3.z, r2.y
(rpt1)nop
add.u r1.x, r1.x, r2.x
madsh.m16 r2.x, r3.z, r1.y, r2.y
(rpt1)nop
absneg.s r2.y, (neg)r1.x
sub.u r2.x, r5.z, r2.x
(rpt1)nop
sel.b32 r1.x, r2.y, r1.w, r1.x
cmps.u.ge r1.w, r2.x, r3.z
(rpt1)nop
cmps.u.eq r1.x, r1.x, c2.x
add.u r1.y, r1.y, r1.w
(rpt1)nop
absneg.s r1.x, (neg)r1.x
absneg.s r1.w, (neg)r1.y
(rpt2)nop
sel.b32 r1.y, r1.w, r1.z, r1.y
(rpt2)nop
cmps.u.eq r1.y, r1.y, c2.y
(rpt2)nop
absneg.s r1.y, (neg)r1.y
(rpt2)nop
and.b r1.x, r1.x, r1.y
(rpt2)nop
cmps.u.ne r1.x, r1.x, c3.x
(rpt2)nop
sel.b32 r1.w, r2.w, r1.x, r2.z
sel.b32 r1.z, r3.w, r1.x, r3.x
sel.b32 r1.y, r5.x, r1.x, r4.y
sel.b32 r1.x, r5.w, r1.x, r5.y
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0)
; VERT: 119 instructions, 0 half, 6 full