summaryrefslogtreecommitdiff
path: root/r600_reg_auto_r6xx.h
blob: d757030e155458c34a097a2f629d05eb2bd46c2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
/*
 * RadeonHD R6xx, R7xx Register documentation
 *
 * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
 * Copyright (C) 2008-2009  Matthias Hopf
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef _AUTOREGS
#define _AUTOREGS

enum {

    VGT_VTX_VECT_EJECT_REG                                = 0x000088b0,	/* This register defines the number of primitives that are allowed to pass during the assembly of a single vertex vector. After this number of primitives have passed, the vertex vector is submitted to the shaders for processing even if it is not full. */
	PRIM_COUNT_mask                                   = 0x3ff << 0,	/* This is the count of primitives allowed to pass during the assembly of a single vertex vector. 2. Primitive Assembly Registers */
	PRIM_COUNT_shift                                  = 0,
    VGT_LAST_COPY_STATE                                   = 0x000088c0,	/* This register retains the data from the last GFX_COPY_STATE command. */
	SRC_STATE_ID_mask                                 = 0x07 << 0,	/* Source context from last GFX_COPY_STATE command. */
	SRC_STATE_ID_shift                                = 0,
	DST_STATE_ID_mask                                 = 0x07 << 16,	/* Destination context from last GFX_COPY_STATE command. */
	DST_STATE_ID_shift                                = 16,
    VGT_CACHE_INVALIDATION                                = 0x000088c4,	/* VGT cache invalidation */
	CACHE_INVALIDATION_mask                           = 0x03 << 0,	/* Indicates whether VC or TC is used for cache invalidation */
	CACHE_INVALIDATION_shift                          = 0,
	    VC_ONLY                                       = 0x00,	/* VC_ONLY: VC_ONLY */
	    TC_ONLY                                       = 0x01,	/* TC_ONLY: TC_ONLY */
	    VC_AND_TC                                     = 0x02,	/* VC_AND_TC: VC_AND_TC */
	VS_NO_EXTRA_BUFFER_bit                            = 1 << 5,
    VGT_GS_PER_ES                                         = 0x000088c8,	/* Maximum GS prims per ES thread */
	/* Maximum number of GS prims per ES thread */
    VGT_ES_PER_GS                                         = 0x000088cc,	/* Maximum ES vertices per GS thread */
	/* Maximum number of ES vertices per GS thread */
    VGT_GS_VERTEX_REUSE                                   = 0x000088d4,	/* reuseability for GS path, it is nothing to do with number of good simd */
	VERT_REUSE_mask                                   = 0x1f << 0,	/* reuse number of GS block. Valid values are 0, 4-16. */
	VERT_REUSE_shift                                  = 0,
    VGT_MC_LAT_CNTL                                       = 0x000088d8,	/* Time Stamp Counter Resolution Select */
	MC_TIME_STAMP_RES_mask                            = 0x03 << 0,	/* Select the counter resolution for tracking memory controller latency */
	MC_TIME_STAMP_RES_shift                           = 0,
	    X_0_992_MAX_LATENCY                           = 0x00,	/* 0 -> 992 max latency, step of 32 */
	    X_0_496_MAX_LATENCY                           = 0x01,	/* 0 -> 496 max latency, step of 16 */
	    X_0_248_MAX_LATENCY                           = 0x02,	/* 0 -> 248 max latency, step of 8 */
	    X_0_124_MAX_LATENCY                           = 0x03,	/* 0 -> 124 max latency, step of 4 */
    VGT_GS_PER_VS                                         = 0x000088e8,	/* Maximum GS threads per VS thread */
	GS_PER_VS_mask                                    = 0x0f << 0,	/* Maximum number of GS threads per VS thread */
	GS_PER_VS_shift                                   = 0,
    VGT_CNTL_STATUS                                       = 0x000088f0,	/* Status Bits */
	VGT_OUT_INDX_BUSY_bit                             = 1 << 0,	/* If set, the Output Index block within the VGT is busy */
	VGT_OUT_BUSY_bit                                  = 1 << 1,	/* If set, the Output block within the VGT is busy */
	VGT_PT_BUSY_bit                                   = 1 << 2,	/* If set, the Pass-thru block within the VGT is busy */
	VGT_TE_BUSY_bit                                   = 1 << 3,	/* If set, the Tessellation Engine block within the VGT is busy */
	VGT_VR_BUSY_bit                                   = 1 << 4,	/* If set, the Vertex Reuse Block within the VGT is busy */
	VGT_GRP_BUSY_bit                                  = 1 << 5,	/* If set, the Grouper Block within the VGT is busy */
	VGT_DMA_REQ_BUSY_bit                              = 1 << 6,	/* If set, the VGT DMA is busy requesting */
	VGT_DMA_BUSY_bit                                  = 1 << 7,	/* If set, the VGT DMA is busy */
	VGT_GS_BUSY_bit                                   = 1 << 8,	/* If set, VGT GS is actively processing */
	VGT_BUSY_bit                                      = 1 << 9,	/* If set, VGT is Busy */
    VGT_PRIMITIVE_TYPE                                    = 0x00008958,	/* VGT Primitive Type */
	VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask                = 0x3f << 0,	/* Primitive Type */
	VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift               = 0,
	    DI_PT_NONE                                    = 0x00,	/* DI_PT_NONE: DI_PT_NONE None (does not create draw trigger) */
	    DI_PT_POINTLIST                               = 0x01,	/* DI_PT_POINTLIST: DI_PT_POINTLIST Point List */
	    DI_PT_LINELIST                                = 0x02,	/* DI_PT_LINELIST: DI_PT_LINELIST Line List */
	    DI_PT_LINESTRIP                               = 0x03,	/* DI_PT_LINESTRIP: DI_PT_LINESTRIP Line Strip */
	    DI_PT_TRILIST                                 = 0x04,	/* DI_PT_TRILIST: DI_PT_TRILIST Tri List */
	    DI_PT_TRIFAN                                  = 0x05,	/* DI_PT_TRIFAN: DI_PT_TRIFAN Tri Fan */
	    DI_PT_TRISTRIP                                = 0x06,	/* DI_PT_TRISTRIP: DI_PT_TRISTRIP Tri Strip */
	    DI_PT_UNUSED_0                                = 0x07,	/* DI_PT_UNUSED_0: DI_PT_UNUSED_0 Reserved 1 */
	    DI_PT_UNUSED_1                                = 0x08,	/* DI_PT_UNUSED_1: DI_PT_UNUSED_1 Reserved 2 */
	    DI_PT_UNUSED_2                                = 0x09,	/* DI_PT_UNUSED_2: DI_PT_UNUSED_2 Reserved 3 */
	    DI_PT_LINELIST_ADJ                            = 0x0a,	/* DI_PT_LINELIST_ADJ: DI_PT_LINELIST_ADJ Adjacent Line List */
	    DI_PT_LINESTRIP_ADJ                           = 0x0b,	/* DI_PT_LINESTRIP_ADJ: DI_PT_LINESTRIP_ADJ Adjacent Line Strip */
	    DI_PT_TRILIST_ADJ                             = 0x0c,	/* DI_PT_TRILIST_ADJ: DI_PT_TRILIST_ADJ Adjacent Tri List */
	    DI_PT_TRISTRIP_ADJ                            = 0x0d,	/* DI_PT_TRISTRIP_ADJ: DI_PT_TRISTRIP_ADJ Adjacent Tri Strip */
	    DI_PT_UNUSED_3                                = 0x0e,	/* DI_PT_UNUSED_3: DI_PT_UNUSED_3 Reserved 3 */
	    DI_PT_UNUSED_4                                = 0x0f,	/* DI_PT_UNUSED_4: DI_PT_UNUSED_4 Reserved 4 */
	    DI_PT_TRI_WITH_WFLAGS                         = 0x10,	/* DI_PT_TRI_WITH_WFLAGS: DI_PT_TRI_WITH_WFLAGS Tri List w/Flags (legacy R128) */
	    DI_PT_RECTLIST                                = 0x11,	/* DI_PT_RECTLIST: DI_PT_RECTLIST Rect List */
	    DI_PT_LINELOOP                                = 0x12,	/* DI_PT_LINELOOP: DI_PT_LINELOOP Line LOOP */
	    DI_PT_QUADLIST                                = 0x13,	/* DI_PT_QUADLIST: DI_PT_QUADLIST Quad List */
	    DI_PT_QUADSTRIP                               = 0x14,	/* DI_PT_QUADSTRIP: DI_PT_QUADSTRIP Quad Strip */
	    DI_PT_POLYGON                                 = 0x15,	/* DI_PT_POLYGON: DI_PT_POLYGON Polygon */
	    DI_PT_2D_COPY_RECT_LIST_V0                    = 0x16,	/* DI_PT_2D_COPY_RECT_LIST_V0: DI_PT_2D_COPY_RECT_LIST_V0 2D Copy Rect List V0 */
	    DI_PT_2D_COPY_RECT_LIST_V1                    = 0x17,	/* DI_PT_2D_COPY_RECT_LIST_V1: DI_PT_2D_COPY_RECT_LIST_V1 2D Copy Rect List V1 */
	    DI_PT_2D_COPY_RECT_LIST_V2                    = 0x18,	/* DI_PT_2D_COPY_RECT_LIST_V2: DI_PT_2D_COPY_RECT_LIST_V2 2D Copy Rect List V2 */
	    DI_PT_2D_COPY_RECT_LIST_V3                    = 0x19,	/* DI_PT_2D_COPY_RECT_LIST_V3: DI_PT_2D_COPY_RECT_LIST_V3 2D Copy Rect List V3 */
	    DI_PT_2D_FILL_RECT_LIST                       = 0x1a,	/* DI_PT_2D_FILL_RECT_LIST: DI_PT_2D_FILL_RECT_LIST 2D Fill Rect List */
	    DI_PT_2D_LINE_STRIP                           = 0x1b,	/* DI_PT_2D_LINE_STRIP: DI_PT_2D_LINE_STRIP 2D Line Strip */
	    DI_PT_2D_TRI_STRIP                            = 0x1c,	/* DI_PT_2D_TRI_STRIP: DI_PT_2D_TRI_STRIP 2D Triangle Strip */
    VGT_INDEX_TYPE                                        = 0x0000895c,	/* VGT Index Type */
	INDEX_TYPE_mask                                   = 0x03 << 0,	/* Index Type (applicable to prim types 0-28 only). If the Source Select field is set to `Auto-increment Index` mode, then this field is ignored and the index type is 32- bits per index */
	INDEX_TYPE_shift                                  = 0,
	    DI_INDEX_SIZE_16_BIT                          = 0x00,	/* DI_INDEX_SIZE_16_BIT: DI_INDEX_SIZE_16_BIT 16 bits per index */
	    DI_INDEX_SIZE_32_BIT                          = 0x01,	/* DI_INDEX_SIZE_32_BIT: DI_INDEX_SIZE_32_BIT 32 bits per index */
    VGT_STRMOUT_BUFFER_FILLED_SIZE_0                      = 0x00008960,	/* Stream-out adjusted size. */
	/* SIZE: DWORD Sum of (SO_BufferOffset + BufDwordWritten) for given buffer. Read Only. To read this register the VGT needs to be flushed to the point BufDwordWritten counts are maintained. */
    VGT_STRMOUT_BUFFER_FILLED_SIZE_1                      = 0x00008964,	/* Stream-out adjusted size. */
	/* SIZE: DWORD Sum of (SO_BufferOffset + BufDwordWritten) for given buffer. Read Only. To read this register the VGT needs to be flushed to the point BufDwordWritten counts are maintained. */
    VGT_STRMOUT_BUFFER_FILLED_SIZE_2                      = 0x00008968,	/* Stream-out adjusted size. */
	/* SIZE: DWORD Sum of (SO_BufferOffset + BufDwordWritten) for given buffer. Read Only. To read this register the VGT needs to be flushed to the point BufDwordWritten counts are maintained. */
    VGT_STRMOUT_BUFFER_FILLED_SIZE_3                      = 0x0000896c,	/* Stream-out adjusted size. */
	/* SIZE: DWORD Sum of (SO_BufferOffset + BufDwordWritten) for given buffer. Read Only. To read this register the VGT needs to be flushed to the point BufDwordWritten counts are maintained. */
    VGT_NUM_INDICES                                       = 0x00008970,	/* VGT Number of Indices */
	/* This field indicates the number of indices to process for this draw initiator. Note this count is not necessarily the count of the primitives. It is also not the index buffer size in memory. */
    VGT_NUM_INSTANCES                                     = 0x00008974,	/* VGT Number of Instances */
	/* VGT Number of Instances */
    PA_CL_CNTL_STATUS                                     = 0x00008a10,	/* Status Bits */
	CL_BUSY_bit                                       = 1 << 31,	/* Busy Status Bit */
    PA_CL_ENHANCE                                         = 0x00008a14,	/* Used for Late Additions of Control Bits */
	CLIP_VTX_REORDER_ENA_bit                          = 1 << 0,	/* Enables vertex-order-independent clipping */
	NUM_CLIP_SEQ_mask                                 = 0x03 << 1,	/* Number of Clip Sequences Active (+1). Should be set to 3 (4 sequences) for best performance */
	NUM_CLIP_SEQ_shift                                = 1,
	CLIPPED_PRIM_SEQ_STALL_bit                        = 1 << 3,	/* Forces a faster clip path if NUM_CLIP_SEQ is set to 0 (which should only be if 3 does not work) */
	VE_NAN_PROC_DISABLE_bit                           = 1 << 4,
    PA_SU_CNTL_STATUS                                     = 0x00008a50,	/* Status Bits */
	SU_BUSY_bit                                       = 1 << 31,	/* Busy Status Bit */
    PA_SC_LINE_STIPPLE_STATE                              = 0x00008b10,	/* Current values for Line Stipple */
	CURRENT_PTR_mask                                  = 0x0f << 0,	/* Indicates current state of pattern pointer (can be set w/ a register write). */
	CURRENT_PTR_shift                                 = 0,
	CURRENT_COUNT_mask                                = 0xff << 8,	/* Current state of the repeat counter (can be set w/a register write). */
	CURRENT_COUNT_shift                               = 8,
    PA_SC_MULTI_CHIP_CNTL                                 = 0x00008b20,	/* Controls the Screen Divisioning for Multi-Chip Configurations */
	LOG2_NUM_CHIPS_mask                               = 0x07 << 0,	/* Log2 of the number of chips in the multi-chip configuration. */
	LOG2_NUM_CHIPS_shift                              = 0,
	MULTI_CHIP_TILE_SIZE_mask                         = 0x03 << 3,	/* Size of the tile per chip within each super-tile. POSSIBLE VALUES: 00 - 16 x 16 pixel tile per chip. 01 - 32 x 32 pixel tile per chip. 02 - 64 x 64 pixel tile per chip. 03 - 128x128 pixel tile per chip. */
	MULTI_CHIP_TILE_SIZE_shift                        = 3,
	    X_16_X_16_PIXEL_TILE_PER_CHIP                 = 0x00,	/* 16 x 16 pixel tile per chip. */
	    X_32_X_32_PIXEL_TILE_PER_CHIP                 = 0x01,	/* 32 x 32 pixel tile per chip. */
	    X_64_X_64_PIXEL_TILE_PER_CHIP                 = 0x02,	/* 64 x 64 pixel tile per chip. */
	    X_128X128_PIXEL_TILE_PER_CHIP                 = 0x03,	/* 128x128 pixel tile per chip. */
	CHIP_TILE_X_LOC_mask                              = 0x07 << 5,	/* X Location of the chip within the super-tile. */
	CHIP_TILE_X_LOC_shift                             = 5,
	CHIP_TILE_Y_LOC_mask                              = 0x07 << 8,	/* Y Location of the chip within the super-tile. */
	CHIP_TILE_Y_LOC_shift                             = 8,
	CHIP_SUPER_TILE_B_bit                             = 1 << 11,	/* Must be 0 for even LOG2_NUM_CHIPS. For odd LOG2_NUM_CHIPS, this field specifies the second super tile. */
    PA_SC_AA_SAMPLE_LOCS_2S                               = 0x00008b40,	/* Multi-Sample Programmable Sample Locations for 2-Sample - Used by SC & CB`s */
	S0_X_mask                                         = 0x0f << 0,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S0_X_shift                                        = 0,
	S0_Y_mask                                         = 0x0f << 4,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S0_Y_shift                                        = 4,
	S1_X_mask                                         = 0x0f << 8,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S1_X_shift                                        = 8,
	S1_Y_mask                                         = 0x0f << 12,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S1_Y_shift                                        = 12,
    PA_SC_AA_SAMPLE_LOCS_4S                               = 0x00008b44,	/* Multi-Sample Programmable Sample Locations for 4-Sample - Used by SC & CB`s */
/* 	S0_X_mask                                         = 0x0f << 0, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S0_X_shift                                        = 0, */
/* 	S0_Y_mask                                         = 0x0f << 4, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S0_Y_shift                                        = 4, */
/* 	S1_X_mask                                         = 0x0f << 8, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S1_X_shift                                        = 8, */
/* 	S1_Y_mask                                         = 0x0f << 12, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S1_Y_shift                                        = 12, */
	S2_X_mask                                         = 0x0f << 16,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S2_X_shift                                        = 16,
	S2_Y_mask                                         = 0x0f << 20,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S2_Y_shift                                        = 20,
	S3_X_mask                                         = 0x0f << 24,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S3_X_shift                                        = 24,
	S3_Y_mask                                         = 0x0f << 28,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S3_Y_shift                                        = 28,
    PA_SC_AA_SAMPLE_LOCS_8S_WD0                           = 0x00008b48,	/* Multi-Sample Programmable Sample Locations for 8-Sample First Word - Used by SC & CB`s */
/* 	S0_X_mask                                         = 0x0f << 0, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S0_X_shift                                        = 0, */
/* 	S0_Y_mask                                         = 0x0f << 4, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S0_Y_shift                                        = 4, */
/* 	S1_X_mask                                         = 0x0f << 8, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S1_X_shift                                        = 8, */
/* 	S1_Y_mask                                         = 0x0f << 12, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S1_Y_shift                                        = 12, */
/* 	S2_X_mask                                         = 0x0f << 16, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S2_X_shift                                        = 16, */
/* 	S2_Y_mask                                         = 0x0f << 20, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S2_Y_shift                                        = 20, */
/* 	S3_X_mask                                         = 0x0f << 24, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S3_X_shift                                        = 24, */
/* 	S3_Y_mask                                         = 0x0f << 28, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S3_Y_shift                                        = 28, */
    PA_SC_AA_SAMPLE_LOCS_8S_WD1                           = 0x00008b4c,	/* Multi-Sample Programmable Sample Locations for 8-Sample Second Word - Used by SC & CB`s */
	S4_X_mask                                         = 0x0f << 0,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S4_X_shift                                        = 0,
	S4_Y_mask                                         = 0x0f << 4,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S4_Y_shift                                        = 4,
	S5_X_mask                                         = 0x0f << 8,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S5_X_shift                                        = 8,
	S5_Y_mask                                         = 0x0f << 12,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S5_Y_shift                                        = 12,
	S6_X_mask                                         = 0x0f << 16,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S6_X_shift                                        = 16,
	S6_Y_mask                                         = 0x0f << 20,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S6_Y_shift                                        = 20,
	S7_X_mask                                         = 0x0f << 24,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S7_X_shift                                        = 24,
	S7_Y_mask                                         = 0x0f << 28,	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
	S7_Y_shift                                        = 28,
    PA_SC_CNTL_STATUS                                     = 0x00008be0,	/* Status Bits */
	MPASS_OVERFLOW_bit                                = 1 << 30,	/* If set, the Multipass Pixel Shader SC 32-bit PV counter overflowed. This bit is reset when register is read */
    PA_SC_ENHANCE                                         = 0x00008bf0,	/* Used for Late Additions of Control Bits */
	FORCE_EOV_MAX_CLK_CNT_mask                        = 0xfff << 0,	/* Cycle count used to determine when to force out a pixel vector prematurely */
	FORCE_EOV_MAX_CLK_CNT_shift                       = 0,
	FORCE_EOV_MAX_TILE_CNT_mask                       = 0xfff << 12,	/* Tile count used to determine when to force out a pixel vector prematurely */
	FORCE_EOV_MAX_TILE_CNT_shift                      = 12,
    SQ_CONFIG                                             = 0x00008c00,	/* (1-state) SQ config options. The graphics pipe must be idle to change these. */
	VC_ENABLE_bit                                     = 1 << 0,	/* Vertex Cache (VC) is present; set to zero to disable VC. When VC is disabled, all vertex fetches go through the TC rather than VC regardless of the instruction bit which selects TC/VC. */
	EXPORT_SRC_C_bit                                  = 1 << 1,
	DX9_CONSTS_bit                                    = 1 << 2,	/* DX9 constant file mode. (0 = dx10 constant cache mode, 1 = dx9 constant file mode). This applies to all shaders. */
	ALU_INST_PREFER_VECTOR_bit                        = 1 << 3,	/* ALU clause instruction assignment. When a group of 4 or less instructions, there may be ambiguity whether to assign the last instruction to the vector pipe (according to the instruction`s dest-chan), or to the scalar pipe (trans). This bit controls that decision: 0 = send the last instruction word to the scalar (trans) pipe if possible, 1 = prefer to send it to the vector pipe. This bit is only used when the decision is ambiguous (not ambiguous if: a vector-only or trans-only opcode, or the last instruction writes to the same dest-chan as another instruction in the group. The shader-compiler must be aware of this bit setting and compile accordingly. Default is: 0 (prefer-scalar). */
	SQ_CONFIG__DX10_CLAMP_bit                         = 1 << 4,	/* R600: DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. NOT USED IN R6XX DERIVATIVES (see sq_pgm_resources_*) */
	ALU_PREFER_ONE_WATERFALL_bit                      = 1 << 5,
	ALU_MAX_ONE_WATERFALL_bit                         = 1 << 6,
	CLAUSE_SEQ_PRIO_mask                              = 0x03 << 8,
	CLAUSE_SEQ_PRIO_shift                             = 8,
	PS_PRIO_mask                                      = 0x03 << 24,
	PS_PRIO_shift                                     = 24,
	VS_PRIO_mask                                      = 0x03 << 26,
	VS_PRIO_shift                                     = 26,
	GS_PRIO_mask                                      = 0x03 << 28,
	GS_PRIO_shift                                     = 28,
	ES_PRIO_mask                                      = 0x03 << 30,
	ES_PRIO_shift                                     = 30,
    SQ_GPR_RESOURCE_MGMT_1                                = 0x00008c04,	/* (1-state) Defines how GPR space is divided among the 4 thread types. All ES, VS, and GS work (and PS work for R600) must be flushed before writing this register. PS work must also be flushed prior to changing NUM_PS_GPRS or NUM_CLAUSE_TEMP_GPRS. */
	NUM_PS_GPRS_mask                                  = 0xff << 0,	/* Number of GPRs (per SIMD) assigned to the PS programs [0..255]. */
	NUM_PS_GPRS_shift                                 = 0,
	NUM_VS_GPRS_mask                                  = 0xff << 16,	/* Number of GPRs (per SIMD) assigned to the VS programs [0..255]. */
	NUM_VS_GPRS_shift                                 = 16,
	NUM_CLAUSE_TEMP_GPRS_mask                         = 0x0f << 28,	/* Number of GPRs reserved for clause temporaries [0-7]. This is the number of GPRs available to a single thread, so the hardware will reserve twice this many physical registers (for even & odd clauses). */
	NUM_CLAUSE_TEMP_GPRS_shift                        = 28,
    SQ_GPR_RESOURCE_MGMT_2                                = 0x00008c08,	/* (1-state) Defines how GPR space is divided among the 4 thread types. All ES, VS, and GS work (and PS work for R600) must be flushed before writing this register. */
	NUM_GS_GPRS_mask                                  = 0xff << 0,	/* Number of GPRs (per SIMD) assigned to the GS programs [0..255]. */
	NUM_GS_GPRS_shift                                 = 0,
	NUM_ES_GPRS_mask                                  = 0xff << 16,	/* Number of GPRs (per SIMD) assigned to the ES programs [0..255]. */
	NUM_ES_GPRS_shift                                 = 16,
    SQ_THREAD_RESOURCE_MGMT                               = 0x00008c0c,	/* (1-state) Defines how thread space is divided among the thread types. In hardware, PS threads are [0, NUM_PS_THREADS-1], then VS, then GS and ES in the higest #s. All ES, VS, and GS work (and PS work for R600) must be flushed before writing this register. PS work must also be flushed prior to changing NUM_PS_THREADS. */
	NUM_PS_THREADS_mask                               = 0xff << 0,	/* Number of threads assigned to PS programs [1..127]. */
	NUM_PS_THREADS_shift                              = 0,
	NUM_VS_THREADS_mask                               = 0xff << 8,	/* Number of threads assigned to VS programs [1..127]. */
	NUM_VS_THREADS_shift                              = 8,
	NUM_GS_THREADS_mask                               = 0xff << 16,	/* Number of threads assigned to GS programs [1..127]. */
	NUM_GS_THREADS_shift                              = 16,
	NUM_ES_THREADS_mask                               = 0xff << 24,	/* Number of threads assigned to ES programs [1..127]. */
	NUM_ES_THREADS_shift                              = 24,
    SQ_STACK_RESOURCE_MGMT_1                              = 0x00008c10,	/* (1-state) Defines how thread stack space is divided among the thread types. All ES, VS, and GS work (and PS work for R600) must be flushed before writing this register. PS work must also be flushed prior to changing NUM_PS_STACK_ENTRIES. */
	NUM_PS_STACK_ENTRIES_mask                         = 0xfff << 0,	/* Number of stack entries allocated to PS programs [0..4095]. */
	NUM_PS_STACK_ENTRIES_shift                        = 0,
	NUM_VS_STACK_ENTRIES_mask                         = 0xfff << 16,	/* Number of stack entries allocated to VS programs [0..4095]. */
	NUM_VS_STACK_ENTRIES_shift                        = 16,
    SQ_STACK_RESOURCE_MGMT_2                              = 0x00008c14,	/* (1-state) Defines how thread stack space is divided among the thread types. All ES, VS, and GS work (and PS work for R600) must be flushed before writing this register. */
	NUM_GS_STACK_ENTRIES_mask                         = 0xfff << 0,	/* Number of stack entries allocated to GS programs [0..4095]. */
	NUM_GS_STACK_ENTRIES_shift                        = 0,
	NUM_ES_STACK_ENTRIES_mask                         = 0xfff << 16,	/* Number of stack entries allocated to ES programs [0..4095]. */
	NUM_ES_STACK_ENTRIES_shift                        = 16,
    SQ_ESGS_RING_BASE                                     = 0x00008c40,	/* (1-state) Memory base address of the ES->GS ring buffer (256-byte aligned) */
	/* MEM_BASE: Format is [39:8] */
    SQ_ESGS_RING_SIZE                                     = 0x00008c44,	/* (1-state) Memory region size address of the ES->GS ring buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
	/* MEM_SIZE: Format is [39:8] */
    SQ_GSVS_RING_BASE                                     = 0x00008c48,	/* (1-state) Memory base address of the GS->ES ring buffer (256-byte aligned) */
	/* MEM_BASE: Format is [39:8] */
    SQ_GSVS_RING_SIZE                                     = 0x00008c4c,	/* (1-state) Memory region size address of the GS->ES ring buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
	/* MEM_SIZE: Format is [39:8] */
    SQ_ESTMP_RING_BASE                                    = 0x00008c50,	/* (1-state) Memory base address of the ES Temp buffer (256-byte aligned) */
	/* MEM_BASE: Format is [39:8] */
    SQ_ESTMP_RING_SIZE                                    = 0x00008c54,	/* (1-state) Memory region size address of the ES Temp buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
	/* MEM_SIZE: Format is [39:8] */
    SQ_GSTMP_RING_BASE                                    = 0x00008c58,	/* (1-state) Memory base address of the GS Temp buffer (256-byte aligned) */
	/* MEM_BASE: Format is [39:8] */
    SQ_GSTMP_RING_SIZE                                    = 0x00008c5c,	/* (1-state) Memory region size address of the GS Temp buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
	/* MEM_SIZE: Format is [39:8] */
    SQ_VSTMP_RING_BASE                                    = 0x00008c60,	/* (1-state) Memory base address of the VS Temp buffer (256-byte aligned) */
	/* MEM_BASE: Format is [39:8] */
    SQ_VSTMP_RING_SIZE                                    = 0x00008c64,	/* (1-state) Memory region size address of the VS Temp buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
	/* MEM_SIZE: Format is [39:8] */
    SQ_PSTMP_RING_BASE                                    = 0x00008c68,	/* (1-state) Memory base address of the PS Temp buffer (256-byte aligned) */
	/* MEM_BASE: Format is [39:8] */
    SQ_PSTMP_RING_SIZE                                    = 0x00008c6c,	/* (1-state) Memory region size address of the PS Temp buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
	/* MEM_SIZE: Format is [39:8] */
    SQ_FBUF_RING_BASE                                     = 0x00008c70,	/* (1-state) Memory base address of the FBUFFER (PS only) (256-byte aligned) */
	/* MEM_BASE: Format is [39:8] */
    SQ_FBUF_RING_SIZE                                     = 0x00008c74,	/* (1-state) Memory region size address of the FBUFFER. True size, not size -1. Setting to zero disables. */
	/* MEM_SIZE: Format is [39:8] */
    SQ_REDUC_RING_BASE                                    = 0x00008c78,	/* (1-state) Memory base address of the Reduction Buffer */
	/* MEM_BASE: Format is [39:8] */
    SQ_REDUC_RING_SIZE                                    = 0x00008c7c,	/* (1-state) Memory region size address of the Reduction Buffer. True size, not size -1. Setting to zero disables. */
	/* MEM_SIZE: Format is [39:8] */
    SQ_ALU_WORD1_OP3                                      = 0x00008dfc,	/* ALU instruction word 1. This subencoding is used for OP3 instructions (instructions taking 3 operands). */
	SRC2_SEL_mask                                     = 0x1ff << 0,	/* Source for operands src2. Values [0,127] correspond to GPR[0..127]. Values [128,159] correspond to kcache constants in bank 0. Values [160,191] correspond to kcache constants in bank 1. Values [256,511] correspond to cfile constants c[0..255]. Other special values are shown in the list below. POSSIBLE VALUES: 248 - SQ_ALU_SRC_0: special constant 0.0. 249 - SQ_ALU_SRC_1: special constant 1.0 float. 250 - SQ_ALU_SRC_1_INT: special constant 1 integer. 251 - SQ_ALU_SRC_M_1_INT: special constant -1 integer. 252 - SQ_ALU_SRC_0_5: special constant 0.5 float. 253 - SQ_ALU_SRC_LITERAL: literal constant. 254 - SQ_ALU_SRC_PV: previous vector result. 255 - SQ_ALU_SRC_PS: previous scalar result. */
	SRC2_SEL_shift                                    = 0,
	    SQ_ALU_SRC_0                                  = 0xf8,	/* SQ_ALU_SRC_0: special constant 0.0. */
	    SQ_ALU_SRC_1                                  = 0xf9,	/* SQ_ALU_SRC_1: special constant 1.0 float. */
	    SQ_ALU_SRC_1_INT                              = 0xfa,	/* SQ_ALU_SRC_1_INT: special constant 1 integer. */
	    SQ_ALU_SRC_M_1_INT                            = 0xfb,	/* SQ_ALU_SRC_M_1_INT: special constant -1 integer. */
	    SQ_ALU_SRC_0_5                                = 0xfc,	/* SQ_ALU_SRC_0_5: special constant 0.5 float. */
	    SQ_ALU_SRC_LITERAL                            = 0xfd,	/* SQ_ALU_SRC_LITERAL: literal constant. */
	    SQ_ALU_SRC_PV                                 = 0xfe,	/* SQ_ALU_SRC_PV: previous vector result. */
	    SQ_ALU_SRC_PS                                 = 0xff,	/* SQ_ALU_SRC_PS: previous scalar result. */
	SRC2_REL_bit                                      = 1 << 9,	/* If set, this operand uses relative addressing based on the INDEX_MODE. */
	SRC2_CHAN_mask                                    = 0x03 << 10,	/* Specify which channel of the source to use for this operand. POSSIBLE VALUES: 00 - SQ_CHAN_X: Use X component. 01 - SQ_CHAN_Y: Use Y component. 02 - SQ_CHAN_Z: Use Z component. 03 - SQ_CHAN_W: Use W component. */
	SRC2_CHAN_shift                                   = 10,
	    SQ_CHAN_X                                     = 0x00,	/* SQ_CHAN_X: Use X component. */
	    SQ_CHAN_Y                                     = 0x01,	/* SQ_CHAN_Y: Use Y component. */
	    SQ_CHAN_Z                                     = 0x02,	/* SQ_CHAN_Z: Use Z component. */
	    SQ_CHAN_W                                     = 0x03,	/* SQ_CHAN_W: Use W component. */
	SRC2_NEG_bit                                      = 1 << 12,	/* If set, negate the input for this operand. Should only be set for floating point inputs. */
	SQ_ALU_WORD1_OP3__ALU_INST_mask                   = 0x1f << 13,	/* Instruction opcode. Caution: opcode values do not begin at zero. POSSIBLE VALUES: 12 - SQ_OP3_INST_MUL_LIT 13 - SQ_OP3_INST_MUL_LIT_M2 14 - SQ_OP3_INST_MUL_LIT_M4 15 - SQ_OP3_INST_MUL_LIT_D2 16 - SQ_OP3_INST_MULADD 17 - SQ_OP3_INST_MULADD_M2 18 - SQ_OP3_INST_MULADD_M4 19 - SQ_OP3_INST_MULADD_D2 20 - SQ_OP3_INST_MULADD_IEEE 21 - SQ_OP3_INST_MULADD_IEEE_M2 22 - SQ_OP3_INST_MULADD_IEEE_M4 23 - SQ_OP3_INST_MULADD_IEEE_D2 24 - SQ_OP3_INST_CNDE 25 - SQ_OP3_INST_CNDGT 26 - SQ_OP3_INST_CNDGE 27 - Reserved 28 - SQ_OP3_INST_CNDE_INT 29 - SQ_OP3_INST_CNDGT_INT 30 - SQ_OP3_INST_CNDGE_INT 31 - Reserved */
	SQ_ALU_WORD1_OP3__ALU_INST_shift                  = 13,
	    SQ_OP3_INST_MUL_LIT                           = 0x0c,	/* SQ_OP3_INST_MUL_LIT */
	    SQ_OP3_INST_MUL_LIT_M2                        = 0x0d,	/* SQ_OP3_INST_MUL_LIT_M2 */
	    SQ_OP3_INST_MUL_LIT_M4                        = 0x0e,	/* SQ_OP3_INST_MUL_LIT_M4 */
	    SQ_OP3_INST_MUL_LIT_D2                        = 0x0f,	/* SQ_OP3_INST_MUL_LIT_D2 */
	    SQ_OP3_INST_MULADD                            = 0x10,	/* SQ_OP3_INST_MULADD */
	    SQ_OP3_INST_MULADD_M2                         = 0x11,	/* SQ_OP3_INST_MULADD_M2 */
	    SQ_OP3_INST_MULADD_M4                         = 0x12,	/* SQ_OP3_INST_MULADD_M4 */
	    SQ_OP3_INST_MULADD_D2                         = 0x13,	/* SQ_OP3_INST_MULADD_D2 */
	    SQ_OP3_INST_MULADD_IEEE                       = 0x14,	/* SQ_OP3_INST_MULADD_IEEE */
	    SQ_OP3_INST_MULADD_IEEE_M2                    = 0x15,	/* SQ_OP3_INST_MULADD_IEEE_M2 */
	    SQ_OP3_INST_MULADD_IEEE_M4                    = 0x16,	/* SQ_OP3_INST_MULADD_IEEE_M4 */
	    SQ_OP3_INST_MULADD_IEEE_D2                    = 0x17,	/* SQ_OP3_INST_MULADD_IEEE_D2 */
	    SQ_OP3_INST_CNDE                              = 0x18,	/* SQ_OP3_INST_CNDE */
	    SQ_OP3_INST_CNDGT                             = 0x19,	/* SQ_OP3_INST_CNDGT */
	    SQ_OP3_INST_CNDGE                             = 0x1a,	/* SQ_OP3_INST_CNDGE */
	    SQ_OP3_INST_CNDE_INT                          = 0x1c,	/* SQ_OP3_INST_CNDE_INT */
	    SQ_OP3_INST_CNDGT_INT                         = 0x1d,	/* SQ_OP3_INST_CNDGT_INT */
	    SQ_OP3_INST_CNDGE_INT                         = 0x1e,	/* SQ_OP3_INST_CNDGE_INT */
    SQ_TEX_WORD2                                          = 0x00008dfc,	/* Texture fetch clause instruction word 2. */
	OFFSET_X_mask                                     = 0x1f << 0,	/* Value added to X component of texel address before sampling (in texel space). S3.1 fixpoint value ranging from [-8, 8). */
	OFFSET_X_shift                                    = 0,
	OFFSET_Y_mask                                     = 0x1f << 5,	/* Value added to Y component of texel address before sampling (in texel space). S3.1 fixpoint value ranging from [-8, 8). */
	OFFSET_Y_shift                                    = 5,
	OFFSET_Z_mask                                     = 0x1f << 10,	/* Value added to Z component of texel address before sampling (in texel space). S3.1 fixpoint value ranging from [-8, 8). */
	OFFSET_Z_shift                                    = 10,
	SAMPLER_ID_mask                                   = 0x1f << 15,	/* Sampler ID to use (specifies filter options, etc.). Value in the range [0, 17]. */
	SAMPLER_ID_shift                                  = 15,
	SQ_TEX_WORD2__SRC_SEL_X_mask                      = 0x07 << 20,	/* Indicate component source for src.XYZW. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 */
	SQ_TEX_WORD2__SRC_SEL_X_shift                     = 20,
	    SQ_SEL_X                                      = 0x00,	/* SQ_SEL_X: use X component */
	    SQ_SEL_Y                                      = 0x01,	/* SQ_SEL_Y: use Y component */
	    SQ_SEL_Z                                      = 0x02,	/* SQ_SEL_Z: use Z component */
	    SQ_SEL_W                                      = 0x03,	/* SQ_SEL_W: use W component */
	    SQ_SEL_0                                      = 0x04,	/* SQ_SEL_0: use constant 0.0 */
	    SQ_SEL_1                                      = 0x05,	/* SQ_SEL_1: use constant 1.0 */
	SRC_SEL_Y_mask                                    = 0x07 << 23,	/* Indicate component source for src.XYZW. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 */
	SRC_SEL_Y_shift                                   = 23,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
	SRC_SEL_Z_mask                                    = 0x07 << 26,	/* Indicate component source for src.XYZW. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 */
	SRC_SEL_Z_shift                                   = 26,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
	SRC_SEL_W_mask                                    = 0x07 << 29,	/* Indicate component source for src.XYZW. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 6. Shader Vertex Resource Constants */
	SRC_SEL_W_shift                                   = 29,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 5. R7xx Shader Instructions */
    SQ_CF_ALLOC_EXPORT_WORD1                              = 0x00008dfc,	/* Word 1 of the control flow instruction for alloc/export is the bitwise OR of WORD1 | WORD1_{BUF,SWIZ}. This part contains fields that are always defined. */
	BURST_COUNT_mask                                  = 0x0f << 17,	/* Number of MRTs, positions, parameters, or logical export values to allocate and/or export, minus one. This field is interpreted as a value in [1,16]. */
	BURST_COUNT_shift                                 = 17,
	END_OF_PROGRAM_bit                                = 1 << 21,	/* If set, then this instruction is the last instruction of the CF program. Execution ends after this instruction is issued. */
	VALID_PIXEL_MODE_bit                              = 1 << 22,	/* If set, execute this instruction/clause as if invalid pixels are inactive. Antonym of WHOLE_QUAD_MODE. Caution: VALID_PIXEL_MODE is not the `default` mode; this bit should be set to 0 by default. */
	SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask            = 0x7f << 23,	/* Type of instruction to evaluate in CF. This value MUST be one of the alloc/export instructions listed below. POSSIBLE VALUES: 32 - SQ_CF_INST_MEM_STREAM0: perform a memory operation on the stream buffer 0 (write-only). 33 - SQ_CF_INST_MEM_STREAM1: perform a memory operation on the stream buffer 1 (write-only). 34 - SQ_CF_INST_MEM_STREAM2: perform a memory operation on the stream buffer 2 (write-only). 35 - SQ_CF_INST_MEM_STREAM3: perform a memory operation on the stream buffer 3 (write-only). 36 - SQ_CF_INST_MEM_SCRATCH: perform a memory operation on the scratch buffer (read-write). 37 - SQ_CF_INST_MEM_REDUCTION: perform a memory operation on the reduction buffer (read-write). 38 - SQ_CF_INST_MEM_RING: perform a memory operation on the ring buffer (write-only). 39 - SQ_CF_INST_EXPORT: export only (not last). Used for PIXEL, POS, PARAM exports. 40 - SQ_CF_INST_EXPORT_DONE: export only (last export). Used for PIXEL, POS, PARAM exports. */
	SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift           = 23,
	    SQ_CF_INST_MEM_STREAM0                        = 0x20,	/* SQ_CF_INST_MEM_STREAM0: perform a memory operation on the stream buffer 0 (write-only). */
	    SQ_CF_INST_MEM_STREAM1                        = 0x21,	/* SQ_CF_INST_MEM_STREAM1: perform a memory operation on the stream buffer 1 (write-only). */
	    SQ_CF_INST_MEM_STREAM2                        = 0x22,	/* SQ_CF_INST_MEM_STREAM2: perform a memory operation on the stream buffer 2 (write-only). */
	    SQ_CF_INST_MEM_STREAM3                        = 0x23,	/* SQ_CF_INST_MEM_STREAM3: perform a memory operation on the stream buffer 3 (write-only). */
	    SQ_CF_INST_MEM_SCRATCH                        = 0x24,	/* SQ_CF_INST_MEM_SCRATCH: perform a memory operation on the scratch buffer (read-write). */
	    SQ_CF_INST_MEM_REDUCTION                      = 0x25,	/* SQ_CF_INST_MEM_REDUCTION: perform a memory operation on the reduction buffer (read-write). */
	    SQ_CF_INST_MEM_RING                           = 0x26,	/* SQ_CF_INST_MEM_RING: perform a memory operation on the ring buffer (write-only). */
	    SQ_CF_INST_EXPORT                             = 0x27,	/* SQ_CF_INST_EXPORT: export only (not last). Used for PIXEL, POS, PARAM exports. */
	    SQ_CF_INST_EXPORT_DONE                        = 0x28,	/* SQ_CF_INST_EXPORT_DONE: export only (last export). Used for PIXEL, POS, PARAM exports. */
	WHOLE_QUAD_MODE_bit                               = 1 << 30,	/* If set, execute this instruction/clause as if all pixels were active and valid. */
	BARRIER_bit                                       = 1 << 31,	/* If set, all prior CF instructions/clauses must complete before this instruction/clause executes. If not set, this instruction/clause may run in parallel with prior instructions. */
    SQ_CF_ALU_WORD1                                       = 0x00008dfc,	/* Control flow instruction word 1. This word is used by ALU clause instructions. */
	KCACHE_MODE1_mask                                 = 0x03 << 0,	/* Mode for second set of locked cache lines. POSSIBLE VALUES: 00 - SQ_CF_KCACHE_NOP: do not lock any cache lines. 01 - SQ_CF_KCACHE_LOCK_1: lock cache line [bank][addr]. 02 - SQ_CF_KCACHE_LOCK_2: lock cache lines [bank][addr] and [bank][addr+1]. 03 - SQ_CF_KCACHE_LOCK_LOOP_INDEX: lock cache lines [bank][loop/16+addr] and [bank][loop/16+addr+1], where loop is current loop index. */
	KCACHE_MODE1_shift                                = 0,
	    SQ_CF_KCACHE_NOP                              = 0x00,	/* SQ_CF_KCACHE_NOP: do not lock any cache lines. */
	    SQ_CF_KCACHE_LOCK_1                           = 0x01,	/* SQ_CF_KCACHE_LOCK_1: lock cache line [bank][addr]. */
	    SQ_CF_KCACHE_LOCK_2                           = 0x02,	/* SQ_CF_KCACHE_LOCK_2: lock cache lines [bank][addr] and [bank][addr+1]. */
	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03,	/* SQ_CF_KCACHE_LOCK_LOOP_INDEX: lock cache lines [bank][loop/16+addr] and [bank][loop/16+addr+1], where loop is current loop index. */
	KCACHE_ADDR0_mask                                 = 0xff << 2,	/* Constant buffer address for first set of locked cache lines. In units of cache lines where a line holds 16 128-bit constants (byte addr[15:8]). */
	KCACHE_ADDR0_shift                                = 2,
	KCACHE_ADDR1_mask                                 = 0xff << 10,	/* Constant buffer address for second set of locked cache lines. */
	KCACHE_ADDR1_shift                                = 10,
	SQ_CF_ALU_WORD1__COUNT_mask                       = 0x7f << 18,	/* Number of instructions to execute in the clause, minus one. This is interpreted as the number of instruction slots (64-bit slots) in the range [1,128]. */
	SQ_CF_ALU_WORD1__COUNT_shift                      = 18,
	SQ_CF_ALU_WORD1__ALT_CONST_bit                    = 1 << 25,	/* if set, uses constants from alternate thread type: ps->vs, vs->gs, gs->vs, es->gs (note that es and vs share constants). */
	SQ_CF_ALU_WORD1__CF_INST_mask                     = 0x0f << 26,	/* Type of ALU instruction to evaluate in CF. For this encoding, CF_INST must be set to one of the following values. POSSIBLE VALUES: 08 - SQ_CF_INST_ALU: each PRED_SET updates the active state but does not update the stack. 09 - SQ_CF_INST_ALU_PUSH_BEFORE: do CF_PUSH; then CF_INST_ALU 10 - SQ_CF_INST_ALU_POP_AFTER: do CF_INST_ALU; then do CF_INST_POP. 11 - SQ_CF_INST_ALU_POP2_AFTER: do CF_INST_ALU; then do CF_INST_POP twice. 13 - SQ_CF_INST_ALU_CONTINUE: each PRED_SET causes a continue operation on the masked pixels. Equivalent to CF_INST_PUSH; CF_INST_ALU; CF_INST_ELSE; CF_INST_CONTINUE; CF_POP. 14 - SQ_CF_INST_ALU_BREAK: each PRED_SET causes a break operation on the masked pixels. Equivalent to CF_INST_PUSH; CF_INST_ALU; CF_INST_ELSE; CF_INST_CONTINUE; CF_POP. 15 - SQ_CF_INST_ALU_ELSE_AFTER: do CF_INST_ALU; then do CF_INST_ELSE. */
	SQ_CF_ALU_WORD1__CF_INST_shift                    = 26,
	    SQ_CF_INST_ALU                                = 0x08,	/* SQ_CF_INST_ALU: each PRED_SET updates the active state but does not update the stack. */
	    SQ_CF_INST_ALU_PUSH_BEFORE                    = 0x09,	/* SQ_CF_INST_ALU_PUSH_BEFORE: do CF_PUSH; then CF_INST_ALU */
	    SQ_CF_INST_ALU_POP_AFTER                      = 0x0a,	/* SQ_CF_INST_ALU_POP_AFTER: do CF_INST_ALU; then do CF_INST_POP. */
	    SQ_CF_INST_ALU_POP2_AFTER                     = 0x0b,	/* SQ_CF_INST_ALU_POP2_AFTER: do CF_INST_ALU; then do CF_INST_POP twice. */
	    SQ_CF_INST_ALU_CONTINUE                       = 0x0d,	/* SQ_CF_INST_ALU_CONTINUE: each PRED_SET causes a continue operation on the masked pixels. Equivalent to CF_INST_PUSH; CF_INST_ALU; CF_INST_ELSE; CF_INST_CONTINUE; CF_POP. */
	    SQ_CF_INST_ALU_BREAK                          = 0x0e,	/* SQ_CF_INST_ALU_BREAK: each PRED_SET causes a break operation on the masked pixels. Equivalent to CF_INST_PUSH; CF_INST_ALU; CF_INST_ELSE; CF_INST_CONTINUE; CF_POP. */
	    SQ_CF_INST_ALU_ELSE_AFTER                     = 0x0f,	/* SQ_CF_INST_ALU_ELSE_AFTER: do CF_INST_ALU; then do CF_INST_ELSE. */
/* 	WHOLE_QUAD_MODE_bit                               = 1 << 30, */	/* If set, execute this instruction/clause as if all pixels are active and valid. Antonym of VALID_PIXEL_MODE. Set at most one of these bits. */
/* 	BARRIER_bit                                       = 1 << 31, */	/* If set, all prior CF instructions/clauses must complete before this instruction/clause executes. If not set, this instruction/clause may run in parallel with prior instructions. */
    SQ_TEX_WORD1                                          = 0x00008dfc,	/* Texture fetch clause instruction word 1. */
	SQ_TEX_WORD1__DST_GPR_mask                        = 0x7f << 0,	/* Destination GPR address to write result to. */
	SQ_TEX_WORD1__DST_GPR_shift                       = 0,
	SQ_TEX_WORD1__DST_REL_bit                         = 1 << 7,	/* Indicate whether destination address is absolute or relative to an index. */
	SQ_TEX_WORD1__DST_SEL_X_mask                      = 0x07 << 9,	/* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SQ_TEX_WORD1__DST_SEL_X_shift                     = 9,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
	    SQ_SEL_MASK                                   = 0x07,	/* SQ_SEL_MASK: mask out this component */
	SQ_TEX_WORD1__DST_SEL_Y_mask                      = 0x07 << 12,	/* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SQ_TEX_WORD1__DST_SEL_Y_shift                     = 12,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
	SQ_TEX_WORD1__DST_SEL_Z_mask                      = 0x07 << 15,	/* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SQ_TEX_WORD1__DST_SEL_Z_shift                     = 15,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
	SQ_TEX_WORD1__DST_SEL_W_mask                      = 0x07 << 18,	/* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SQ_TEX_WORD1__DST_SEL_W_shift                     = 18,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
	SQ_TEX_WORD1__LOD_BIAS_mask                       = 0x7f << 21,	/* Constant LOD bias to add to the computed bias for this lookup. Twos-complement S3.4 fixpoint value with range [-4, 4). */
	SQ_TEX_WORD1__LOD_BIAS_shift                      = 21,
	COORD_TYPE_X_bit                                  = 1 << 28,	/* Indicate the type of the src.XYZW component. */
	COORD_TYPE_Y_bit                                  = 1 << 29,	/* Indicate the type of the src.XYZW component. */
	COORD_TYPE_Z_bit                                  = 1 << 30,	/* Indicate the type of the src.XYZW component. */
	COORD_TYPE_W_bit                                  = 1 << 31,	/* Indicate the type of the src.XYZW component. */
    SQ_VTX_WORD0                                          = 0x00008dfc,	/* Vertex fetch clause instruction word 0. */
	VTX_INST_mask                                     = 0x1f << 0,	/* Opcode for this vertex fetch instruction. POSSIBLE VALUES: 00 - SQ_VTX_INST_FETCH: vertex fetch (X = uint32 index) 01 - SQ_VTX_INST_SEMANTIC: semantic vertex fetch */
	VTX_INST_shift                                    = 0,
	    SQ_VTX_INST_FETCH                             = 0x00,	/* SQ_VTX_INST_FETCH: vertex fetch (X = uint32 index) */
	    SQ_VTX_INST_SEMANTIC                          = 0x01,	/* SQ_VTX_INST_SEMANTIC: semantic vertex fetch */
	FETCH_TYPE_mask                                   = 0x03 << 5,	/* Specify which index offset to send to VC. POSSIBLE VALUES: 00 - SQ_VTX_FETCH_VERTEX_DATA 01 - SQ_VTX_FETCH_INSTANCE_DATA 02 - SQ_VTX_FETCH_NO_INDEX_OFFSET */
	FETCH_TYPE_shift                                  = 5,
	    SQ_VTX_FETCH_VERTEX_DATA                      = 0x00,	/* SQ_VTX_FETCH_VERTEX_DATA */
	    SQ_VTX_FETCH_INSTANCE_DATA                    = 0x01,	/* SQ_VTX_FETCH_INSTANCE_DATA */
	    SQ_VTX_FETCH_NO_INDEX_OFFSET                  = 0x02,	/* SQ_VTX_FETCH_NO_INDEX_OFFSET */
	FETCH_WHOLE_QUAD_bit                              = 1 << 7,	/* If set, texture instruction must fetch data for all pixels (result may be used as source coordinate of a dependent read). If cleared, texture instruction can ignore invalid pixels. */
	BUFFER_ID_mask                                    = 0xff << 8,	/* Constant ID to use for this vertex fetch (indicates the buffer address, size, and format). */
	BUFFER_ID_shift                                   = 8,
	SRC_GPR_mask                                      = 0x7f << 16,	/* Source GPR address to get fetch address from. */
	SRC_GPR_shift                                     = 16,
	SRC_REL_bit                                       = 1 << 23,	/* Indicate whether source address is absolute or relative to an index. */
	SQ_VTX_WORD0__SRC_SEL_X_mask                      = 0x03 << 24,	/* Indicate which component of src to use for the fetch address. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component */
	SQ_VTX_WORD0__SRC_SEL_X_shift                     = 24,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
	MEGA_FETCH_COUNT_mask                             = 0x3f << 26,	/* For a mega-fetch, number of bytes to fetch at once. For mini-fetch, number of bytes to fetch if SQ converts this instruction into a mega-fetch. This value`s range is [1,64]. */
	MEGA_FETCH_COUNT_shift                            = 26,
    SQ_CF_ALLOC_EXPORT_WORD1_SWIZ                         = 0x00008dfc,	/* Word 1 of the control flow instruction. This subencoding is used by alloc/exports for PIXEL, POS, and PARAM. */
	SEL_X_mask                                        = 0x07 << 0,	/* Specify source for each component of the export. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SEL_X_shift                                       = 0,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
	SEL_Y_mask                                        = 0x07 << 3,	/* Specify source for each component of the export. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SEL_Y_shift                                       = 3,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
	SEL_Z_mask                                        = 0x07 << 6,	/* Specify source for each component of the export. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SEL_Z_shift                                       = 6,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
	SEL_W_mask                                        = 0x07 << 9,	/* Specify source for each component of the export. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SEL_W_shift                                       = 9,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
    SQ_ALU_WORD1                                          = 0x00008dfc,	/* ALU instruction word 1 is the bitwise OR of SQ_ALU_WORD1 | SQ_ALU_WORD1_OP[2,3]. SQ_ALU_WORD1 contains fields used by all encodings. */
	ENCODING_mask                                     = 0x07 << 15,	/* A read-only field used to determine whether OP2 or OP3 encoding is being used. If this field`s value is 0, the instruction is using OP2. Otherwise, the instruction is using OP3. Do not write to this field directly. */
	ENCODING_shift                                    = 15,
	BANK_SWIZZLE_mask                                 = 0x07 << 18,	/* Specify how to load operands into the SP. POSSIBLE VALUES: 00 - SQ_ALU_VEC_012, SQ_ALU_SCL_210 01 - SQ_ALU_VEC_021, SQ_ALU_SCL_122 02 - SQ_ALU_VEC_120, SQ_ALU_SCL_212 03 - SQ_ALU_VEC_102, SQ_ALU_SCL_221 04 - SQ_ALU_VEC_201 05 - SQ_ALU_VEC_210 */
	BANK_SWIZZLE_shift                                = 18,
	    SQ_ALU_VEC_012                                = 0x00,	/* SQ_ALU_VEC_012, SQ_ALU_SCL_210 */
	    SQ_ALU_VEC_021                                = 0x01,	/* SQ_ALU_VEC_021, SQ_ALU_SCL_122 */
	    SQ_ALU_VEC_120                                = 0x02,	/* SQ_ALU_VEC_120, SQ_ALU_SCL_212 */
	    SQ_ALU_VEC_102                                = 0x03,	/* SQ_ALU_VEC_102, SQ_ALU_SCL_221 */
	    SQ_ALU_VEC_201                                = 0x04,	/* SQ_ALU_VEC_201 */
	    SQ_ALU_VEC_210                                = 0x05,	/* SQ_ALU_VEC_210 */
	SQ_ALU_WORD1__DST_GPR_mask                        = 0x7f << 21,	/* Destination address to write result to. Always a GPR address. */
	SQ_ALU_WORD1__DST_GPR_shift                       = 21,
	SQ_ALU_WORD1__DST_REL_bit                         = 1 << 28,	/* If set, this operand uses relative addressing based on the INDEX_MODE. */
	DST_CHAN_mask                                     = 0x03 << 29,	/* Specify which channel of DST_GPR to write the result to. POSSIBLE VALUES: 00 - CHAN_X: write to X channel of dest. 01 - CHAN_Y: write to Y channel of dest. 02 - CHAN_Z: write to Z channel of dest. 03 - CHAN_W: write to W channel of dest. */
	DST_CHAN_shift                                    = 29,
	    CHAN_X                                        = 0x00,	/* CHAN_X: write to X channel of dest. */
	    CHAN_Y                                        = 0x01,	/* CHAN_Y: write to Y channel of dest. */
	    CHAN_Z                                        = 0x02,	/* CHAN_Z: write to Z channel of dest. */
	    CHAN_W                                        = 0x03,	/* CHAN_W: write to W channel of dest. */
	SQ_ALU_WORD1__CLAMP_bit                           = 1 << 31,	/* If set, clamp the result to [0.0, 1.0]. Not mathematically defined for opcodes that produce integer results. */
    SQ_CF_ALU_WORD0                                       = 0x00008dfc,	/* Control flow instruction word 0. This word is used by ALU clause instructions. */
	SQ_CF_ALU_WORD0__ADDR_mask                        = 0x3fffff << 0,	/* Bits [24:3] of the byte offset (producing a QUAD-word- aligned value) of the clause to execute. The offset is relative to the byte address specified by PGM_START. */
	SQ_CF_ALU_WORD0__ADDR_shift                       = 0,
	KCACHE_BANK0_mask                                 = 0x0f << 22,	/* Bank (constant buffer number) for first set of locked cache lines. */
	KCACHE_BANK0_shift                                = 22,
	KCACHE_BANK1_mask                                 = 0x0f << 26,	/* Bank (constant buffer number) for second set of locked cache lines. */
	KCACHE_BANK1_shift                                = 26,
	KCACHE_MODE0_mask                                 = 0x03 << 30,	/* Mode for first set of locked cache lines. POSSIBLE VALUES: 00 - SQ_CF_KCACHE_NOP: do not lock any cache lines. 01 - SQ_CF_KCACHE_LOCK_1: lock cache line [bank][addr]. 02 - SQ_CF_KCACHE_LOCK_2: lock cache lines [bank][addr] and [bank][addr+1]. 03 - SQ_CF_KCACHE_LOCK_LOOP_INDEX: lock cache lines [bank][loop/16+addr] and [bank][loop/16+addr+1], where loop is current loop index. */
	KCACHE_MODE0_shift                                = 30,
/* 	    SQ_CF_KCACHE_NOP                              = 0x00, */	/* SQ_CF_KCACHE_NOP: do not lock any cache lines. */
/* 	    SQ_CF_KCACHE_LOCK_1                           = 0x01, */	/* SQ_CF_KCACHE_LOCK_1: lock cache line [bank][addr]. */
/* 	    SQ_CF_KCACHE_LOCK_2                           = 0x02, */	/* SQ_CF_KCACHE_LOCK_2: lock cache lines [bank][addr] and [bank][addr+1]. */
/* 	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03, */	/* SQ_CF_KCACHE_LOCK_LOOP_INDEX: lock cache lines [bank][loop/16+addr] and [bank][loop/16+addr+1], where loop is current loop index. */
    SQ_VTX_WORD2                                          = 0x00008dfc,	/* Vertex fetch clause instruction word 2. */
	SQ_VTX_WORD2__OFFSET_mask                         = 0xffff << 0,	/* Offset to begin reading from. Byte-aligned. */
	SQ_VTX_WORD2__OFFSET_shift                        = 0,
	SQ_VTX_WORD2__ENDIAN_SWAP_mask                    = 0x03 << 16,	/* Endian control (ignored if USE_CONST_FIELDS = 1). POSSIBLE VALUES: 00 - SQ_ENDIAN_NONE: no endian swap (XOR by 0) 01 - SQ_ENDIAN_8IN16: 8 bit swap in 16 bit word (XOR by 1): AABBCCDD -> BBAADDCC 02 - SQ_ENDIAN_8IN32: 8 bit swap in 32 bit word (XOR by 3): AABBCCDD -> DDCCBBAA */
	SQ_VTX_WORD2__ENDIAN_SWAP_shift                   = 16,
	    SQ_ENDIAN_NONE                                = 0x00,	/* SQ_ENDIAN_NONE: no endian swap (XOR by 0) */
	    SQ_ENDIAN_8IN16                               = 0x01,	/* SQ_ENDIAN_8IN16: 8 bit swap in 16 bit word (XOR by 1): AABBCCDD -> BBAADDCC */
	    SQ_ENDIAN_8IN32                               = 0x02,	/* SQ_ENDIAN_8IN32: 8 bit swap in 32 bit word (XOR by 3): AABBCCDD -> DDCCBBAA */
	CONST_BUF_NO_STRIDE_bit                           = 1 << 18,	/* If set, force stride to zero for constant buffer fetches that use absolute addresses. */
	MEGA_FETCH_bit                                    = 1 << 19,	/* If set, this instruction is a mega-fetch. Otherwise it is a mini-fetch. */
	SQ_VTX_WORD2__ALT_CONST_bit                       = 1 << 20,	/* if set, uses constants from alternate thread type: ps->vs, vs->gs, gs->vs, es->gs (note that es and vs share constants). */
    SQ_ALU_WORD1_OP2_V2                                   = 0x00008dfc,	/* ALU instruction word 1. This subencoding is used for OP2 instructions (instructions taking 0 to 2 operands). */
	SRC0_ABS_bit                                      = 1 << 0,	/* If set, take the absolute value of the input for this operand. Should only be set for floating point inputs; performed before negation. */
	SRC1_ABS_bit                                      = 1 << 1,	/* If set, take the absolute value of the input for this operand. Should only be set for floating point inputs; performed before negation. */
	UPDATE_EXECUTE_MASK_bit                           = 1 << 2,	/* If set, update the execute mask in the SQ after executing this instruction based on the current predicate. */
	UPDATE_PRED_bit                                   = 1 << 3,	/* If set, update the predicate in the SP based on the predicate operation computed here. */
	WRITE_MASK_bit                                    = 1 << 4,	/* If set, write this scalar result to the destination GPR channel. */
	SQ_ALU_WORD1_OP2_V2__OMOD_mask                    = 0x03 << 5,	/* Output modifier for this instruction. Must be set to ALU_OMOD_OFF for operations that produce an integer result. */
	SQ_ALU_WORD1_OP2_V2__OMOD_shift                   = 5,
	    SQ_ALU_OMOD_OFF                               = 0x00,	/* SQ_ALU_OMOD_OFF: identity. */
	    SQ_ALU_OMOD_M2                                = 0x01,	/* SQ_ALU_OMOD_M2: multiply by 2.0. */
	    SQ_ALU_OMOD_M4                                = 0x02,	/* SQ_ALU_OMOD_M4: multiply by 4.0. */
	    SQ_ALU_OMOD_D2                                = 0x03,	/* SQ_ALU_OMOD_D2: divide by 2.0. */
	SQ_ALU_WORD1_OP2_V2__ALU_INST_mask                = 0x7ff << 7,	/* Instruction opcode. The top 3 bits of this must be zero. Caution: gaps in opcode values are not marked in the table below. */
	SQ_ALU_WORD1_OP2_V2__ALU_INST_shift               = 7,
	    SQ_OP2_INST_ADD                               = 0x00,	/* SQ_OP2_INST_ADD */
	    SQ_OP2_INST_MUL                               = 0x01,	/* SQ_OP2_INST_MUL */
	    SQ_OP2_INST_MUL_IEEE                          = 0x02,	/* SQ_OP2_INST_MUL_IEEE */
	    SQ_OP2_INST_MAX                               = 0x03,	/* SQ_OP2_INST_MAX */
	    SQ_OP2_INST_MIN                               = 0x04,	/* SQ_OP2_INST_MIN */
	    SQ_OP2_INST_MAX_DX10                          = 0x05,	/* SQ_OP2_INST_MAX_DX10 */
	    SQ_OP2_INST_MIN_DX10                          = 0x06,	/* SQ_OP2_INST_MIN_DX10 */
	    SQ_OP2_INST_SETE                              = 0x08,	/* SQ_OP2_INST_SETE */
	    SQ_OP2_INST_SETGT                             = 0x09,	/* SQ_OP2_INST_SETGT */
	    SQ_OP2_INST_SETGE                             = 0x0a,	/* SQ_OP2_INST_SETGE */
	    SQ_OP2_INST_SETNE                             = 0x0b,	/* SQ_OP2_INST_SETNE */
	    SQ_OP2_INST_SETE_DX10                         = 0x0c,	/* SQ_OP2_INST_SETE_DX10 */
	    SQ_OP2_INST_SETGT_DX10                        = 0x0d,	/* SQ_OP2_INST_SETGT_DX10 */
	    SQ_OP2_INST_SETGE_DX10                        = 0x0e,	/* SQ_OP2_INST_SETGE_DX10 */
	    SQ_OP2_INST_SETNE_DX10                        = 0x0f,	/* SQ_OP2_INST_SETNE_DX10 */
	    SQ_OP2_INST_FRACT                             = 0x10,	/* SQ_OP2_INST_FRACT */
	    SQ_OP2_INST_TRUNC                             = 0x11,	/* SQ_OP2_INST_TRUNC */
	    SQ_OP2_INST_CEIL                              = 0x12,	/* SQ_OP2_INST_CEIL */
	    SQ_OP2_INST_RNDNE                             = 0x13,	/* SQ_OP2_INST_RNDNE */
	    SQ_OP2_INST_FLOOR                             = 0x14,	/* SQ_OP2_INST_FLOOR */
	    SQ_OP2_INST_MOVA                              = 0x15,	/* SQ_OP2_INST_MOVA */
	    SQ_OP2_INST_MOVA_FLOOR                        = 0x16,	/* SQ_OP2_INST_MOVA_FLOOR */
	    SQ_OP2_INST_MOVA_INT                          = 0x18,	/* SQ_OP2_INST_MOVA_INT */
	    SQ_OP2_INST_MOV                               = 0x19,	/* SQ_OP2_INST_MOV */
	    SQ_OP2_INST_NOP                               = 0x1a,	/* SQ_OP2_INST_NOP */
	    SQ_OP2_INST_PRED_SETGT_UINT                   = 0x1e,	/* SQ_OP2_INST_PRED_SETGT_UINT */
	    SQ_OP2_INST_PRED_SETGE_UINT                   = 0x1f,	/* SQ_OP2_INST_PRED_SETGE_UINT */
	    SQ_OP2_INST_PRED_SETE                         = 0x20,	/* SQ_OP2_INST_PRED_SETE */
	    SQ_OP2_INST_PRED_SETGT                        = 0x21,	/* SQ_OP2_INST_PRED_SETGT */
	    SQ_OP2_INST_PRED_SETGE                        = 0x22,	/* SQ_OP2_INST_PRED_SETGE */
	    SQ_OP2_INST_PRED_SETNE                        = 0x23,	/* SQ_OP2_INST_PRED_SETNE */
	    SQ_OP2_INST_PRED_SET_INV                      = 0x24,	/* SQ_OP2_INST_PRED_SET_INV */
	    SQ_OP2_INST_PRED_SET_POP                      = 0x25,	/* SQ_OP2_INST_PRED_SET_POP */
	    SQ_OP2_INST_PRED_SET_CLR                      = 0x26,	/* SQ_OP2_INST_PRED_SET_CLR */
	    SQ_OP2_INST_PRED_SET_RESTORE                  = 0x27,	/* SQ_OP2_INST_PRED_SET_RESTORE */
	    SQ_OP2_INST_PRED_SETE_PUSH                    = 0x28,	/* SQ_OP2_INST_PRED_SETE_PUSH */
	    SQ_OP2_INST_PRED_SETGT_PUSH                   = 0x29,	/* SQ_OP2_INST_PRED_SETGT_PUSH */
	    SQ_OP2_INST_PRED_SETGE_PUSH                   = 0x2a,	/* SQ_OP2_INST_PRED_SETGE_PUSH */
	    SQ_OP2_INST_PRED_SETNE_PUSH                   = 0x2b,	/* SQ_OP2_INST_PRED_SETNE_PUSH */
	    SQ_OP2_INST_KILLE                             = 0x2c,	/* SQ_OP2_INST_KILLE */
	    SQ_OP2_INST_KILLGT                            = 0x2d,	/* SQ_OP2_INST_KILLGT */
	    SQ_OP2_INST_KILLGE                            = 0x2e,	/* SQ_OP2_INST_KILLGE */
	    SQ_OP2_INST_KILLNE                            = 0x2f,	/* SQ_OP2_INST_KILLNE */
	    SQ_OP2_INST_AND_INT                           = 0x30,	/* SQ_OP2_INST_AND_INT */
	    SQ_OP2_INST_OR_INT                            = 0x31,	/* SQ_OP2_INST_OR_INT */
	    SQ_OP2_INST_XOR_INT                           = 0x32,	/* SQ_OP2_INST_XOR_INT */
	    SQ_OP2_INST_NOT_INT                           = 0x33,	/* SQ_OP2_INST_NOT_INT */
	    SQ_OP2_INST_ADD_INT                           = 0x34,	/* SQ_OP2_INST_ADD_INT */
	    SQ_OP2_INST_SUB_INT                           = 0x35,	/* SQ_OP2_INST_SUB_INT */
	    SQ_OP2_INST_MAX_INT                           = 0x36,	/* SQ_OP2_INST_MAX_INT */
	    SQ_OP2_INST_MIN_INT                           = 0x37,	/* SQ_OP2_INST_MIN_INT */
	    SQ_OP2_INST_MAX_UINT                          = 0x38,	/* SQ_OP2_INST_MAX_UINT */
	    SQ_OP2_INST_MIN_UINT                          = 0x39,	/* SQ_OP2_INST_MIN_UINT */
	    SQ_OP2_INST_SETE_INT                          = 0x3a,	/* SQ_OP2_INST_SETE_INT */
	    SQ_OP2_INST_SETGT_INT                         = 0x3b,	/* SQ_OP2_INST_SETGT_INT */
	    SQ_OP2_INST_SETGE_INT                         = 0x3c,	/* SQ_OP2_INST_SETGE_INT */
	    SQ_OP2_INST_SETNE_INT                         = 0x3d,	/* SQ_OP2_INST_SETNE_INT */
	    SQ_OP2_INST_SETGT_UINT                        = 0x3e,	/* SQ_OP2_INST_SETGT_UINT */
	    SQ_OP2_INST_SETGE_UINT                        = 0x3f,	/* SQ_OP2_INST_SETGE_UINT */
	    SQ_OP2_INST_KILLGT_UINT                       = 0x40,	/* SQ_OP2_INST_KILLGT_UINT */
	    SQ_OP2_INST_KILLGE_UINT                       = 0x41,	/* SQ_OP2_INST_KILLGE_UINT */
	    SQ_OP2_INST_PRED_SETE_INT                     = 0x42,	/* SQ_OP2_INST_PRED_SETE_INT */
	    SQ_OP2_INST_PRED_SETGT_INT                    = 0x43,	/* SQ_OP2_INST_PRED_SETGT_INT */
	    SQ_OP2_INST_PRED_SETGE_INT                    = 0x44,	/* SQ_OP2_INST_PRED_SETGE_INT */
	    SQ_OP2_INST_PRED_SETNE_INT                    = 0x45,	/* SQ_OP2_INST_PRED_SETNE_INT */
	    SQ_OP2_INST_KILLE_INT                         = 0x46,	/* SQ_OP2_INST_KILLE_INT */
	    SQ_OP2_INST_KILLGT_INT                        = 0x47,	/* SQ_OP2_INST_KILLGT_INT */
	    SQ_OP2_INST_KILLGE_INT                        = 0x48,	/* SQ_OP2_INST_KILLGE_INT */
	    SQ_OP2_INST_KILLNE_INT                        = 0x49,	/* SQ_OP2_INST_KILLNE_INT */
	    SQ_OP2_INST_PRED_SETE_PUSH_INT                = 0x4a,	/* SQ_OP2_INST_PRED_SETE_PUSH_INT */
	    SQ_OP2_INST_PRED_SETGT_PUSH_INT               = 0x4b,	/* SQ_OP2_INST_PRED_SETGT_PUSH_INT */
	    SQ_OP2_INST_PRED_SETGE_PUSH_INT               = 0x4c,	/* SQ_OP2_INST_PRED_SETGE_PUSH_INT */
	    SQ_OP2_INST_PRED_SETNE_PUSH_INT               = 0x4d,	/* SQ_OP2_INST_PRED_SETNE_PUSH_INT */
	    SQ_OP2_INST_PRED_SETLT_PUSH_INT               = 0x4e,	/* SQ_OP2_INST_PRED_SETLT_PUSH_INT */
	    SQ_OP2_INST_PRED_SETLE_PUSH_INT               = 0x4f,	/* SQ_OP2_INST_PRED_SETLE_PUSH_INT */
	    SQ_OP2_INST_DOT4                              = 0x50,	/* SQ_OP2_INST_DOT4 */
	    SQ_OP2_INST_DOT4_IEEE                         = 0x51,	/* SQ_OP2_INST_DOT4_IEEE */
	    SQ_OP2_INST_CUBE                              = 0x52,	/* SQ_OP2_INST_CUBE */
	    SQ_OP2_INST_MAX4                              = 0x53,	/* SQ_OP2_INST_MAX4 */
	    SQ_OP2_INST_MOVA_GPR_INT                      = 0x60,	/* SQ_OP2_INST_MOVA_GPR_INT */
	    SQ_OP2_INST_EXP_IEEE                          = 0x61,	/* SQ_OP2_INST_EXP_IEEE */
	    SQ_OP2_INST_LOG_CLAMPED                       = 0x62,	/* SQ_OP2_INST_LOG_CLAMPED */
	    SQ_OP2_INST_LOG_IEEE                          = 0x63,	/* SQ_OP2_INST_LOG_IEEE */
	    SQ_OP2_INST_RECIP_CLAMPED                     = 0x64,	/* SQ_OP2_INST_RECIP_CLAMPED */
	    SQ_OP2_INST_RECIP_FF                          = 0x65,	/* SQ_OP2_INST_RECIP_FF */
	    SQ_OP2_INST_RECIP_IEEE                        = 0x66,	/* SQ_OP2_INST_RECIP_IEEE */
	    SQ_OP2_INST_RECIPSQRT_CLAMPED                 = 0x67,	/* SQ_OP2_INST_RECIPSQRT_CLAMPED */
	    SQ_OP2_INST_RECIPSQRT_FF                      = 0x68,	/* SQ_OP2_INST_RECIPSQRT_FF */
	    SQ_OP2_INST_RECIPSQRT_IEEE                    = 0x69,	/* SQ_OP2_INST_RECIPSQRT_IEEE */
	    SQ_OP2_INST_SQRT_IEEE                         = 0x6a,	/* SQ_OP2_INST_SQRT_IEEE */
	    SQ_OP2_INST_FLT_TO_INT                        = 0x6b,	/* SQ_OP2_INST_FLT_TO_INT */
	    SQ_OP2_INST_INT_TO_FLT                        = 0x6c,	/* SQ_OP2_INST_INT_TO_FLT */
	    SQ_OP2_INST_UINT_TO_FLT                       = 0x6d,	/* SQ_OP2_INST_UINT_TO_FLT */
	    SQ_OP2_INST_SIN                               = 0x6e,	/* SQ_OP2_INST_SIN */
	    SQ_OP2_INST_COS                               = 0x6f,	/* SQ_OP2_INST_COS */
	    SQ_OP2_INST_ASHR_INT                          = 0x70,	/* SQ_OP2_INST_ASHR_INT */
	    SQ_OP2_INST_LSHR_INT                          = 0x71,	/* SQ_OP2_INST_LSHR_INT */
	    SQ_OP2_INST_LSHL_INT                          = 0x72,	/* SQ_OP2_INST_LSHL_INT */
	    SQ_OP2_INST_MULLO_INT                         = 0x73,	/* SQ_OP2_INST_MULLO_INT */
	    SQ_OP2_INST_MULHI_INT                         = 0x74,	/* SQ_OP2_INST_MULHI_INT */
	    SQ_OP2_INST_MULLO_UINT                        = 0x75,	/* SQ_OP2_INST_MULLO_UINT */
	    SQ_OP2_INST_MULHI_UINT                        = 0x76,	/* SQ_OP2_INST_MULHI_UINT */
	    SQ_OP2_INST_RECIP_INT                         = 0x77,	/* SQ_OP2_INST_RECIP_INT */
	    SQ_OP2_INST_RECIP_UINT                        = 0x78,	/* SQ_OP2_INST_RECIP_UINT */
	    SQ_OP2_INST_FLT_TO_UINT                       = 0x79,	/* SQ_OP2_INST_FLT_TO_UINT */
    SQ_CF_ALLOC_EXPORT_WORD1_BUF                          = 0x00008dfc,	/* Word 1 of the control flow instruction. This subencoding is used by alloc/exports for all input/outputs to scratch/ring/stream/reduction buffers. */
	ARRAY_SIZE_mask                                   = 0xfff << 0,	/* Array size (elem-size units). Represents values [1,4096] when ELEMSIZE=0, [4,16384] when ELEMSIZE=3. */
	ARRAY_SIZE_shift                                  = 0,
	COMP_MASK_mask                                    = 0x0f << 12,	/* XYZW component mask (X is the LSB). Write the component iff the corresponding bit is 1. */
	COMP_MASK_shift                                   = 12,
    SQ_CF_WORD0                                           = 0x00008dfc,	/* Control flow instruction word 0. This word is the default representation for CF instructions. */
	/* ADDR: Bits [34:3] of the byte offset (producing a QUAD-word- aligned value) of the clause to execute (clause instructions only). Bits [34:3] of the byte offset (producing a QUAD-word-aligned value) of the control flow address to jump to (instructions that can jump). Offsets are relative to the byte address specified by PGM_START. Texture & Vertex clauses must start on 16-byte aligned addresses. */
    SQ_CF_ALLOC_EXPORT_WORD0                              = 0x00008dfc,	/* Word 0 of the control flow instruction for alloc/export. */
	ARRAY_BASE_mask                                   = 0x1fff << 0,	/* For scratch/reduction input/output, this is the base address of the array in multiples of 4 dwords [0,32764]. For stream/ring output, this is the base addess of the array in multiples of 1 dword [0,8191]. For pixel/z output, this is the index of the first export (framebuffer 0..7; computed Z: 61). For parameter output, this is the parameter index of the first export [0,31]. For position output, this is the position index of the first export [60,63]. */
	ARRAY_BASE_shift                                  = 0,
	SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask               = 0x03 << 13,	/* Type of allocation/export. In the table below, the first enumeration value listed (PIXEL, POS, PARAM) is used with CF_INST_EXPORT*. The second enumeration value listed (WRITE, WRITE_IND, WRITE_ACK, WRITE_IND_ACK) is used with CF_INST_MEM*. POSSIBLE VALUES: 00 - SQ_EXPORT_PIXEL: write pixel. SQ_EXPORT_WRITE: write to memory buffer. 01 - SQ_EXPORT_POS: write position. SQ_EXPORT_WRITE_IND: write to memory buffer, use offset in INDEX_GPR. 02 - SQ_EXPORT_PARAM: write parameter cache. SQ_EXPORT_WRITE_ACK: write to memory buffer, request an ACK when write is committed to memory. 03 - Unused for SX exports. SQ_EXPORT_WRITE_IND_ACK: write to memory buffer with offset in INDEX_GPR, get an ACK when done. */
	SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift              = 13,
	    SQ_EXPORT_PIXEL                               = 0x00,	/* SQ_EXPORT_PIXEL: write pixel. SQ_EXPORT_WRITE: write to memory buffer. */
	    SQ_EXPORT_POS                                 = 0x01,	/* SQ_EXPORT_POS: write position. SQ_EXPORT_WRITE_IND: write to memory buffer, use offset in INDEX_GPR. */
	    SQ_EXPORT_PARAM                               = 0x02,	/* SQ_EXPORT_PARAM: write parameter cache. SQ_EXPORT_READ: read from memory buffer (scratch and reduction only). */
	    X_UNUSED_FOR_SX_EXPORTS                       = 0x03,	/* Unused for SX exports. SQ_EXPORT_READ_IND: read from memory buffer, use offset in INDEX_GPR (scratch and reduction only). */
	RW_GPR_mask                                       = 0x7f << 15,	/* GPR register to write data to. */
	RW_GPR_shift                                      = 15,
	RW_REL_bit                                        = 1 << 22,	/* Indicates whether GPR is an absolute address, or relative to the loop index. */
	INDEX_GPR_mask                                    = 0x7f << 23,	/* For any indexed export, this GPR contains an index that will be used in the computation for determining the address of the first export. The index is multipled by (ELEM_SIZE + 1). Only the X component is used (other components ignored, no swizzle allowed). */
	INDEX_GPR_shift                                   = 23,
	ELEM_SIZE_mask                                    = 0x03 << 30,	/* Number of DWORDs per element, minus one. This field is interpreted as a value in [1,2,4] (3 not supported). The value from INDEX_GPR and the loop counter are multiplied by this factor, if applicable. Also, BURST_COUNT is multiplied by this factor for CF_INST_MEM*. This field is ignored for CF_INST_EXPORT*. Normally, ELEMSIZE = 4 DWORDs for scratch & reduction, one DWORD for other types. */
	ELEM_SIZE_shift                                   = 30,
    SQ_VTX_WORD1                                          = 0x00008dfc,	/* Vertex fetch clause instruction word 1 is the bitwise OR of WORD1 | WORD1_{GPR,SEM}. This part contains fields shared by both subencodings. */
	SQ_VTX_WORD1__DST_SEL_X_mask                      = 0x07 << 9,	/* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SQ_VTX_WORD1__DST_SEL_X_shift                     = 9,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
	SQ_VTX_WORD1__DST_SEL_Y_mask                      = 0x07 << 12,	/* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SQ_VTX_WORD1__DST_SEL_Y_shift                     = 12,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
	SQ_VTX_WORD1__DST_SEL_Z_mask                      = 0x07 << 15,	/* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SQ_VTX_WORD1__DST_SEL_Z_shift                     = 15,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
	SQ_VTX_WORD1__DST_SEL_W_mask                      = 0x07 << 18,	/* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
	SQ_VTX_WORD1__DST_SEL_W_shift                     = 18,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
/* 	    SQ_SEL_MASK                                   = 0x07, */	/* SQ_SEL_MASK: mask out this component */
	USE_CONST_FIELDS_bit                              = 1 << 21,	/* If set, use format given in the fetch constant instead of in this instruction. */
	SQ_VTX_WORD1__DATA_FORMAT_mask                    = 0x3f << 22,	/* Indicate vertex data format (ignored if USE_CONST_FIELDS = 1). */
	SQ_VTX_WORD1__DATA_FORMAT_shift                   = 22,
	SQ_VTX_WORD1__NUM_FORMAT_ALL_mask                 = 0x03 << 28,	/* Format of returning data (N is the number of bits derived from DATA_FORMAT and gamma) (ignored if USE_CONST_FIELDS = 1). POSSIBLE VALUES: 00 - SQ_NUM_FORMAT_NORM: repeating fraction number (0.N) with range [0, 1] if unsigned, or [- 1, 1] if signed. 01 - SQ_NUM_FORMAT_INT: integer number (N.0) with range [0, 2^N] if unsigned, or [-2^M, 2^M] if signed (M = N - 1). 02 - SQ_NUM_FORMAT_SCALED: integer number stored as a S23E8 floating-point representation (1 == 0x3f800000). */
	SQ_VTX_WORD1__NUM_FORMAT_ALL_shift                = 28,
	    SQ_NUM_FORMAT_NORM                            = 0x00,	/* SQ_NUM_FORMAT_NORM: repeating fraction number (0.N) with range [0, 1] if unsigned, or [- 1, 1] if signed. */
	    SQ_NUM_FORMAT_INT                             = 0x01,	/* SQ_NUM_FORMAT_INT: integer number (N.0) with range [0, 2^N] if unsigned, or [-2^M, 2^M] if signed (M = N - 1). */
	    SQ_NUM_FORMAT_SCALED                          = 0x02,	/* SQ_NUM_FORMAT_SCALED: integer number stored as a S23E8 floating-point representation (1 == 0x3f800000). */
	SQ_VTX_WORD1__FORMAT_COMP_ALL_bit                 = 1 << 30,	/* Indicate sign of source components (ignored if USE_CONST_FIELDS = 1). */
	SQ_VTX_WORD1__SRF_MODE_ALL_bit                    = 1 << 31,	/* Mapping to use when converting from signed RF to float (ignored if USE_CONST_FIELDS = 1). */
    SQ_ALU_WORD1_OP2                                      = 0x00008dfc,	/* ALU instruction word 1. This subencoding is used for OP2 instructions (instructions taking 0 to 2 operands). */
/* 	SRC0_ABS_bit                                      = 1 << 0, */	/* If set, take the absolute value of the input for this operand. Should only be set for floating point inputs; performed before negation. */
/* 	SRC1_ABS_bit                                      = 1 << 1, */	/* If set, take the absolute value of the input for this operand. Should only be set for floating point inputs; performed before negation. */
/* 	UPDATE_EXECUTE_MASK_bit                           = 1 << 2, */	/* If set, update the execute mask in the SQ after executing this instruction based on the current predicate. */
/* 	UPDATE_PRED_bit                                   = 1 << 3, */	/* If set, update the predicate in the SP based on the predicate operation computed here. */
/* 	WRITE_MASK_bit                                    = 1 << 4, */	/* If set, write this scalar result to the destination GPR channel. */
	FOG_MERGE_bit                                     = 1 << 5,	/* If set, export fog value by merging the transcendental ALU result into the low-order bits of the vector destination. The vector results will lose some precision. This bit takes effect when set on the scalar instruction. */
	SQ_ALU_WORD1_OP2__OMOD_mask                       = 0x03 << 6,	/* Output modifier for this instruction. Must be set to ALU_OMOD_OFF for operations that produce an integer result. */
	SQ_ALU_WORD1_OP2__OMOD_shift                      = 6,
/* 	    SQ_ALU_OMOD_OFF                               = 0x00, */	/* SQ_ALU_OMOD_OFF: identity. */
/* 	    SQ_ALU_OMOD_M2                                = 0x01, */	/* SQ_ALU_OMOD_M2: multiply by 2.0. */
/* 	    SQ_ALU_OMOD_M4                                = 0x02, */	/* SQ_ALU_OMOD_M4: multiply by 4.0. */
/* 	    SQ_ALU_OMOD_D2                                = 0x03, */	/* SQ_ALU_OMOD_D2: divide by 2.0. */
	SQ_ALU_WORD1_OP2__ALU_INST_mask                   = 0x3ff << 8,	/* Instruction opcode. The top 3 bits of this must be zero. Caution: gaps in opcode values are not marked in the table below. */
	SQ_ALU_WORD1_OP2__ALU_INST_shift                  = 8,
/* 	    SQ_OP2_INST_ADD                               = 0x00, */	/* SQ_OP2_INST_ADD */
/* 	    SQ_OP2_INST_MUL                               = 0x01, */	/* SQ_OP2_INST_MUL */
/* 	    SQ_OP2_INST_MUL_IEEE                          = 0x02, */	/* SQ_OP2_INST_MUL_IEEE */
/* 	    SQ_OP2_INST_MAX                               = 0x03, */	/* SQ_OP2_INST_MAX */
/* 	    SQ_OP2_INST_MIN                               = 0x04, */	/* SQ_OP2_INST_MIN */
/* 	    SQ_OP2_INST_MAX_DX10                          = 0x05, */	/* SQ_OP2_INST_MAX_DX10 */
/* 	    SQ_OP2_INST_MIN_DX10                          = 0x06, */	/* SQ_OP2_INST_MIN_DX10 */
/* 	    SQ_OP2_INST_SETE                              = 0x08, */	/* SQ_OP2_INST_SETE */
/* 	    SQ_OP2_INST_SETGT                             = 0x09, */	/* SQ_OP2_INST_SETGT */
/* 	    SQ_OP2_INST_SETGE                             = 0x0a, */	/* SQ_OP2_INST_SETGE */
/* 	    SQ_OP2_INST_SETNE                             = 0x0b, */	/* SQ_OP2_INST_SETNE */
/* 	    SQ_OP2_INST_SETE_DX10                         = 0x0c, */	/* SQ_OP2_INST_SETE_DX10 */
/* 	    SQ_OP2_INST_SETGT_DX10                        = 0x0d, */	/* SQ_OP2_INST_SETGT_DX10 */
/* 	    SQ_OP2_INST_SETGE_DX10                        = 0x0e, */	/* SQ_OP2_INST_SETGE_DX10 */
/* 	    SQ_OP2_INST_SETNE_DX10                        = 0x0f, */	/* SQ_OP2_INST_SETNE_DX10 */
/* 	    SQ_OP2_INST_FRACT                             = 0x10, */	/* SQ_OP2_INST_FRACT */
/* 	    SQ_OP2_INST_TRUNC                             = 0x11, */	/* SQ_OP2_INST_TRUNC */
/* 	    SQ_OP2_INST_CEIL                              = 0x12, */	/* SQ_OP2_INST_CEIL */
/* 	    SQ_OP2_INST_RNDNE                             = 0x13, */	/* SQ_OP2_INST_RNDNE */
/* 	    SQ_OP2_INST_FLOOR                             = 0x14, */	/* SQ_OP2_INST_FLOOR */
/* 	    SQ_OP2_INST_MOVA                              = 0x15, */	/* SQ_OP2_INST_MOVA */
/* 	    SQ_OP2_INST_MOVA_FLOOR                        = 0x16, */	/* SQ_OP2_INST_MOVA_FLOOR */
/* 	    SQ_OP2_INST_MOVA_INT                          = 0x18, */	/* SQ_OP2_INST_MOVA_INT */
/* 	    SQ_OP2_INST_MOV                               = 0x19, */	/* SQ_OP2_INST_MOV */
/* 	    SQ_OP2_INST_NOP                               = 0x1a, */	/* SQ_OP2_INST_NOP */
/* 	    SQ_OP2_INST_PRED_SETGT_UINT                   = 0x1e, */	/* SQ_OP2_INST_PRED_SETGT_UINT */
/* 	    SQ_OP2_INST_PRED_SETGE_UINT                   = 0x1f, */	/* SQ_OP2_INST_PRED_SETGE_UINT */
/* 	    SQ_OP2_INST_PRED_SETE                         = 0x20, */	/* SQ_OP2_INST_PRED_SETE */
/* 	    SQ_OP2_INST_PRED_SETGT                        = 0x21, */	/* SQ_OP2_INST_PRED_SETGT */
/* 	    SQ_OP2_INST_PRED_SETGE                        = 0x22, */	/* SQ_OP2_INST_PRED_SETGE */
/* 	    SQ_OP2_INST_PRED_SETNE                        = 0x23, */	/* SQ_OP2_INST_PRED_SETNE */
/* 	    SQ_OP2_INST_PRED_SET_INV                      = 0x24, */	/* SQ_OP2_INST_PRED_SET_INV */
/* 	    SQ_OP2_INST_PRED_SET_POP                      = 0x25, */	/* SQ_OP2_INST_PRED_SET_POP */
/* 	    SQ_OP2_INST_PRED_SET_CLR                      = 0x26, */	/* SQ_OP2_INST_PRED_SET_CLR */
/* 	    SQ_OP2_INST_PRED_SET_RESTORE                  = 0x27, */	/* SQ_OP2_INST_PRED_SET_RESTORE */
/* 	    SQ_OP2_INST_PRED_SETE_PUSH                    = 0x28, */	/* SQ_OP2_INST_PRED_SETE_PUSH */
/* 	    SQ_OP2_INST_PRED_SETGT_PUSH                   = 0x29, */	/* SQ_OP2_INST_PRED_SETGT_PUSH */
/* 	    SQ_OP2_INST_PRED_SETGE_PUSH                   = 0x2a, */	/* SQ_OP2_INST_PRED_SETGE_PUSH */
/* 	    SQ_OP2_INST_PRED_SETNE_PUSH                   = 0x2b, */	/* SQ_OP2_INST_PRED_SETNE_PUSH */
/* 	    SQ_OP2_INST_KILLE                             = 0x2c, */	/* SQ_OP2_INST_KILLE */
/* 	    SQ_OP2_INST_KILLGT                            = 0x2d, */	/* SQ_OP2_INST_KILLGT */
/* 	    SQ_OP2_INST_KILLGE                            = 0x2e, */	/* SQ_OP2_INST_KILLGE */
/* 	    SQ_OP2_INST_KILLNE                            = 0x2f, */	/* SQ_OP2_INST_KILLNE */
/* 	    SQ_OP2_INST_AND_INT                           = 0x30, */	/* SQ_OP2_INST_AND_INT */
/* 	    SQ_OP2_INST_OR_INT                            = 0x31, */	/* SQ_OP2_INST_OR_INT */
/* 	    SQ_OP2_INST_XOR_INT                           = 0x32, */	/* SQ_OP2_INST_XOR_INT */
/* 	    SQ_OP2_INST_NOT_INT                           = 0x33, */	/* SQ_OP2_INST_NOT_INT */
/* 	    SQ_OP2_INST_ADD_INT                           = 0x34, */	/* SQ_OP2_INST_ADD_INT */
/* 	    SQ_OP2_INST_SUB_INT                           = 0x35, */	/* SQ_OP2_INST_SUB_INT */
/* 	    SQ_OP2_INST_MAX_INT                           = 0x36, */	/* SQ_OP2_INST_MAX_INT */
/* 	    SQ_OP2_INST_MIN_INT                           = 0x37, */	/* SQ_OP2_INST_MIN_INT */
/* 	    SQ_OP2_INST_MAX_UINT                          = 0x38, */	/* SQ_OP2_INST_MAX_UINT */
/* 	    SQ_OP2_INST_MIN_UINT                          = 0x39, */	/* SQ_OP2_INST_MIN_UINT */
/* 	    SQ_OP2_INST_SETE_INT                          = 0x3a, */	/* SQ_OP2_INST_SETE_INT */
/* 	    SQ_OP2_INST_SETGT_INT                         = 0x3b, */	/* SQ_OP2_INST_SETGT_INT */
/* 	    SQ_OP2_INST_SETGE_INT                         = 0x3c, */	/* SQ_OP2_INST_SETGE_INT */
/* 	    SQ_OP2_INST_SETNE_INT                         = 0x3d, */	/* SQ_OP2_INST_SETNE_INT */
/* 	    SQ_OP2_INST_SETGT_UINT                        = 0x3e, */	/* SQ_OP2_INST_SETGT_UINT */
/* 	    SQ_OP2_INST_SETGE_UINT                        = 0x3f, */	/* SQ_OP2_INST_SETGE_UINT */
/* 	    SQ_OP2_INST_KILLGT_UINT                       = 0x40, */	/* SQ_OP2_INST_KILLGT_UINT */
/* 	    SQ_OP2_INST_KILLGE_UINT                       = 0x41, */	/* SQ_OP2_INST_KILLGE_UINT */
/* 	    SQ_OP2_INST_PRED_SETE_INT                     = 0x42, */	/* SQ_OP2_INST_PRED_SETE_INT */
/* 	    SQ_OP2_INST_PRED_SETGT_INT                    = 0x43, */	/* SQ_OP2_INST_PRED_SETGT_INT */
/* 	    SQ_OP2_INST_PRED_SETGE_INT                    = 0x44, */	/* SQ_OP2_INST_PRED_SETGE_INT */
/* 	    SQ_OP2_INST_PRED_SETNE_INT                    = 0x45, */	/* SQ_OP2_INST_PRED_SETNE_INT */
/* 	    SQ_OP2_INST_KILLE_INT                         = 0x46, */	/* SQ_OP2_INST_KILLE_INT */
/* 	    SQ_OP2_INST_KILLGT_INT                        = 0x47, */	/* SQ_OP2_INST_KILLGT_INT */
/* 	    SQ_OP2_INST_KILLGE_INT                        = 0x48, */	/* SQ_OP2_INST_KILLGE_INT */
/* 	    SQ_OP2_INST_KILLNE_INT                        = 0x49, */	/* SQ_OP2_INST_KILLNE_INT */
/* 	    SQ_OP2_INST_PRED_SETE_PUSH_INT                = 0x4a, */	/* SQ_OP2_INST_PRED_SETE_PUSH_INT */
/* 	    SQ_OP2_INST_PRED_SETGT_PUSH_INT               = 0x4b, */	/* SQ_OP2_INST_PRED_SETGT_PUSH_INT */
/* 	    SQ_OP2_INST_PRED_SETGE_PUSH_INT               = 0x4c, */	/* SQ_OP2_INST_PRED_SETGE_PUSH_INT */
/* 	    SQ_OP2_INST_PRED_SETNE_PUSH_INT               = 0x4d, */	/* SQ_OP2_INST_PRED_SETNE_PUSH_INT */
/* 	    SQ_OP2_INST_PRED_SETLT_PUSH_INT               = 0x4e, */	/* SQ_OP2_INST_PRED_SETLT_PUSH_INT */
/* 	    SQ_OP2_INST_PRED_SETLE_PUSH_INT               = 0x4f, */	/* SQ_OP2_INST_PRED_SETLE_PUSH_INT */
/* 	    SQ_OP2_INST_DOT4                              = 0x50, */	/* SQ_OP2_INST_DOT4 */
/* 	    SQ_OP2_INST_DOT4_IEEE                         = 0x51, */	/* SQ_OP2_INST_DOT4_IEEE */
/* 	    SQ_OP2_INST_CUBE                              = 0x52, */	/* SQ_OP2_INST_CUBE */
/* 	    SQ_OP2_INST_MAX4                              = 0x53, */	/* SQ_OP2_INST_MAX4 */
/* 	    SQ_OP2_INST_MOVA_GPR_INT                      = 0x60, */	/* SQ_OP2_INST_MOVA_GPR_INT */
/* 	    SQ_OP2_INST_EXP_IEEE                          = 0x61, */	/* SQ_OP2_INST_EXP_IEEE */
/* 	    SQ_OP2_INST_LOG_CLAMPED                       = 0x62, */	/* SQ_OP2_INST_LOG_CLAMPED */
/* 	    SQ_OP2_INST_LOG_IEEE                          = 0x63, */	/* SQ_OP2_INST_LOG_IEEE */
/* 	    SQ_OP2_INST_RECIP_CLAMPED                     = 0x64, */	/* SQ_OP2_INST_RECIP_CLAMPED */
/* 	    SQ_OP2_INST_RECIP_FF                          = 0x65, */	/* SQ_OP2_INST_RECIP_FF */
/* 	    SQ_OP2_INST_RECIP_IEEE                        = 0x66, */	/* SQ_OP2_INST_RECIP_IEEE */
/* 	    SQ_OP2_INST_RECIPSQRT_CLAMPED                 = 0x67, */	/* SQ_OP2_INST_RECIPSQRT_CLAMPED */
/* 	    SQ_OP2_INST_RECIPSQRT_FF                      = 0x68, */	/* SQ_OP2_INST_RECIPSQRT_FF */
/* 	    SQ_OP2_INST_RECIPSQRT_IEEE                    = 0x69, */	/* SQ_OP2_INST_RECIPSQRT_IEEE */
/* 	    SQ_OP2_INST_SQRT_IEEE                         = 0x6a, */	/* SQ_OP2_INST_SQRT_IEEE */
/* 	    SQ_OP2_INST_FLT_TO_INT                        = 0x6b, */	/* SQ_OP2_INST_FLT_TO_INT */
/* 	    SQ_OP2_INST_INT_TO_FLT                        = 0x6c, */	/* SQ_OP2_INST_INT_TO_FLT */
/* 	    SQ_OP2_INST_UINT_TO_FLT                       = 0x6d, */	/* SQ_OP2_INST_UINT_TO_FLT */
/* 	    SQ_OP2_INST_SIN                               = 0x6e, */	/* SQ_OP2_INST_SIN */
/* 	    SQ_OP2_INST_COS                               = 0x6f, */	/* SQ_OP2_INST_COS */
/* 	    SQ_OP2_INST_ASHR_INT                          = 0x70, */	/* SQ_OP2_INST_ASHR_INT */
/* 	    SQ_OP2_INST_LSHR_INT                          = 0x71, */	/* SQ_OP2_INST_LSHR_INT */
/* 	    SQ_OP2_INST_LSHL_INT                          = 0x72, */	/* SQ_OP2_INST_LSHL_INT */
/* 	    SQ_OP2_INST_MULLO_INT                         = 0x73, */	/* SQ_OP2_INST_MULLO_INT */
/* 	    SQ_OP2_INST_MULHI_INT                         = 0x74, */	/* SQ_OP2_INST_MULHI_INT */
/* 	    SQ_OP2_INST_MULLO_UINT                        = 0x75, */	/* SQ_OP2_INST_MULLO_UINT */
/* 	    SQ_OP2_INST_MULHI_UINT                        = 0x76, */	/* SQ_OP2_INST_MULHI_UINT */
/* 	    SQ_OP2_INST_RECIP_INT                         = 0x77, */	/* SQ_OP2_INST_RECIP_INT */
/* 	    SQ_OP2_INST_RECIP_UINT                        = 0x78, */	/* SQ_OP2_INST_RECIP_UINT */
/* 	    SQ_OP2_INST_FLT_TO_UINT                       = 0x79, */	/* SQ_OP2_INST_FLT_TO_UINT */
    SQ_CF_WORD1                                           = 0x00008dfc,	/* Control flow instruction word 1. This word is the default representation for CF instructions. */
	POP_COUNT_mask                                    = 0x07 << 0,	/* Specify the number of entries to pop from the stack, in [0..7]. Only used by certain CF instructions that pop the branch-loop stack. May be zero, to indicate no pop operation. */
	POP_COUNT_shift                                   = 0,
	CF_CONST_mask                                     = 0x1f << 3,	/* Specify the CF constant to use for flow control statements. For LOOP/ENDLOOP, this specifies the integer constant to use for the loop counter, loop index initializer, and increment. For instructions using COND, this specifies the index of the boolean constant to use. */
	CF_CONST_shift                                    = 3,
	COND_mask                                         = 0x03 << 8,	/* Specifies how to evaluate the condition test for each pixel. Not used by all instructions. May reference CF_CONST. POSSIBLE VALUES: 00 - SQ_CF_COND_ACTIVE: condition test passes for active pixels. 01 - SQ_CF_COND_FALSE: contition test fails for all pixels. 02 - SQ_CF_COND_BOOL: condition test passes iff pixel is active and boolean referenced by CF_CONST is true. 03 - SQ_CF_COND_NOT_BOOL: condition test passes iff pixel is active and boolean referenced by CF_CONST is false. */
	COND_shift                                        = 8,
	    SQ_CF_COND_ACTIVE                             = 0x00,	/* SQ_CF_COND_ACTIVE: condition test passes for active pixels. */
	    SQ_CF_COND_FALSE                              = 0x01,	/* SQ_CF_COND_FALSE: contition test fails for all pixels. */
	    SQ_CF_COND_BOOL                               = 0x02,	/* SQ_CF_COND_BOOL: condition test passes iff pixel is active and boolean referenced by CF_CONST is true. */
	    SQ_CF_COND_NOT_BOOL                           = 0x03,	/* SQ_CF_COND_NOT_BOOL: condition test passes iff pixel is active and boolean referenced by CF_CONST is false. */
	SQ_CF_WORD1__COUNT_mask                           = 0x07 << 10,	/* Number of instructions to execute in the clause, minus one (clause instructions only). This is interpreted as the number of instruction slots in the range [1,16]. MSB of count is COUNT_3 field. */
	SQ_CF_WORD1__COUNT_shift                          = 10,
	CALL_COUNT_mask                                   = 0x3f << 13,	/* Amount to increment call nesting counter by when executing a CALL statement; a CALL is skipped if the current nesting depth + call_count > 32. This field is interpreted in the range [0,31], and has no effect for other instruction types. */
	CALL_COUNT_shift                                  = 13,
	COUNT_3_bit                                       = 1 << 19,	/* MSB of COUNT field. */
/* 	END_OF_PROGRAM_bit                                = 1 << 21, */	/* If set, then this instruction is the last instruction of the CF program. Execution ends after this instruction is issued. */
/* 	VALID_PIXEL_MODE_bit                              = 1 << 22, */	/* If set, execute this instruction/clause as if invalid pixels are inactive. Antonym of WHOLE_QUAD_MODE. Caution: VALID_PIXEL_MODE is not the `default` mode; this bit should be set to 0 by default. */
	SQ_CF_WORD1__CF_INST_mask                         = 0x7f << 23,	/* Type of instruction to evaluate in CF. For this encoding, CF_INST must be set to one of the following values. POSSIBLE VALUES: 00 - SQ_CF_INST_NOP: perform no operation. 01 - SQ_CF_INST_TEX: execute texture fetch clause, through the texture cache. CF_COND=ACTIVE is required. 02 - SQ_CF_INST_VTX: execute vertex fetch clause, through the vertex-cache (if exists). CF_COND=ACTIVE is required. 03 - SQ_CF_INST_VTX_TC: execute vertex fetch clause through the texture cache. CF_COND=ACTIVE is required. 04 - SQ_CF_INST_LOOP_START: execute DX9 loop start instruction (push onto loop stack if loop body executes). 05 - SQ_CF_INST_LOOP_END: execute DX9 loop end instruction (pop loop stack if loop is finished). 06 - SQ_CF_INST_LOOP_START_DX10: execute DX10 loop start instruction (push onto loop stack if loop body executes). 07 - SQ_CF_INST_LOOP_START_NO_AL: same as LOOP_START but don`t push AL onto stack or update AL. 08 - SQ_CF_INST_LOOP_CONTINUE: execute continue statement (jump to end of loop if all pixels ready to continue). 09 - SQ_CF_INST_LOOP_BREAK: execute a break statement (pop loop stack if all pixels ready to break). 10 - SQ_CF_INST_JUMP: execute jump statement (may be conditional). 11 - SQ_CF_INST_PUSH: push current per-pixel active state onto stack OR jump and pop if no items would be active. 12 - SQ_CF_INST_PUSH_ELSE: push current per- pixel active state onto stack ND jump if no items would be active. 13 - SQ_CF_INST_ELSE: execute else statement (may be conditional) OR jump if no items would be active. 14 - SQ_CF_INST_POP: pop current per-pixel state from the stack. jump if no pixels were enabled prior to pop. 15 - SQ_CF_INST_POP_JUMP: pop current per- pixel state from the stack. then execute CF_INST_JUMP with pop count = 0. 16 - SQ_CF_INST_POP_PUSH: pop current per- pixel state from the stack. then execute CF_INST_PUSH with pop count = 0. 17 - SQ_CF_INST_POP_PUSH_ELSE: pop current per-pixel state from the stack. then execute CF_INST_PUSH_ELSE. 18 - SQ_CF_INST_CALL: execute subroutine call instruction (push onto address stack). 19 - SQ_CF_INST_CALL_FS: call fetch shader. The address to call is stored in a state register in SQ. 20 - SQ_CF_INST_RETURN: execute subroutine return instruction (pop address stack). Pair with CF_INST_CALL only. 21 - SQ_CF_INST_EMIT_VERTEX: signal that GS has finished exporting a vertex to memory. CF_COND=ACTIVE is required. 22 - SQ_CF_INST_EMIT_CUT_VERTEX: emit a vertex and an end of primitive strip marker. The next emitted vertex will start a new primitive strip. CF_COND=ACTIVE is required. 23 - SQ_CF_INST_CUT_VERTEX: emit an end of primitive strip marker. The next emitted vertex will start a new primitive strip. CF_COND=ACTIVE is required. 24 - SQ_CF_INST_KILL: kill pixels that pass the condition test (may be conditional). jump if all pixels are killed. CF_COND=ACTIVE is required. */
	SQ_CF_WORD1__CF_INST_shift                        = 23,
	    SQ_CF_INST_NOP                                = 0x00,	/* SQ_CF_INST_NOP: perform no operation. */
	    SQ_CF_INST_TEX                                = 0x01,	/* SQ_CF_INST_TEX: execute texture fetch or constant fetch clause. CF_COND=ACTIVE is required. */
	    SQ_CF_INST_VTX                                = 0x02,	/* SQ_CF_INST_VTX: execute vertex fetch clause. CF_COND=ACTIVE is required. */
	    SQ_CF_INST_VTX_TC                             = 0x03,	/* SQ_CF_INST_VTX_TC: execute vertex fetch clause through the texture cache (for systems lacking VC). CF_COND=ACTIVE is required. */
	    SQ_CF_INST_LOOP_START                         = 0x04,	/* SQ_CF_INST_LOOP_START: execute DX9 loop start instruction (push onto loop stack if loop body executes). */
	    SQ_CF_INST_LOOP_END                           = 0x05,	/* SQ_CF_INST_LOOP_END: execute DX9 loop end instruction (pop loop stack if loop is finished). */
	    SQ_CF_INST_LOOP_START_DX10                    = 0x06,	/* SQ_CF_INST_LOOP_START_DX10: execute DX10 loop start instruction (push onto loop stack if loop body executes). */
	    SQ_CF_INST_LOOP_START_NO_AL                   = 0x07,	/* SQ_CF_INST_LOOP_START_NO_AL: same as LOOP_START but don`t push AL onto stack or update AL. */
	    SQ_CF_INST_LOOP_CONTINUE                      = 0x08,	/* SQ_CF_INST_LOOP_CONTINUE: execute continue statement (jump to end of loop if all pixels ready to continue). */
	    SQ_CF_INST_LOOP_BREAK                         = 0x09,	/* SQ_CF_INST_LOOP_BREAK: execute a break statement (pop loop stack if all pixels ready to break). */
	    SQ_CF_INST_JUMP                               = 0x0a,	/* SQ_CF_INST_JUMP: execute jump statement (may be conditional). */
	    SQ_CF_INST_PUSH                               = 0x0b,	/* SQ_CF_INST_PUSH: push current per-pixel active state onto stack OR jump and pop if no items would be active. */
	    SQ_CF_INST_PUSH_ELSE                          = 0x0c,	/* SQ_CF_INST_PUSH_ELSE: push current per- pixel active state onto stack ND jump if no items would be active. */
	    SQ_CF_INST_ELSE                               = 0x0d,	/* SQ_CF_INST_ELSE: execute else statement (may be conditional) OR jump if no items would be active. */
	    SQ_CF_INST_POP                                = 0x0e,	/* SQ_CF_INST_POP: pop current per-pixel state from the stack. jump if no pixels were enabled prior to pop. */
	    SQ_CF_INST_POP_JUMP                           = 0x0f,	/* SQ_CF_INST_POP_JUMP: pop current per- pixel state from the stack. then execute CF_INST_JUMP with pop count = 0. */
	    SQ_CF_INST_POP_PUSH                           = 0x10,	/* SQ_CF_INST_POP_PUSH: pop current per- pixel state from the stack. then execute CF_INST_PUSH with pop count = 0. */
	    SQ_CF_INST_POP_PUSH_ELSE                      = 0x11,	/* SQ_CF_INST_POP_PUSH_ELSE: pop current per-pixel state from the stack. then execute CF_INST_PUSH_ELSE. */
	    SQ_CF_INST_CALL                               = 0x12,	/* SQ_CF_INST_CALL: execute subroutine call instruction (push onto address stack). */
	    SQ_CF_INST_CALL_FS                            = 0x13,	/* SQ_CF_INST_CALL_FS: call fetch shader. The address to call is stored in a state register in SQ. */
	    SQ_CF_INST_RETURN                             = 0x14,	/* SQ_CF_INST_RETURN: execute subroutine return instruction (pop address stack). Pair with CF_INST_CALL only. */
	    SQ_CF_INST_EMIT_VERTEX                        = 0x15,	/* SQ_CF_INST_EMIT_VERTEX: signal that GS has finished exporting a vertex to memory. CF_COND=ACTIVE is required. */
	    SQ_CF_INST_EMIT_CUT_VERTEX                    = 0x16,	/* SQ_CF_INST_EMIT_CUT_VERTEX: emit a vertex and an end of primitive strip marker. The next emitted vertex will start a new primitive strip. CF_COND=ACTIVE is required. */
	    SQ_CF_INST_CUT_VERTEX                         = 0x17,	/* SQ_CF_INST_CUT_VERTEX: emit an end of primitive strip marker. The next emitted vertex will start a new primitive strip. CF_COND=ACTIVE is required. */
	    SQ_CF_INST_KILL                               = 0x18,	/* SQ_CF_INST_KILL: kill pixels that pass the condition test (may be conditional). jump if all pixels are killed. CF_COND=ACTIVE is required. */
/* 	WHOLE_QUAD_MODE_bit                               = 1 << 30, */	/* If set, execute this instruction/clause as if all pixels are active and valid. Antonym of VALID_PIXEL_MODE. Set at most one of these bits. */
/* 	BARRIER_bit                                       = 1 << 31, */	/* If set, all prior CF instructions/clauses must complete before this instruction/clause executes. If not set, this instruction/clause may run in parallel with prior instructions. */
    SQ_VTX_WORD1_SEM                                      = 0x00008dfc,	/* Vertex fetch clause instruction word 1. This subencoding is used by semantic fetch instructions that specify the destination using a semantic table. */
	SEMANTIC_ID_mask                                  = 0xff << 0,	/* Specify the 8-bit semantic ID used to lookup the destination GPR from the semantic table. */
	SEMANTIC_ID_shift                                 = 0,
    SQ_TEX_WORD0                                          = 0x00008dfc,	/* Texture fetch clause instruction word 0. */
	TEX_INST_mask                                     = 0x1f << 0,	/* Opcode for this texture instruction. POSSIBLE VALUES: 00 - SQ_TEX_INST_VTX_FETCH: vertex fetch (X = uint32 index) 01 - SQ_TEX_INST_VTX_SEMANTIC: semantic vertex fetch 03 - SQ_TEX_INST_LD: fetch texel, XYZL are uint32 04 - SQ_TEX_INST_GET_TEXTURE_RESINFO: retrieve width, height, depth, number of mipmap levels 05 - SQ_TEX_INST_GET_NUMBER_OF_SAMPLES: retrieve width, height, depth, number of samples of an MSAA surface 06 - SQ_TEX_INST_GET_LOD: X = computed LOD for all pixels in quad 07 - SQ_TEX_INST_GET_GRADIENTS_H: slopes relative to horizontal: X = dx/dh, Y = dy/dh, Z = dz/dh, W = dw/dh 08 - SQ_TEX_INST_GET_GRADIENTS_V: slopes relative to vertical: X = dx/dv, Y = dy/dv, Z = dz/dv, W = dw/dv 09 - SQ_TEX_INST_GET_LERP: retrieve weights used for bilinear fetch, X = horizontal lerp, Y = vertical lerp, Z = volume slice lerp, W = mipmap lerp 11 - SQ_TEX_INST_SET_GRADIENTS_H: XYZ set horizontal gradients 12 - SQ_TEX_INST_SET_GRADIENTS_V: XYZ set vertical gradients 13 - SQ_TEX_INST_PASS: returns the address read in memory 14 - Z set index for array of cubemaps 16 - SQ_TEX_INST_SAMPLE 17 - SQ_TEX_INST_SAMPLE_L 18 - SQ_TEX_INST_SAMPLE_LB 19 - SQ_TEX_INST_SAMPLE_LZ 20 - SQ_TEX_INST_SAMPLE_G 21 - SQ_TEX_INST_SAMPLE_G_L 22 - SQ_TEX_INST_SAMPLE_G_LB 23 - SQ_TEX_INST_SAMPLE_G_LZ 24 - SQ_TEX_INST_SAMPLE_C 25 - SQ_TEX_INST_SAMPLE_C_L 26 - SQ_TEX_INST_SAMPLE_C_LB 27 - SQ_TEX_INST_SAMPLE_C_LZ 28 - SQ_TEX_INST_SAMPLE_C_G 29 - SQ_TEX_INST_SAMPLE_C_G_L 30 - SQ_TEX_INST_SAMPLE_C_G_LB 31 - SQ_TEX_INST_SAMPLE_C_G_LZ */
	TEX_INST_shift                                    = 0,
	    SQ_TEX_INST_VTX_FETCH                         = 0x00,	/* SQ_TEX_INST_VTX_FETCH: vertex fetch (X = uint32 index) */
	    SQ_TEX_INST_VTX_SEMANTIC                      = 0x01,	/* SQ_TEX_INST_VTX_SEMANTIC: semantic vertex fetch */
	    SQ_TEX_INST_LD                                = 0x03,	/* SQ_TEX_INST_LD: fetch texel, XYZL are uint32 */
	    SQ_TEX_INST_GET_TEXTURE_RESINFO               = 0x04,	/* SQ_TEX_INST_GET_TEXTURE_RESINFO: retrieve width, height, depth, number of mipmap levels */
	    SQ_TEX_INST_GET_NUMBER_OF_SAMPLES             = 0x05,	/* SQ_TEX_INST_GET_NUMBER_OF_SAMPLES: retrieve width, height, depth, number of samples of an MSAA surface */
	    SQ_TEX_INST_GET_LOD                           = 0x06,	/* SQ_TEX_INST_GET_LOD: X = computed LOD for all pixels in quad */
	    SQ_TEX_INST_GET_GRADIENTS_H                   = 0x07,	/* SQ_TEX_INST_GET_GRADIENTS_H: slopes relative to horizontal: X = dx/dh, Y = dy/dh, Z = dz/dh, W = dw/dh */
	    SQ_TEX_INST_GET_GRADIENTS_V                   = 0x08,	/* SQ_TEX_INST_GET_GRADIENTS_V: slopes relative to vertical: X = dx/dv, Y = dy/dv, Z = dz/dv, W = dw/dv */
	    SQ_TEX_INST_GET_LERP                          = 0x09,	/* SQ_TEX_INST_GET_LERP: retrieve weights used for bilinear fetch, X = horizontal lerp, Y = vertical lerp, Z = volume slice lerp, W = mipmap lerp */
	    SQ_TEX_INST_RESERVED_10                       = 0x0a,	/* SQ_TEX_INST_RESERVED_10: Reserved (was GetWeight: retrieve weights used for bilinear fetch, X = TL weight, Y = TR weight, Z = BL weight, W = BR weight) */
	    SQ_TEX_INST_SET_GRADIENTS_H                   = 0x0b,	/* SQ_TEX_INST_SET_GRADIENTS_H: XYZ set horizontal gradients */
	    SQ_TEX_INST_SET_GRADIENTS_V                   = 0x0c,	/* SQ_TEX_INST_SET_GRADIENTS_V: XYZ set vertical gradients */
	    SQ_TEX_INST_PASS                              = 0x0d,	/* SQ_TEX_INST_PASS: returns the address read in memory */
	    X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS           = 0x0e,	/* Z set index for array of cubemaps */
	    SQ_TEX_INST_SAMPLE                            = 0x10,	/* SQ_TEX_INST_SAMPLE */
	    SQ_TEX_INST_SAMPLE_L                          = 0x11,	/* SQ_TEX_INST_SAMPLE_L */
	    SQ_TEX_INST_SAMPLE_LB                         = 0x12,	/* SQ_TEX_INST_SAMPLE_LB */
	    SQ_TEX_INST_SAMPLE_LZ                         = 0x13,	/* SQ_TEX_INST_SAMPLE_LZ */
	    SQ_TEX_INST_SAMPLE_G                          = 0x14,	/* SQ_TEX_INST_SAMPLE_G */
	    SQ_TEX_INST_SAMPLE_G_L                        = 0x15,	/* SQ_TEX_INST_SAMPLE_G_L */
	    SQ_TEX_INST_SAMPLE_G_LB                       = 0x16,	/* SQ_TEX_INST_SAMPLE_G_LB */
	    SQ_TEX_INST_SAMPLE_G_LZ                       = 0x17,	/* SQ_TEX_INST_SAMPLE_G_LZ */
	    SQ_TEX_INST_SAMPLE_C                          = 0x18,	/* SQ_TEX_INST_SAMPLE_C */
	    SQ_TEX_INST_SAMPLE_C_L                        = 0x19,	/* SQ_TEX_INST_SAMPLE_C_L */
	    SQ_TEX_INST_SAMPLE_C_LB                       = 0x1a,	/* SQ_TEX_INST_SAMPLE_C_LB */
	    SQ_TEX_INST_SAMPLE_C_LZ                       = 0x1b,	/* SQ_TEX_INST_SAMPLE_C_LZ */
	    SQ_TEX_INST_SAMPLE_C_G                        = 0x1c,	/* SQ_TEX_INST_SAMPLE_C_G */
	    SQ_TEX_INST_SAMPLE_C_G_L                      = 0x1d,	/* SQ_TEX_INST_SAMPLE_C_G_L */
	    SQ_TEX_INST_SAMPLE_C_G_LB                     = 0x1e,	/* SQ_TEX_INST_SAMPLE_C_G_LB */
	    SQ_TEX_INST_SAMPLE_C_G_LZ                     = 0x1f,	/* SQ_TEX_INST_SAMPLE_C_G_LZ */
	BC_FRAC_MODE_bit                                  = 1 << 5,	/* If set, force black texture data and white border to retrieve fraction of pixel that hits the border. */
/* 	FETCH_WHOLE_QUAD_bit                              = 1 << 7, */	/* If set, texture instruction must fetch data for all pixels (result may be used as source coordinate of a dependent read). If cleared, texture instruction can ignore invalid pixels. */
	RESOURCE_ID_mask                                  = 0xff << 8,	/* Surface ID to read from (specifies the buffer address, size, and format). 160 available for GS and PS; 176 shared across FS and VS. */
	RESOURCE_ID_shift                                 = 8,
/* 	SRC_GPR_mask                                      = 0x7f << 16, */	/* Source GPR address to get the texture lookup address from. */
/* 	SRC_GPR_shift                                     = 16, */
/* 	SRC_REL_bit                                       = 1 << 23, */	/* Indicate whether source address is absolute or relative to an index. */
	SQ_TEX_WORD0__ALT_CONST_bit                       = 1 << 24,	/* if set, uses constants from alternate thread type: ps->vs, vs->gs, gs->vs, es->gs (note that es and vs share constants). */
    SQ_VTX_WORD1_GPR                                      = 0x00008dfc,	/* Vertex fetch clause instruction word 1. This subencoding is used by fetch instructions that specify a destination GPR directly. */
	SQ_VTX_WORD1_GPR__DST_GPR_mask                    = 0x7f << 0,	/* Destination GPR address to write result to. */
	SQ_VTX_WORD1_GPR__DST_GPR_shift                   = 0,
	SQ_VTX_WORD1_GPR__DST_REL_bit                     = 1 << 7,	/* Indicate whether destination address is absolute or relative to an index. */
    SQ_ALU_WORD0                                          = 0x00008dfc,	/* ALU instruction word 0. */
	SRC0_SEL_mask                                     = 0x1ff << 0,	/* Source for operands src0, src1. Values [0,127] correspond to GPR[0..127]. Values [128,159] correspond to kcache constants in bank 0. Values [160,191] correspond to kcache constants in bank 1. Values [256,511] correspond to cfile constants c[0..255]. Other special values are shown in the list below. POSSIBLE VALUES: 248 - SQ_ALU_SRC_0: special constant 0.0. 249 - SQ_ALU_SRC_1: special constant 1.0 float. 250 - SQ_ALU_SRC_1_INT: special constant 1 integer. 251 - SQ_ALU_SRC_M_1_INT: special constant -1 integer. 252 - SQ_ALU_SRC_0_5: special constant 0.5 float. 253 - SQ_ALU_SRC_LITERAL: literal constant. 254 - SQ_ALU_SRC_PV: previous vector result. 255 - SQ_ALU_SRC_PS: previous scalar result. */
	SRC0_SEL_shift                                    = 0,
/* 	    SQ_ALU_SRC_0                                  = 0xf8, */	/* SQ_ALU_SRC_0: special constant 0.0. */
/* 	    SQ_ALU_SRC_1                                  = 0xf9, */	/* SQ_ALU_SRC_1: special constant 1.0 float. */
/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */	/* SQ_ALU_SRC_1_INT: special constant 1 integer. */
/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */	/* SQ_ALU_SRC_M_1_INT: special constant -1 integer. */
/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */	/* SQ_ALU_SRC_0_5: special constant 0.5 float. */
/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */	/* SQ_ALU_SRC_LITERAL: literal constant. */
/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */	/* SQ_ALU_SRC_PV: previous vector result. */
/* 	    SQ_ALU_SRC_PS                                 = 0xff, */	/* SQ_ALU_SRC_PS: previous scalar result. */
	SRC0_REL_bit                                      = 1 << 9,	/* If set, this operand uses relative addressing based on the INDEX_MODE. */
	SRC0_CHAN_mask                                    = 0x03 << 10,	/* Specify which channel of the source to use for this operand. POSSIBLE VALUES: 00 - SQ_CHAN_X: Use X component. 01 - SQ_CHAN_Y: Use Y component. 02 - SQ_CHAN_Z: Use Z component. 03 - SQ_CHAN_W: Use W component. */
	SRC0_CHAN_shift                                   = 10,
/* 	    SQ_CHAN_X                                     = 0x00, */	/* SQ_CHAN_X: Use X component. */
/* 	    SQ_CHAN_Y                                     = 0x01, */	/* SQ_CHAN_Y: Use Y component. */
/* 	    SQ_CHAN_Z                                     = 0x02, */	/* SQ_CHAN_Z: Use Z component. */
/* 	    SQ_CHAN_W                                     = 0x03, */	/* SQ_CHAN_W: Use W component. */
	SRC0_NEG_bit                                      = 1 << 12,	/* If set, negate the input for this operand. Should only be set for floating point inputs. */
	SRC1_SEL_mask                                     = 0x1ff << 13,	/* Source for operands src0, src1. Values [0,127] correspond to GPR[0..127]. Values [128,159] correspond to kcache constants in bank 0. Values [160,191] correspond to kcache constants in bank 1. Values [256,511] correspond to cfile constants c[0..255]. Other special values are shown in the list below. POSSIBLE VALUES: 248 - SQ_ALU_SRC_0: special constant 0.0. 249 - SQ_ALU_SRC_1: special constant 1.0 float. 250 - SQ_ALU_SRC_1_INT: special constant 1 integer. 251 - SQ_ALU_SRC_M_1_INT: special constant -1 integer. 252 - SQ_ALU_SRC_0_5: special constant 0.5 float. 253 - SQ_ALU_SRC_LITERAL: literal constant. 254 - SQ_ALU_SRC_PV: previous vector result. 255 - SQ_ALU_SRC_PS: previous scalar result. */
	SRC1_SEL_shift                                    = 13,
/* 	    SQ_ALU_SRC_0                                  = 0xf8, */	/* SQ_ALU_SRC_0: special constant 0.0. */
/* 	    SQ_ALU_SRC_1                                  = 0xf9, */	/* SQ_ALU_SRC_1: special constant 1.0 float. */
/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */	/* SQ_ALU_SRC_1_INT: special constant 1 integer. */
/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */	/* SQ_ALU_SRC_M_1_INT: special constant -1 integer. */
/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */	/* SQ_ALU_SRC_0_5: special constant 0.5 float. */
/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */	/* SQ_ALU_SRC_LITERAL: literal constant. */
/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */	/* SQ_ALU_SRC_PV: previous vector result. */
/* 	    SQ_ALU_SRC_PS                                 = 0xff, */	/* SQ_ALU_SRC_PS: previous scalar result. */
	SRC1_REL_bit                                      = 1 << 22,	/* If set, this operand uses relative addressing based on the INDEX_MODE. */
	SRC1_CHAN_mask                                    = 0x03 << 23,	/* Specify which channel of the source to use for this operand. POSSIBLE VALUES: 00 - SQ_CHAN_X: Use X component. 01 - SQ_CHAN_Y: Use Y component. 02 - SQ_CHAN_Z: Use Z component. 03 - SQ_CHAN_W: Use W component. */
	SRC1_CHAN_shift                                   = 23,
/* 	    SQ_CHAN_X                                     = 0x00, */	/* SQ_CHAN_X: Use X component. */
/* 	    SQ_CHAN_Y                                     = 0x01, */	/* SQ_CHAN_Y: Use Y component. */
/* 	    SQ_CHAN_Z                                     = 0x02, */	/* SQ_CHAN_Z: Use Z component. */
/* 	    SQ_CHAN_W                                     = 0x03, */	/* SQ_CHAN_W: Use W component. */
	SRC1_NEG_bit                                      = 1 << 25,	/* If set, negate the input for this operand. Should only be set for floating point inputs. */
	INDEX_MODE_mask                                   = 0x07 << 26,	/* Specify what relative addressing mode to use for operands that have the REL bit set. POSSIBLE VALUES: 00 - SQ_INDEX_AR_X: constants: add AR.X. registers: add GPR index. 01 - SQ_INDEX_AR_Y: constants: add AR.Y. registers: add GPR index. 02 - SQ_INDEX_AR_Z: constants: add AR.Z. registers: add GPR index. 03 - SQ_INDEX_AR_W: constants: add AR.W. registers: add GPR index. 04 - SQ_INDEX_LOOP: add current loop index value. */
	INDEX_MODE_shift                                  = 26,
	    SQ_INDEX_AR_X                                 = 0x00,	/* SQ_INDEX_AR_X: constants: add AR.X. registers: add GPR index. */
	    SQ_INDEX_AR_Y                                 = 0x01,	/* SQ_INDEX_AR_Y: constants: add AR.Y. registers: add GPR index. */
	    SQ_INDEX_AR_Z                                 = 0x02,	/* SQ_INDEX_AR_Z: constants: add AR.Z. registers: add GPR index. */
	    SQ_INDEX_AR_W                                 = 0x03,	/* SQ_INDEX_AR_W: constants: add AR.W. registers: add GPR index. */
	    SQ_INDEX_LOOP                                 = 0x04,	/* SQ_INDEX_LOOP: add current loop index value. */
	PRED_SEL_mask                                     = 0x03 << 29,	/* Predicate to apply to this instruction. POSSIBLE VALUES: 00 - SQ_PRED_SEL_OFF: execute all pixels. 01 - Reserved 02 - SQ_PRED_SEL_ZERO: execute when pred = 0. 03 - SQ_PRED_SEL_ONE: execute when pred = 1. */
	PRED_SEL_shift                                    = 29,
	    SQ_PRED_SEL_OFF                               = 0x00,	/* SQ_PRED_SEL_OFF: execute all pixels. */
	    SQ_PRED_SEL_ZERO                              = 0x02,	/* SQ_PRED_SEL_ZERO: execute when pred = 0. */
	    SQ_PRED_SEL_ONE                               = 0x03,	/* SQ_PRED_SEL_ONE: execute when pred = 1. */
	LAST_bit                                          = 1 << 31,	/* If set, this is the last 64-bit word for this instruction. */
    SX_EXPORT_BUFFER_SIZES                                = 0x0000900c,	/* Register that defines export buffer ring sizes */
	COLOR_BUFFER_SIZE_mask                            = 0xff << 0,	/* Number of 4 line buffers -1 in color buffer. Each memory buffer corresponds to 4 lines of 16*128 bits elements. Minimum acceptable value of register field is 0xA. */
	COLOR_BUFFER_SIZE_shift                           = 0,
	POSITION_BUFFER_SIZE_mask                         = 0xff << 8,	/* Number of 4 line buffers -1 in position buffer. Each memory buffer corresponds to 4 lines of 16*128 bits elements. Minimum acceptable value of register field is 0x12. */
	POSITION_BUFFER_SIZE_shift                        = 8,
	SMX_BUFFER_SIZE_mask                              = 0xff << 16,	/* Number of 4 line buffers -1 in smx buffer. Each memory buffer corresponds to 4 lines of 16*128 bits elements */
	SMX_BUFFER_SIZE_shift                             = 16,
    SX_MEMORY_EXPORT_BASE                                 = 0x00009010,	/* Defines the base address of the memory export. Only available if chip supports GPU__GC__MEM_EXPORT_PRESENT */
	/* ADDRESS: 256 byte aligned base address, SX will add 8`h0 at the bottom to get byte address */
    SX_MEMORY_EXPORT_SIZE                                 = 0x00009014,	/* Defines the aperture of the memory export. Only available if chip supports GPU__GC__MEM_EXPORT_PRESENT */
	/* If computed address minus base address is greater than size, SX will clamp to Size - 1 dword and disable the write. Read will happen at size - 1 dword */
    SPI_CONFIG_CNTL                                       = 0x00009100,
	GPR_WRITE_PRIORITY_mask                           = 0x1f << 0,	/* POSSIBLE VALUES: 00 - Priority order (high to low) = VS, GS, ES, PS 01 - Priority order = VS, GS, PS, ES 02 - Priority order = VS, ES, GS, PS 03 - Priority order = VS, ES, PS, GS 04 - Priority order = VS, PS, GS, ES 05 - Priority order = VS, PS, ES, GS 06 - Priority order = GS, VS, ES, PS 07 - Priority order = GS, VS, PS, ES 08 - Priority order = GS, ES, VS, PS 09 - Priority order = GS, ES, PS, VS 10 - Priority order = GS, PS, VS, ES 11 - Priority order = GS, PS, ES, VS 12 - Priority order = ES, VS, GS, PS 13 - Priority order = ES, VS, PS, GS 14 - Priority order = ES, GS, VS, PS 15 - Priority order = ES, GS, PS, VS 16 - Priority order = ES, PS, VS, GS 17 - Priority order = ES, PS, GS, VS 18 - Priority order = PS, VS, GS, ES 19 - Priority order = PS, VS, ES, GS 20 - Priority order = PS, GS, VS, ES 21 - Priority order = PS, GS, ES, VS 22 - Priority order = PS, ES, VS, GS 23 - Priority order = PS, ES, GS, VS */
	GPR_WRITE_PRIORITY_shift                          = 0,
	    X_PRIORITY_ORDER                              = 0x00,	/* Priority order (high to low) = VS, GS, ES, PS */
	    X_PRIORITY_ORDER_VS                           = 0x01,	/* Priority order = VS, GS, PS, ES */
	DISABLE_INTERP_1_bit                              = 1 << 5,
	DEBUG_THREAD_TYPE_SEL_mask                        = 0x03 << 6,	/* POSSIBLE VALUES: 00 - PS 01 - VS 02 - GS 03 - ES */
	DEBUG_THREAD_TYPE_SEL_shift                       = 6,
	DEBUG_GROUP_SEL_mask                              = 0x1f << 8,
	DEBUG_GROUP_SEL_shift                             = 8,
	DEBUG_GRBM_OVERRIDE_bit                           = 1 << 13,
    SPI_CONFIG_CNTL_1                                     = 0x0000913c,
	VTX_DONE_DELAY_mask                               = 0x0f << 0,
	VTX_DONE_DELAY_shift                              = 0,
	    X_DELAY_10_CLKS                               = 0x00,	/* delay 10 clks (defalut, min value needed for R600 config) */
	    X_DELAY_11_CLKS                               = 0x01,	/* delay 11 clks */
	    X_DELAY_12_CLKS                               = 0x02,	/* delay 12 clks */
	    X_DELAY_13_CLKS                               = 0x03,	/* delay 13 clks */
	    X_DELAY_14_CLKS                               = 0x04,	/* delay 14 clks */
	    X_DELAY_15_CLKS                               = 0x05,	/* delay 15 clks */
	    X_DELAY_16_CLKS                               = 0x06,	/* delay 16 clks */
	    X_DELAY_17_CLKS                               = 0x07,	/* delay 17 clks */
	    X_DELAY_2_CLKS                                = 0x08,	/* delay 2 clks */
	    X_DELAY_3_CLKS                                = 0x09,	/* delay 3 clks */
	    X_DELAY_4_CLKS                                = 0x0a,	/* delay 4 clks */
	    X_DELAY_5_CLKS                                = 0x0b,	/* delay 5 clks */
	    X_DELAY_6_CLKS                                = 0x0c,	/* delay 6 clks */
	    X_DELAY_7_CLKS                                = 0x0d,	/* delay 7 clks */
	    X_DELAY_8_CLKS                                = 0x0e,	/* delay 8 clks */
	    X_DELAY_9_CLKS                                = 0x0f,	/* delay 9 clks */
	INTERP_ONE_PRIM_PER_ROW_bit                       = 1 << 4,
    TD_FILTER4                                            = 0x00009400,	/* FILTER4 Write Weights */
	WEIGHT_1_mask                                     = 0x7ff << 0,	/* Right (or Bottom) weight of pair: format s2.9 (range [-2, 2), with 9b of fraction). */
	WEIGHT_1_shift                                    = 0,
	WEIGHT_0_mask                                     = 0x7ff << 11,	/* Left (or Top) weight of pair: format s2.9 (range [-2, 2), with 9b of fraction). */
	WEIGHT_0_shift                                    = 11,
	WEIGHT_PAIR_bit                                   = 1 << 22,	/* Indicates which pair of weights is loaded. 0: Left (or Top) pair 1: Right (or Bottom) pair */
	PHASE_mask                                        = 0x0f << 23,	/* Indicates which of 9 phases is loaded. */
	PHASE_shift                                       = 23,
	DIRECTION_bit                                     = 1 << 27,	/* Indicates whether to load the horizontal (Left+Right) or vertical (Top+Bottom) weight pair. 0: Horizontal 1: Vertical */
    TD_FILTER4_1                                          = 0x00009404,
	TD_FILTER4_1_num                                  = 35,
/* 	WEIGHT_1_mask                                     = 0x7ff << 0, */
/* 	WEIGHT_1_shift                                    = 0, */
/* 	WEIGHT_0_mask                                     = 0x7ff << 11, */
/* 	WEIGHT_0_shift                                    = 11, */
    TD_CNTL                                               = 0x00009490,	/* Texture Data Common Control */
	SYNC_PHASE_SH_mask                                = 0x03 << 0,
	SYNC_PHASE_SH_shift                               = 0,
	SYNC_PHASE_VC_SMX_mask                            = 0x03 << 4,
	SYNC_PHASE_VC_SMX_shift                           = 4,
    TD0_CNTL                                              = 0x00009494,	/* Texture Data 0 Control */
	TD0_CNTL_num                                      = 4,
	ID_OVERRIDE_mask                                  = 0x03 << 28,	/* Texture Data 0 ID Override */
	ID_OVERRIDE_shift                                 = 28,
    TD0_STATUS                                            = 0x000094a4,	/* Texture Data 0 Status */
	TD0_STATUS_num                                    = 4,
	BUSY_bit                                          = 1 << 31,	/* (Access: R) */
    TA_CNTL_AUX                                           = 0x00009508,	/* Texture Addresser Common Control */
	DISABLE_CUBE_WRAP_bit                             = 1 << 0,	/* CubeMap Clamp Policy Override */
	SYNC_GRADIENT_bit                                 = 1 << 24,	/* Gradient synchronization mode */
	SYNC_WALKER_bit                                   = 1 << 25,	/* Walker synchronization mode */
	SYNC_ALIGNER_bit                                  = 1 << 26,	/* Aligner synchronization mode */
	BILINEAR_PRECISION_bit                            = 1 << 31,	/* Bilinear precision setting */
    TA0_CNTL                                              = 0x00009510,	/* Texture Addresser 0 Control */
/* 	ID_OVERRIDE_mask                                  = 0x03 << 28, */	/* Texture Addresser 0 ID Override */
/* 	ID_OVERRIDE_shift                                 = 28, */
    TA1_CNTL                                              = 0x00009514,	/* Texture Addresser 1 Control */
/* 	ID_OVERRIDE_mask                                  = 0x03 << 28, */	/* Texture Addresser 1 ID Override */
/* 	ID_OVERRIDE_shift                                 = 28, */
    TA2_CNTL                                              = 0x00009518,	/* Texture Addresser 2 Control */
/* 	ID_OVERRIDE_mask                                  = 0x03 << 28, */	/* Texture Addresser 2 ID Override */
/* 	ID_OVERRIDE_shift                                 = 28, */
    TA3_CNTL                                              = 0x0000951c,	/* Texture Addresser 3 Control */
/* 	ID_OVERRIDE_mask                                  = 0x03 << 28, */	/* Texture Addresser 3 ID Override */
/* 	ID_OVERRIDE_shift                                 = 28, */
    TA0_STATUS                                            = 0x00009520,	/* Texture Addresser 0 Status */
	FG_PFIFO_EMPTYB_bit                               = 1 << 12,	/* (Access: R) Gradient FIFO state, pipeline fifo not empty */
	FG_LFIFO_EMPTYB_bit                               = 1 << 13,	/* (Access: R) Gradient FIFO state, latency fifo not empty */
	FG_SFIFO_EMPTYB_bit                               = 1 << 14,	/* (Access: R) Gradient FIFO state, state fifo not empty */
	FL_PFIFO_EMPTYB_bit                               = 1 << 16,	/* (Access: R) LOD FIFO state, pipeline fifo not empty */
	FL_LFIFO_EMPTYB_bit                               = 1 << 17,	/* (Access: R) LOD FIFO state, latency fifo not empty */
	FL_SFIFO_EMPTYB_bit                               = 1 << 18,	/* (Access: R) LOD FIFO state, state fifo not empty */
	FA_PFIFO_EMPTYB_bit                               = 1 << 20,	/* (Access: R) Addresser FIFO state, pipeline fifo not empty */
	FA_LFIFO_EMPTYB_bit                               = 1 << 21,	/* (Access: R) Addresser FIFO state, latency fifo not empty */
	FA_SFIFO_EMPTYB_bit                               = 1 << 22,	/* (Access: R) Addresser FIFO state, state fifo not empty */
	IN_BUSY_bit                                       = 1 << 24,	/* (Access: R) Input/LOD(Deriv) busy */
	FG_BUSY_bit                                       = 1 << 25,	/* (Access: R) Gradient FIFO busy */
	FL_BUSY_bit                                       = 1 << 27,	/* (Access: R) LOD FIFO busy */
	TA_BUSY_bit                                       = 1 << 28,	/* (Access: R) Addresser busy */
	FA_BUSY_bit                                       = 1 << 29,	/* (Access: R) Addresser FIFO busy */
	AL_BUSY_bit                                       = 1 << 30,	/* (Access: R) Aligner busy */
/* 	BUSY_bit                                          = 1 << 31, */	/* (Access: R) Global TA0 busy */
    TA1_STATUS                                            = 0x00009524,	/* Texture Addresser 1 Status */
/* 	FG_PFIFO_EMPTYB_bit                               = 1 << 12, */	/* (Access: R) Gradient FIFO state, pipeline fifo not empty */
/* 	FG_LFIFO_EMPTYB_bit                               = 1 << 13, */	/* (Access: R) Gradient FIFO state, latency fifo not empty */
/* 	FG_SFIFO_EMPTYB_bit                               = 1 << 14, */	/* (Access: R) Gradient FIFO state, state fifo not empty */
/* 	FL_PFIFO_EMPTYB_bit                               = 1 << 16, */	/* (Access: R) LOD FIFO state, pipeline fifo not empty */
/* 	FL_LFIFO_EMPTYB_bit                               = 1 << 17, */	/* (Access: R) LOD FIFO state, latency fifo not empty */
/* 	FL_SFIFO_EMPTYB_bit                               = 1 << 18, */	/* (Access: R) LOD FIFO state, state fifo not empty */
/* 	FA_PFIFO_EMPTYB_bit                               = 1 << 20, */	/* (Access: R) Addresser FIFO state, pipeline fifo not empty */
/* 	FA_LFIFO_EMPTYB_bit                               = 1 << 21, */	/* (Access: R) Addresser FIFO state, latency fifo not empty */
/* 	FA_SFIFO_EMPTYB_bit                               = 1 << 22, */	/* (Access: R) Addresser FIFO state, state fifo not empty */
/* 	IN_BUSY_bit                                       = 1 << 24, */	/* (Access: R) Input/LOD(Deriv) busy */
/* 	FG_BUSY_bit                                       = 1 << 25, */	/* (Access: R) Gradient FIFO busy */
/* 	FL_BUSY_bit                                       = 1 << 27, */	/* (Access: R) LOD FIFO busy */
/* 	TA_BUSY_bit                                       = 1 << 28, */	/* (Access: R) Addresser busy */
/* 	FA_BUSY_bit                                       = 1 << 29, */	/* (Access: R) Addresser FIFO busy */
/* 	AL_BUSY_bit                                       = 1 << 30, */	/* (Access: R) Aligner busy */
/* 	BUSY_bit                                          = 1 << 31, */	/* (Access: R) Global TA1 busy */
    TA2_STATUS                                            = 0x00009528,	/* Texture Addresser 2 Status */
/* 	FG_PFIFO_EMPTYB_bit                               = 1 << 12, */	/* (Access: R) Gradient FIFO state, pipeline fifo not empty */
/* 	FG_LFIFO_EMPTYB_bit                               = 1 << 13, */	/* (Access: R) Gradient FIFO state, latency fifo not empty */
/* 	FG_SFIFO_EMPTYB_bit                               = 1 << 14, */	/* (Access: R) Gradient FIFO state, state fifo not empty */
/* 	FL_PFIFO_EMPTYB_bit                               = 1 << 16, */	/* (Access: R) LOD FIFO state, pipeline fifo not empty */
/* 	FL_LFIFO_EMPTYB_bit                               = 1 << 17, */	/* (Access: R) LOD FIFO state, latency fifo not empty */
/* 	FL_SFIFO_EMPTYB_bit                               = 1 << 18, */	/* (Access: R) LOD FIFO state, state fifo not empty */
/* 	FA_PFIFO_EMPTYB_bit                               = 1 << 20, */	/* (Access: R) Addresser FIFO state, pipeline fifo not empty */
/* 	FA_LFIFO_EMPTYB_bit                               = 1 << 21, */	/* (Access: R) Addresser FIFO state, latency fifo not empty */
/* 	FA_SFIFO_EMPTYB_bit                               = 1 << 22, */	/* (Access: R) Addresser FIFO state, state fifo not empty */
/* 	IN_BUSY_bit                                       = 1 << 24, */	/* (Access: R) Input/LOD(Deriv) busy */
/* 	FG_BUSY_bit                                       = 1 << 25, */	/* (Access: R) Gradient FIFO busy 26 none */
/* 	FL_BUSY_bit                                       = 1 << 27, */	/* (Access: R) LOD FIFO busy */
/* 	TA_BUSY_bit                                       = 1 << 28, */	/* (Access: R) Addresser busy */
/* 	FA_BUSY_bit                                       = 1 << 29, */	/* (Access: R) Addresser FIFO busy */
/* 	AL_BUSY_bit                                       = 1 << 30, */	/* (Access: R) Aligner busy */
/* 	BUSY_bit                                          = 1 << 31, */	/* (Access: R) Global TA2 busy */
    TA3_STATUS                                            = 0x0000952c,	/* Texture Addresser 3 Status */
/* 	FG_PFIFO_EMPTYB_bit                               = 1 << 12, */	/* (Access: R) Gradient FIFO state, pipeline fifo not empty */
/* 	FG_LFIFO_EMPTYB_bit                               = 1 << 13, */	/* (Access: R) Gradient FIFO state, latency fifo not empty */
/* 	FG_SFIFO_EMPTYB_bit                               = 1 << 14, */	/* (Access: R) Gradient FIFO state, state fifo not empty */
/* 	FL_PFIFO_EMPTYB_bit                               = 1 << 16, */	/* (Access: R) LOD FIFO state, pipeline fifo not empty */
/* 	FL_LFIFO_EMPTYB_bit                               = 1 << 17, */	/* (Access: R) LOD FIFO state, latency fifo not empty */
/* 	FL_SFIFO_EMPTYB_bit                               = 1 << 18, */	/* (Access: R) LOD FIFO state, state fifo not empty */
/* 	FA_PFIFO_EMPTYB_bit                               = 1 << 20, */	/* (Access: R) Addresser FIFO state, pipeline fifo not empty */
/* 	FA_LFIFO_EMPTYB_bit                               = 1 << 21, */	/* (Access: R) Addresser FIFO state, latency fifo not empty */
/* 	FA_SFIFO_EMPTYB_bit                               = 1 << 22, */	/* (Access: R) Addresser FIFO state, state fifo not empty */
/* 	IN_BUSY_bit                                       = 1 << 24, */	/* (Access: R) Input/LOD(Deriv) busy */
/* 	FG_BUSY_bit                                       = 1 << 25, */	/* (Access: R) Gradient FIFO busy */
/* 	FL_BUSY_bit                                       = 1 << 27, */	/* (Access: R) LOD FIFO busy */
/* 	TA_BUSY_bit                                       = 1 << 28, */	/* (Access: R) Addresser busy */
/* 	FA_BUSY_bit                                       = 1 << 29, */	/* (Access: R) Addresser FIFO busy */
/* 	AL_BUSY_bit                                       = 1 << 30, */	/* (Access: R) Aligner busy */
/* 	BUSY_bit                                          = 1 << 31, */	/* (Access: R) Global TA3 busy */
    TC_STATUS                                             = 0x00009600,	/* Texture Cache Status */
	TC_BUSY_bit                                       = 1 << 0,	/* (Access: R) Texture Cache busy 14. Texture Pipe Registers */
    TC_INVALIDATE                                         = 0x00009604,	/* Texture Cache Invalidate - When used, TC must be idle or rendering artifacts can occur */
	START_bit                                         = 1 << 0,	/* (Access: W) Invalidate L1 and L2 caches */
    TC_CNTL                                               = 0x00009608,	/* Texture Cache Control - When used, TC must be idle or rendering artifacts can occur */
	FORCE_HIT_bit                                     = 1 << 0,
	FORCE_MISS_bit                                    = 1 << 1,
	L2_SIZE_mask                                      = 0x0f << 5,	/* L2 cache size, can be used to disable L2 completely. RV630 default=128K ; RV610 default=0 POSSIBLE VALUES: 00 - 256K 01 - 224K 02 - 192K 03 - 160K 04 - 128K 05 - 96K 06 - 64K 07 - 32K 08 - 0 */
	L2_SIZE_shift                                     = 5,
	    _256K                                         = 0x00,	/* 256K */
	    _224K                                         = 0x01,	/* 224K */
	    _192K                                         = 0x02,	/* 192K */
	    _160K                                         = 0x03,	/* 160K */
	    _128K                                         = 0x04,	/* 128K */
	    _96K                                          = 0x05,	/* 96K */
	    _64K                                          = 0x06,	/* 64K */
	    _32K                                          = 0x07,	/* 32K */
	L2_DISABLE_LATE_HIT_bit                           = 1 << 9,
	DISABLE_VERT_PERF_bit                             = 1 << 10,
	DISABLE_INVAL_BUSY_bit                            = 1 << 11,
	DISABLE_INVAL_SAME_SURFACE_bit                    = 1 << 12,
	PARTITION_MODE_mask                               = 0x03 << 13,	/* Default is no partitioning POSSIBLE VALUES: 00 - Vertex: Full Cache ; Texture: Full Cache 01 - Vertex: 1/2 Cache ; Texture: 1/2 Cache 02 - Vertex: 1/4 Cache ; Texture: 3/4 Cache */
	PARTITION_MODE_shift                              = 13,
	    X_VERTEX                                      = 0x00,	/* Vertex: Full Cache ; Texture: Full Cache */
	MISS_ARB_MODE_bit                                 = 1 << 15,
	HIT_ARB_MODE_bit                                  = 1 << 16,
	DISABLE_WRITE_DELAY_bit                           = 1 << 17,
	HIT_FIFO_DEPTH_bit                                = 1 << 18,
    VC_CNTL_STATUS                                        = 0x00009704,	/* Vertex Cache Status */
	RP_BUSY_bit                                       = 1 << 0,	/* Vertex Cache Request Processor is Busy */
	RG_BUSY_bit                                       = 1 << 1,	/* Vertex Cache Request Generator is Busy */
	VC_BUSY_bit                                       = 1 << 2,	/* Vertex Cache is Busy */
	CLAMP_DETECT_bit                                  = 1 << 3,
    SMX_DC_CTL0                                           = 0x0000a020,	/* Control settings for all Data Caches. These settings should only be changed when the SMX is idle. */
	WR_GATHER_STREAM0_bit                             = 1 << 0,	/* For Stream0 traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
	WR_GATHER_STREAM1_bit                             = 1 << 1,	/* For Stream1 traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
	WR_GATHER_STREAM2_bit                             = 1 << 2,	/* For Stream2 traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
	WR_GATHER_STREAM3_bit                             = 1 << 3,	/* For Stream3 traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
	WR_GATHER_SCRATCH_bit                             = 1 << 4,	/* For Scratch traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
	WR_GATHER_REDUC_BUF_bit                           = 1 << 5,	/* For Reduction Buffer traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
	WR_GATHER_RING_BUF_bit                            = 1 << 6,	/* For Ring Buffer traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
	WR_GATHER_F_BUF_bit                               = 1 << 7,	/* For F Buffer traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
	DISABLE_CACHES_bit                                = 1 << 8,	/* Disables all Data Caches and turns on the bypass path. WARNING: Only write requests can be handled while caches are disabled. Read requests will still go to memory but read returns will be dropped by the SMX. */
	AUTO_FLUSH_INVAL_EN_bit                           = 1 << 10,	/* Valid only if AUTO_FLUSH_EN is set. Will cause auto- invalidate as well as auto-flush */
	AUTO_FLUSH_EN_bit                                 = 1 << 11,	/* Turn on Auto Flush of caches. All caches will automatically flush after AUTO_FLUSH_CNT idle cycles. */
	AUTO_FLUSH_CNT_mask                               = 0xffff << 12,	/* Nr of idle cycles after which all caches will automatically flush. */
	AUTO_FLUSH_CNT_shift                              = 12,
	MC_RD_STALL_FACTOR_mask                           = 0x03 << 28,	/* How easily SMX will assert MC`s read info stall bit. 2`d3 = only if cache_ctl_op_fifo_stalled, 2`d2 = as in 2 and if any VFA is full, 2`d1 = as in 1 and if IB full, 2`d0 = SMX will never assert read info stall bit. */
	MC_RD_STALL_FACTOR_shift                          = 28,
	MC_WR_STALL_FACTOR_mask                           = 0x03 << 30,	/* How easily SMX will assert MC`s write info stall bit. 2`d3 = only if MU`s L2 victim cache or wr req fifo stalled, 2`d2 = as in 2 and if any VFA is full, 2`d1 = as in 1 and if IB full, 2`d0 = SMX will never assert write info stall bit. */
	MC_WR_STALL_FACTOR_shift                          = 30,
    SMX_DC_CTL1                                           = 0x0000a024,	/* Control settings for all Data Caches. These settings should only be changed when the SMX is idle. */
	OP_FIFO_SKID_mask                                 = 0x7f << 0,	/* Skid for Cache Operation Fifo. Must be at least 1. */
	OP_FIFO_SKID_shift                                = 0,
	CACHE_LINE_SIZE_bit                               = 1 << 8,	/* Selects between 32-byte (CL32) or 64-byte (CL64) size cache lines. Note that CL64 has double the cache line width but half the number of cache lines as CL32. Since the SMX MC write and read interfaces are only 32 bytes wide, a 64 byte cache line transfer takes 2 consecutive cycles over the MC interface, this makes more efficient use of MC bandwidth. */
	MULTI_FLUSH_MODE_bit                              = 1 << 9,	/* Allows multiple outstanding flushes to be in flight without stalling the pipeline. Only for ES/GS Flush and Flush and/or invalidate all events. Multi-Flush mode does not exist in RV630. */
	MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask          = 0x0f << 10,	/* Skid for Multi-Flush Engine`s Flush Request Abort Index Fifo. Must be at least 1. */
	MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift         = 10,
	DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit          = 1 << 16,	/* A Read hit of a write-gathering cacheline forces it to first evict to memory then read back to ensure coherency. Setting this bit allows you to read the line without evicting it first, but coherency (of cache vs memory) is not guaranteed. */
	DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit      = 1 << 17,	/* In a write-gathering cacheline, a read tag check also checks if the comp valid bits allow a read to be serviced from cache, else it is evicted and read back. Setting this bit disables the comp valid checking forcing any read hit to a write gathering cacheline to evict to memory and read back. */
	DISABLE_FLUSH_ES_ALSO_INVALS_bit                  = 1 << 18,	/* A Flush ES event also invalidates all ES lines in the caches. Disabling this will reduce cache`s ability to process incoming requests while flushing, reducing performance. */
	DISABLE_FLUSH_GS_ALSO_INVALS_bit                  = 1 << 19,	/* A Flush GS event also invalidates all GS lines in the caches. Disabling this will reduce cache`s ability to process incoming requests while flushing, reducing performance. */
    SMX_DC_CTL2                                           = 0x0000a028,	/* Operations on all Data Caches. These operations should only be done when the SMX is idle. The register fields can be polled to check for completion of the operation */
	INVALIDATE_CACHES_bit                             = 1 << 0,	/* Invalidates all lines in all Data Caches. This field will stay at 1 until the operation is complete, after which it will revert to 0. */
	CACHES_INVALID_bit                                = 1 << 1,	/* READ-ONLY. All lines in all Data Caches are invalid, i.e., the caches are empty. */
	CACHES_DIRTY_bit                                  = 1 << 2,	/* READ-ONLY. There are some dirty lines in the Data Caches. */
	FLUSH_ALL_bit                                     = 1 << 4,	/* Flush all lines from all caches. This field will stay at 1 until the operation is complete, after which it will revert to 0. */
	FLUSH_GS_THREADS_bit                              = 1 << 8,	/* Flush all lines from all caches which come from Geometry Shader threads. This field will stay at 1 until the operation is complete, after which it will revert to 0. */
	FLUSH_ES_THREADS_bit                              = 1 << 9,	/* Flush all lines from all caches which come from Export Shader threads. This field will stay at 1 until the operation is complete, after which it will revert to 0. */
    TD_PS_SAMPLER0_BORDER_RED                             = 0x0000a400,
	TD_PS_SAMPLER0_BORDER_RED_num                     = 18,
	TD_PS_SAMPLER0_BORDER_RED_offset                  = 16,
    TD_PS_SAMPLER0_BORDER_GREEN                           = 0x0000a404,
	TD_PS_SAMPLER0_BORDER_GREEN_num                   = 18,
	TD_PS_SAMPLER0_BORDER_GREEN_offset                = 16,
    TD_PS_SAMPLER0_BORDER_BLUE                            = 0x0000a408,
	TD_PS_SAMPLER0_BORDER_BLUE_num                    = 18,
	TD_PS_SAMPLER0_BORDER_BLUE_offset                 = 16,
    TD_PS_SAMPLER0_BORDER_ALPHA                           = 0x0000a40c,
	TD_PS_SAMPLER0_BORDER_ALPHA_num                   = 18,
	TD_PS_SAMPLER0_BORDER_ALPHA_offset                = 16,
    TD_VS_SAMPLER0_BORDER_RED                             = 0x0000a600,
	TD_VS_SAMPLER0_BORDER_RED_num                     = 18,
	TD_VS_SAMPLER0_BORDER_RED_offset                  = 16,
    TD_VS_SAMPLER0_BORDER_GREEN                           = 0x0000a604,
	TD_VS_SAMPLER0_BORDER_GREEN_num                   = 18,
	TD_VS_SAMPLER0_BORDER_GREEN_offset                = 16,
    TD_VS_SAMPLER0_BORDER_BLUE                            = 0x0000a608,
	TD_VS_SAMPLER0_BORDER_BLUE_num                    = 18,
	TD_VS_SAMPLER0_BORDER_BLUE_offset                 = 16,
    TD_VS_SAMPLER0_BORDER_ALPHA                           = 0x0000a60c,
	TD_VS_SAMPLER0_BORDER_ALPHA_num                   = 18,
	TD_VS_SAMPLER0_BORDER_ALPHA_offset                = 16,
    TD_GS_SAMPLER0_BORDER_RED                             = 0x0000a800,
	TD_GS_SAMPLER0_BORDER_RED_num                     = 18,
	TD_GS_SAMPLER0_BORDER_RED_offset                  = 16,
    TD_GS_SAMPLER0_BORDER_GREEN                           = 0x0000a804,
	TD_GS_SAMPLER0_BORDER_GREEN_num                   = 18,
	TD_GS_SAMPLER0_BORDER_GREEN_offset                = 16,
    TD_GS_SAMPLER0_BORDER_BLUE                            = 0x0000a808,
	TD_GS_SAMPLER0_BORDER_BLUE_num                    = 18,
	TD_GS_SAMPLER0_BORDER_BLUE_offset                 = 16,
    TD_GS_SAMPLER0_BORDER_ALPHA                           = 0x0000a80c,
	TD_GS_SAMPLER0_BORDER_ALPHA_num                   = 18,
	TD_GS_SAMPLER0_BORDER_ALPHA_offset                = 16,
    TD_PS_SAMPLER0_CLEARTYPE_KERNEL                       = 0x0000aa00,
	TD_PS_SAMPLER0_CLEARTYPE_KERNEL_num               = 18,
	TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_mask       = 0x07 << 0,
	TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_shift      = 0,
	TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask      = 0x07 << 3,
	TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift     = 3,
    DB_DEPTH_SIZE                                         = 0x00028000,
	PITCH_TILE_MAX_mask                               = 0x3ff << 0,	/* Width in 8x8 pixel tiles. (Pitch - 1) */
	PITCH_TILE_MAX_shift                              = 0,
	SLICE_TILE_MAX_mask                               = 0xfffff << 10,	/* Number of 8x8 pixel tiles until the next slice plus some small number to be able to rotate the tile pattern. (Pitch - 1) */
	SLICE_TILE_MAX_shift                              = 10,
    DB_DEPTH_VIEW                                         = 0x00028004,	/* Selects slice index range for render target 0. */
	SLICE_START_mask                                  = 0x7ff << 0,	/* Specifies the starting slice number for this view. This field is added to the RenderTargetArrayIndex to compute the slice to render. */
	SLICE_START_shift                                 = 0,
	SLICE_MAX_mask                                    = 0x7ff << 13,	/* Specifies the maximum allowed Z slice index for this resource, which is one less than the total number of slices. */
	SLICE_MAX_shift                                   = 13,
    DB_DEPTH_BASE                                         = 0x0002800c,
	/* BASE_256B: Location of the first byte of the Depth surface in Device Address Space, which must be 256 byte aligned. High 32-bits of 40-bit address. */
    DB_DEPTH_INFO                                         = 0x00028010,
	DB_DEPTH_INFO__FORMAT_mask                        = 0x07 << 0,	/* Specifies the size of the depth and stencil components and whether depth is floating point. */
	DB_DEPTH_INFO__FORMAT_shift                       = 0,
	    DEPTH_INVALID                                 = 0x00,	/* DEPTH_INVALID: Depth and stencil surface are not valid. */
	    DEPTH_16                                      = 0x01,	/* DEPTH_16: UNORM 16-bit depth. */
	    DEPTH_X8_24                                   = 0x02,	/* DEPTH_X8_24: 24-bit UNORM depth and invalid stencil surface. */
	    DEPTH_8_24                                    = 0x03,	/* DEPTH_8_24: 24-bit UNORM depth and int stencil. */
	    DEPTH_X8_24_FLOAT                             = 0x04,	/* DEPTH_X8_24_FLOAT: 24-bit float depth and invalid stencil surface. */
	    DEPTH_8_24_FLOAT                              = 0x05,	/* DEPTH_8_24_FLOAT: 24-bit float depth and int stencil. */
	    DEPTH_32_FLOAT                                = 0x06,	/* DEPTH_32_FLOAT: 32-bit float depth. */
	    DEPTH_X24_8_32_FLOAT                          = 0x07,	/* DEPTH_X24_8_32_FLOAT: 32-bit float depth and int stencil. */
	DB_DEPTH_INFO__READ_SIZE_bit                      = 1 << 3,	/* Specifies the read size: larger reads are more efficient for AGP accesses, for example. */
	DB_DEPTH_INFO__ARRAY_MODE_mask                    = 0x0f << 15,	/* Specifies the tiling format for this array. DB does not support values 0, 1, 3, 7, 11, 13, or 15. */
	DB_DEPTH_INFO__ARRAY_MODE_shift                   = 15,
	    ARRAY_2D_TILED_THIN1                          = 0x04,	/* ARRAY_2D_TILED_THIN1: Uses 8x8x1 macro-tiles */
	TILE_SURFACE_ENABLE_bit                           = 1 << 25,	/* Enables reading and writing of the htile data. If off HiZ+S is off. */
	TILE_COMPACT_bit                                  = 1 << 26,	/* If true, this surface is compacted to eliminate storage that would be unused due to multi-chip supertiling. The supertiling mode is specified in PA_SC_MULTI_CHIP_CNTL. If this bit is set, then MULTI_CHIP_SUPERTILE_ENABLE must be set in PA_SC_MODE_CNTL. */
	ZRANGE_PRECISION_bit                              = 1 << 31,	/* 0 = ZMin is the base, generally set when doing a Z > test, 1 = ZMax is the base, set when generally using a Z < test. The value used as base has full 14 bit precision. By setting the base to Max culling has less error in a < test. Can only be changed after a full surface clear. */
    DB_HTILE_DATA_BASE                                    = 0x00028014,
	/* BASE_256B: Location of the first byte of the HTileData surface in Device Address Space, which must be 256 byte aligned. High 32-bits of 40-bit address. This surface contains the HiZ data. */
    DB_STENCIL_CLEAR                                      = 0x00028028,
	DB_STENCIL_CLEAR__CLEAR_mask                      = 0xff << 0,	/* Stencil value when SMEM==0, which specifies that the tile is cleared to background stencil values. */
	DB_STENCIL_CLEAR__CLEAR_shift                     = 0,
	MIN_mask                                          = 0xff << 16,	/* Compressed stencils store values from STENCIL_MIN to STENCIL_MIN+15. Cannot be changed without clearing or previously expanding the stencil buffer. 16. Color Buffer Registers */
	MIN_shift                                         = 16,
    DB_DEPTH_CLEAR                                        = 0x0002802c,
	/* Depth value when ZMASK==0, which indicates that the tile has been cleared to the background depth. This register holds a 32bit float value. */
    PA_SC_SCREEN_SCISSOR_TL                               = 0x00028030,	/* Screen Scissor rectangle specification. This scissor is NOT affected by WINDOW_OFFSET. Negative numbers clamped to 0, so reads will mismatch on negative values. */
	PA_SC_SCREEN_SCISSOR_TL__TL_X_mask                = 0x7fff << 0,	/* Left hand edge of scissor rectangle. 15 bits signed. Valid range -16K to 8191. Inclusive for UPPER_LEFT. */
	PA_SC_SCREEN_SCISSOR_TL__TL_X_shift               = 0,
	PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask                = 0x7fff << 16,	/* Upper edge of scissor rectangle. 15 bits signed. Valid range -16K to 8191. Inclusive for UPPER_LEFT. */
	PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift               = 16,
    PA_SC_SCREEN_SCISSOR_BR                               = 0x00028034,	/* Screen Scissor rectangle specification. This scissor is NOT affected by WINDOW_OFFSET. Negative numbers clamped to 0, so reads will mismatch on negative values. */
	PA_SC_SCREEN_SCISSOR_BR__BR_X_mask                = 0x7fff << 0,	/* Right hand edge of scissor rectangle. 15 bits signed. Valid range -16K to 8192. Exclusive for BOTTOM_RIGHT. */
	PA_SC_SCREEN_SCISSOR_BR__BR_X_shift               = 0,
	PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask                = 0x7fff << 16,	/* Lower edge of scissor rectangle. 15 bits signed. Valid range -16K to 8192. Exclusive for BOTTOM_RIGHT. */
	PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift               = 16,
    CB_COLOR0_BASE                                        = 0x00028040,
	CB_COLOR0_BASE_num                                = 8,
	/* BASE_256B: For linear and 1D tiled surfaces, this value times 256 is the byte address of the start of the resource in device address space. In other words, this field is the high 32- bits of an up to 40-bit virtual device address. For 2D tiled surfaces, the bits corresponding to the bank and pipe number in the address actually specify the bank/pipe swizzle for the surface. 2D tiled surfaces are always aligned to a multiple of the group size times the number of banks times the number of pipes (memory channels). */
    CB_COLOR0_SIZE                                        = 0x00028060,
	CB_COLOR0_SIZE_num                                = 8,
/* 	PITCH_TILE_MAX_mask                               = 0x3ff << 0, */	/* Define Pitch as the number of data elements per scanline. This field equals (Pitch/8) - 1, which equals the maximum 8x8 tile number allowed in the X dimension for the surface. Allowed values for Pitch depend on ARRAY_MODE, ElemBytes (the number of bytes per data element: 1 to 16), and Samples (the number of multi-samples per pixel: 1, 2, 4, or 8). It also depends on two configuration parameters that are the same for all resources: GroupBytes (the bytes per memory interleave group: 256 or 512), and Banks (the number of DDRAM banks: 4 or 8). Linear: Pitch*ElemBytes is a multiple of GroupBytes and Pitch is a multiple of 64 1D tiled: Pitch*8*ElemBytes*Samples is a multiple of GroupBytes and Pitch is a multiple of 8 2D/3D tiled: Pitch*8*ElemBytes*Samples/Banks is multiple of GroupBytes and Pitch is a multiple of 8*Banks/Factor where Factor is 1, 2, or 4 for THIN1/THIN2/THIN4 2B/3B tiled: also padded to bank swap boundaries, which are determined from GB_TILING_CONFIG fields In addition to these constraints */
/* 	PITCH_TILE_MAX_shift                              = 0, */
/* 	SLICE_TILE_MAX_mask                               = 0xfffff << 10, */	/* Define SliceTiles as (Pitch*Height/64). This field equals SliceTiles-1, and is the maximum allowed 8x8 or 64x1 tile number within an (x,y) slice of a 2D or 3D surface. The following constraints apply to allowable heights and z-depths for resources (see the ARRAY_MODE field): All cases: Height is in the range [1..8192]. 1D tiling: Height is a multiple of 8. 2D THIN1 tiling: Height is a multiple of 8*Pipes 2D THIN2 tiling: Height is a multiple of 16*Pipes (and pitch is a multiple of 4*Banks) 2D THIN4 tiling: Height is a multiple of 32*Pipes (and pitch is a multiple of 2*Banks) 2D THICK tiling: Height is a multiple of 8*Pipes and z-depth is a multiple of 4 Note: Pitch, height and Z-depth must be powers of 2 for mipmap chains (other than the base map). */
/* 	SLICE_TILE_MAX_shift                              = 10, */
    CB_COLOR0_VIEW                                        = 0x00028080,	/* Selects slice index range for render target 0. */
	CB_COLOR0_VIEW_num                                = 8,
/* 	SLICE_START_mask                                  = 0x7ff << 0, */	/* For ARRAY_LINEAR_GENERAL the low 8-bits together with BASE_256B specify a 40-bit starting addressess (must be element-aligned). Else this specifies the starting slice number for this view: this field is added to the RenderTargetArrayIndex to compute the slice to render. */
/* 	SLICE_START_shift                                 = 0, */
/* 	SLICE_MAX_mask                                    = 0x7ff << 13, */	/* Specifies the maximum allowed Z slice index for this resource, which is one less than the total number of slices. Clamp Z slice to SLICE_START if this value is exceeded (clamp to zero for ARRAY_LINEAR_GENERAL). */
/* 	SLICE_MAX_shift                                   = 13, */
    CB_COLOR0_INFO                                        = 0x000280a0,	/* Information needed for render target 0 */
	CB_COLOR0_INFO_num                                = 8,
	ENDIAN_mask                                       = 0x03 << 0,	/* Specifies what kind of byte swapping to perform, if any, for different endian modes. The byte swap is equivalent to computing dest[A] = src[A XOR N] for byte address A and the XOR values listed below. See the COMP_SWAP field for component swapping options. */
	ENDIAN_shift                                      = 0,
	    ENDIAN_NONE                                   = 0x00,	/* ENDIAN_NONE: No endian swapping (XOR by 0) */
	    ENDIAN_8IN16                                  = 0x01,	/* ENDIAN_8IN16: 8 bit swap within 16 bit word (XOR by 1): 0xAABBCCDD -> 0xBBAADDCC */
	    ENDIAN_8IN32                                  = 0x02,	/* ENDIAN_8IN32: 8 bit swap within 32 bit word (XOR by 3): 0xAABBCCDD -> 0xDDCCBBAA */
	    ENDIAN_8IN64                                  = 0x03,	/* ENDIAN_8IN64: 8 bit swap in 64 bits (XOR by 7): 0xaabbccddeeffgghh -> 0xhhggffeeddccbbaa */
	CB_COLOR0_INFO__FORMAT_mask                       = 0x3f << 2,	/* Specifies the size of the color components and in some cases the number format. See the COMP_SWAP field below for mappings of RGBA (XYZW) shader pipe results to color component positions in the pixel format. */
	CB_COLOR0_INFO__FORMAT_shift                      = 2,
	    COLOR_INVALID                                 = 0x00,	/* COLOR_INVALID: this resource is disabled */
	    COLOR_8                                       = 0x01,	/* COLOR_8: */
	    COLOR_4_4                                     = 0x02,	/* COLOR_4_4: */
	    COLOR_3_3_2                                   = 0x03,	/* COLOR_3_3_2: (*) */
	    COLOR_16                                      = 0x05,	/* COLOR_16: */
	    COLOR_16_FLOAT                                = 0x06,	/* COLOR_16_FLOAT: */
	    COLOR_8_8                                     = 0x07,	/* COLOR_8_8: */
	    COLOR_5_6_5                                   = 0x08,	/* COLOR_5_6_5: */
	    COLOR_6_5_5                                   = 0x09,	/* COLOR_6_5_5: */
	    COLOR_1_5_5_5                                 = 0x0a,	/* COLOR_1_5_5_5: 1-bit component is uint for uint/sint number type, else it isunorm */
	    COLOR_4_4_4_4                                 = 0x0b,	/* COLOR_4_4_4_4: */
	    COLOR_5_5_5_1                                 = 0x0c,	/* COLOR_5_5_5_1: 1-bit component is uint uint/sint number type, else it is unorm */
	    COLOR_32                                      = 0x0d,	/* COLOR_32: float/uint/sint only */
	    COLOR_32_FLOAT                                = 0x0e,	/* COLOR_32_FLOAT: */
	    COLOR_16_16                                   = 0x0f,	/* COLOR_16_16: */
	    COLOR_16_16_FLOAT                             = 0x10,	/* COLOR_16_16_FLOAT: */
	    COLOR_8_24                                    = 0x11,	/* COLOR_8_24: unorm depth, uint stencil */
	    COLOR_8_24_FLOAT                              = 0x12,	/* COLOR_8_24_FLOAT: float depth, uint stencil */
	    COLOR_24_8                                    = 0x13,	/* COLOR_24_8: unorm depth, uint stencil */
	    COLOR_24_8_FLOAT                              = 0x14,	/* COLOR_24_8_FLOAT: float depth, uint stencil */
	    COLOR_10_11_11                                = 0x15,	/* COLOR_10_11_11: */
	    COLOR_10_11_11_FLOAT                          = 0x16,	/* COLOR_10_11_11_FLOAT: */
	    COLOR_11_11_10                                = 0x17,	/* COLOR_11_11_10: */
	    COLOR_11_11_10_FLOAT                          = 0x18,	/* COLOR_11_11_10_FLOAT: */
	    COLOR_2_10_10_10                              = 0x19,	/* COLOR_2_10_10_10: */
	    COLOR_8_8_8_8                                 = 0x1a,	/* COLOR_8_8_8_8: srgb allowed */
	    COLOR_10_10_10_2                              = 0x1b,	/* COLOR_10_10_10_2: */
	    COLOR_X24_8_32_FLOAT                          = 0x1c,	/* COLOR_X24_8_32_FLOAT: float depth, uint stencil */
	    COLOR_32_32                                   = 0x1d,	/* COLOR_32_32: float/uint/sint only */
	    COLOR_32_32_FLOAT                             = 0x1e,	/* COLOR_32_32_FLOAT: */
	    COLOR_16_16_16_16                             = 0x1f,	/* COLOR_16_16_16_16: */
	    COLOR_16_16_16_16_FLOAT                       = 0x20,	/* COLOR_16_16_16_16_FLOAT: */
	    COLOR_32_32_32_32                             = 0x22,	/* COLOR_32_32_32_32: float/uint/sint only */
	    COLOR_32_32_32_32_FLOAT                       = 0x23,	/* COLOR_32_32_32_32_FLOAT: */
	CB_COLOR0_INFO__ARRAY_MODE_mask                   = 0x0f << 8,	/* Specifies the tiling format of this render target array. */
	CB_COLOR0_INFO__ARRAY_MODE_shift                  = 8,
	    ARRAY_LINEAR_GENERAL                          = 0x00,	/* ARRAY_LINEAR_GENERAL: Unaligned linear array */
	    ARRAY_LINEAR_ALIGNED                          = 0x01,	/* ARRAY_LINEAR_ALIGNED: Aligned linear array */
/* 	    ARRAY_2D_TILED_THIN1                          = 0x04, */	/* ARRAY_2D_TILED_THIN1: Uses 8x8x1 macro-tiles */
	NUMBER_TYPE_mask                                  = 0x07 << 12,	/* Specifies the numeric type of the color components. This field is ignored if FORMAT specifies a number type (e.g. float or gamma). */
	NUMBER_TYPE_shift                                 = 12,
	    NUMBER_UNORM                                  = 0x00,	/* NUMBER_UNORM: unsigned repeating fraction (urf): range [0..1], scale factor (2^n)-1 */
	    NUMBER_SNORM                                  = 0x01,	/* NUMBER_SNORM: Microsoft-style signed rf: range [-1..1], scale factor (2^(n-1))-1 */
	    NUMBER_USCALED                                = 0x02,	/* NUMBER_USCALED: unsigned integer, converted to float in shader: range [0..(2^n)-1] */
	    NUMBER_SSCALED                                = 0x03,	/* NUMBER_SSCALED: signed integer, converted to float in shader: range [-2^(n-1)..2^(n-1)-1] */
	    NUMBER_UINT                                   = 0x04,	/* NUMBER_UINT: zero-extended bit field, int in shader: not blendable or filterable */
	    NUMBER_SINT                                   = 0x05,	/* NUMBER_SINT: sign-extended bit field, int in shader: not blendable or filterable */
	    NUMBER_SRGB                                   = 0x06,	/* NUMBER_SRGB: gamma corrected, range [0..1] (only suported for 8-bit components (always rounds color channels) */
	    NUMBER_FLOAT                                  = 0x07,	/* NUMBER_FLOAT: floating point, depends on component size: 32-bit: IEEE float, SE8M23, bias 127, range (- 2^129..2^129) 24-bit: Depth float, E4M20, bias 15, range [0..1] 16-bit: Short float SE5M10, bias 15, range (-2^17..2^17) 11-bit: Packed float, E5M6 bias 15, range [0..2^17) 10-bit: Packed float, E5M5 bias 15, range [0..2^17) all other component sizes are treated as UINT */
	CB_COLOR0_INFO__READ_SIZE_bit                     = 1 << 15,	/* Specifies the preferred read size: larger reads are more efficient for PCIE accesses, for example. */
	COMP_SWAP_mask                                    = 0x03 << 16,	/* Specifies how to map the red, green, blue, and alpha components from the shader to the components in the frame buffer pixel format. There are four choices for each number of components. With one component, the four modes select any one component. With 2-4 components, SWAP_STD selects the low order shader components in little-endian order; SWAP_ALT selects an alternate order (for 4 compoents) or inclusion of alpha (for 2 or 3 components); and the other two reverse the component orders for use on big-endian machines. The following table specifies the exact component mappings: 1 comp std alt std_rev alt_rev\ ----------- ------- ------- ------- ------- comp 0: red green blue alpha 2 comps std alt std_rev alt_rev ----------- ------- ------- ------- ------- comp 0: red red green alpha comp 1: green alpha red red 3 comps std alt std_rev alt_rev ----------- ------- ------- ------- ------- comp 0: red red blue alpha comp 1: green green green green comp 2: blue alpha red red 4 comps std alt std_rev alt_rev ----------- ------- ------- ------- ------- comp 0: red blue alpha alpha comp 1: green green blue red comp 2: blue red green green comp 3: alpha alpha red blue */
	COMP_SWAP_shift                                   = 16,
	    SWAP_STD                                      = 0x00,	/* SWAP_STD: standard little-endian comp order */
	    SWAP_ALT                                      = 0x01,	/* SWAP_ALT: alternate components or order */
	    SWAP_STD_REV                                  = 0x02,	/* SWAP_STD_REV: reverses SWAP_STD order */
	    SWAP_ALT_REV                                  = 0x03,	/* SWAP_ALT_REV: reverses SWAP_ALT order */
	CB_COLOR0_INFO__TILE_MODE_mask                    = 0x03 << 18,	/* Selects how and whether to use per-tile CMASK and FMASK per-tile data with this surface. */
	CB_COLOR0_INFO__TILE_MODE_shift                   = 18,
	    TILE_DISABLE                                  = 0x00,	/* TILE_DISABLE: do not access any per-tile data */
	    TILE_CLEAR_ENABLE                             = 0x01,	/* TILE_CLEAR_ENABLE: allow background clear only */
	    TILE_FRAG_ENABLE                              = 0x02,	/* TILE_FRAG_ENABLE: allow background clear and multi-sample fragment masks */
	BLEND_CLAMP_bit                                   = 1 << 20,	/* Specifies whether to clamp source data to the render target range prior to blending, in addition to the post- blend clamp. This bit must be zero for uscaled, sscaled and float number types and when blend_bypass is set. */
	CLEAR_COLOR_bit                                   = 1 << 21,	/* If false, use RGB=0.0 and A=1.0 (0x3f800000) to expand fast-cleared tiles. If true, use the CB_CLEAR register values to expand fast-cleared tiles. */
	BLEND_BYPASS_bit                                  = 1 << 22,	/* If false, blending occurs normaly as specified in CB_BLEND#_CONTROL. If true, blending (but not fog) is disabled. This must be set for the 24_8 and 8_24 formats and when the number type is uint or sint. It should also be set for number types that are required to ignore the blend state in a specific aplication interface. */
	BLEND_FLOAT32_bit                                 = 1 << 23,	/* If true, use 32-bit float precision for source colors, else truncate to 12-bit mantissa precision. This applies even if blending is disabled so that a null blend and blend disable produce the same result. This field is ignored for NUMBER_UINT and NUMBER_SINT. It must be one for floating point components larger than 16-bits or non- floating components larger than 12-bits, otherwise it must be 0. */
	SIMPLE_FLOAT_bit                                  = 1 << 24,	/* If false, floating point processing follows full IEEE rules for INF, NaN, and -0. If true, 0*anything produces 0 and no operation produces -0. */
	CB_COLOR0_INFO__ROUND_MODE_bit                    = 1 << 25,	/* This field selects between truncating (standard for floats) and rounding (standard for most other cases) to convert blender results to frame buffer components. The ROUND_BY_HALF setting can be over-riden by the DITHER_ENABLE field in CB_COLOR_CONTROL. */
/* 	TILE_COMPACT_bit                                  = 1 << 26, */	/* If true, this surface is compacted to eliminate storage that would be unused due to multi-chip supertiling. The supertiling mode is specified in PA_SC_MULTI_CHIP_CNTL. If this bit is set, then MULTI_CHIP_SUPERTILE_ENABLE must be set in PA_SC_MODE_CNTL. */
	SOURCE_FORMAT_bit                                 = 1 << 27,	/* This field indicates the allowed format for color data being exported from the pixel shader into the output merge block. This field may only be set to EXPORT_NORM if BLEND_CLAMP is enabled, BLEND_FLOAT32 is disabled, and the render target has only 11-bit or smaller UNORM or SNORM components. Selecting EXPORT_NORM flushes to zero values with exponent less than 0x70 (values less than 2^-15). */
    CB_COLOR0_TILE                                        = 0x000280c0,
	CB_COLOR0_TILE_num                                = 8,
	/* BASE_256B: This value times 256 is the byte address of the start of the CMASK per-tile data, if any, in device address space. In other words, this field is the high 32-bits of an up to 40-bit virtual device address. */
    CB_COLOR0_FRAG                                        = 0x000280e0,
	CB_COLOR0_FRAG_num                                = 8,
	/* BASE_256B: For linear and 1D tiled surfaces, this value times 256 is the byte address of the start of the FMASK per-tile data, if any, in device address space. In other words, this field is the high 32-bits of an up to 40-bit virtual device address. 2D tiled surfaces are the same except that the bits corresponding to the bank and pipe number in the address actually specify the bank/pipe swizzle for the surface. 2D tiled surfaces are always aligned to a multiple of the group size times the number of banks times the number of pipes (memory channels). */
    CB_COLOR0_MASK                                        = 0x00028100,
	CB_COLOR0_MASK_num                                = 8,
	CMASK_BLOCK_MAX_mask                              = 0xfff << 0,	/* This field equals one less than the number of 128x128 blocks of color mask data per 2D slice. For R600, 4-bit CMASK values are stored in macro-tiles that have pixel width and height determined by computing sqrt(Pipes*16K and rounding up (for width) or down (for height) to the nearest power of two. The pitch for the Cmask array is derived from PITCH_TILE_MAX, padding to the nearest multiple of the macro tile width. */
	CMASK_BLOCK_MAX_shift                             = 0,
	FMASK_TILE_MAX_mask                               = 0xfffff << 12,	/* This field equals one less than the number of 8x8 tiles of fragment mask data per 2D slice. For R600, FMASK values are stored in macro-tiles that have pixel width and height determined ... TBD. The pitch for the Fmask array is derived from PITCH_TILE_MAX, padding to the nearest multiple of the macro tile width. */
	FMASK_TILE_MAX_shift                              = 12,
    CB_CLEAR_RED                                          = 0x00028120,
	/* FP32 red component of background clear value. */
    CB_CLEAR_GREEN                                        = 0x00028124,
	/* FP32 green component of background clear value. */
    CB_CLEAR_BLUE                                         = 0x00028128,
	/* FP32 blue component of background clear value. */
    CB_CLEAR_ALPHA                                        = 0x0002812c,
	/* FP32 alpha component of background clear value. */
    SQ_ALU_CONST_BUFFER_SIZE_PS_0                         = 0x00028140,	/* (8-state). Number of elements in this constant buffer [0..4096], in units of 16 constants (cache lines). Associated with SQ_ALU_CONST_CACHE_PS_0. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
	SQ_ALU_CONST_BUFFER_SIZE_PS_0_num                 = 16,
	SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask          = 0x1ff << 0,	/* Number of constant buffer elements */
	SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift         = 0,
    SQ_ALU_CONST_BUFFER_SIZE_VS_0                         = 0x00028180,	/* (8-state). Number of elements in this constant buffer [0..4096], in units of 16 constants (cache lines). Associated with SQ_ALU_CONST_CACHE_VS_0. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
	SQ_ALU_CONST_BUFFER_SIZE_VS_0_num                 = 16,
	SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask          = 0x1ff << 0,	/* Number of constant buffer elements */
	SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift         = 0,
    SQ_ALU_CONST_BUFFER_SIZE_GS_0                         = 0x000281c0,	/* (8-state). Number of elements in this constant buffer [0..4096], in units of 16 constants (cache lines). Associated with SQ_ALU_CONST_CACHE_GS_0. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
	SQ_ALU_CONST_BUFFER_SIZE_GS_0_num                 = 16,
	SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask          = 0x1ff << 0,	/* Number of constant buffer elements */
	SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift         = 0,
    PA_SC_WINDOW_OFFSET                                   = 0x00028200,	/* Offset from screen coords to window coords. Vertices will be offset by these values if PA_SU_SC_MODE_CNTL.VTX_WINDOW_OFFSET_ENABLE is et. The WINDOW_SCISSOR will be offset by these values if the WINDOW_SCISSOR_TL.WINDOW_OFFSET_DISABLE is clear. If this value allows the window to extend beyond the Front Buffer (Surface) dimensions, it is expected that the SCREEN_SCISSOR is used to limit to FB surface. */
	WINDOW_X_OFFSET_mask                              = 0x7fff << 0,	/* Offset in x-direction from screen to window coords. 16- bit 2`s comp signed value. Valid Range +/- 16K. */
	WINDOW_X_OFFSET_shift                             = 0,
	WINDOW_Y_OFFSET_mask                              = 0x7fff << 16,	/* Offset in y-direction from screen to window coords. 16- bit 2`s comp signed value. Valid Range +/- 16K. */
	WINDOW_Y_OFFSET_shift                             = 16,
    PA_SC_WINDOW_SCISSOR_TL                               = 0x00028204,	/* Window Scissor rectangle specification. Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
	PA_SC_WINDOW_SCISSOR_TL__TL_X_mask                = 0x3fff << 0,	/* Left hand edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
	PA_SC_WINDOW_SCISSOR_TL__TL_X_shift               = 0,
	PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask                = 0x3fff << 16,	/* Upper edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
	PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift               = 16,
	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31,	/* If set, window scissor is not offset by the WINDOW_OFFSET register values. */
    PA_SC_WINDOW_SCISSOR_BR                               = 0x00028208,	/* Window Scissor rectangle specification. Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
	PA_SC_WINDOW_SCISSOR_BR__BR_X_mask                = 0x3fff << 0,	/* Right hand edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
	PA_SC_WINDOW_SCISSOR_BR__BR_X_shift               = 0,
	PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask                = 0x3fff << 16,	/* Lower edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
	PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift               = 16,
    PA_SC_CLIPRECT_RULE                                   = 0x0002820c,	/* OpenGL Clip boolean function */
	CLIP_RULE_mask                                    = 0xffff << 0,	/* OpenGL Clip boolean function. The `inside` flags for each of the four clip rectangles form a 4-bit binary number. The corresponding bit in this 16-bit number specifies whether the pixel is visible. */
	CLIP_RULE_shift                                   = 0,
    PA_SC_CLIPRECT_0_TL                                   = 0x00028210,	/* Clip Rectangle Top-Left Specification */
	PA_SC_CLIPRECT_0_TL_num                           = 4,
	PA_SC_CLIPRECT_0_TL_offset                        = 8,
	PA_SC_CLIPRECT_0_TL__TL_X_mask                    = 0x3fff << 0,	/* Left x value of clip rectangle. 14 bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT */
	PA_SC_CLIPRECT_0_TL__TL_X_shift                   = 0,
	PA_SC_CLIPRECT_0_TL__TL_Y_mask                    = 0x3fff << 16,	/* Top y value of clip rectangle. 14 bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT */
	PA_SC_CLIPRECT_0_TL__TL_Y_shift                   = 16,
    PA_SC_CLIPRECT_0_BR                                   = 0x00028214,	/* Clip Rectangle Bottom-Right Specification */
	PA_SC_CLIPRECT_0_BR_num                           = 4,
	PA_SC_CLIPRECT_0_BR_offset                        = 8,
	PA_SC_CLIPRECT_0_BR__BR_X_mask                    = 0x3fff << 0,	/* Right x value of clip rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT */
	PA_SC_CLIPRECT_0_BR__BR_X_shift                   = 0,
	PA_SC_CLIPRECT_0_BR__BR_Y_mask                    = 0x3fff << 16,	/* Bottom y value of clip rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT */
	PA_SC_CLIPRECT_0_BR__BR_Y_shift                   = 16,
    CB_TARGET_MASK                                        = 0x00028238,	/* Contains color component mask fields for writing the render targets. Red, green, blue, and alpha are components 0, 1, 2, and 3 in the pixel shader and are enabled by bits 0, 1, 2, and 3 in each field. Note that the components may be in a different order in the frame buffer, depending on the COMP_SWAP field. Targets 1-7 are defined equivalently to output 0. */
	TARGET0_ENABLE_mask                               = 0x0f << 0,	/* Enables writing to render target 0 components. The low order bit corresponds to the red channel. A zero bit disables writing to that channel and a one bit enables writing to that channel. If blending is enabled, all components are read from the frame buffer, regardless of this mask value. Any components that are missing due to the element format are replaced with their default value: 0.0 for color or 1.0 for alpha. */
	TARGET0_ENABLE_shift                              = 0,
	TARGET1_ENABLE_mask                               = 0x0f << 4,
	TARGET1_ENABLE_shift                              = 4,
	TARGET2_ENABLE_mask                               = 0x0f << 8,
	TARGET2_ENABLE_shift                              = 8,
	TARGET3_ENABLE_mask                               = 0x0f << 12,
	TARGET3_ENABLE_shift                              = 12,
	TARGET4_ENABLE_mask                               = 0x0f << 16,
	TARGET4_ENABLE_shift                              = 16,
	TARGET5_ENABLE_mask                               = 0x0f << 20,
	TARGET5_ENABLE_shift                              = 20,
	TARGET6_ENABLE_mask                               = 0x0f << 24,
	TARGET6_ENABLE_shift                              = 24,
	TARGET7_ENABLE_mask                               = 0x0f << 28,
	TARGET7_ENABLE_shift                              = 28,
    CB_SHADER_MASK                                        = 0x0002823c,	/* Contains color component mask fields for the colors output by the shader. Outputs 1-7 are defined equivalently to output 0. */
	OUTPUT0_ENABLE_mask                               = 0x0f << 0,	/* If zero, this field disables writes to render target 0, else it specifies which components are enabled in the shader. The low order bit corresponds to the red channel. A one bit bit passes the shader output component value to the color block. A zero bit replaces the component with the default value: 0.0 for RGB or 1.0 for alpha. */
	OUTPUT0_ENABLE_shift                              = 0,
	OUTPUT1_ENABLE_mask                               = 0x0f << 4,
	OUTPUT1_ENABLE_shift                              = 4,
	OUTPUT2_ENABLE_mask                               = 0x0f << 8,
	OUTPUT2_ENABLE_shift                              = 8,
	OUTPUT3_ENABLE_mask                               = 0x0f << 12,
	OUTPUT3_ENABLE_shift                              = 12,
	OUTPUT4_ENABLE_mask                               = 0x0f << 16,
	OUTPUT4_ENABLE_shift                              = 16,
	OUTPUT5_ENABLE_mask                               = 0x0f << 20,
	OUTPUT5_ENABLE_shift                              = 20,
	OUTPUT6_ENABLE_mask                               = 0x0f << 24,
	OUTPUT6_ENABLE_shift                              = 24,
	OUTPUT7_ENABLE_mask                               = 0x0f << 28,
	OUTPUT7_ENABLE_shift                              = 28,
    PA_SC_GENERIC_SCISSOR_TL                              = 0x00028240,	/* Generic Scissor rectangle specification. Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
	PA_SC_GENERIC_SCISSOR_TL__TL_X_mask               = 0x3fff << 0,	/* Left hand edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
	PA_SC_GENERIC_SCISSOR_TL__TL_X_shift              = 0,
	PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask               = 0x3fff << 16,	/* Upper edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
	PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift              = 16,
/* 	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31, */	/* If set, generic scissor is not offset by the WINDOW_OFFSET register values. */
    PA_SC_GENERIC_SCISSOR_BR                              = 0x00028244,	/* Generic Scissor rectangle specification. Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
	PA_SC_GENERIC_SCISSOR_BR__BR_X_mask               = 0x3fff << 0,	/* Right hand edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
	PA_SC_GENERIC_SCISSOR_BR__BR_X_shift              = 0,
	PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask               = 0x3fff << 16,	/* Lower edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
	PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift              = 16,
    PA_SC_VPORT_SCISSOR_0_TL                              = 0x00028250,	/* WGF ViewportId Scissor rectangle specification(0-15). Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
	PA_SC_VPORT_SCISSOR_0_TL_num                      = 16,
	PA_SC_VPORT_SCISSOR_0_TL_offset                   = 8,
	PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask               = 0x3fff << 0,	/* Left hand edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
	PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift              = 0,
	PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask               = 0x3fff << 16,	/* Upper edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
	PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift              = 16,
/* 	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31, */	/* If set, viewportId scissor is not offset by the WINDOW_OFFSET register values. */
    PA_SC_VPORT_SCISSOR_0_BR                              = 0x00028254,	/* WGF ViewportID Scissor rectangle specification(0-15). Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
	PA_SC_VPORT_SCISSOR_0_BR_num                      = 16,
	PA_SC_VPORT_SCISSOR_0_BR_offset                   = 8,
	PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask               = 0x3fff << 0,	/* Right hand edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
	PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift              = 0,
	PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask               = 0x3fff << 16,	/* Lower edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
	PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift              = 16,
    PA_SC_VPORT_ZMIN_0                                    = 0x000282d0,	/* Viewport Transform Z Min Clamp - 0-15 For WGF ViewportId */
	PA_SC_VPORT_ZMIN_0_num                            = 16,
	PA_SC_VPORT_ZMIN_0_offset                         = 8,
	/* VPORT_ZMIN: Minimum Z Value from Viewport Transform. Z values will be clamped by the DB to this value. */
    PA_SC_VPORT_ZMAX_0                                    = 0x000282d4,	/* Viewport Transform Z Max Clamp - 0-15 For WGF ViewportId */
	PA_SC_VPORT_ZMAX_0_num                            = 16,
	PA_SC_VPORT_ZMAX_0_offset                         = 8,
	/* VPORT_ZMAX: Maximum Z Value from Viewport Transform. Z values will be clamped by the DB to this value. */
    SX_MISC                                               = 0x00028350,
	MULTIPASS_bit                                     = 1 << 0,
    SQ_VTX_SEMANTIC_0                                     = 0x00028380,	/* (8-state) Vertex Fetch Semantic Name. Used for semantic-based vertex fetches. 32 entries provided (8 states). The address in which the semantic occurs dictates which GPR the named element goes to in the vertex shader. Note that the hardware does not interpret this value, other than simply compare these 8 bits versus the 8-bit semantic in the vertex fetch instruction. These registers are write-only (not readable). */
	SQ_VTX_SEMANTIC_0_num                             = 32,
/* 	SEMANTIC_ID_mask                                  = 0xff << 0, */	/* 8-bit semantic id */
/* 	SEMANTIC_ID_shift                                 = 0, */
    VGT_MAX_VTX_INDX                                      = 0x00028400,	/* For components that are that are specified to be indices (see the VGT_GROUP_VECT_0_FMT_CNTL register), this register is the maximum clamp value. Clamping occurs after offsetting and prior to fix->flt conversion. */
	/* MAX_INDX: maximum clamp value for index clamp, exten it to 32-bit */
    VGT_MIN_VTX_INDX                                      = 0x00028404,	/* For components that are that are specified to be indices (see the VGT_GROUP_VECT_0_FMT_CNTL register), this register is the minimum clamp value. Clamping occurs after offsetting and prior to fix->flt conversion. */
	/* MIN_INDX: minimum clamp value for index clamp, extend it to 32- bits */
    VGT_INDX_OFFSET                                       = 0x00028408,	/* For components that are that are specified to be indices (see the VGT_GROUP_VECT_0_FMT_CNTL register), this register is the offset value. Offsetting occurs prior to clamping and fix->flt conversion. */
	/* Index offset value (32-bit adder), extend it to 32-bits */
    VGT_MULTI_PRIM_IB_RESET_INDX                          = 0x0002840c,	/* This register defines the index which resets primitive sets when MULTI_PRIM_IB is enabled. */
	/* If this value matches an index in the IB, a new primitive set is started. */
    SX_ALPHA_TEST_CONTROL                                 = 0x00028410,
	ALPHA_FUNC_mask                                   = 0x07 << 0,	/* Specifies the function used to compare the fragment alpha value (produced by the shader pipe) to ALPHA_REF, the reference alpha value. The alpha test passes (keeping the pixel) if frag_alpha OP alpha_ref is true. */
	ALPHA_FUNC_shift                                  = 0,
	    REF_NEVER                                     = 0x00,	/* REF_NEVER: never pass */
	    REF_LESS                                      = 0x01,	/* REF_LESS: pass if left < right */
	    REF_EQUAL                                     = 0x02,	/* REF_EQUAL: pass if left = right */
	    REF_LEQUAL                                    = 0x03,	/* REF_LEQUAL: pass if left <= right */
	    REF_GREATER                                   = 0x04,	/* REF_GREATER: pass if left > right */
	    REF_NOTEQUAL                                  = 0x05,	/* REF_NOTEQUAL: pass if left != right */
	    REF_GEQUAL                                    = 0x06,	/* REF_GEQUAL: pass if left >= right */
	    REF_ALWAYS                                    = 0x07,	/* REF_ALWAYS: always pass */
	ALPHA_TEST_ENABLE_bit                             = 1 << 3,	/* If alpha test is enabled, then a failed ALPHA_FUNC comparison causes the pixel to be killed. */
	ALPHA_TEST_BYPASS_bit                             = 1 << 8,	/* Driver can st this bit to bypass the alpha test for surface types that don`t support alpha testing. */
    CB_BLEND_RED                                          = 0x00028414,
	/* FP32 red component of constant blend color. */
    CB_BLEND_GREEN                                        = 0x00028418,
	/* FP32 green component of constant blend color. */
    CB_BLEND_BLUE                                         = 0x0002841c,
	/* FP32 blue component of constant blend color. */
    CB_BLEND_ALPHA                                        = 0x00028420,
	/* FP32 alpha component of constant blend color. */
    CB_FOG_RED                                            = 0x00028424,
	/* Red component of fog color, specified in IEEE floating point. */
    CB_FOG_GREEN                                          = 0x00028428,
	/* Green component of fog color, specified in IEEE floating point. */
    CB_FOG_BLUE                                           = 0x0002842c,
	/* Blue component of fog color, specified in IEEE floating point. */
    DB_STENCILREFMASK                                     = 0x00028430,
	STENCILREF_mask                                   = 0xff << 0,	/* Specifies the reference stencil value for front facing primitives. */
	STENCILREF_shift                                  = 0,
	STENCILMASK_mask                                  = 0xff << 8,	/* This value is ANDed with both the reference and the current stencil value prior to the stencil test for front facing primitives. */
	STENCILMASK_shift                                 = 8,
	STENCILWRITEMASK_mask                             = 0xff << 16,	/* Specifies the write mask for the stencil planes for front facing primitives. */
	STENCILWRITEMASK_shift                            = 16,
    DB_STENCILREFMASK_BF                                  = 0x00028434,
	STENCILREF_BF_mask                                = 0xff << 0,	/* Specifies the reference stencil value for back facing primitives. */
	STENCILREF_BF_shift                               = 0,
	STENCILMASK_BF_mask                               = 0xff << 8,	/* This value is ANDed with both the reference and the current stencil value prior to the stencil test for back facing primitives. */
	STENCILMASK_BF_shift                              = 8,
	STENCILWRITEMASK_BF_mask                          = 0xff << 16,	/* Specifies the write mask for the stencil planes for back facing primitives. */
	STENCILWRITEMASK_BF_shift                         = 16,
    SX_ALPHA_REF                                          = 0x00028438,
	/* Reference value for alpha test, which is specified in IEEE floating point. */
    PA_CL_VPORT_XSCALE_0                                  = 0x0002843c,	/* Viewport Transform X Scale Factor - 1-15 For WGF ViewportId */
	PA_CL_VPORT_XSCALE_0_num                          = 16,
	PA_CL_VPORT_XSCALE_0_offset                       = 24,
	/* VPORT_XSCALE: Viewport Scale Factor for X coordinates. An IEEE float. */
    PA_CL_VPORT_XOFFSET_0                                 = 0x00028440,	/* Viewport Transform X Offset - 1-15 For WGF ViewportId */
	PA_CL_VPORT_XOFFSET_0_num                         = 16,
	PA_CL_VPORT_XOFFSET_0_offset                      = 24,
	/* VPORT_XOFFSET: Viewport Offset for X coordinates. An IEEE float. */
    PA_CL_VPORT_YSCALE_0                                  = 0x00028444,	/* Viewport Transform Y Scale Factor - 1-15 For WGF ViewportId */
	PA_CL_VPORT_YSCALE_0_num                          = 16,
	PA_CL_VPORT_YSCALE_0_offset                       = 24,
	/* VPORT_YSCALE: Viewport Scale Factor for Y coordinates. An IEEE float. */
    PA_CL_VPORT_YOFFSET_0                                 = 0x00028448,	/* Viewport Transform Y Offset - 1-15 For WGF ViewportId */
	PA_CL_VPORT_YOFFSET_0_num                         = 16,
	PA_CL_VPORT_YOFFSET_0_offset                      = 24,
	/* VPORT_YOFFSET: Viewport Offset for Y coordinates. An IEEE float. */
    PA_CL_VPORT_ZSCALE_0                                  = 0x0002844c,	/* Viewport Transform Z Scale Factor - 1-15 For WGF ViewportId */
	PA_CL_VPORT_ZSCALE_0_num                          = 16,
	PA_CL_VPORT_ZSCALE_0_offset                       = 24,
	/* VPORT_ZSCALE: Viewport Scale Factor for Z coordinates. An IEEE float. */
    PA_CL_VPORT_ZOFFSET_0                                 = 0x00028450,	/* Viewport Transform Z Offset - 1-15 For WGF ViewportId */
	PA_CL_VPORT_ZOFFSET_0_num                         = 16,
	PA_CL_VPORT_ZOFFSET_0_offset                      = 24,
	/* VPORT_ZOFFSET: Viewport Offset for Z coordinates. An IEEE float. */
    SPI_VS_OUT_ID_0                                       = 0x00028614,	/* VS output semantic mapping for 4 components/vectors */
	SPI_VS_OUT_ID_0_num                               = 10,
	SEMANTIC_0_mask                                   = 0xff << 0,
	SEMANTIC_0_shift                                  = 0,
	SEMANTIC_1_mask                                   = 0xff << 8,
	SEMANTIC_1_shift                                  = 8,
	SEMANTIC_2_mask                                   = 0xff << 16,
	SEMANTIC_2_shift                                  = 16,
	SEMANTIC_3_mask                                   = 0xff << 24,	/* 12. Shader Export Registers */
	SEMANTIC_3_shift                                  = 24,
    SPI_PS_INPUT_CNTL_0                                   = 0x00028644,	/* PS interpolator setttings for parameter 0 */
	SPI_PS_INPUT_CNTL_0_num                           = 32,
	SEMANTIC_mask                                     = 0xff << 0,	/* PS input semantic mapping */
	SEMANTIC_shift                                    = 0,
	DEFAULT_VAL_mask                                  = 0x03 << 8,	/* Selects value to force into GPR if no semantic match found POSSIBLE VALUES: 00 - 0.0f, 0.0f, 0.0f, 0.0f 01 - 0.0f, 0.0f, 0.0f, 1.0f 02 - 1.0f, 1.0f, 1.0f, 0.0f 03 - 1,0f, 1.0f, 1.0f, 1.0f */
	DEFAULT_VAL_shift                                 = 8,
	    X_0_0F                                        = 0x00,	/* 0.0f, 0.0f, 0.0f, 0.0f */
	FLAT_SHADE_bit                                    = 1 << 10,	/* Flat shade select */
	SEL_CENTROID_bit                                  = 1 << 11,	/* Use IJ data sampled at pixel centroid */
	SEL_LINEAR_bit                                    = 1 << 12,	/* Use IJ data from linear gradients */
	CYL_WRAP_mask                                     = 0x0f << 13,	/* 4-bit cylindrical wrap control (1 bit per component) */
	CYL_WRAP_shift                                    = 13,
	PT_SPRITE_TEX_bit                                 = 1 << 17,	/* Override this parameter with texture coordinates if global enable set and prim is a point */
	SEL_SAMPLE_bit                                    = 1 << 18,
    SPI_VS_OUT_CONFIG                                     = 0x000286c4,	/* VS output configuration */
	VS_PER_COMPONENT_bit                              = 1 << 0,	/* When set, each entry in SPI_VS_OUT_ID_0-9 represents one component of a vector (not valid for DX10). Otherwise each entry represents an entire vector */
	VS_EXPORT_COUNT_mask                              = 0x1f << 1,	/* Number of vectors exported by the VS (value is minus 1) */
	VS_EXPORT_COUNT_shift                             = 1,
	VS_EXPORTS_FOG_bit                                = 1 << 8,	/* Set when VS exports fog */
	VS_OUT_FOG_VEC_ADDR_mask                          = 0x1f << 9,	/* Vector address where VS exported fog. Fog factor will always be in the X channel */
	VS_OUT_FOG_VEC_ADDR_shift                         = 9,
    SPI_PS_IN_CONTROL_0                                   = 0x000286cc,	/* Interpolator control settings */
	NUM_INTERP_mask                                   = 0x3f << 0,	/* Number of parameters to interp (no minus 1). Does not include fog, param_gen, or gen_indx, but should include position and frontface */
	NUM_INTERP_shift                                  = 0,
	POSITION_ENA_bit                                  = 1 << 8,	/* Load per-pixel position into the PS */
	POSITION_CENTROID_bit                             = 1 << 9,	/* Calculate per-pixel position at pixel centroid */
	POSITION_ADDR_mask                                = 0x1f << 10,	/* Relative GPR address where position is loaded (0->31) */
	POSITION_ADDR_shift                               = 10,
	PARAM_GEN_mask                                    = 0x0f << 15,	/* Generate up to 4 sets of ST coordinates. Bit 0=persp/center, 1=persp/centroid, 2=linear/center, 3=linear/centroid */
	PARAM_GEN_shift                                   = 15,
	PARAM_GEN_ADDR_mask                               = 0x7f << 19,	/* First relative GPR address where param_gen values are loaded (0->(127-num_param_gen)) */
	PARAM_GEN_ADDR_shift                              = 19,
	BARYC_SAMPLE_CNTL_mask                            = 0x03 << 26,
	BARYC_SAMPLE_CNTL_shift                           = 26,
	    CENTROIDS_ONLY                                = 0x00,	/* CENTROIDS_ONLY: CENTROIDS_ONLY */
	    CENTERS_ONLY                                  = 0x01,	/* CENTERS_ONLY: CENTERS_ONLY */
	    CENTROIDS_AND_CENTERS                         = 0x02,	/* CENTROIDS_AND_CENTERS: CENTROIDS_AND_CENTERS */
	    UNDEF                                         = 0x03,	/* UNDEF: UNDEFINED */
	PERSP_GRADIENT_ENA_bit                            = 1 << 28,	/* Enable perspective gradients (if linear is set to 0, persp is always enabled) */
	LINEAR_GRADIENT_ENA_bit                           = 1 << 29,	/* Enable linear gradients */
	POSITION_SAMPLE_bit                               = 1 << 30,
	BARYC_AT_SAMPLE_ENA_bit                           = 1 << 31,
    SPI_PS_IN_CONTROL_1                                   = 0x000286d0,	/* Interpolator control settings */
	GEN_INDEX_PIX_bit                                 = 1 << 0,	/* Load incrementing value into each pixel to create a unique index for each */
	GEN_INDEX_PIX_ADDR_mask                           = 0x7f << 1,	/* Relative GPR address where gen_index is loaded (0- >126) */
	GEN_INDEX_PIX_ADDR_shift                          = 1,
	FRONT_FACE_ENA_bit                                = 1 << 8,	/* Override interpolator results with frontface information */
	FRONT_FACE_CHAN_mask                              = 0x03 << 9,	/* Select channel to override */
	FRONT_FACE_CHAN_shift                             = 9,
	FRONT_FACE_ALL_BITS_bit                           = 1 << 11,
	FRONT_FACE_ADDR_mask                              = 0x1f << 12,	/* Relative GPR address to load (0->31) */
	FRONT_FACE_ADDR_shift                             = 12,
	FOG_ADDR_mask                                     = 0x7f << 17,	/* Relative GPR address to load (0->126) */
	FOG_ADDR_shift                                    = 17,
	FIXED_PT_POSITION_ENA_bit                         = 1 << 24,
	FIXED_PT_POSITION_ADDR_mask                       = 0x1f << 25,
	FIXED_PT_POSITION_ADDR_shift                      = 25,
    SPI_INTERP_CONTROL_0                                  = 0x000286d4,	/* Interpolator control settings */
	FLAT_SHADE_ENA_bit                                = 1 << 0,	/* Global flat shade enable used in conjunction with per- parameter flat shade control */
	PNT_SPRITE_ENA_bit                                = 1 << 1,	/* Enable PT_SPRITE_TEX override for point primitives */
	PNT_SPRITE_OVRD_X_mask                            = 0x07 << 2,
	PNT_SPRITE_OVRD_X_shift                           = 2,
	    SPI_PNT_SPRITE_SEL_0                          = 0x00,	/* SPI_PNT_SPRITE_SEL_0: Override component with 0.0f */
	    SPI_PNT_SPRITE_SEL_1                          = 0x01,	/* SPI_PNT_SPRITE_SEL_1: Override component with 1.0f */
	    SPI_PNT_SPRITE_SEL_S                          = 0x02,	/* SPI_PNT_SPRITE_SEL_S: Override component with S value */
	    SPI_PNT_SPRITE_SEL_T                          = 0x03,	/* SPI_PNT_SPRITE_SEL_T: Override component with T value */
	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04,	/* SPI_PNT_SPRITE_SEL_NONE: Keep interpolated result */
	PNT_SPRITE_OVRD_Y_mask                            = 0x07 << 5,
	PNT_SPRITE_OVRD_Y_shift                           = 5,
/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */	/* SPI_PNT_SPRITE_SEL_0: Override component with 0.0f */
/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */	/* SPI_PNT_SPRITE_SEL_1: Override component with 1.0f */
/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */	/* SPI_PNT_SPRITE_SEL_S: Override component with S value */
/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */	/* SPI_PNT_SPRITE_SEL_T: Override component with T value */
/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */	/* SPI_PNT_SPRITE_SEL_NONE: Keep interpolated result */
	PNT_SPRITE_OVRD_Z_mask                            = 0x07 << 8,
	PNT_SPRITE_OVRD_Z_shift                           = 8,
/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */	/* SPI_PNT_SPRITE_SEL_0: Override component with 0.0f */
/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */	/* SPI_PNT_SPRITE_SEL_1: Override component with 1.0f */
/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */	/* SPI_PNT_SPRITE_SEL_S: Override component with S value */
/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */	/* SPI_PNT_SPRITE_SEL_T: Override component with T value */
/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */	/* SPI_PNT_SPRITE_SEL_NONE: Keep interpolated result */
	PNT_SPRITE_OVRD_W_mask                            = 0x07 << 11,
	PNT_SPRITE_OVRD_W_shift                           = 11,
/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */	/* SPI_PNT_SPRITE_SEL_0: Override component with 0.0f */
/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */	/* SPI_PNT_SPRITE_SEL_1: Override component with 1.0f */
/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */	/* SPI_PNT_SPRITE_SEL_S: Override component with S value */
/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */	/* SPI_PNT_SPRITE_SEL_T: Override component with T value */
/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */	/* SPI_PNT_SPRITE_SEL_NONE: Keep interpolated result */
	PNT_SPRITE_TOP_1_bit                              = 1 << 14,
    SPI_INPUT_Z                                           = 0x000286d8,
	PROVIDE_Z_TO_SPI_bit                              = 1 << 0,
    SPI_FOG_CNTL                                          = 0x000286dc,	/* Fog interpolation control */
	PASS_FOG_THROUGH_PS_bit                           = 1 << 0,	/* Enable fog processing */
	PIXEL_FOG_FUNC_mask                               = 0x03 << 1,
	PIXEL_FOG_FUNC_shift                              = 1,
	    SPI_FOG_NONE                                  = 0x00,	/* SPI_FOG_NONE: SPI_FOG_NONE */
	    SPI_FOG_EXP                                   = 0x01,	/* SPI_FOG_EXP: SPI_FOG_EXP */
	    SPI_FOG_EXP2                                  = 0x02,	/* SPI_FOG_EXP2: SPI_FOG_EXP2 */
	    SPI_FOG_LINEAR                                = 0x03,	/* SPI_FOG_LINEAR: SPI_FOG_LINEAR */
	PIXEL_FOG_SRC_SEL_bit                             = 1 << 3,
	VS_FOG_CLAMP_DISABLE_bit                          = 1 << 4,
    SPI_FOG_FUNC_SCALE                                    = 0x000286e0,
	/* VALUE:  */
    SPI_FOG_FUNC_BIAS                                     = 0x000286e4,
	/* VALUE:  */
    CB_BLEND0_CONTROL                                     = 0x00028780,	/* Per-MRT blend control for render target 0, used if PER_MRT_BLEND is true. See CB_BLEND_CONTROL for field descriptions. */
	CB_BLEND0_CONTROL_num                             = 8,
	COLOR_SRCBLEND_mask                               = 0x1f << 0,
	COLOR_SRCBLEND_shift                              = 0,
	COLOR_COMB_FCN_mask                               = 0x07 << 5,
	COLOR_COMB_FCN_shift                              = 5,
	COLOR_DESTBLEND_mask                              = 0x1f << 8,
	COLOR_DESTBLEND_shift                             = 8,
	OPACITY_WEIGHT_bit                                = 1 << 13,
	ALPHA_SRCBLEND_mask                               = 0x1f << 16,
	ALPHA_SRCBLEND_shift                              = 16,
	ALPHA_COMB_FCN_mask                               = 0x07 << 21,
	ALPHA_COMB_FCN_shift                              = 21,
	ALPHA_DESTBLEND_mask                              = 0x1f << 24,
	ALPHA_DESTBLEND_shift                             = 24,
	SEPARATE_ALPHA_BLEND_bit                          = 1 << 29,
    VGT_DMA_BASE_HI                                       = 0x000287e4,	/* VGT DMA Base Address : upper 8-bits of 40 bit address */
	VGT_DMA_BASE_HI__BASE_ADDR_mask                   = 0xff << 0,	/* This specfies upper 8-bits of 40-bits of DMA address */
	VGT_DMA_BASE_HI__BASE_ADDR_shift                  = 0,
    VGT_DMA_BASE                                          = 0x000287e8,	/* VGT DMA Base Address */
	/* BASE_ADDR: VGT DMA Base Address This address must be naturally aligned to a 16-bit word. Therefore, bit 0 of this register must be 0 */
    VGT_DRAW_INITIATOR                                    = 0x000287f0,	/* Draw Inititiator */
	SOURCE_SELECT_mask                                = 0x03 << 0,	/* Input Source Select. If the Source Select field is set to `Auto-increment Index` mode and the Primitive Type is set to `Tri List w/Flags`, then the draw initiator is processed as just a regular `Tri List`. */
	SOURCE_SELECT_shift                               = 0,
	    DI_SRC_SEL_DMA                                = 0x00,	/* DI_SRC_SEL_DMA: VGT DMA Data */
	    DI_SRC_SEL_IMMEDIATE                          = 0x01,	/* DI_SRC_SEL_IMMEDIATE: Immediate Data */
	    DI_SRC_SEL_AUTO_INDEX                         = 0x02,	/* DI_SRC_SEL_AUTO_INDEX: Auto-increment Index */
	    DI_SRC_SEL_RESERVED                           = 0x03,	/* DI_SRC_SEL_RESERVED: Reserved - unused */
	MAJOR_MODE_mask                                   = 0x03 << 2,	/* Major Mode */
	MAJOR_MODE_shift                                  = 2,
	    DI_MAJOR_MODE_0                               = 0x00,	/* DI_MAJOR_MODE_0: DI_MAJOR_MODE_0 Normal (Implicit) Mode -- applies only to prim types 0- 21. Some VGT state registers are ignored (their values implied) in this mode. */
	    DI_MAJOR_MODE_1                               = 0x01,	/* DI_MAJOR_MODE_1: DI_MAJOR_MODE_1 Explicit Mode -- Configuration completely specified by state registers. */
	SPRITE_EN_bit                                     = 1 << 4,	/* sprite enable */
	NOT_EOP_bit                                       = 1 << 5,	/* This bit indicates that this draw initiator should not generate an end-of-packet signal because it will be followed by one or more chained draw initiators. Care must be taken so that this draw initiator is immediately followed, at the hardware interface, by a chained draw initiator. (In other words, chained draw initiators cannot be separated over driver buffer boundaries that can be interrupted. This bit is primarily intended to be set by the CP to improve the processing parallelism of small 2D blits.) */
	USE_OPAQUE_bit                                    = 1 << 6,	/* This bit indicates that this draw call is a opaque draw call */
    VGT_IMMED_DATA                                        = 0x000287f4,	/* VGT Immediate Data */
	/* Data written to this address is written into the VGT Immediate Data FIFO. */
    VGT_EVENT_ADDRESS_REG                                 = 0x000287f8,	/* Event address */
	ADDRESS_LOW_mask                                  = 0xfffffff << 0,	/* address bit 31:4 for zpass event */
	ADDRESS_LOW_shift                                 = 0,
    DB_DEPTH_CONTROL                                      = 0x00028800,	/* This register controls depth and stencil tests. */
	STENCIL_ENABLE_bit                                = 1 << 0,	/* Enables stencil testing. If disabled, all pixels pass the stencil test. If there is no stencil buffer this is treated as disabled. */
	Z_ENABLE_bit                                      = 1 << 1,	/* Enables depth testing. If disabled, all pixels pass the depth test. If there is no depth buffer this is treated as disabled. */
	Z_WRITE_ENABLE_bit                                = 1 << 2,	/* Enables writing to the depth buffer if the depth test passes. */
	ZFUNC_mask                                        = 0x07 << 4,	/* Specifies the function that compares the depth at each sample in the fragment to the destination depth at the corresponding sample point. */
	ZFUNC_shift                                       = 4,
	    FRAG_NEVER                                    = 0x00,	/* FRAG_NEVER: never pass */
	    FRAG_LESS                                     = 0x01,	/* FRAG_LESS: pass if fragment < dest */
	    FRAG_EQUAL                                    = 0x02,	/* FRAG_EQUAL: pass if fragment = dest */
	    FRAG_LEQUAL                                   = 0x03,	/* FRAG_LEQUAL: pass if fragment <= dest */
	    FRAG_GREATER                                  = 0x04,	/* FRAG_GREATER: pass if fragment > dest */
	    FRAG_NOTEQUAL                                 = 0x05,	/* FRAG_NOTEQUAL: pass if fragment != dest */
	    FRAG_GEQUAL                                   = 0x06,	/* FRAG_GEQUAL: pass if fragment >= dest */
	    FRAG_ALWAYS                                   = 0x07,	/* FRAG_ALWAYS: always pass */
	BACKFACE_ENABLE_bit                               = 1 << 7,	/* If false, forces all quads to be stencil tested as frontface quads. */
	STENCILFUNC_mask                                  = 0x07 << 8,	/* Specifies the function that compares STENCILREF to the destination stencil value for frontface quads. The stencil test passes if ref OP dest is true. */
	STENCILFUNC_shift                                 = 8,
/* 	    REF_NEVER                                     = 0x00, */	/* REF_NEVER: never pass */
/* 	    REF_LESS                                      = 0x01, */	/* REF_LESS: pass if left < right */
/* 	    REF_EQUAL                                     = 0x02, */	/* REF_EQUAL: pass if left = right */
/* 	    REF_LEQUAL                                    = 0x03, */	/* REF_LEQUAL: pass if left <= right */
/* 	    REF_GREATER                                   = 0x04, */	/* REF_GREATER: pass if left > right */
/* 	    REF_NOTEQUAL                                  = 0x05, */	/* REF_NOTEQUAL: pass if left != right */
/* 	    REF_GEQUAL                                    = 0x06, */	/* REF_GEQUAL: pass if left >= right */
/* 	    REF_ALWAYS                                    = 0x07, */	/* REF_ALWAYS: always pass */
	STENCILFAIL_mask                                  = 0x07 << 11,	/* Specifies the stencil operation for frontface quads if the stencil function fails. */
	STENCILFAIL_shift                                 = 11,
	    STENCIL_KEEP                                  = 0x00,	/* STENCIL_KEEP: New value = Old Value */
	    STENCIL_ZERO                                  = 0x01,	/* STENCIL_ZERO: New value = 0 */
	    STENCIL_REPLACE                               = 0x02,	/* STENCIL_REPLACE: New value = STENCILREF */
	    STENCIL_INCR_CLAMP                            = 0x03,	/* STENCIL_INCR_CLAMP: New value++ (clamp) */
	    STENCIL_DECR_CLAMP                            = 0x04,	/* STENCIL_DECR_CLAMP: New value-- (clamp) */
	    STENCIL_INVERT                                = 0x05,	/* STENCIL_INVERT: New value=~Old value */
	    STENCIL_INCR_WRAP                             = 0x06,	/* STENCIL_INCR_WRAP: New value++ (wrap) */
	    STENCIL_DECR_WRAP                             = 0x07,	/* STENCIL_DECR_WRAP: New value-- (wrap) */
	STENCILZPASS_mask                                 = 0x07 << 14,	/* Specifies the stencil operation for frontface quads if the stencil and depth functions both pass. */
	STENCILZPASS_shift                                = 14,
/* 	    STENCIL_KEEP                                  = 0x00, */	/* STENCIL_KEEP: New value = Old Value */
/* 	    STENCIL_ZERO                                  = 0x01, */	/* STENCIL_ZERO: New value = 0 */
/* 	    STENCIL_REPLACE                               = 0x02, */	/* STENCIL_REPLACE: New value = STENCILREF */
/* 	    STENCIL_INCR_CLAMP                            = 0x03, */	/* STENCIL_INCR_CLAMP: New value++ (clamp) */
/* 	    STENCIL_DECR_CLAMP                            = 0x04, */	/* STENCIL_DECR_CLAMP: New value-- (clamp) */
/* 	    STENCIL_INVERT                                = 0x05, */	/* STENCIL_INVERT: New value=~Old value */
/* 	    STENCIL_INCR_WRAP                             = 0x06, */	/* STENCIL_INCR_WRAP: New value++ (wrap) */
/* 	    STENCIL_DECR_WRAP                             = 0x07, */	/* STENCIL_DECR_WRAP: New value-- (wrap) */
	STENCILZFAIL_mask                                 = 0x07 << 17,	/* Specifies the stencil operation for frontface quads if the stencil function passes and the depth function fails. */
	STENCILZFAIL_shift                                = 17,
/* 	    STENCIL_KEEP                                  = 0x00, */	/* STENCIL_KEEP: New value = Old Value */
/* 	    STENCIL_ZERO                                  = 0x01, */	/* STENCIL_ZERO: New value = 0 */
/* 	    STENCIL_REPLACE                               = 0x02, */	/* STENCIL_REPLACE: New value = STENCILREF */
/* 	    STENCIL_INCR_CLAMP                            = 0x03, */	/* STENCIL_INCR_CLAMP: New value++ (clamp) */
/* 	    STENCIL_DECR_CLAMP                            = 0x04, */	/* STENCIL_DECR_CLAMP: New value-- (clamp) */
/* 	    STENCIL_INVERT                                = 0x05, */	/* STENCIL_INVERT: New value=~Old value */
/* 	    STENCIL_INCR_WRAP                             = 0x06, */	/* STENCIL_INCR_WRAP: New value++ (wrap) */
/* 	    STENCIL_DECR_WRAP                             = 0x07, */	/* STENCIL_DECR_WRAP: New value-- (wrap) */
	STENCILFUNC_BF_mask                               = 0x07 << 20,	/* Specifies the function that compares STENCILREF_BF to the destination stencil for backface quads. The stencil test passes if ref OP dest is true. */
	STENCILFUNC_BF_shift                              = 20,
/* 	    REF_NEVER                                     = 0x00, */	/* REF_NEVER: never pass */
/* 	    REF_LESS                                      = 0x01, */	/* REF_LESS: pass if left < right */
/* 	    REF_EQUAL                                     = 0x02, */	/* REF_EQUAL: pass if left = right */
/* 	    REF_LEQUAL                                    = 0x03, */	/* REF_LEQUAL: pass if left <= right */
/* 	    REF_GREATER                                   = 0x04, */	/* REF_GREATER: pass if left > right */
/* 	    REF_NOTEQUAL                                  = 0x05, */	/* REF_NOTEQUAL: pass if left != right */
/* 	    REF_GEQUAL                                    = 0x06, */	/* REF_GEQUAL: pass if left >= right */
/* 	    REF_ALWAYS                                    = 0x07, */	/* REF_ALWAYS: always pass */
	STENCILFAIL_BF_mask                               = 0x07 << 23,	/* Specifies the stencil operation for backface quads if the stencil function fails. */
	STENCILFAIL_BF_shift                              = 23,
/* 	    STENCIL_KEEP                                  = 0x00, */	/* STENCIL_KEEP: New value = Old Value */
/* 	    STENCIL_ZERO                                  = 0x01, */	/* STENCIL_ZERO: New value = 0 */
/* 	    STENCIL_REPLACE                               = 0x02, */	/* STENCIL_REPLACE: New value = STENCILREF */
/* 	    STENCIL_INCR_CLAMP                            = 0x03, */	/* STENCIL_INCR_CLAMP: New value++ (clamp) */
/* 	    STENCIL_DECR_CLAMP                            = 0x04, */	/* STENCIL_DECR_CLAMP: New value-- (clamp) */
/* 	    STENCIL_INVERT                                = 0x05, */	/* STENCIL_INVERT: New value=~Old value */
/* 	    STENCIL_INCR_WRAP                             = 0x06, */	/* STENCIL_INCR_WRAP: New value++ (wrap) */
/* 	    STENCIL_DECR_WRAP                             = 0x07, */	/* STENCIL_DECR_WRAP: New value-- (wrap) */
	STENCILZPASS_BF_mask                              = 0x07 << 26,	/* Specifies the stencil operation for backface quads if the stencil and depth functions both pass. */
	STENCILZPASS_BF_shift                             = 26,
/* 	    STENCIL_KEEP                                  = 0x00, */	/* STENCIL_KEEP: New value = Old Value */
/* 	    STENCIL_ZERO                                  = 0x01, */	/* STENCIL_ZERO: New value = 0 */
/* 	    STENCIL_REPLACE                               = 0x02, */	/* STENCIL_REPLACE: New value = STENCILREF */
/* 	    STENCIL_INCR_CLAMP                            = 0x03, */	/* STENCIL_INCR_CLAMP: New value++ (clamp) */
/* 	    STENCIL_DECR_CLAMP                            = 0x04, */	/* STENCIL_DECR_CLAMP: New value-- (clamp) */
/* 	    STENCIL_INVERT                                = 0x05, */	/* STENCIL_INVERT: New value=~Old value */
/* 	    STENCIL_INCR_WRAP                             = 0x06, */	/* STENCIL_INCR_WRAP: New value++ (wrap) */
/* 	    STENCIL_DECR_WRAP                             = 0x07, */	/* STENCIL_DECR_WRAP: New value-- (wrap) */
	STENCILZFAIL_BF_mask                              = 0x07 << 29,	/* Specifies the stencil operation for backface quads if the stencil function passes and the depth function fails. */
	STENCILZFAIL_BF_shift                             = 29,
/* 	    STENCIL_KEEP                                  = 0x00, */	/* STENCIL_KEEP: New value = Old Value */
/* 	    STENCIL_ZERO                                  = 0x01, */	/* STENCIL_ZERO: New value = 0 */
/* 	    STENCIL_REPLACE                               = 0x02, */	/* STENCIL_REPLACE: New value = STENCILREF */
/* 	    STENCIL_INCR_CLAMP                            = 0x03, */	/* STENCIL_INCR_CLAMP: New value++ (clamp) */
/* 	    STENCIL_DECR_CLAMP                            = 0x04, */	/* STENCIL_DECR_CLAMP: New value-- (clamp) */
/* 	    STENCIL_INVERT                                = 0x05, */	/* STENCIL_INVERT: New value=~Old value */
/* 	    STENCIL_INCR_WRAP                             = 0x06, */	/* STENCIL_INCR_WRAP: New value++ (wrap) */
/* 	    STENCIL_DECR_WRAP                             = 0x07, */	/* STENCIL_DECR_WRAP: New value-- (wrap) */
    CB_BLEND_CONTROL                                      = 0x00028804,	/* Blend function used for all render targets if PER_MRT_BLEND is false. */
/* 	COLOR_SRCBLEND_mask                               = 0x1f << 0, */	/* Source blend function for RGB components. BLEND_X name corresponds to GL_X blend function. */
/* 	COLOR_SRCBLEND_shift                              = 0, */
	    BLEND_ZERO                                    = 0x00,	/* BLEND_ZERO: (d3d_zero) */
	    BLEND_ONE                                     = 0x01,	/* BLEND_ONE: (d3d_one) */
	    BLEND_SRC_COLOR                               = 0x02,	/* BLEND_SRC_COLOR: (d3d_srccolor) */
	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03,	/* BLEND_ONE_MINUS_SRC_COLOR: (d3d_invsrccolor) */
	    BLEND_SRC_ALPHA                               = 0x04,	/* BLEND_SRC_ALPHA: (d3d_srcalpha) */
	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05,	/* BLEND_ONE_MINUS_SRC_ALPHA: (d3d_invsrcalpha) */
	    BLEND_DST_ALPHA                               = 0x06,	/* BLEND_DST_ALPHA: (d3d_destalpha) */
	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07,	/* BLEND_ONE_MINUS_DST_ALPHA: (d3d_invdestalpha) */
	    BLEND_DST_COLOR                               = 0x08,	/* BLEND_DST_COLOR: (d3d_destcolor) */
	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09,	/* BLEND_ONE_MINUS_DST_COLOR: (d3d_invdestcolor) */
	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a,	/* BLEND_SRC_ALPHA_SATURATE: (d3d_srcalphasat) */
	    BLEND_BOTH_SRC_ALPHA                          = 0x0b,	/* BLEND_BOTH_SRC_ALPHA: dx9 mode */
	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c,	/* BLEND_BOTH_INV_SRC_ALPHA: dx9 mode */
	    BLEND_CONSTANT_COLOR                          = 0x0d,	/* BLEND_CONSTANT_COLOR: (d3d_blendfactor, uses corresponding RB_BLEND component) */
	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e,	/* BLEND_ONE_MINUS_CONSTANT_COLOR: (d3d_invblendfactor) */
	    BLEND_SRC1_COLOR                              = 0x0f,	/* BLEND_SRC1_COLOR: wgf dual-source mode */
	    BLEND_INV_SRC1_COLOR                          = 0x10,	/* BLEND_INV_SRC1_COLOR: wgf dual-source mode */
	    BLEND_SRC1_ALPHA                              = 0x11,	/* BLEND_SRC1_ALPHA: wgf dual-source mode */
	    BLEND_INV_SRC1_ALPHA                          = 0x12,	/* BLEND_INV_SRC1_ALPHA: wgf dual-source mode */
	    BLEND_CONSTANT_ALPHA                          = 0x13,	/* BLEND_CONSTANT_ALPHA: (uses RB_BLEND_ALPHA) */
	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14,	/* BLEND_ONE_MINUS_CONSTANT_ALPHA: */
/* 	COLOR_COMB_FCN_mask                               = 0x07 << 5, */	/* Source/dest combination function for RGB components. Result is clamped to the representable range. */
/* 	COLOR_COMB_FCN_shift                              = 5, */
	    COMB_DST_PLUS_SRC                             = 0x00,	/* COMB_DST_PLUS_SRC: (ADD): Source*SRCBLEND + Dest*DSTBLEND */
	    COMB_SRC_MINUS_DST                            = 0x01,	/* COMB_SRC_MINUS_DST: (SUBTRACT): Source*SRCBLEND - Dest*DSTBLEND */
	    COMB_MIN_DST_SRC                              = 0x02,	/* COMB_MIN_DST_SRC: (MIN): min(Source, Dest) */
	    COMB_MAX_DST_SRC                              = 0x03,	/* COMB_MAX_DST_SRC: (MAX): max(Source, Dest) */
	    COMB_DST_MINUS_SRC                            = 0x04,	/* COMB_DST_MINUS_SRC: (REVSUBTRACT): Dest*DSTBLEND - Source*SRCBLEND */
/* 	COLOR_DESTBLEND_mask                              = 0x1f << 8, */	/* Destination blend function for RGB components. BLEND_X name corresponds to GL_X blend function. */
/* 	COLOR_DESTBLEND_shift                             = 8, */
/* 	    BLEND_ZERO                                    = 0x00, */	/* BLEND_ZERO: (d3d_zero) */
/* 	    BLEND_ONE                                     = 0x01, */	/* BLEND_ONE: (d3d_one) */
/* 	    BLEND_SRC_COLOR                               = 0x02, */	/* BLEND_SRC_COLOR: (d3d_srccolor) */
/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */	/* BLEND_ONE_MINUS_SRC_COLOR: (d3d_invsrccolor) */
/* 	    BLEND_SRC_ALPHA                               = 0x04, */	/* BLEND_SRC_ALPHA: (d3d_srcalpha) */
/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */	/* BLEND_ONE_MINUS_SRC_ALPHA: (d3d_invsrcalpha) */
/* 	    BLEND_DST_ALPHA                               = 0x06, */	/* BLEND_DST_ALPHA: (d3d_destalpha) */
/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */	/* BLEND_ONE_MINUS_DST_ALPHA: (d3d_invdestalpha) */
/* 	    BLEND_DST_COLOR                               = 0x08, */	/* BLEND_DST_COLOR: (d3d_destcolor) */
/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */	/* BLEND_ONE_MINUS_DST_COLOR: (d3d_invdestcolor) */
/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */	/* BLEND_SRC_ALPHA_SATURATE: (d3d_srcalphasat) */
/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */	/* BLEND_BOTH_SRC_ALPHA: dx9 mode */
/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */	/* BLEND_BOTH_INV_SRC_ALPHA: dx9 mode */
/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */	/* BLEND_CONSTANT_COLOR: (d3d_blendfactor, uses corresponding RB_BLEND component) */
/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */	/* BLEND_ONE_MINUS_CONSTANT_COLOR: (d3d_invblendfactor) */
/* 	    BLEND_SRC1_COLOR                              = 0x0f, */	/* BLEND_SRC1_COLOR: wgf dual-source mode */
/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */	/* BLEND_INV_SRC1_COLOR: wgf dual-source mode */
/* 	    BLEND_SRC1_ALPHA                              = 0x11, */	/* BLEND_SRC1_ALPHA: wgf dual-source mode */
/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */	/* BLEND_INV_SRC1_ALPHA: wgf dual-source mode */
/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */	/* BLEND_CONSTANT_ALPHA: (uses RB_BLEND_ALPHA) */
/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */	/* BLEND_ONE_MINUS_CONSTANT_ALPHA: */
/* 	OPACITY_WEIGHT_bit                                = 1 << 13, */	/* If one, enables multiplying source alpha times source RGB before blending. This field must be zero if FOG_ENABLE is one. */
/* 	ALPHA_SRCBLEND_mask                               = 0x1f << 16, */	/* Source blend function for alpha component. BLEND_X name corresponds to GL_X blend function. */
/* 	ALPHA_SRCBLEND_shift                              = 16, */
/* 	    BLEND_ZERO                                    = 0x00, */	/* BLEND_ZERO: (d3d_zero) */
/* 	    BLEND_ONE                                     = 0x01, */	/* BLEND_ONE: (d3d_one) */
/* 	    BLEND_SRC_COLOR                               = 0x02, */	/* BLEND_SRC_COLOR: (d3d_srccolor) */
/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */	/* BLEND_ONE_MINUS_SRC_COLOR: (d3d_invsrccolor) */
/* 	    BLEND_SRC_ALPHA                               = 0x04, */	/* BLEND_SRC_ALPHA: (d3d_srcalpha) */
/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */	/* BLEND_ONE_MINUS_SRC_ALPHA: (d3d_invsrcalpha) */
/* 	    BLEND_DST_ALPHA                               = 0x06, */	/* BLEND_DST_ALPHA: (d3d_destalpha) */
/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */	/* BLEND_ONE_MINUS_DST_ALPHA: (d3d_invdestalpha) */
/* 	    BLEND_DST_COLOR                               = 0x08, */	/* BLEND_DST_COLOR: (d3d_destcolor) */
/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */	/* BLEND_ONE_MINUS_DST_COLOR: (d3d_invdestcolor) */
/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */	/* BLEND_SRC_ALPHA_SATURATE: (d3d_srcalphasat) */
/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */	/* BLEND_BOTH_SRC_ALPHA: dx9 mode */
/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */	/* BLEND_BOTH_INV_SRC_ALPHA: dx9 mode */
/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */	/* BLEND_CONSTANT_COLOR: (d3d_blendfactor, uses corresponding RB_BLEND component) */
/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */	/* BLEND_ONE_MINUS_CONSTANT_COLOR: (d3d_invblendfactor) */
/* 	    BLEND_SRC1_COLOR                              = 0x0f, */	/* BLEND_SRC1_COLOR: wgf dual-source mode */
/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */	/* BLEND_INV_SRC1_COLOR: wgf dual-source mode */
/* 	    BLEND_SRC1_ALPHA                              = 0x11, */	/* BLEND_SRC1_ALPHA: wgf dual-source mode */
/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */	/* BLEND_INV_SRC1_ALPHA: wgf dual-source mode */
/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */	/* BLEND_CONSTANT_ALPHA: (uses RB_BLEND_ALPHA) */
/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */	/* BLEND_ONE_MINUS_CONSTANT_ALPHA: */
/* 	ALPHA_COMB_FCN_mask                               = 0x07 << 21, */	/* Source/dest combination function for alpha component. Result is clamped to the representable range. Note that Min and Max do not force src and dst blend functions to ONE. */
/* 	ALPHA_COMB_FCN_shift                              = 21, */
/* 	    COMB_DST_PLUS_SRC                             = 0x00, */	/* COMB_DST_PLUS_SRC: (ADD): Source*SRCBLEND + Dest*DSTBLEND */
/* 	    COMB_SRC_MINUS_DST                            = 0x01, */	/* COMB_SRC_MINUS_DST: (SUBTRACT): Source*SRCBLEND - Dest*DSTBLEND */
/* 	    COMB_MIN_DST_SRC                              = 0x02, */	/* COMB_MIN_DST_SRC: (MIN): min(Source, Dest) */
/* 	    COMB_MAX_DST_SRC                              = 0x03, */	/* COMB_MAX_DST_SRC: (MAX): max(Source, Dest) */
/* 	    COMB_DST_MINUS_SRC                            = 0x04, */	/* COMB_DST_MINUS_SRC: (REVSUBTRACT): Dest*DSTBLEND - Source*SRCBLEND */
/* 	ALPHA_DESTBLEND_mask                              = 0x1f << 24, */	/* Destination blend function for alpha component. BLEND_X name corresponds to GL_X blend function. */
/* 	ALPHA_DESTBLEND_shift                             = 24, */
/* 	    BLEND_ZERO                                    = 0x00, */	/* BLEND_ZERO: (d3d_zero) */
/* 	    BLEND_ONE                                     = 0x01, */	/* BLEND_ONE: (d3d_one) */
/* 	    BLEND_SRC_COLOR                               = 0x02, */	/* BLEND_SRC_COLOR: (d3d_srccolor) */
/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */	/* BLEND_ONE_MINUS_SRC_COLOR: (d3d_invsrccolor) */
/* 	    BLEND_SRC_ALPHA                               = 0x04, */	/* BLEND_SRC_ALPHA: (d3d_srcalpha) */
/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */	/* BLEND_ONE_MINUS_SRC_ALPHA: (d3d_invsrcalpha) */
/* 	    BLEND_DST_ALPHA                               = 0x06, */	/* BLEND_DST_ALPHA: (d3d_destalpha) */
/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */	/* BLEND_ONE_MINUS_DST_ALPHA: (d3d_invdestalpha) */
/* 	    BLEND_DST_COLOR                               = 0x08, */	/* BLEND_DST_COLOR: (d3d_destcolor) */
/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */	/* BLEND_ONE_MINUS_DST_COLOR: (d3d_invdestcolor) */
/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */	/* BLEND_SRC_ALPHA_SATURATE: (d3d_srcalphasat) */
/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */	/* BLEND_BOTH_SRC_ALPHA: dx9 mode */
/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */	/* BLEND_BOTH_INV_SRC_ALPHA: dx9 mode */
/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */	/* BLEND_CONSTANT_COLOR: (d3d_blendfactor, uses corresponding RB_BLEND component) */
/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */	/* BLEND_ONE_MINUS_CONSTANT_COLOR: (d3d_invblendfactor) */
/* 	    BLEND_SRC1_COLOR                              = 0x0f, */	/* BLEND_SRC1_COLOR: wgf dual-source mode */
/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */	/* BLEND_INV_SRC1_COLOR: wgf dual-source mode */
/* 	    BLEND_SRC1_ALPHA                              = 0x11, */	/* BLEND_SRC1_ALPHA: wgf dual-source mode */
/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */	/* BLEND_INV_SRC1_ALPHA: wgf dual-source mode */
/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */	/* BLEND_CONSTANT_ALPHA: (uses RB_BLEND_ALPHA) */
/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */	/* BLEND_ONE_MINUS_CONSTANT_ALPHA: */
/* 	SEPARATE_ALPHA_BLEND_bit                          = 1 << 29, */	/* If false, use color blend modes for blending the alpha channel. If true, use the ALPHA_ fields to control blending to the alpha channel. */
    CB_COLOR_CONTROL                                      = 0x00028808,
	FOG_ENABLE_bit                                    = 1 << 0,	/* If true, extract a fog factor from each exported color and performs fog blending prior to alpha blending, using FOG_RED etc. as the fog color. This bit must be zero if a dual-source (SRC1) blend operation is selected. */
	MULTIWRITE_ENABLE_bit                             = 1 << 1,	/* If true, replicate color output 0 to each color output, so taht it is rendered to each enabled render target. This feature is used in OpenGL. SHADER_MASK.OUTPUTn_ENABLE masks the color components of color output 0 for render target n. */
	DITHER_ENABLE_bit                                 = 1 << 2,	/* If true, then each component is dithered if it is no larger than 16-bits and its ROUND_MODE is set to ROUND_BY_HALF. This API state is present in OpenGL and DX9 but not DX10. */
	DEGAMMA_ENABLE_bit                                = 1 << 3,	/* If true, then each UNORM format COLOR_8_8_8_8 render target is treated as an SRGB format instead. This API state is present in DX9 but not WGF2. */
	SPECIAL_OP_mask                                   = 0x07 << 4,	/* This field selects stanard color processing or one of several special operation modes, which ignore the backend state except that the fast clear and expand modes use nonzero fields in CB_TARGET_WRITE field to select render targets. NOTE: for the SPECIAL_EXPAND modes, all enabled MRTs must have a cmask buffer. */
	SPECIAL_OP_shift                                  = 4,
	    SPECIAL_NORMAL                                = 0x00,	/* SPECIAL_NORMAL: use state to render */
	    SPECIAL_DISABLE                               = 0x01,	/* SPECIAL_DISABLE: do not write color results */
	    SPECIAL_FAST_CLEAR                            = 0x02,	/* SPECIAL_FAST_CLEAR: set fully covered tiles to fast clear value, as selected by CLEAR_MODE field. */
	    SPECIAL_FORCE_CLEAR                           = 0x03,	/* SPECIAL_FORCE_CLEAR: use for full surface fast clear (removes knowledge of prior clear color). */
	    SPECIAL_EXPAND_COLOR                          = 0x04,	/* SPECIAL_EXPAND_COLOR: expand cleared tiles so that clear color is not used. Use this or force_clear when changing the clear color. */
	    SPECIAL_EXPAND_TEXTURE                        = 0x05,	/* SPECIAL_EXPAND_TEXTURE: expand as needed before binding the surface as a texture. */
	    SPECIAL_EXPAND_SAMPLES                        = 0x06,	/* SPECIAL_EXPAND_SAMPLES: expand to a export_ separate color per sample. This is required before CPU access to the surface. */
	    SPECIAL_RESOLVE_BOX                           = 0x07,	/* SPECIAL_RESOLVE_BOX: read from target 0, sum all covered samples samples, divide by the number of samples, and write to target 1, which is one-sample. This may be used to produce a linear array from a tiled array. NOTE: do EXPAND_COLOR before resolving surface. */
	PER_MRT_BLEND_bit                                 = 1 << 7,	/* If false, use CB_BLEND_CONTROL for all blend functions. If true, use CB_BLEND#_CONTROL for the blend function for render target # (if blending is enabled). */
	TARGET_BLEND_ENABLE_mask                          = 0xff << 8,	/* Each bit enables blending for the corresponding render target if it is 1, else disables blending for that render target if it is 0. This field must be 0xcc (source) if BLEND_FLOAT32 is set for any enabled render target. */
	TARGET_BLEND_ENABLE_shift                         = 8,
	ROP3_mask                                         = 0xff << 16,	/* This field supports the 28 boolean ops that combine either source and dest or brush and dest, with brush provided by the shader in place of source. ROP3 codes that use both src and brush are emulated in software. Allowed ROP3 codes have either the form pqrspqrs (for source/dest ops) or pqpqrsrs (for brush/dest ops). The code 0xCC (11001100) copies the source to the destination, which disables the ROP function. */
	ROP3_shift                                        = 16,
    DB_SHADER_CONTROL                                     = 0x0002880c,
	Z_EXPORT_ENABLE_bit                               = 1 << 0,	/* A shader is bound that exports Z as a float into Red. */
	STENCIL_REF_EXPORT_ENABLE_bit                     = 1 << 1,	/* A shader is bound that exports a stencil ref value as an integer onto Green. */
	Z_ORDER_mask                                      = 0x03 << 4,	/* Indicates Shader`s preference for which type of Z testing. The _THEN_ for early Z allows the shader to indicate a preference when EARLY_Z can`t be used. If RE_Z can`t be used then LATE_Z is. */
	Z_ORDER_shift                                     = 4,
	    LATE_Z                                        = 0x00,	/* LATE_Z */
	    EARLY_Z_THEN_LATE_Z                           = 0x01,	/* EARLY_Z_THEN_LATE_Z */
	    RE_Z                                          = 0x02,	/* RE_Z */
	    EARLY_Z_THEN_RE_Z                             = 0x03,	/* EARLY_Z_THEN_RE_Z */
	KILL_ENABLE_bit                                   = 1 << 6,	/* Shader can kill pixels through texkill. */
	COVERAGE_TO_MASK_ENABLE_bit                       = 1 << 7,	/* Use Z (2nd) Export Alpha Channel to Generate Alpha to Mask. */
	MASK_EXPORT_ENABLE_bit                            = 1 << 8,	/* Use Z (2nd) Export Blue Channel as sample mask for pixel. */
	DUAL_EXPORT_ENABLE_bit                            = 1 << 9,	/* Allows the shader export block to pack two quads into each export to the backend. This only occurs if there is no depth export, the active render targets permit (see CB_COLOR0_INFO SOURCE_FORMAT field) and CB_COLOR_CONTROL FOG_ENABLE and MULTIWRITE_ENABLE are both zero. */
	EXEC_ON_HIER_FAIL_bit                             = 1 << 10,	/* Will execute the shader even if Hierarchical Z or Stencil would kill the quad. Enable if the pixel shader has a desired side effect not covered by the above flags for Z or Stencil failed pixels. EarlyZ and ReZ kills will still stop the shader from running. */
	EXEC_ON_NOOP_bit                                  = 1 << 11,	/* Will execute the shader even if nothing uses the shader`s color or depth exports. Enable if the pixel shader has a desired side effect not caused by the above flags for passing pixels. */
    PA_CL_CLIP_CNTL                                       = 0x00028810,	/* Clipper Control Bits */
	UCP_ENA_0_bit                                     = 1 << 0,	/* Enable User-Clip Plane 0 */
	UCP_ENA_1_bit                                     = 1 << 1,	/* Enable User-Clip Plane 1 */
	UCP_ENA_2_bit                                     = 1 << 2,	/* Enable User-Clip Plane 2 */
	UCP_ENA_3_bit                                     = 1 << 3,	/* Enable User-Clip Plane 3 */
	UCP_ENA_4_bit                                     = 1 << 4,	/* Enable User-Clip Plane 4 */
	UCP_ENA_5_bit                                     = 1 << 5,	/* Enable User-Clip Plane 5 */
	PS_UCP_Y_SCALE_NEG_bit                            = 1 << 13,
	PS_UCP_MODE_mask                                  = 0x03 << 14,	/* 0 = Cull using distance from center of point 1 = Cull using radius-based distance from center of point 2 = Cull using radius-based distance from center of point, Expand and Clip on intersection 3 = Always expand and clip as trifan */
	PS_UCP_MODE_shift                                 = 14,
	CLIP_DISABLE_bit                                  = 1 << 16,	/* Disables clip code generation and clipping process for TCL */
	UCP_CULL_ONLY_ENA_bit                             = 1 << 17,	/* Cull Primitives against UCPS, but don`t clip */
	BOUNDARY_EDGE_FLAG_ENA_bit                        = 1 << 18,	/* Currently unused: Pending Delete. Left as placeholder for now. */
	DX_CLIP_SPACE_DEF_bit                             = 1 << 19,	/* Clip space is defined as: 0: -W < X < W, -W < Y < W, -W < Z < W (OpenGL Definition) 1: -W < X < W, -W < Y < W, 0 < Z < W (DirectX Definition) */
	DIS_CLIP_ERR_DETECT_bit                           = 1 << 20,	/* Disables culling of primitives for which the clipped detects an error. Default is 0 */
	VTX_KILL_OR_bit                                   = 1 << 21,	/* Used if Vertex Kill flags are exported from Vertex Shader. If clear, ALL vertices for current primitive must be set to kill the primitive ( AND MODE). If set, if ANY vertices for current primitive are set, the the primitive will be killed ( OR MODE). */
	DX_LINEAR_ATTR_CLIP_ENA_bit                       = 1 << 24,
	VTE_VPORT_PROVOKE_DISABLE_bit                     = 1 << 25,
	ZCLIP_NEAR_DISABLE_bit                            = 1 << 26,
	ZCLIP_FAR_DISABLE_bit                             = 1 << 27,
    PA_SU_SC_MODE_CNTL                                    = 0x00028814,	/* SU/SC Controls for Facedness Culling, Polymode, Polygon Offset, and various Enables */
	CULL_FRONT_bit                                    = 1 << 0,	/* Enable for front-face culling. */
	CULL_BACK_bit                                     = 1 << 1,	/* Enable for back-face culling. */
	FACE_bit                                          = 1 << 2,	/* X-Ored with cross product sign to determine positive facing */
	POLY_MODE_mask                                    = 0x03 << 3,	/* Polygon mode enable. */
	POLY_MODE_shift                                   = 3,
	    X_DISABLE_POLY_MODE                           = 0x00,	/* Disable poly mode (render triangles). */
	    X_DUAL_MODE                                   = 0x01,	/* Dual mode (send 2 sets of 3 polys with specified poly type). */
	POLYMODE_FRONT_PTYPE_mask                         = 0x07 << 5,	/* Specifies how to render front-facing polygons. */
	POLYMODE_FRONT_PTYPE_shift                        = 5,
	    X_DRAW_POINTS                                 = 0x00,	/* Draw points. */
	    X_DRAW_LINES                                  = 0x01,	/* Draw lines. */
	    X_DRAW_TRIANGLES                              = 0x02,	/* Draw triangles. */
	POLYMODE_BACK_PTYPE_mask                          = 0x07 << 8,	/* Specifies how to render back-facing polygons. */
	POLYMODE_BACK_PTYPE_shift                         = 8,
/* 	    X_DRAW_POINTS                                 = 0x00, */	/* Draw points. */
/* 	    X_DRAW_LINES                                  = 0x01, */	/* Draw lines. */
/* 	    X_DRAW_TRIANGLES                              = 0x02, */	/* Draw triangles. */
	POLY_OFFSET_FRONT_ENABLE_bit                      = 1 << 11,	/* Enables front facing polygon`s offset. */
	POLY_OFFSET_BACK_ENABLE_bit                       = 1 << 12,	/* Enables back facing polygon`s offset. */
	POLY_OFFSET_PARA_ENABLE_bit                       = 1 << 13,	/* Enables polygon offset for non-triangle primitives. */
	VTX_WINDOW_OFFSET_ENABLE_bit                      = 1 << 16,	/* Enables addition of PA_SC_WINDOW_OFFSET values to vertex data. */
	PROVOKING_VTX_LAST_bit                            = 1 << 19,	/* Defines which vertex of a primitive is used for attribute components when flat shading is enabled */
	PERSP_CORR_DIS_bit                                = 1 << 20,	/* Disables perspective correction for all attributes */
	MULTI_PRIM_IB_ENA_bit                             = 1 << 21,	/* Enables multiple primitive sets to be placed in a single index buffer, separated by RESET_INDX indices */
    PA_CL_VTE_CNTL                                        = 0x00028818,	/* Viewport Transform Engine Control */
	VPORT_X_SCALE_ENA_bit                             = 1 << 0,	/* Viewport Transform Scale Enable for X component */
	VPORT_X_OFFSET_ENA_bit                            = 1 << 1,	/* Viewport Transform Offset Enable for X component */
	VPORT_Y_SCALE_ENA_bit                             = 1 << 2,	/* Viewport Transform Scale Enable for Y component */
	VPORT_Y_OFFSET_ENA_bit                            = 1 << 3,	/* Viewport Transform Offset Enable for Y component */
	VPORT_Z_SCALE_ENA_bit                             = 1 << 4,	/* Viewport Transform Scale Enable for Z component */
	VPORT_Z_OFFSET_ENA_bit                            = 1 << 5,	/* Viewport Transform Offset Enable for Z component */
	VTX_XY_FMT_bit                                    = 1 << 8,	/* Indicates that the incoming X, Y have already been multiplied by 1/W0. If OFF, the Setup Engine will multiply the X, Y coordinates by 1/W0., */
	VTX_Z_FMT_bit                                     = 1 << 9,	/* Indicates that the incoming Z has already been multiplied by 1/W0. If OFF, the Setup Engine will multiply the Z coordinate by 1/W0. */
	VTX_W0_FMT_bit                                    = 1 << 10,	/* Indicates that the incoming W0 is not 1/W0. If ON, the Setup Engine will perform the reciprocal to get 1/W0. */
	PERFCOUNTER_REF_bit                               = 1 << 11,	/* Indicates perf counters should increment for this context. */
    PA_CL_VS_OUT_CNTL                                     = 0x0002881c,	/* Vertex Shader Output Control */
	CLIP_DIST_ENA_0_bit                               = 1 << 0,	/* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
	CLIP_DIST_ENA_1_bit                               = 1 << 1,	/* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
	CLIP_DIST_ENA_2_bit                               = 1 << 2,	/* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
	CLIP_DIST_ENA_3_bit                               = 1 << 3,	/* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
	CLIP_DIST_ENA_4_bit                               = 1 << 4,	/* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
	CLIP_DIST_ENA_5_bit                               = 1 << 5,	/* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
	CLIP_DIST_ENA_6_bit                               = 1 << 6,	/* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
	CLIP_DIST_ENA_7_bit                               = 1 << 7,	/* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
	CULL_DIST_ENA_0_bit                               = 1 << 8,	/* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
	CULL_DIST_ENA_1_bit                               = 1 << 9,	/* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
	CULL_DIST_ENA_2_bit                               = 1 << 10,	/* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
	CULL_DIST_ENA_3_bit                               = 1 << 11,	/* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
	CULL_DIST_ENA_4_bit                               = 1 << 12,	/* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
	CULL_DIST_ENA_5_bit                               = 1 << 13,	/* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
	CULL_DIST_ENA_6_bit                               = 1 << 14,	/* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
	CULL_DIST_ENA_7_bit                               = 1 << 15,	/* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
	USE_VTX_POINT_SIZE_bit                            = 1 << 16,	/* Use the PointSize output from the VS (in the x channel of VS_OUT_MISC_VEC). */
	USE_VTX_EDGE_FLAG_bit                             = 1 << 17,	/* Use the EdgeFlag output from the VS (in the y channel of VS_OUT_MISC_VEC). */
	USE_VTX_RENDER_TARGET_INDX_bit                    = 1 << 18,	/* Use the RenderTargetArrayIndx output from the VS (in the z channel of VS_OUT_MISC_VEC). Only valid for WGF Geometry Shader */
	USE_VTX_VIEWPORT_INDX_bit                         = 1 << 19,	/* Use the ViewportArrayIndx output from the VS (in the w channel of VS_OUT_MISC_VEC). Only valid for WGF Geometry Shader */
	USE_VTX_KILL_FLAG_bit                             = 1 << 20,	/* Use the KillFlag output from the VS (in the z channel of VS_OUT_MISC_VEC). Mutually exclusive from RTarrayindx */
	VS_OUT_MISC_VEC_ENA_bit                           = 1 << 21,	/* Output the VS output misc vector from the VS (SX) to the PA (primitive assembler). Should be set if any of the fields are to be used */
	VS_OUT_CCDIST0_VEC_ENA_bit                        = 1 << 22,	/* Output the VS output ccdist0 vector from the VS (SX) to the PA (primitive assembler). Should be set if any of the fields are to be used */
	VS_OUT_CCDIST1_VEC_ENA_bit                        = 1 << 23,	/* Output the VS output ccdist1 vector from the VS (SX) to the PA (primitive assembler). Should be set if any of the fields are to be used */
    PA_CL_NANINF_CNTL                                     = 0x00028820,
	VTE_XY_INF_DISCARD_bit                            = 1 << 0,
	VTE_Z_INF_DISCARD_bit                             = 1 << 1,
	VTE_W_INF_DISCARD_bit                             = 1 << 2,
	VTE_0XNANINF_IS_0_bit                             = 1 << 3,
	VTE_XY_NAN_RETAIN_bit                             = 1 << 4,
	VTE_Z_NAN_RETAIN_bit                              = 1 << 5,
	VTE_W_NAN_RETAIN_bit                              = 1 << 6,
	VTE_W_RECIP_NAN_IS_0_bit                          = 1 << 7,
	VS_XY_NAN_TO_INF_bit                              = 1 << 8,
	VS_XY_INF_RETAIN_bit                              = 1 << 9,
	VS_Z_NAN_TO_INF_bit                               = 1 << 10,
	VS_Z_INF_RETAIN_bit                               = 1 << 11,
	VS_W_NAN_TO_INF_bit                               = 1 << 12,
	VS_W_INF_RETAIN_bit                               = 1 << 13,
	VS_CLIP_DIST_INF_DISCARD_bit                      = 1 << 14,
	VTE_NO_OUTPUT_NEG_0_bit                           = 1 << 20,
    SQ_PGM_START_PS                                       = 0x00028840,	/* (8-state) Memory address of the (256-byte aligned) first CF instruction of the shader code for the pixel shader (PS) */
	/* PGM_START: Format is [39:8] */
    SQ_PGM_RESOURCES_PS                                   = 0x00028850,	/* (8-state). Resource requirements to run the PS program. Can only read most recent version, not all 8 states. */
	NUM_GPRS_mask                                     = 0xff << 0,	/* number of GPRs required to run this program [0..127] */
	NUM_GPRS_shift                                    = 0,
	STACK_SIZE_mask                                   = 0xff << 8,	/* number of stack entries needed [0..255] */
	STACK_SIZE_shift                                  = 8,
	SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit               = 1 << 21,	/* DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. See SP doc for details. */
	FETCH_CACHE_LINES_mask                            = 0x07 << 24,	/* number of program cache lines to fetch on a cache miss, up to the size of the program segment [1..8]. */
	FETCH_CACHE_LINES_shift                           = 24,
	UNCACHED_FIRST_INST_bit                           = 1 << 28,	/* Ensure that the first instruction is not read from the first instruction cache. Should only be used for debugging if there is a problem with the cache. On R600 only: this bit MUST be set due to a bug that is fixed in derivative parts. */
	CLAMP_CONSTS_bit                                  = 1 << 31,	/* Clamp ALU constants to [-1.0, 1.0]. Used for shader versions below PS2.0. Applies only to Constant-file constants (not literals) and only to const-file entries 0..7. Other entries are never clamped. */
    SQ_PGM_EXPORTS_PS                                     = 0x00028854,	/* (8-state). Defines the exports from the Pixel Shader Program. */
	EXPORT_MODE_mask                                  = 0x1f << 0,	/* Pixel Shader export mode. bbbbz where bbbb is how many color we export (0-8) and z is export z or not. It is illegal to program this to all zeros. */
	EXPORT_MODE_shift                                 = 0,
    SQ_PGM_START_VS                                       = 0x00028858,	/* (8-state) Memory address of the (256-byte aligned) first CF instruction of the shader code for the vertex shader (VS) */
	/* PGM_START: Format is [39:8] 11. Shader Interpolator Registers */
    SQ_PGM_RESOURCES_VS                                   = 0x00028868,	/* (8-state). Resource requirements to run the VS program. Can only read most recent version, not all 8 states. */
/* 	NUM_GPRS_mask                                     = 0xff << 0, */	/* number of GPRs required to run this program [0..127] */
/* 	NUM_GPRS_shift                                    = 0, */
/* 	STACK_SIZE_mask                                   = 0xff << 8, */	/* number of stack entries needed [0..255] */
/* 	STACK_SIZE_shift                                  = 8, */
	SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit               = 1 << 21,	/* DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. See SP doc for details. */
/* 	FETCH_CACHE_LINES_mask                            = 0x07 << 24, */	/* number of program cache lines to fetch on a cache miss, up to the size of the program segment [1..8]. */
/* 	FETCH_CACHE_LINES_shift                           = 24, */
/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */	/* Ensure that the first instruction is not read from the first instruction cache. Should only be used for debugging if there is a problem with the cache. */
    SQ_PGM_START_GS                                       = 0x0002886c,	/* (8-state) Memory address of the (256-byte aligned) first CF instruction of the shader code for the geometry shader (GS) */
	/* PGM_START: Format is [39:8] */
    SQ_PGM_RESOURCES_GS                                   = 0x0002887c,	/* (8-state). Resource requirements to run the GS program. Can only read most recent version, not all 8 states. */
/* 	NUM_GPRS_mask                                     = 0xff << 0, */	/* number of GPRs required to run this program [0..127] */
/* 	NUM_GPRS_shift                                    = 0, */
/* 	STACK_SIZE_mask                                   = 0xff << 8, */	/* number of stack entries needed [0..255] */
/* 	STACK_SIZE_shift                                  = 8, */
	SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit               = 1 << 21,	/* DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. See SP doc for details. */
/* 	FETCH_CACHE_LINES_mask                            = 0x07 << 24, */	/* number of program cache lines to fetch on a cache miss, up to the size of the program segment [1..8]. */
/* 	FETCH_CACHE_LINES_shift                           = 24, */
/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */	/* Ensure that the first instruction is not read from the first instruction cache. Should only be used for debugging if there is a problem with the cache. */
    SQ_PGM_START_ES                                       = 0x00028880,	/* (8-state) Memory address of the (256-byte aligned) first CF instruction of the shader code for the export shader (ES) */
	/* PGM_START: Format is [39:8] */
    SQ_PGM_RESOURCES_ES                                   = 0x00028890,	/* (8-state). Resource requirements to run the ES program. Can only read most recent version, not all 8 states. */
/* 	NUM_GPRS_mask                                     = 0xff << 0, */	/* number of GPRs required to run this program [0..127] */
/* 	NUM_GPRS_shift                                    = 0, */
/* 	STACK_SIZE_mask                                   = 0xff << 8, */	/* number of stack entries needed [0..255] */
/* 	STACK_SIZE_shift                                  = 8, */
	SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit               = 1 << 21,	/* DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. See SP doc for details. */
/* 	FETCH_CACHE_LINES_mask                            = 0x07 << 24, */	/* number of program cache lines to fetch on a cache miss, up to the size of the program segment [1..8]. */
/* 	FETCH_CACHE_LINES_shift                           = 24, */
/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */	/* Ensure that the first instruction is not read from the first instruction cache. Should only be used for debugging if there is a problem with the cache. */
    SQ_PGM_START_FS                                       = 0x00028894,	/* (8-state) Memory address of the (256-byte aligned) first CF instruction of the shader code for the fetch shader (FS) */
	/* PGM_START: Format is [39:8] */
    SQ_PGM_RESOURCES_FS                                   = 0x000288a4,	/* (8-state). Resource requirements to run the FS program. The FS shares with either the VS (gs-off) or ES (gs-on) and performs a single allocation equal to the VS+FS or ES+FS resource requirements. The SPI allocates stack space as (VS/ES + FS_stack_size) in the same manner as GPRs. Max_call_depth and fetch_cache_lines will be inherited from the parent shader (VS or ES). Can only read most recent version, not all 8 states. */
/* 	NUM_GPRS_mask                                     = 0xff << 0, */	/* number of GPRs required to run this program [0..127] */
/* 	NUM_GPRS_shift                                    = 0, */
/* 	STACK_SIZE_mask                                   = 0xff << 8, */	/* number of stack entries needed [0..255] */
/* 	STACK_SIZE_shift                                  = 8, */
	SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit               = 1 << 21,	/* DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. See SP doc for details. */
    SQ_ESGS_RING_ITEMSIZE                                 = 0x000288a8,	/* (8-state) Space allocated to a single pixel/vertex in the ES->GS ring buffer (in DWORDs). Itemsize is the true count, not count-1 and represents [0..32767] dwords. */
	ITEMSIZE_mask                                     = 0x7fff << 0,	/* Format is [16:2] */
	ITEMSIZE_shift                                    = 0,
    SQ_GSVS_RING_ITEMSIZE                                 = 0x000288ac,	/* (8-state) Space allocated to a single pixel/vertex in the GS->ES ring buffer (in DWORDs). This defines the max number of dwords a single invocation of the GS can output to the ring buffer. */
/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */	/* Format is [16:2] */
/* 	ITEMSIZE_shift                                    = 0, */
    SQ_ESTMP_RING_ITEMSIZE                                = 0x000288b0,	/* (8-state) Space allocated to a single pixel/vertex in the ES Temp buffer (in DWORDs). */
/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */	/* Format is [16:2] */
/* 	ITEMSIZE_shift                                    = 0, */
    SQ_GSTMP_RING_ITEMSIZE                                = 0x000288b4,	/* (8-state) Space allocated to a single pixel/vertex in the GS Temp buffer (in DWORDs). */
/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */	/* Format is [16:2] */
/* 	ITEMSIZE_shift                                    = 0, */
    SQ_VSTMP_RING_ITEMSIZE                                = 0x000288b8,	/* (8-state) Space allocated to a single pixel/vertex in the VS Temp buffer (in DWORDs) */
/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */	/* Format is [16:2] */
/* 	ITEMSIZE_shift                                    = 0, */
    SQ_PSTMP_RING_ITEMSIZE                                = 0x000288bc,	/* (8-state) Space allocated to a single pixel/vertex in the PS Temp buffer (in DWORDs) */
/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */	/* Format is [16:2] */
/* 	ITEMSIZE_shift                                    = 0, */
    SQ_FBUF_RING_ITEMSIZE                                 = 0x000288c0,	/* (8-state) Space allocated to a single pixel/vertex in the FBUFFER */
/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */	/* Format is [16:2] */
/* 	ITEMSIZE_shift                                    = 0, */
    SQ_REDUC_RING_ITEMSIZE                                = 0x000288c4,	/* (8-state) Space allocated to a single pixel/vertex in the Reduction Buffer */
/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */	/* Format is [16:2] */
/* 	ITEMSIZE_shift                                    = 0, */
    SQ_GS_VERT_ITEMSIZE                                   = 0x000288c8,	/* (8-state) Space allocated to a single GS output vertex in GS Temp Buffer. This defines the size of a single vertex output by the GS. Multiple vertices can be output so long as the total output size does not exceed SQ_GSVS_RING_ITEMSIZE. */
/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */	/* Format is [16:2] */
/* 	ITEMSIZE_shift                                    = 0, */
    SQ_PGM_CF_OFFSET_PS                                   = 0x000288cc,	/* (8-state) Memory offset from the program start (SQ_PGM_START_PS) of the (8-byte aligned) entry point for the pixel shader (PS) program. This is the first CF instruction that each thread will execute. */
	PGM_CF_OFFSET_mask                                = 0xfffff << 0,	/* Format is [22:3] */
	PGM_CF_OFFSET_shift                               = 0,
    SQ_PGM_CF_OFFSET_VS                                   = 0x000288d0,	/* (8-state) Memory offset from the program start (SQ_PGM_START_VS) of the (8-byte aligned) entry point for the vertex shader (VS) program. This is the first CF instruction that each thread will execute. */
/* 	PGM_CF_OFFSET_mask                                = 0xfffff << 0, */	/* Format is [22:3] */
/* 	PGM_CF_OFFSET_shift                               = 0, */
    SQ_PGM_CF_OFFSET_GS                                   = 0x000288d4,	/* (8-state) Memory offset from the program start (SQ_PGM_START_GS) of the (8-byte aligned) entry point for the geometry shader (GS) program. This is the first CF instruction that each thread will execute. */
/* 	PGM_CF_OFFSET_mask                                = 0xfffff << 0, */	/* Format is [22:3] */
/* 	PGM_CF_OFFSET_shift                               = 0, */
    SQ_PGM_CF_OFFSET_ES                                   = 0x000288d8,	/* (8-state) Memory offset from the program start (SQ_PGM_START_ES) of the (8-byte aligned) entry point for the export shader (ES) program. This is the first CF instruction that each thread will execute. */
/* 	PGM_CF_OFFSET_mask                                = 0xfffff << 0, */	/* Format is [22:3] */
/* 	PGM_CF_OFFSET_shift                               = 0, */
    SQ_PGM_CF_OFFSET_FS                                   = 0x000288dc,	/* (8-state) Memory offset from the program start (SQ_PGM_START_FS) of the (8-byte aligned) entry point for the fetch shader (FS) program. This is the first CF instruction that each thread will execute. */
/* 	PGM_CF_OFFSET_mask                                = 0xfffff << 0, */	/* Format is [22:3] */
/* 	PGM_CF_OFFSET_shift                               = 0, */
    SQ_VTX_SEMANTIC_CLEAR                                 = 0x000288e0,	/* (8-state) This register is used to clear the contents of the vertex semantic table. Entries can be cleared independently -- each has one bit in this register to clear or leave alone. This register is write-only (not readable). */
	/* clear or preserve table entry */
    SQ_ALU_CONST_CACHE_PS_0                               = 0x00028940,	/* (8-state) Base address of constant-buffer #0 used by the constant cache, 256B aligned address [39:8]. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
	SQ_ALU_CONST_CACHE_PS_0_num                       = 16,
	/* DATA: TBD */
    SQ_ALU_CONST_CACHE_VS_0                               = 0x00028980,	/* (8-state) Base address of constant-buffer #0 used by the constant cache, 256B aligned address [39:8]. Used by both VS and ES shaders. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
	SQ_ALU_CONST_CACHE_VS_0_num                       = 16,
	/* DATA: TBD 10. Shader Program Setup Registers */
    SQ_ALU_CONST_CACHE_GS_0                               = 0x000289c0,	/* (8-state) Base address of constant-buffer #0 used by the constant cache, 256B aligned address [39:8]. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
	SQ_ALU_CONST_CACHE_GS_0_num                       = 16,
	/* DATA: TBD */
    PA_SU_POINT_SIZE                                      = 0x00028a00,	/* Dimensions for Points */
	PA_SU_POINT_SIZE__HEIGHT_mask                     = 0xffff << 0,	/* 1/2 Height (Vertical Radius) of point; fixed (12.4), 12 bits integer, 4 bits fractional pixels. */
	PA_SU_POINT_SIZE__HEIGHT_shift                    = 0,
	PA_SU_POINT_SIZE__WIDTH_mask                      = 0xffff << 16,	/* 1/2 Width (Horizontal Radius)of point; fixed (12.4), 12 bits integer, 4 bits fractional pixels. */
	PA_SU_POINT_SIZE__WIDTH_shift                     = 16,
    PA_SU_POINT_MINMAX                                    = 0x00028a04,	/* Specifies maximum and minimum point & sprite sizes for per vertex size specification. */
	MIN_SIZE_mask                                     = 0xffff << 0,	/* Minimum point & sprite radius size to allow. fixed point (12.4), 12 bits integer, 4 bits fractional pixels */
	MIN_SIZE_shift                                    = 0,
	MAX_SIZE_mask                                     = 0xffff << 16,	/* Maximum point & sprite radius size to allow. fixed point (12.4), 12 bits integer, 4 bits fractional pixels */
	MAX_SIZE_shift                                    = 16,
    PA_SU_LINE_CNTL                                       = 0x00028a08,	/* Line control */
	PA_SU_LINE_CNTL__WIDTH_mask                       = 0xffff << 0,	/* 1/2 width of line, in subpixels; (16.0) fixed format. */
	PA_SU_LINE_CNTL__WIDTH_shift                      = 0,
    PA_SC_LINE_STIPPLE                                    = 0x00028a0c,	/* Line Stipple Control */
	LINE_PATTERN_mask                                 = 0xffff << 0,	/* 16-bit pattern */
	LINE_PATTERN_shift                                = 0,
	REPEAT_COUNT_mask                                 = 0xff << 16,	/* Pattern bit repeat count (minus 1). Field has a valid range of 0-255 which maps to OGL api values of 1-256. */
	REPEAT_COUNT_shift                                = 16,
	PATTERN_BIT_ORDER_bit                             = 1 << 28,	/* Bit Ordering of Pattern Bits: 0 = Little Bit Order, 1 = Big Bit Order */
	AUTO_RESET_CNTL_mask                              = 0x03 << 29,	/* Auto reset control of current pattern count/pointer. 0 = Never reset current pattern count/pointer. 1 = Reset current pattern count/pointer at each primitive (line list). 2 = Reset current pattern count/pointer at each packet (line strip). */
	AUTO_RESET_CNTL_shift                             = 29,
    VGT_OUTPUT_PATH_CNTL                                  = 0x00028a10,	/* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register selects which backend path will be used by the VGT block. */
	PATH_SELECT_mask                                  = 0x03 << 0,	/* This field indicates the VGT back-end path to be used. */
	PATH_SELECT_shift                                 = 0,
	    VGT_OUTPATH_VTX_REUSE                         = 0x00,	/* VGT_OUTPATH_VTX_REUSE: VGT_OUTPATH_VTX_REUSE */
	    VGT_OUTPATH_TESS_EN                           = 0x01,	/* VGT_OUTPATH_TESS_EN: VGT_OUTPATH_TESS_EN */
	    VGT_OUTPATH_PASSTHRU                          = 0x02,	/* VGT_OUTPATH_PASSTHRU: VGT_OUTPATH_PASSTHRU */
	    VGT_OUTPATH_GS_BLOCK                          = 0x03,	/* VGT_OUTPATH_GS_BLOCK: VGT_OUTPATH_GS_BLOCK */
    VGT_HOS_CNTL                                          = 0x00028a14,	/* This register controls the behavior of the Tessellation Engine block at the backend of the VGT. This register is relevant only if the VGT_OUTPUT_PATH_CNTL register specifies the Tessellation Engine block for the VGT backend path. Note that the tessellation engine is enabled by selecting the tessellation engine path in the VGT_OUTPUT_PATH_CNTL register as opposed to the single enable bit that was used in previous architectures. */
	TESS_MODE_mask                                    = 0x03 << 0,	/* Tessellation Mode 0 : Discrete 1 : Continuous 2 : Adaptive */
	TESS_MODE_shift                                   = 0,
    VGT_HOS_MAX_TESS_LEVEL                                = 0x00028a18,	/* For continuous and discrete tessellation modes, this register contains the tessellation level. For adaptive tessellation, this register contains the maximum tessellation level. The adaptive tessellation levels will be clamped less-than or equal to this level by the tessellation engine. In all cases, the format of this register is 32-bit IEEE floating point. This register is relevant only when the VGT_OUT_CNTL register specifies `Tessellation Engine` in the Path Select field. */
	/* MAX_TESS: For adaptive tessellation mode, this is the maximum tessellation clamp value. For continuous and discrete tessellation modes, this is the tessellation level. For discrete modes, values in the range (1.0, 14.0) are legal. For non-discrete modes, values in the range (1.0, 15.0) are legal. MAX_TESS must be greater than or equal to MIN_TESS. */
    VGT_HOS_MIN_TESS_LEVEL                                = 0x00028a1c,	/* For continuous and discrete tessellation modes, this register is not applicable. For adaptive tessellation, this register contains the minimum tessellation level. The adaptive tessellation levels will be clamped greater-than or equal to this level by the tessellation engine. The format of this register is 32-bit IEEE floating point. This register is relevant only when the VGT_OUT_CNTL register specifies `Tessellation Engine` in the Path Select field and the VGT_HOS_CNTL register specifies adaptive tessellation mode. */
	/* MIN_TESS: For adpative tessellation mode, this is the minimum tessellation clamp value. For continuous and discrete tessellartion modes, this register is not applicable. For discrete modes values in the range (1.0, 14.0) are legal. For non-discrete modes, values in the range (1.0, 15.0) are legal. MIN_TESS must be less than or equal to MAX_TESS. */
    VGT_HOS_REUSE_DEPTH                                   = 0x00028a20,
	REUSE_DEPTH_mask                                  = 0xff << 0,
	REUSE_DEPTH_shift                                 = 0,
    VGT_GROUP_PRIM_TYPE                                   = 0x00028a24,	/* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register contains the prim type output by the grouper stage of the VGT */
	VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask               = 0x1f << 0,	/* Prim type output by grouper stage of the VGT. */
	VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift              = 0,
	    VGT_GRP_3D_POINT                              = 0x00,	/* VGT_GRP_3D_POINT: VGT_GRP_3D_POINT */
	    VGT_GRP_3D_LINE                               = 0x01,	/* VGT_GRP_3D_LINE: VGT_GRP_3D_LINE */
	    VGT_GRP_3D_TRI                                = 0x02,	/* VGT_GRP_3D_TRI: VGT_GRP_3D_TRI */
	    VGT_GRP_3D_RECT                               = 0x03,	/* VGT_GRP_3D_RECT: VGT_GRP_3D_RECT */
	    VGT_GRP_3D_QUAD                               = 0x04,	/* VGT_GRP_3D_QUAD: VGT_GRP_3D_QUAD */
	    VGT_GRP_2D_COPY_RECT_V0                       = 0x05,	/* VGT_GRP_2D_COPY_RECT_V0: VGT_GRP_2D_COPY_RECT_V0 */
	    VGT_GRP_2D_COPY_RECT_V1                       = 0x06,	/* VGT_GRP_2D_COPY_RECT_V1: VGT_GRP_2D_COPY_RECT_V1 */
	    VGT_GRP_2D_COPY_RECT_V2                       = 0x07,	/* VGT_GRP_2D_COPY_RECT_V2: VGT_GRP_2D_COPY_RECT_V2 */
	    VGT_GRP_2D_COPY_RECT_V3                       = 0x08,	/* VGT_GRP_2D_COPY_RECT_V3: VGT_GRP_2D_COPY_RECT_V3 */
	    VGT_GRP_2D_FILL_RECT                          = 0x09,	/* VGT_GRP_2D_FILL_RECT: VGT_GRP_2D_FILL_RECT */
	    VGT_GRP_2D_LINE                               = 0x0a,	/* VGT_GRP_2D_LINE: VGT_GRP_2D_LINE */
	    VGT_GRP_2D_TRI                                = 0x0b,	/* VGT_GRP_2D_TRI: VGT_GRP_2D_TRI */
	    VGT_GRP_PRIM_INDEX_LINE                       = 0x0c,	/* VGT_GRP_PRIM_INDEX_LINE: VGT_GRP_PRIM_INDEX_LINE */
	    VGT_GRP_PRIM_INDEX_TRI                        = 0x0d,	/* VGT_GRP_PRIM_INDEX_TRI: VGT_GRP_PRIM_INDEX_TRI */
	    VGT_GRP_PRIM_INDEX_QUAD                       = 0x0e,	/* VGT_GRP_PRIM_INDEX_QUAD: VGT_GRP_PRIM_INDEX_QUAD */
	    VGT_GRP_3D_LINE_ADJ                           = 0x0f,	/* VGT_GRP_3D_LINE_ADJ: VGT_GRP_3D_LINE_ADJ */
	    VGT_GRP_3D_TRI_ADJ                            = 0x10,	/* VGT_GRP_3D_TRI_ADJ: VGT_GRP_3D_TRI_ADJ */
	RETAIN_ORDER_bit                                  = 1 << 14,	/* Resetting this bit to zero causes the Grouper within the VGT to convert strips, fans, loops, and polygons into regular lists in the vgt_grouper block. It also causes the primitive indices to be re-ordered to have the provoking vertex in the correct position. This bit should be set to zero if the VGT_OUTPUT_PATH_CNTL register specifies VGT_OUTPATH_VTX_REUSE or VGT_OUTPATH_TESS_EN and the VGT_DRAW_INITIATOR prim type is between 0 and 15, inclusive, (tri list, tri strip, tri fan, etc...). This bit is implied to be zero for VGT_DRAW_INITIATOR prim types 0 thru 15 if the Major Mode of the VGT_DRAW_INIITIATOR is 0. If this bit is set for prim types 0 thru 15, then the primitive index order from the grouper will be retained and the indices will be incorrect for loops, fans, and polygons. Note that if the VGT_DRAW_INITIATOR.MAJOR_MODE is set to MAJOR_MODE_1 and VGT_OUTPUT_PATH_CNTL is set to VGT_OUTPATH_PASSTHRU and the VGT_GROUP_PRIM_TYPE.PRIM_TYPE is set to VGT_GRP_3D_TRI or VGT_GRP_2D_TRI and VGT_GROUP_PRIM_TYPE.PRIM_ORDER is set to VGT_GRP_STRIP, then the passthru block will perform DX/OpenGL index re-ordering for tri-strips. */
	RETAIN_QUADS_bit                                  = 1 << 15,	/* This bit can only be legally set if the VGT_OUTPUT_PATH_CNTL register specifies the Tessellation Engine and the Major Mode of the VGT_DRAW_INITATOR is 1. The RETAIN_QUADS bit indicates that quads should be passed intact to the tessellation engine. If this bit is not set, then the quads will be decomposed into triangles. */
	PRIM_ORDER_mask                                   = 0x07 << 16,	/* Prim order output by grouper stage of the VGT. */
	PRIM_ORDER_shift                                  = 16,
	    VGT_GRP_LIST                                  = 0x00,	/* VGT_GRP_LIST: VGT_GRP_LIST */
	    VGT_GRP_STRIP                                 = 0x01,	/* VGT_GRP_STRIP: VGT_GRP_STRIP */
	    VGT_GRP_FAN                                   = 0x02,	/* VGT_GRP_FAN: VGT_GRP_FAN */
	    VGT_GRP_LOOP                                  = 0x03,	/* VGT_GRP_LOOP: VGT_GRP_LOOP */
	    VGT_GRP_POLYGON                               = 0x04,	/* VGT_GRP_POLYGON: VGT_GRP_POLYGON */
    VGT_GROUP_FIRST_DECR                                  = 0x00028a28,	/* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register contains the amount by which the draw initiator index count is decremented for the first group taken from the input stream. */
	FIRST_DECR_mask                                   = 0x0f << 0,	/* Decrement amount for the first group */
	FIRST_DECR_shift                                  = 0,
    VGT_GROUP_DECR                                        = 0x00028a2c,	/* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register contains the amount by which the draw initiator index count is decremented for all groups taken from the input stream except for the first group. */
	DECR_mask                                         = 0x0f << 0,	/* Decrement amount for groups except the first */
	DECR_shift                                        = 0,
    VGT_GROUP_VECT_0_CNTL                                 = 0x00028a30,	/* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register indicates, with bits flags, which components are relevant for vector 0 of a group. At least one component of vector 0 must be indicated. This register also contains the stride of vector 0 (in 16-bit words) in the input stream and the amount to shift the input stream (in 16-bit words) after extracting the vector. */
	COMP_X_EN_bit                                     = 1 << 0,	/* Indicates that component X will be output from the grouper for vector 0 */
	COMP_Y_EN_bit                                     = 1 << 1,	/* Indicates that component Y will be output from the grouper for vector 0 */
	COMP_Z_EN_bit                                     = 1 << 2,	/* Indicates that component Z will be output from the grouper for vector 0 */
	COMP_W_EN_bit                                     = 1 << 3,	/* Indicates that component W will be output from the grouper for vector 0 */
	VGT_GROUP_VECT_0_CNTL__STRIDE_mask                = 0xff << 8,	/* The stride of vector 0 data in the input stream (in 16-bit words). Zero is NOT a legal value for an active vector. See the programming guidelines for the situation in which a vector uses no data from the shifter. */
	VGT_GROUP_VECT_0_CNTL__STRIDE_shift               = 8,
	SHIFT_mask                                        = 0xff << 16,	/* The amount to shift the input stream after extracting vector 0 (in 16-bit words). This field must be less than or equal to the STRIDE field for proper shifter operation. */
	SHIFT_shift                                       = 16,
    VGT_GROUP_VECT_1_CNTL                                 = 0x00028a34,	/* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register is identical to VGT_GROUP_VECT_0_CNTL except that it applies to vector 1 of the group instead of vector 0. Also, vector 0 is required to have at least one component set; however, vector 1 may have none set. */
/* 	COMP_X_EN_bit                                     = 1 << 0, */
/* 	COMP_Y_EN_bit                                     = 1 << 1, */
/* 	COMP_Z_EN_bit                                     = 1 << 2, */
/* 	COMP_W_EN_bit                                     = 1 << 3, */
	VGT_GROUP_VECT_1_CNTL__STRIDE_mask                = 0xff << 8,
	VGT_GROUP_VECT_1_CNTL__STRIDE_shift               = 8,
/* 	SHIFT_mask                                        = 0xff << 16, */
/* 	SHIFT_shift                                       = 16, */
    VGT_GROUP_VECT_0_FMT_CNTL                             = 0x00028a38,	/* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register indicates how the value each component of vector 0 will be determined. If the VGT_GROUP_VECT_0_CNTL register indicates that a particular component is not selected for output from the grouper, then that component`s format control fields are ignored. */
	X_CONV_mask                                       = 0x0f << 0,	/* X Component Determination. */
	X_CONV_shift                                      = 0,
	    VGT_GRP_INDEX_16                              = 0x00,	/* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
	    VGT_GRP_INDEX_32                              = 0x01,	/* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
	    VGT_GRP_UINT_16                               = 0x02,	/* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
	    VGT_GRP_UINT_32                               = 0x03,	/* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
	    VGT_GRP_SINT_16                               = 0x04,	/* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
	    VGT_GRP_SINT_32                               = 0x05,	/* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
	    VGT_GRP_FLOAT_32                              = 0x06,	/* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
	    VGT_GRP_AUTO_PRIM                             = 0x07,	/* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08,	/* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
	X_OFFSET_mask                                     = 0x0f << 4,	/* X Component Offset. This field is the offset, in 16-bit words, of the X component in the input cycle. */
	X_OFFSET_shift                                    = 4,
	Y_CONV_mask                                       = 0x0f << 8,	/* Y Component Determination. See the X component determination field for description. */
	Y_CONV_shift                                      = 8,
/* 	    VGT_GRP_INDEX_16                              = 0x00, */	/* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
/* 	    VGT_GRP_INDEX_32                              = 0x01, */	/* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
/* 	    VGT_GRP_UINT_16                               = 0x02, */	/* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
/* 	    VGT_GRP_UINT_32                               = 0x03, */	/* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
/* 	    VGT_GRP_SINT_16                               = 0x04, */	/* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
/* 	    VGT_GRP_SINT_32                               = 0x05, */	/* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
/* 	    VGT_GRP_FLOAT_32                              = 0x06, */	/* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */	/* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */	/* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
	Y_OFFSET_mask                                     = 0x0f << 12,	/* Y Component Offset. This field is the offset, in 16-bit words, of the Y component in the input cycle. */
	Y_OFFSET_shift                                    = 12,
	Z_CONV_mask                                       = 0x0f << 16,	/* Z Component Determination. See the X component determination field for description. */
	Z_CONV_shift                                      = 16,
/* 	    VGT_GRP_INDEX_16                              = 0x00, */	/* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
/* 	    VGT_GRP_INDEX_32                              = 0x01, */	/* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
/* 	    VGT_GRP_UINT_16                               = 0x02, */	/* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
/* 	    VGT_GRP_UINT_32                               = 0x03, */	/* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
/* 	    VGT_GRP_SINT_16                               = 0x04, */	/* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
/* 	    VGT_GRP_SINT_32                               = 0x05, */	/* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
/* 	    VGT_GRP_FLOAT_32                              = 0x06, */	/* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */	/* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */	/* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
	Z_OFFSET_mask                                     = 0x0f << 20,	/* Z Component Offset. This field is the offset, in 16-bit words, of the Z component in the input cycle. */
	Z_OFFSET_shift                                    = 20,
	W_CONV_mask                                       = 0x0f << 24,	/* W Component Determination. See the X component determination field for description. */
	W_CONV_shift                                      = 24,
/* 	    VGT_GRP_INDEX_16                              = 0x00, */	/* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
/* 	    VGT_GRP_INDEX_32                              = 0x01, */	/* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
/* 	    VGT_GRP_UINT_16                               = 0x02, */	/* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
/* 	    VGT_GRP_UINT_32                               = 0x03, */	/* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
/* 	    VGT_GRP_SINT_16                               = 0x04, */	/* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
/* 	    VGT_GRP_SINT_32                               = 0x05, */	/* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
/* 	    VGT_GRP_FLOAT_32                              = 0x06, */	/* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */	/* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */	/* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
	W_OFFSET_mask                                     = 0x0f << 28,	/* W Component Offset. This field is the offset, in 16-bit words, of the Z component in the input cycle. */
	W_OFFSET_shift                                    = 28,
    VGT_GROUP_VECT_1_FMT_CNTL                             = 0x00028a3c,	/* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register is identical to VGT_GROUP_VECT_0_FMT_CNTL except that it controls the formatting of output vector 1 instead of output vector 0. */
/* 	X_CONV_mask                                       = 0x0f << 0, */
/* 	X_CONV_shift                                      = 0, */
/* 	    VGT_GRP_INDEX_16                              = 0x00, */	/* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
/* 	    VGT_GRP_INDEX_32                              = 0x01, */	/* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
/* 	    VGT_GRP_UINT_16                               = 0x02, */	/* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
/* 	    VGT_GRP_UINT_32                               = 0x03, */	/* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
/* 	    VGT_GRP_SINT_16                               = 0x04, */	/* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
/* 	    VGT_GRP_SINT_32                               = 0x05, */	/* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
/* 	    VGT_GRP_FLOAT_32                              = 0x06, */	/* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */	/* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */	/* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
/* 	X_OFFSET_mask                                     = 0x0f << 4, */
/* 	X_OFFSET_shift                                    = 4, */
/* 	Y_CONV_mask                                       = 0x0f << 8, */
/* 	Y_CONV_shift                                      = 8, */
/* 	    VGT_GRP_INDEX_16                              = 0x00, */	/* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
/* 	    VGT_GRP_INDEX_32                              = 0x01, */	/* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
/* 	    VGT_GRP_UINT_16                               = 0x02, */	/* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
/* 	    VGT_GRP_UINT_32                               = 0x03, */	/* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
/* 	    VGT_GRP_SINT_16                               = 0x04, */	/* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
/* 	    VGT_GRP_SINT_32                               = 0x05, */	/* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
/* 	    VGT_GRP_FLOAT_32                              = 0x06, */	/* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */	/* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */	/* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
/* 	Y_OFFSET_mask                                     = 0x0f << 12, */
/* 	Y_OFFSET_shift                                    = 12, */
/* 	Z_CONV_mask                                       = 0x0f << 16, */
/* 	Z_CONV_shift                                      = 16, */
/* 	    VGT_GRP_INDEX_16                              = 0x00, */	/* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
/* 	    VGT_GRP_INDEX_32                              = 0x01, */	/* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
/* 	    VGT_GRP_UINT_16                               = 0x02, */	/* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
/* 	    VGT_GRP_UINT_32                               = 0x03, */	/* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
/* 	    VGT_GRP_SINT_16                               = 0x04, */	/* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
/* 	    VGT_GRP_SINT_32                               = 0x05, */	/* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
/* 	    VGT_GRP_FLOAT_32                              = 0x06, */	/* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */	/* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */	/* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
/* 	Z_OFFSET_mask                                     = 0x0f << 20, */
/* 	Z_OFFSET_shift                                    = 20, */
/* 	W_CONV_mask                                       = 0x0f << 24, */
/* 	W_CONV_shift                                      = 24, */
/* 	    VGT_GRP_INDEX_16                              = 0x00, */	/* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
/* 	    VGT_GRP_INDEX_32                              = 0x01, */	/* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
/* 	    VGT_GRP_UINT_16                               = 0x02, */	/* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
/* 	    VGT_GRP_UINT_32                               = 0x03, */	/* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
/* 	    VGT_GRP_SINT_16                               = 0x04, */	/* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
/* 	    VGT_GRP_SINT_32                               = 0x05, */	/* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
/* 	    VGT_GRP_FLOAT_32                              = 0x06, */	/* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */	/* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */	/* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
/* 	W_OFFSET_mask                                     = 0x0f << 28, */
/* 	W_OFFSET_shift                                    = 28, */
    VGT_GS_MODE                                           = 0x00028a40,	/* VGT GS Enable Mode */
	MODE_mask                                         = 0x03 << 0,	/* Indicates which of GS scenerio is enabled */
	MODE_shift                                        = 0,
	    GS_OFF                                        = 0x00,	/* GS_OFF: GS_OFF */
	    GS_SCENARIO_A                                 = 0x01,	/* GS_SCENARIO_A: GS_SCENARIO_A */
	    GS_SCENARIO_B                                 = 0x02,	/* GS_SCENARIO_B: GS_SCENARIO_B */
	    GS_SCENARIO_G                                 = 0x03,	/* GS_SCENARIO_G: GS_SCENARIO_G */
	ES_PASSTHRU_bit                                   = 1 << 2,	/* sets to one if VS shader is passthru when GS scenario G is used */
	CUT_MODE_mask                                     = 0x03 << 3,	/* 00: 1024 max gs emit vertices, 01:512 max gs emit vertices, 10:256 max gs emit vertices, 11: 128 max gs emit vertices */
	CUT_MODE_shift                                    = 3,
	    GS_CUT_1024                                   = 0x00,	/* GS_CUT_1024: GS_CUT_1024 */
	    GS_CUT_512                                    = 0x01,	/* GS_CUT_512: GS_CUT_512 */
	    GS_CUT_256                                    = 0x02,	/* GS_CUT_256: GS_CUT_256 */
	    GS_CUT_128                                    = 0x03,	/* GS_CUT_128: GS_CUT_128 */
    PA_SC_MPASS_PS_CNTL                                   = 0x00028a48,	/* Multi-Pass Pixel Shader Control Register */
	MPASS_PIX_VEC_PER_PASS_mask                       = 0xfffff << 0,	/* Specifies the number of pixel vectors to process for each pass. Should be based on the amount of memory available for pixel shader export to memory and size of each pixels output data. Note there are 64 pixels per pixel vector in R600. There will likely be 32 pixels /pixel vector and 16 in derivative parts */
	MPASS_PIX_VEC_PER_PASS_shift                      = 0,
	MPASS_PS_ENA_bit                                  = 1 << 31,	/* If set, enables multipass pixel shader operation. */
    PA_SC_MODE_CNTL                                       = 0x00028a4c,	/* SC Mode Control Register for Various Enables Bit Defaul */
	MSAA_ENABLE_bit                                   = 1 << 0,	/* Enable MultiSample AA. If set, the MSAA_NUM_SAMPLES+1 used for MSAA will have unique subpixel locations as described below and MSAA_NUM_SAMPLES must not equal 0. If clear, all MSAA_NUM_SAMPLES+1 will be sampled using the pixel center All sample locations are specified as an offset from pixel cetner. 2 SAMPLE Sample 0: -4, 4 Sample 1: 4, -4 4 SAMPLE Sample 0: -2, -2 Sample 1: 2, 2 Sample 2: -6, 6 Sample 3: 6, -6 8 SAMPLE Sample 0: -2, -5 Sample 1: 4, -4 Sample 2: 1, 6 Sample 3: -6, -2 Sample 4: 6, 1 Sample 5: 0, 0 Sample 6: -5, 4 Sample 7: 7, -8 */
	CLIPRECT_ENABLE_bit                               = 1 << 1,	/* Enables 4 cliprects (same as setting CLIPRECT_RULE to 0xffff) */
	LINE_STIPPLE_ENABLE_bit                           = 1 << 2,	/* Enable line stipple processing */
	MULTI_CHIP_PRIM_DISCARD_ENAB_bit                  = 1 << 3,	/* Enables primitives to be discarded based on */
	WALK_ORDER_ENABLE_bit                             = 1 << 4,	/* Enables fixed pattern for quad walk order. Must be disabled for overlapping blit rendering. */
	HALVE_DETAIL_SAMPLE_PERF_bit                      = 1 << 5,	/* Enables the ability to halve the performance of the detail samplers in all MSAA modes. */
	WALK_SIZE_bit                                     = 1 << 6,	/* Defines the size of the SC walk stamp. 0 : walk by supertiles (32 bits); 1 : walk by tiles (8 bits). */
	WALK_ALIGNMENT_bit                                = 1 << 7,	/* Defines the alignment value of the SC walker. 0 : align by supertiles (32 bits); 1 : align by tiles (8 bits). */
	WALK_ALIGN8_PRIM_FITS_ST_bit                      = 1 << 8,	/* When alignment value is set to supertiles (32 bits), enables the walker to align by tiles (8 bits) if primitive fits within one supertile. */
	TILE_COVER_NO_SCISSOR_bit                         = 1 << 9,	/* Disables the use of scissors when determining tile covered. */
	KILL_PIX_POST_HI_Z_bit                            = 1 << 10,	/* If set, all pixels are killed in the SC after the HI-Z test. Typically set for VizQuery geometry */
	KILL_PIX_POST_DETAIL_MASK_bit                     = 1 << 11,	/* If set, all pixels are killed in the SC after the detail mask. Can be used for performance info */
	MULTI_CHIP_SUPERTILE_ENABLE_bit                   = 1 << 12,	/* Enables Multi-Chip supertile mode with the configuration defined in PA_SC_MULTI_CHIP_CNTL. */
	TILE_COVER_DISABLE_bit                            = 1 << 13,	/* Disables tile covered (Hi-Z optimization) that is sent to the DBs. */
	FORCE_EOV_CNTDWN_ENABLE_bit                       = 1 << 14,	/* Enables forcing out pixel vectors prematurely based on the cycle count programmed in PA_SC_ENHANCE::FORCE_EOV_MAX_CLK_CNT[11 :0] */
	FORCE_EOV_TILE_ENABLE_bit                         = 1 << 15,	/* Enables forcing out pixel vectors prematurely based on the tile count programmed in PA_SC_ENHANCE::FORCE_EOV_MAX_TILE_CNT[1 1:0] */
	FORCE_EOV_REZ_ENABLE_bit                          = 1 << 16,	/* Enables forcing out pixel vectors prematurely based on the ReZ hang condition(ie. cache locked) detected in the DB */
	PS_ITER_SAMPLE_bit                                = 1 << 17,	/* Enables per-sample (i.e. unique shader-computed value per sample) pixel shader execution. */
    VGT_ENHANCE                                           = 0x00028a50,	/* Used for Late Additions of Control Bits */
	MI_TIMESTAMP_RES_mask                             = 0x03 << 0,	/* POSSIBLE VALUES: 00 - 0 -> 992 Clocks latency range in steps of 32 01 - 0 -> 496 Clocks latency range in steps of 16 02 - 0 -> 248 Clocks latency range in steps of 8 03 - 0 -> 124 Clocks latency range in steps of 4 */
	MI_TIMESTAMP_RES_shift                            = 0,
	    X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32   = 0x00,	/* 0 -> 992 Clocks latency range in steps of 32 */
	    X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16   = 0x01,	/* 0 -> 496 Clocks latency range in steps of 16 */
	    X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8    = 0x02,	/* 0 -> 248 Clocks latency range in steps of 8 */
	    X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4    = 0x03,	/* 0 -> 124 Clocks latency range in steps of 4 */
	MISC_mask                                         = 0x3fffffff << 2,	/* Misc bit */
	MISC_shift                                        = 2,
    VGT_GS_OUT_PRIM_TYPE                                  = 0x00028a6c,	/* VGT GS output primitive type */
	OUTPRIM_TYPE_mask                                 = 0x3f << 0,	/* GS output primitive type */
	OUTPRIM_TYPE_shift                                = 0,
	    POINTLIST                                     = 0x00,	/* POINTLIST: POINTLIST */
	    LINESTRIP                                     = 0x01,	/* LINESTRIP: LINESTRIP */
	    TRISTRIP                                      = 0x02,	/* TRISTRIP: TRISTRIP */
    VGT_DMA_SIZE                                          = 0x00028a74,	/* VGT DMA Size */
	/* NUM_INDICES: VGT DMA Number of indices */
    VGT_DMA_INDEX_TYPE                                    = 0x00028a7c,	/* VGT DMA Index Type and Mode */
/* 	INDEX_TYPE_mask                                   = 0x03 << 0, */	/* VGT DMA Index Type */
/* 	INDEX_TYPE_shift                                  = 0, */
	    VGT_INDEX_16                                  = 0x00,	/* VGT_INDEX_16: VGT_INDEX_16 16-bit index */
	    VGT_INDEX_32                                  = 0x01,	/* VGT_INDEX_32: VGT_INDEX_32 32-bit index */
	SWAP_MODE_mask                                    = 0x03 << 2,	/* DMA Swap mode */
	SWAP_MODE_shift                                   = 2,
	    VGT_DMA_SWAP_NONE                             = 0x00,	/* VGT_DMA_SWAP_NONE: VGT_DMA_SWAP_NONE No swap */
	    VGT_DMA_SWAP_16_BIT                           = 0x01,	/* VGT_DMA_SWAP_16_BIT: VGT_DMA_SWAP_16_BIT 16-bit swap 0xAABBCCDD -> 0xBBAADDCC */
	    VGT_DMA_SWAP_32_BIT                           = 0x02,	/* VGT_DMA_SWAP_32_BIT: VGT_DMA_SWAP_32_BIT 32-bit swap 0xAABBCCDD -> 0xDDCCBBAA */
	    VGT_DMA_SWAP_WORD                             = 0x03,	/* VGT_DMA_SWAP_WORD: VGT_DMA_SWAP_WORD word swap 0xAABBCCDD -> 0xCCDDAABB */
    VGT_PRIMITIVEID_EN                                    = 0x00028a84,	/* VGT Primitive ID enable */
	PRIMITIVEID_EN_bit                                = 1 << 0,	/* PrimitiveID generation is enabled */
    VGT_DMA_NUM_INSTANCES                                 = 0x00028a88,	/* VGT DMA Number of Instances */
	/* VGT DMA Number of Instances, minimum value is 1 */
    VGT_EVENT_INITIATOR                                   = 0x00028a90,	/* Event Initiator */
	EVENT_TYPE_mask                                   = 0x3f << 0,	/* Event Type (also called Event ID) -- Currently, the hardware interface between the VGT and the PA supports only 6-bit event type. */
	EVENT_TYPE_shift                                  = 0,
	    CACHE_FLUSH_TS                                = 0x04,	/* CACHE_FLUSH_TS: Destination Cache Flush with Timestamp -- Inserted by the driver to request the CBs, DBs, and SMX to signal the CP when all prior rendering is flushed to memory. */
	    CONTEXT_DONE                                  = 0x05,	/* CONTEXT_DONE: GFXDEC Context Done -- Inserted by the CP on the first GFXDEC state update after a draw. */
	    CACHE_FLUSH                                   = 0x06,	/* CACHE_FLUSH: Destination Caches Flushed -- Inserted by the driver to request the CBs, DBs, and SMX to flushed their caches to memory (No Timestamp is Generated). */
	    VIZQUERY_START                                = 0x07,	/* VIZQUERY_START: No longer supported */
	    VIZQUERY_END                                  = 0x08,	/* VIZQUERY_END: No longer supported */
	    SC_WAIT_WC                                    = 0x09,	/* SC_WAIT_WC: SC Wait for WC from CP -- Inserted by the CP to inform the SC to wait for the write confirm signal (wire) from the CP before submitting future pixel vectors. This is used to synchronize 2D source surface (brush, a.ka. texture) with user of that surface. */
	    MPASS_PS_CP_REFETCH                           = 0x0a,	/* MPASS_PS_CP_REFETCH: Multi-Pass Pixel Shader CP Refetch -- Inserted by the driver to inform the SC it needs to report to CP to refetch buffer for multi- pass pixel shader or continue. */
	    MPASS_PS_RST_START                            = 0x0b,	/* MPASS_PS_RST_START: Multi-Pass Pixel Shader Reset Start -- Inserted by the driver just before an INDIRECT_BUFFER_MP packet to instruct the SC to reset the multi-pass start pixel vector. */
	    MPASS_PS_INCR_START                           = 0x0c,	/* MPASS_PS_INCR_START: Multi-Pass Pixel Shader Increment Start -- Inserted by the driver to instruct the SC to increment the multi-pass start vector by vectors_per_pass. */
	    RST_PIX_CNT                                   = 0x0d,	/* RST_PIX_CNT: Reset SQ`s auto Pixel Counter AND reset SC`s multi-pass pixel vector count -- Inserted by the driver. */
	    RST_VTX_CNT                                   = 0x0e,	/* RST_VTX_CNT: Reset SQ`s auto Vertex Counter -- Inserted by the driver. */
	    VS_PARTIAL_FLUSH                              = 0x0f,	/* VS_PARTIAL_FLUSH: Used to flush all work between the CP and the ES, GS, VS shaders including the VGT. */
	    PS_PARTIAL_FLUSH                              = 0x10,	/* PS_PARTIAL_FLUSH: Used to flush all work between the CP and the ES, GS, VS, PS shaders including scan conversion, primitive assembly, and VGT. */
	    CACHE_FLUSH_AND_INV_TS_EVENT                  = 0x14,	/* CACHE_FLUSH_AND_INV_TS_EVENT: Same as CACHE_FLUSH_TS with an invalidate -- Inserted by the driver. */
	    ZPASS_DONE                                    = 0x15,	/* ZPASS_DONE: Write ZPASS counts to memory -- Inserted by the driver to instruct the DBs to write out the ZPASS counters to memory. Used to support DX10 occlusion queries. */
	    CACHE_FLUSH_AND_INV_EVENT                     = 0x16,	/* CACHE_FLUSH_AND_INV_EVENT: Same as CACHE_FLUSH with an invalidate -- Inserted by the driver. */
	    PERFCOUNTER_START                             = 0x17,	/* PERFCOUNTER_START: Start enabled event based Performance counters -- Inserted by the driver. */
	    PERFCOUNTER_STOP                              = 0x18,	/* PERFCOUNTER_STOP: Stop enabled event based Performance counters that are event-enabled -- Inserted by the driver. */
	    PIPELINESTAT_START                            = 0x19,	/* PIPELINESTAT_START: Start pipeline/strmout stat -- Inserted by the driver. */
	    PIPELINESTAT_STOP                             = 0x1a,	/* PIPELINESTAT_STOP: Stop pipeline/strmout stat -- Inserted by the driver. */
	    PERFCOUNTER_SAMPLE                            = 0x1b,	/* PERFCOUNTER_SAMPLE: Sample the performance counters of all blocks -- Inserted by the driver to read the performance counters. */
	    FLUSH_ES_OUTPUT                               = 0x1c,	/* FLUSH_ES_OUTPUT: Flush Export Shader Output -- Inserted by the VGT to instruct the SMX to flush all the ES output to memory. */
	    FLUSH_GS_OUTPUT                               = 0x1d,	/* FLUSH_GS_OUTPUT: Flush Geometry Shader Output -- Inserted by the VGT to instruct the SMX to flush all the GS output to memory. */
	    SAMPLE_PIPELINESTAT                           = 0x1e,	/* SAMPLE_PIPELINESTAT: Sample Pipeline Statistics counters -- Inserted by the driver to request the GPU to sample counters associated with pipelinestats. The CP will subsequently write them to memory. */
	    SO_VGTSTREAMOUT_FLUSH                         = 0x1f,	/* SO_VGTSTREAMOUT_FLUSH: VGT Streamout Flush -- This event will cause VGT to update the read only offsets registers and then send a VGT_CP_strmout_flushed to instruct the CP to read the offsets. */
	    SAMPLE_STREAMOUTSTATS                         = 0x20,	/* SAMPLE_STREAMOUTSTATS: Sample Streamout Statitics counters -- Inserted by the driver to request the GPU to sample counters associated with streamout. The CP will subsequently write them to memory. */
	    RESET_VTX_CNT                                 = 0x21,	/* RESET_VTX_CNT: Reset Vertex Count -- Inserted by the driver to reset the auto index count for vertex count. There are tow counters one for gs and non- gs and these should be reset seperately */
	    BLOCK_CONTEXT_DONE                            = 0x22,	/* BLOCK_CONTEXT_DONE: Block Managed State (SQCONSDEC) Context Done - Inserted by the CP on the first SQCONSDEC constant update after a draw. */
	    CR_CONTEXT_DONE                               = 0x23,	/* CR_CONTEXT_DONE: CR Context Done -- Inserted by the driver with an EVENT_WRITE packet, before the first CR state update after a draw (CR_CMD register write) */
	    VGT_FLUSH                                     = 0x24,	/* VGT_FLUSH: VGT Flush - Inserted by the driver to cause the VGT to be flushed. Used when GS ring buffer sizes are changed */
	    CR_DONE_TS                                    = 0x25,	/* CR_DONE_TS: CR Done Timestamp - Inserted by the driver to request a time stamp when the CR has completed previous work, flush of destination cache is assumed. */
	    SQ_NON_EVENT                                  = 0x26,	/* SQ_NON_EVENT: SQ Non-Event -- This event is reserved for SQ */
	    SC_SEND_DB_VPZ                                = 0x27,	/* SC_SEND_DB_VPZ: SC Send Depth Block VPort Z -- Inserted by the driver to cause the SC to send the vport array Zmin and Zmax values to the DBs. */
	    BOTTOM_OF_PIPE_TS                             = 0x28,	/* BOTTOM_OF_PIPE_TS: Bottom of the Pipe Timestamp -- Inserted by the driver to request a bottom of pipe timestamp be sent to memory, no flushing required. */
	    DB_CACHE_FLUSH_AND_INV                        = 0x2a,	/* DB_CACHE_FLUSH_AND_INV: DB Flush and Invalidate - Inserted by the driver when the depth surface is paged out of memory. */
	ADDRESS_HI_mask                                   = 0xff << 19,	/* address bit 39:32 for zpass event */
	ADDRESS_HI_shift                                  = 19,
	EXTENDED_EVENT_bit                                = 1 << 27,	/* 0 for single DW event, 1 for two DW event */
    VGT_MULTI_PRIM_IB_RESET_EN                            = 0x00028a94,	/* This register enabling reseting of prim based on reset index */
	RESET_EN_bit                                      = 1 << 0,	/* IF SET, THEN RESET INDEX IS USED FOR RESETING A PRIM */
    VGT_INSTANCE_STEP_RATE_0                              = 0x00028aa0,	/* This register defines the first instance step rate */
	/* STEP_RATE: Instance step rate */
    VGT_INSTANCE_STEP_RATE_1                              = 0x00028aa4,	/* This register defines the second instance step rate */
	/* STEP_RATE: Instance step rate */
    VGT_STRMOUT_EN                                        = 0x00028ab0,	/* This register enables streaming out */
	STREAMOUT_bit                                     = 1 << 0,	/* If set, streaming output is enabled */
    VGT_REUSE_OFF                                         = 0x00028ab4,	/* VGT reuse is off. This will expand strip primitives to list primitives */
	REUSE_OFF_bit                                     = 1 << 0,	/* reuse is off (set to 1) */
    VGT_VTX_CNT_EN                                        = 0x00028ab8,	/* Auto -index generation is on. */
	VTX_CNT_EN_bit                                    = 1 << 0,	/* Set to one if auto index generation is enabled */
    VGT_STRMOUT_BUFFER_SIZE_0                             = 0x00028ad0,	/* Stream-out size. */
	/* SIZE: DWORD Buffer size for given stream out buffer. */
    VGT_STRMOUT_VTX_STRIDE_0                              = 0x00028ad4,	/* Stream out stride. */
	VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask             = 0x3ff << 0,	/* DWORD stride between vertices in given stream-out buffer. From stream output declarations details of dx10 spec, the max stride 2048 bytes or 512 words defined to be the spacing between the beginning of each vertex. */
	VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift            = 0,
    VGT_STRMOUT_BUFFER_BASE_0                             = 0x00028ad8,	/* Stream-out base. */
	/* BASE: DWORD Buffer base for given stream out buffer. Bits 31:0 corresponds to 39:8 of memory address. This data can be stored in the coherency registers. This register is snooped by CP. */
    VGT_STRMOUT_BUFFER_OFFSET_0                           = 0x00028adc,	/* Stream out offset. */
	/* OFFSET: DWORD offset for given stream out buffer. Writing this register will cause the VGT to load a Zero into BufDwordWritten[4] and SO_CurVertIndex. */
    VGT_STRMOUT_BUFFER_SIZE_1                             = 0x00028ae0,	/* Stream-out size. */
	/* SIZE: DWORD Buffer size for given stream out buffer. */
    VGT_STRMOUT_VTX_STRIDE_1                              = 0x00028ae4,	/* Stream out stride. */
	VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask             = 0x3ff << 0,	/* DWORD stride between vertices in given stream-out buffer. From stream output declarations details of dx10 spec, the max stride 2048 bytes or 512 words defined to be the spacing between the beginning of each vertex. */
	VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift            = 0,
    VGT_STRMOUT_BUFFER_BASE_1                             = 0x00028ae8,	/* Stream-out base. */
	/* BASE: DWORD Buffer base for given stream out buffer. Bits 31:0 corresponds to 39:8 of memory address. This data can be stored in the coherency registers. This register is snooped by CP. */
    VGT_STRMOUT_BUFFER_OFFSET_1                           = 0x00028aec,	/* Stream out offset. */
	/* OFFSET: DWORD offset for given stream out buffer. Writing this register will cause the VGT to load a Zero into BufDwordWritten[4] and SO_CurVertIndex. */
    VGT_STRMOUT_BUFFER_SIZE_2                             = 0x00028af0,	/* Stream-out size. */
	/* SIZE: DWORD Buffer size for given stream out buffer. */
    VGT_STRMOUT_VTX_STRIDE_2                              = 0x00028af4,	/* Stream out stride. */
	VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask             = 0x3ff << 0,	/* DWORD stride between vertices in given stream-out buffer. From stream output declarations details of dx10 spec, the max stride 2048 bytes or 512 words defined to be the spacing between the beginning of each vertex. */
	VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift            = 0,
    VGT_STRMOUT_BUFFER_BASE_2                             = 0x00028af8,	/* Stream-out base. */
	/* BASE: DWORD Buffer base for given stream out buffer. Bits 31:0 corresponds to 39:8 of memory address. This data can be stored in the coherency registers. This register is snooped by CP. */
    VGT_STRMOUT_BUFFER_OFFSET_2                           = 0x00028afc,	/* Stream out offset. */
	/* OFFSET: DWORD offset for given stream out buffer. Writing this register will cause the VGT to load a Zero into BufDwordWritten[4] and SO_CurVertIndex. */
    VGT_STRMOUT_BUFFER_SIZE_3                             = 0x00028b00,	/* Stream-out size. */
	/* SIZE: DWORD Buffer size for given stream out buffer. */
    VGT_STRMOUT_VTX_STRIDE_3                              = 0x00028b04,	/* Stream out stride. */
	VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask             = 0x3ff << 0,	/* DWORD stride between vertices in given stream-out buffer. From stream output declarations details of dx10 spec, the max stride 2048 bytes or 512 words defined to be the spacing between the beginning of each vertex. */
	VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift            = 0,
    VGT_STRMOUT_BUFFER_BASE_3                             = 0x00028b08,	/* Stream-out base. */
	/* BASE: DWORD Buffer base for given stream out buffer. Bits 31:0 corresponds to 39:8 of memory address. This data can be stored in the coherency registers. This register is snooped by CP. */
    VGT_STRMOUT_BUFFER_OFFSET_3                           = 0x00028b0c,	/* Stream out offset. */
	/* OFFSET: DWORD offset for given stream out buffer. Writing this register will cause the VGT to load a Zero into BufDwordWritten[4] and SO_CurVertIndex. */
    VGT_STRMOUT_BASE_OFFSET_0                             = 0x00028b10,	/* Stream out base_0 + offset_0. This register is snooped by SQ. */
	/* BASE_OFFSET: DWORD base+offset for given stream out buffer. Set by CP or driver. */
    VGT_STRMOUT_BASE_OFFSET_1                             = 0x00028b14,	/* Stream out base_1 + offset_1. This register is snooped by SQ. */
	/* BASE_OFFSET: DWORD base+offset for given stream out buffer. Set by CP or driver. */
    VGT_STRMOUT_BASE_OFFSET_2                             = 0x00028b18,	/* Stream out base_2 + offset_2. This register is snooped by SQ. */
	/* BASE_OFFSET: DWORD base+offset for given stream out buffer. Set by CP or driver. */
    VGT_STRMOUT_BASE_OFFSET_3                             = 0x00028b1c,	/* Stream out base_3 + offset_3. This register is snooped by SQ. */
	/* BASE_OFFSET: DWORD base+offset for given stream out buffer. Set by CP or driver. */
    VGT_STRMOUT_BUFFER_EN                                 = 0x00028b20,	/* Stream out enable bits. CP will use for SO coherency register validness. */
	BUFFER_0_EN_bit                                   = 1 << 0,	/* Enable buffer 0 stream out. */
	BUFFER_1_EN_bit                                   = 1 << 1,	/* Enable buffer 1 stream out. */
	BUFFER_2_EN_bit                                   = 1 << 2,	/* Enable buffer 2 stream out. */
	BUFFER_3_EN_bit                                   = 1 << 3,	/* Enable buffer 3 stream out. */
    VGT_STRMOUT_DRAW_OPAQUE_OFFSET                        = 0x00028b28,	/* Draw opaque offset. */
	/* pOffsets from the IASetVertexBuffers binding of a stream out buffer that is to be used as src data. The retrived BufferFilledSize minus this poffset if positive, will determine the amount of data from which primitives can be created. */
    VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE            = 0x00028b2c,	/* Draw opaque size. */
	/* This will be loaded by the CP for a DrawOpaque call by fetching a memory address containing last bufferfilledsize associated with the previous stream out buffer bound to the IA. */
    VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE                 = 0x00028b30,	/* Draw opaque vertex stride. */
	/* vertex stride used for draw opaque call */
    VGT_STRMOUT_BASE_OFFSET_HI_0                          = 0x00028b44,	/* Upper 6-bits of 40-bits Stream out base_0 + offset_0. This register is snooped by SQ. */
	VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask    = 0x3f << 0,	/* Upper 6-bits of 40-bits DWORD base+offset for given stream out buffer. Set by CP or driver. */
	VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift   = 0,
    VGT_STRMOUT_BASE_OFFSET_HI_1                          = 0x00028b48,	/* Upper 6-bits of 40-bits Stream out base_1 + offset_1. This register is snooped by SQ. */
	VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask    = 0x3f << 0,	/* Upper 6-bits of 40-bits DWORD base+offset for given stream out buffer. Set by CP or driver. */
	VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift   = 0,
    VGT_STRMOUT_BASE_OFFSET_HI_2                          = 0x00028b4c,	/* Upper 6-bits of 40-bits Stream out base_2 + offset_2. This register is snooped by SQ. */
	VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask    = 0x3f << 0,	/* Upper 6-bits of 40-bits DWORD base+offset for given stream out buffer. Set by CP or driver. */
	VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift   = 0,
    VGT_STRMOUT_BASE_OFFSET_HI_3                          = 0x00028b50,	/* Upper 6-bits of 40-bits Stream out base_3 + offset_3. This register is snooped by SQ. */
	VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask    = 0x3f << 0,	/* Upper 6-bits of 40-bits DWORD base+offset for given stream out buffer. Set by CP or driver. */
	VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift   = 0,
    PA_SC_LINE_CNTL                                       = 0x00028c00,	/* Line Drawing Control */
	BRES_CNTL_mask                                    = 0xff << 0,	/* This field indicates what the hardware should do on the minor axis of the line, when the line is exactly half way between two pixels (bresenham error = 0). This field is a LUT (BRES_CNTL[7:0] w/ 1-bit per entry, where if the bit BRES_CNTL[index] = `1` then that means to step the minor axis. The 3-bit index is calculated from the attributes of the line ((abs(Xend - Xstart) >= abs(Yend - Ystart)) << 2) | ((Xstart <= Xend) << 1) | (Ystart <= Yend) */
	BRES_CNTL_shift                                   = 0,
	USE_BRES_CNTL_bit                                 = 1 << 8,	/* If set, use the bresenham control field. Should be set for 2D lines, clear for 3D lines. */
	EXPAND_LINE_WIDTH_bit                             = 1 << 9,	/* If set, the line width will be expanded by the 1/cos(a) where a the minimum angle from horz or vertical. This bit most likely should be set whenever MSAA_ENABLE is set or Line Antialiasing is being done in pixel shader. */
	LAST_PIXEL_bit                                    = 1 << 10,	/* If set the last pixel of a line will not be killed by the diamond exit rule. */
    PA_SC_AA_CONFIG                                       = 0x00028c04,	/* Multisample Antialiasing Control */
	MSAA_NUM_SAMPLES_mask                             = 0x03 << 0,	/* Specifies the number of samples to use for MSAA. Representative of size of surface allocated for Color and Depth. 0 = 1-sample, 1 = 2-sample, 2 = 4-sample, 3 = 8- sample. */
	MSAA_NUM_SAMPLES_shift                            = 0,
	AA_MASK_CENTROID_DTMN_bit                         = 1 << 4,	/* Specifies whether to apply the MSAA Mask before or after the centroid determination. 0 = before; 1 = after. */
	MAX_SAMPLE_DIST_mask                              = 0x0f << 13,	/* Specifies the maximum distance (in subpixels) between the pixel center and the outermost subpixel sample. This value is used to optimize coarse walk and quad identity. Should be set to 0 when not anti-aliasing. Max value for R600 should be 8(16ths). */
	MAX_SAMPLE_DIST_shift                             = 13,
    PA_SU_VTX_CNTL                                        = 0x00028c08,	/* Miscellaneous SU Control */
	PIX_CENTER_bit                                    = 1 << 0,	/* Specifies where the pixel center of the incoming vertex is. The drawing engine itself has pixel centers @ 0.5, so if this bit is `0`, 0.5 will be added to the X,Y coordinates to move the incoming vertex onto our internal grid. */
	PA_SU_VTX_CNTL__ROUND_MODE_mask                   = 0x03 << 1,	/* Controls conversion of X,Y coordinates from IEEE to fixed-point */
	PA_SU_VTX_CNTL__ROUND_MODE_shift                  = 1,
	    X_TRUNCATE                                    = 0x00,	/* 0 = Truncate (OGL) */
	    X_ROUND                                       = 0x01,	/* 1 = Round */
	    X_ROUND_TO_EVEN                               = 0x02,	/* 2 = Round to Even (D3D) */
	    X_ROUND_TO_ODD                                = 0x03,	/* 3 = Round to Odd */
	QUANT_MODE_mask                                   = 0x07 << 3,	/* Controls conversion of X,Y coordinates from IEEE to fixed-point */
	QUANT_MODE_shift                                  = 3,
	    X_1_16TH                                      = 0x00,	/* 0 = 1/16th */
	    X_1_8TH                                       = 0x01,	/* 1 = 1/8th */
	    X_1_4TH                                       = 0x02,	/* 2 = 1/4th */
	    X_1_2                                         = 0x03,	/* 3 = 1/2 */
	    X_1                                           = 0x04,	/* 4 = 1 */
	    X_1_256TH                                     = 0x05,	/* 5 = 1/256th 3. General Shader Registers */
    PA_CL_GB_VERT_CLIP_ADJ                                = 0x00028c0c,	/* Vertical Guard Band Clip Adjust Register */
	/* DATA_REGISTER: 32-bit floating point value. Should be set to 1.0 for no guard band. */
    PA_CL_GB_VERT_DISC_ADJ                                = 0x00028c10,	/* Vertical Guard Band Discard Adjust Register */
	/* DATA_REGISTER: 32-bit floating point value. Should be set to 1.0 for no guard band. */
    PA_CL_GB_HORZ_CLIP_ADJ                                = 0x00028c14,	/* Horizontal Guard Band Clip Adjust Register */
	/* DATA_REGISTER: 32-bit floating point value. Should be set to 1.0 for no guard band. */
    PA_CL_GB_HORZ_DISC_ADJ                                = 0x00028c18,	/* Horizontal Guard Band Discard Adjust Register */
	/* DATA_REGISTER: 32-bit floating point value. Should be set to 1.0 for no guard band. */
    PA_SC_AA_SAMPLE_LOCS_MCTX                             = 0x00028c1c,	/* Multi-Sample Programmable Sample Locations for 2-Sample, 4-Sample, 8-Sample First Word - Used by SC, SPI & CB`s */
/* 	S0_X_mask                                         = 0x0f << 0, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S0_X_shift                                        = 0, */
/* 	S0_Y_mask                                         = 0x0f << 4, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S0_Y_shift                                        = 4, */
/* 	S1_X_mask                                         = 0x0f << 8, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S1_X_shift                                        = 8, */
/* 	S1_Y_mask                                         = 0x0f << 12, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S1_Y_shift                                        = 12, */
/* 	S2_X_mask                                         = 0x0f << 16, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S2_X_shift                                        = 16, */
/* 	S2_Y_mask                                         = 0x0f << 20, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S2_Y_shift                                        = 20, */
/* 	S3_X_mask                                         = 0x0f << 24, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S3_X_shift                                        = 24, */
/* 	S3_Y_mask                                         = 0x0f << 28, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S3_Y_shift                                        = 28, */
    PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX                      = 0x00028c20,	/* Multi-Sample Programmable Sample Locations for 8-Sample Second Word - Used by SC, SPI & CB`s */
/* 	S4_X_mask                                         = 0x0f << 0, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S4_X_shift                                        = 0, */
/* 	S4_Y_mask                                         = 0x0f << 4, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S4_Y_shift                                        = 4, */
/* 	S5_X_mask                                         = 0x0f << 8, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S5_X_shift                                        = 8, */
/* 	S5_Y_mask                                         = 0x0f << 12, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S5_Y_shift                                        = 12, */
/* 	S6_X_mask                                         = 0x0f << 16, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S6_X_shift                                        = 16, */
/* 	S6_Y_mask                                         = 0x0f << 20, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S6_Y_shift                                        = 20, */
/* 	S7_X_mask                                         = 0x0f << 24, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S7_X_shift                                        = 24, */
/* 	S7_Y_mask                                         = 0x0f << 28, */	/* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* 	S7_Y_shift                                        = 28, */
    CB_CLRCMP_CONTROL                                     = 0x00028c30,	/* This register controls color keying, which masks individual pixel writes based on comparing the source (pre-ROP) color and/or the dest (frame buffer) color to comparison values, after masking both by CLRCMP_MSK. Source color keying is a legacy operation that is not supported if any enabled render target has >32-bit pixels or sets the BLEND_FLOAT32 bit. */
	CLRCMP_FCN_SRC_mask                               = 0x07 << 0,	/* Color Compare Source Function, Specifies the function to perform on the source color compare. */
	CLRCMP_FCN_SRC_shift                              = 0,
	    CLRCMP_DRAW_ALWAYS                            = 0x00,	/* CLRCMP_DRAW_ALWAYS: always draw */
	    CLRCMP_DRAW_NEVER                             = 0x01,	/* CLRCMP_DRAW_NEVER: never draw */
	    CLRCMP_DRAW_ON_NEQ                            = 0x04,	/* CLRCMP_DRAW_ON_NEQ: draw if xxx!=CLRCMP_XXX */
	    CLRCMP_DRAW_ON_EQ                             = 0x05,	/* CLRCMP_DRAW_ON_EQ: draw if xxx==CLRCMP_XXX */
	CLRCMP_FCN_DST_mask                               = 0x07 << 8,	/* Color Compare Destination Function, Specifies the function to perform on the destination color compare. */
	CLRCMP_FCN_DST_shift                              = 8,
/* 	    CLRCMP_DRAW_ALWAYS                            = 0x00, */	/* CLRCMP_DRAW_ALWAYS: always draw */
/* 	    CLRCMP_DRAW_NEVER                             = 0x01, */	/* CLRCMP_DRAW_NEVER: never draw */
/* 	    CLRCMP_DRAW_ON_NEQ                            = 0x04, */	/* CLRCMP_DRAW_ON_NEQ: draw if xxx!=CLRCMP_XXX */
/* 	    CLRCMP_DRAW_ON_EQ                             = 0x05, */	/* CLRCMP_DRAW_ON_EQ: draw if xxx==CLRCMP_XXX */
	CLRCMP_FCN_SEL_mask                               = 0x03 << 24,	/* Color Compare Function Select, Selects which color compare results to use in the final compare results. */
	CLRCMP_FCN_SEL_shift                              = 24,
	    CLRCMP_SEL_DST                                = 0x00,	/* CLRCMP_SEL_DST: use CLRCMP_FCN_DST */
	    CLRCMP_SEL_SRC                                = 0x01,	/* CLRCMP_SEL_SRC: use CLRCMP_FCN_SRC */
	    CLRCMP_SEL_AND                                = 0x02,	/* CLRCMP_SEL_AND: draw if allowed by both CLRCMP_FCN_SRC and CLRCMP_FCN_DST */
    CB_CLRCMP_SRC                                         = 0x00028c34,
	/* Comparison color for source, in frame buffer format. Ignored for pixels larger than 32-bits. Zero-fill high bits for pixels smaller than 32-bits. */
    CB_CLRCMP_DST                                         = 0x00028c38,
	/* Comparison color for destination, in frame buffer format. Ignored for pixels larger than 32-bits. Zero-fill high bits for pixels smaller than 32-bits. */
    CB_CLRCMP_MSK                                         = 0x00028c3c,
	/* Compare mask, which is ANDed with source and destination before the comparsion. Ignored for pixels larger than 32-bits. Zero-fill high bits for pixels smaller than 32-bits. */
    PA_SC_AA_MASK                                         = 0x00028c48,	/* Multisample AA Mask */
	/* This mask is used for Multisample AA. It contains 4 8- bit masks. The 4 masks are applied to each 2x2 screen- aligned pixels as follows: ULC 7:0, URC 15:8, LLC 23:16, LRC 31:24, LSB is Sample0, MSB is Sample7. */
    VGT_VERTEX_REUSE_BLOCK_CNTL                           = 0x00028c58,	/* This register controls the behavior of the Vertex Reuse block at the backend of the VGT. This register is relevant only if the VGT_OUTPUT_PATH_CNTL register (or the prim type in Major Mode 0) specifies the Vertex Reuse Block for the VGT backend path. */
	VTX_REUSE_DEPTH_mask                              = 0xff << 0,	/* In general, for processing triangles, the vertex reuse depth should be programmed to ((num_enabled_pipes * 4) - 2) */
	VTX_REUSE_DEPTH_shift                             = 0,
    VGT_OUT_DEALLOC_CNTL                                  = 0x00028c5c,	/* This register controls, within a process vector, when the previous process vector is de-allocated. */
	DEALLOC_DIST_mask                                 = 0x7f << 0,	/* Distance (in indices) which the vertex vector slot assignment leads the deallocation. This field should typically be set to (num_enabled_pipes * 4). */
	DEALLOC_DIST_shift                                = 0,
    DB_RENDER_CONTROL                                     = 0x00028d0c,
	DEPTH_CLEAR_ENABLE_bit                            = 1 << 0,	/* Clears Z to the Clear Value. */
	STENCIL_CLEAR_ENABLE_bit                          = 1 << 1,	/* Clears Stencil to the Clear Value */
	DEPTH_COPY_bit                                    = 1 << 2,	/* Enables Z expansion to color render target 0. CB must be programmed to the desired destination format. */
	STENCIL_COPY_bit                                  = 1 << 3,	/* Enables Stencil expansion to color render target 0. CB must be programmed to the desired destination format. */
	RESUMMARIZE_ENABLE_bit                            = 1 << 4,	/* If set, all tiles touched will update the HTILE surface info. */
	STENCIL_COMPRESS_DISABLE_bit                      = 1 << 5,
	DEPTH_COMPRESS_DISABLE_bit                        = 1 << 6,
	COPY_CENTROID_bit                                 = 1 << 7,	/* If set, copy the 1st lit sample in the pixel after the COPY_SAMPLE`th sample (wraps back to lower samples). */
	COPY_SAMPLE_mask                                  = 0x07 << 8,	/* If COPY_CENTROID, copy 1st lit after this sample number. Else copy this sample whether lit or not. */
	COPY_SAMPLE_shift                                 = 8,
	ZPASS_INCREMENT_DISABLE_bit                       = 1 << 11,	/* Disable incrementing the ZPass count for this context. */
    DB_RENDER_OVERRIDE                                    = 0x00028d10,
	FORCE_HIZ_ENABLE_mask                             = 0x03 << 0,	/* Forces hierarchical depth culling to be enabled ignoring what is in DB_SHADER_CONTROL and all other render states. */
	FORCE_HIZ_ENABLE_shift                            = 0,
	    FORCE_OFF                                     = 0x00,	/* FORCE_OFF */
	    FORCE_ENABLE                                  = 0x01,	/* FORCE_ENABLE */
	    FORCE_DISABLE                                 = 0x02,	/* FORCE_DISABLE */
	    FORCE_RESERVED                                = 0x03,	/* FORCE_RESERVED */
	FORCE_HIS_ENABLE0_mask                            = 0x03 << 2,	/* Forces hierarchical stencil culling to be enabled for compare state 0, ignoring what is in DB_SHADER_CONTROL and all other render states. */
	FORCE_HIS_ENABLE0_shift                           = 2,
/* 	    FORCE_OFF                                     = 0x00, */	/* FORCE_OFF */
/* 	    FORCE_ENABLE                                  = 0x01, */	/* FORCE_ENABLE */
/* 	    FORCE_DISABLE                                 = 0x02, */	/* FORCE_DISABLE */
/* 	    FORCE_RESERVED                                = 0x03, */	/* FORCE_RESERVED */
	FORCE_HIS_ENABLE1_mask                            = 0x03 << 4,	/* Forces hierarchical stencil culling to be enabled for compare state 1, ignoring what is in DB_SHADER_CONTROL and all other render states. */
	FORCE_HIS_ENABLE1_shift                           = 4,
/* 	    FORCE_OFF                                     = 0x00, */	/* FORCE_OFF */
/* 	    FORCE_ENABLE                                  = 0x01, */	/* FORCE_ENABLE */
/* 	    FORCE_DISABLE                                 = 0x02, */	/* FORCE_DISABLE */
/* 	    FORCE_RESERVED                                = 0x03, */	/* FORCE_RESERVED */
	FORCE_SHADER_Z_ORDER_bit                          = 1 << 6,	/* Forces the setting specified in DB_SHADER_CONTROL.Z_ORDER to be used for early/late/re Z+S test. If not set the shader preference is used unless precluded by other render states. */
	FAST_Z_DISABLE_bit                                = 1 << 7,	/* Do not accelerate Z clears or write operations. Prevents killing quads before detail rasterization if depth operations are needed. */
	FAST_STENCIL_DISABLE_bit                          = 1 << 8,	/* Do not accelerate stencil clears or write operations. Prevents killing quads before detail rasterization if stencil operations are needed. */
	NOOP_CULL_DISABLE_bit                             = 1 << 9,	/* Prevents hierarchically killing quads that will pass Z and Stencil, but do not write Z, Stencil or Color. This would be used to make sure ZPass counts are perfect. */
	FORCE_COLOR_KILL_bit                              = 1 << 10,	/* DB does any possible depth optimizations assuming the shader results are not needed and kills all samples before the color operation. */
	FORCE_Z_READ_bit                                  = 1 << 11,	/* Read all Z data for a tile even if it is not needed. Used for resummarization blts. */
	FORCE_STENCIL_READ_bit                            = 1 << 12,	/* Read all stencil data for a tile even if it is not needed. Used for resummarization blts. */
	FORCE_FULL_Z_RANGE_mask                           = 0x03 << 13,	/* Forces hierarchical depth to treat each primitive as if its range is 0.0 -> 1.0f or not. If disabled, it is implicitly derived from DB_SHADER_CONTROL.Z_EXPORT_ENABLE and other enabling registers. Can be used to reset the Z range to 0-1 as well. */
	FORCE_FULL_Z_RANGE_shift                          = 13,
/* 	    FORCE_OFF                                     = 0x00, */	/* FORCE_OFF */
/* 	    FORCE_ENABLE                                  = 0x01, */	/* FORCE_ENABLE */
/* 	    FORCE_DISABLE                                 = 0x02, */	/* FORCE_DISABLE */
/* 	    FORCE_RESERVED                                = 0x03, */	/* FORCE_RESERVED */
	FORCE_QC_SMASK_CONFLICT_bit                       = 1 << 15,	/* Forces Quad Coherency to mark a quad with a matching dtileid, x, and y as a conflict and stall it even if the sample mask doesn`t overrlap. */
	DISABLE_VIEWPORT_CLAMP_bit                        = 1 << 16,	/* Disables the viewport clamp, which allows Z data to go through untouched. */
	IGNORE_SC_ZRANGE_bit                              = 1 << 17,	/* Ignore the SC`s vertex bounds on the minZ/maxZ for a tile during HiZ. */
    DB_HTILE_SURFACE                                      = 0x00028d24,
	HTILE_WIDTH_bit                                   = 1 << 0,	/* How many pixels wide each entry in the htile buffer represents. 0 = 4, 1 = 8 */
	HTILE_HEIGHT_bit                                  = 1 << 1,	/* How many pixels high each entry in the htile buffer represents. 0 = 4, 1 = 8 */
	LINEAR_bit                                        = 1 << 2,	/* Surface is stored linearly in swaths of 8 htiles high until the surface is complete. */
	FULL_CACHE_bit                                    = 1 << 3,	/* This htile buffer uses the entire htile cache. */
	HTILE_USES_PRELOAD_WIN_bit                        = 1 << 4,	/* If set, the htile surface dimensions will be that of the preload window; otherwise, it will be that of the depth buffer */
	PRELOAD_bit                                       = 1 << 5,	/* Preload all data that fits as soon as room is available once the VGT_DRAW_INITIATOR is seen on a context. */
	PREFETCH_WIDTH_mask                               = 0x3f << 6,	/* The Prefetch window width. Prefetcher tries to keep this window around the last rasterized htile in cache at all times. */
	PREFETCH_WIDTH_shift                              = 6,
	PREFETCH_HEIGHT_mask                              = 0x3f << 12,	/* The Prefetch window height. Prefetcher tries to keep this window around the last rasterized htile in cache at all times. */
	PREFETCH_HEIGHT_shift                             = 12,
    DB_SRESULTS_COMPARE_STATE1                            = 0x00028d2c,
	COMPAREFUNC1_mask                                 = 0x07 << 0,	/* Used to determine the meaning of the MayPass and MayFail smask bits during hierarchical stencil testing. NEVER or ALWAYS invalidates the SResults in the HTile Buffer */
	COMPAREFUNC1_shift                                = 0,
/* 	    REF_NEVER                                     = 0x00, */	/* REF_NEVER: never pass */
/* 	    REF_LESS                                      = 0x01, */	/* REF_LESS: pass if left < right */
/* 	    REF_EQUAL                                     = 0x02, */	/* REF_EQUAL: pass if left = right */
/* 	    REF_LEQUAL                                    = 0x03, */	/* REF_LEQUAL: pass if left <= right */
/* 	    REF_GREATER                                   = 0x04, */	/* REF_GREATER: pass if left > right */
/* 	    REF_NOTEQUAL                                  = 0x05, */	/* REF_NOTEQUAL: pass if left != right */
/* 	    REF_GEQUAL                                    = 0x06, */	/* REF_GEQUAL: pass if left >= right */
/* 	    REF_ALWAYS                                    = 0x07, */	/* REF_ALWAYS: always pass */
	COMPAREVALUE1_mask                                = 0xff << 4,	/* Stencil value compared against the stencil reference value during hierarchical stencil testing. */
	COMPAREVALUE1_shift                               = 4,
	COMPAREMASK1_mask                                 = 0xff << 12,	/* This value is ANDed with the SResults compare value. A mask of 0 invalidates the SResults in the HTile Buffer */
	COMPAREMASK1_shift                                = 12,
	ENABLE1_bit                                       = 1 << 24,	/* If set, use SResults in HiS test. Set when compare state is known and clear when doing a resummarize. */
    DB_PRELOAD_CONTROL                                    = 0x00028d30,
	START_X_mask                                      = 0xff << 0,	/* Starting X position of the preload window, in 32 pixel increments */
	START_X_shift                                     = 0,
	START_Y_mask                                      = 0xff << 8,	/* Starting Y position of the preload window, in 32 pixel increments */
	START_Y_shift                                     = 8,
	MAX_X_mask                                        = 0xff << 16,	/* Ending X position of the preload window, in 32 pixel increments */
	MAX_X_shift                                       = 16,
	MAX_Y_mask                                        = 0xff << 24,	/* Ending Y position of the preload window, in 32 pixel increments */
	MAX_Y_shift                                       = 24,
    DB_PREFETCH_LIMIT                                     = 0x00028d34,
	DEPTH_HEIGHT_TILE_MAX_mask                        = 0x3ff << 0,	/* Height of the depth buffer in 8x8 pixels (height - 1) */
	DEPTH_HEIGHT_TILE_MAX_shift                       = 0,
    PA_SU_POLY_OFFSET_DB_FMT_CNTL                         = 0x00028df8,	/* Polygon Offset Depth Buffer Format Control */
	POLY_OFFSET_NEG_NUM_DB_BITS_mask                  = 0xff << 0,	/* Specifies the number of bits in the depth buffer format. Specified as a negative value typically. For fixed point formats, should be number of bits (i.e. -16, -24), for float formats should be number of mantissa bits (i.e. - 23). This is a signed 8b value, range -128,127 */
	POLY_OFFSET_NEG_NUM_DB_BITS_shift                 = 0,
	POLY_OFFSET_DB_IS_FLOAT_FMT_bit                   = 1 << 8,	/* Specifies whether the depth buffer format is fixed or float. The NEG_NUM_DB_BITS is used differently (i.e. different POLY_OFFSET equation for fixed vs. float buffer formats. */
    PA_SU_POLY_OFFSET_CLAMP                               = 0x00028dfc,	/* Clamp Value for Polygon Offset */
	/* Specifies the maximum (if clamp is positive) or minimum (if clamp is negative) value clamp for the polygon offset result. */
    PA_SU_POLY_OFFSET_FRONT_SCALE                         = 0x00028e00,	/* Front-Facing Polygon Offset Scale */
	/* Specifies polygon offset scale for front-facing polygons; 32-bit IEEE float format. */
    PA_SU_POLY_OFFSET_FRONT_OFFSET                        = 0x00028e04,	/* Front-Facing Polygon Offset Offset */
	/* Specifies polygon offset offset for front-facing polygons; 32b IEEE fixed format. */
    PA_SU_POLY_OFFSET_BACK_SCALE                          = 0x00028e08,	/* Back-Facing Polygon Offset Scale */
	/* Specifies polygon offset scale for back-facing polygons; 32-bit IEEE float format. */
    PA_SU_POLY_OFFSET_BACK_OFFSET                         = 0x00028e0c,	/* Back-Facing Polygon Offset Offset */
	/* Specifies polygon offset offset for back-facing polygons; 32b IEEE fixed format. */
    PA_CL_POINT_X_RAD                                     = 0x00028e10,	/* Point Sprite X Radius Expansion */
	/* DATA_REGISTER:  */
    PA_CL_POINT_Y_RAD                                     = 0x00028e14,	/* Point Sprite Y Radius Expansion */
	/* DATA_REGISTER:  */
    PA_CL_POINT_SIZE                                      = 0x00028e18,	/* Point Sprite Constant Size */
	/* DATA_REGISTER:  */
    PA_CL_POINT_CULL_RAD                                  = 0x00028e1c,	/* Point Sprite Culling Radius Expansion SQRT(XRadExp^2 + YRadExp^2) */
	/* DATA_REGISTER:  */
    PA_CL_UCP_0_X                                         = 0x00028e20,	/* User Clip Plane Data */
	PA_CL_UCP_0_X_num                                 = 6,
	PA_CL_UCP_0_X_offset                              = 16,
	/* DATA_REGISTER:  */
    PA_CL_UCP_0_Y                                         = 0x00028e24,	/* User Clip Plane Data */
	PA_CL_UCP_0_Y_num                                 = 6,
	PA_CL_UCP_0_Y_offset                              = 16,
	/* DATA_REGISTER:  */
    PA_CL_UCP_0_Z                                         = 0x00028e28,	/* User Clip Plane Data */
	PA_CL_UCP_0_Z_num                                 = 6,
	PA_CL_UCP_0_Z_offset                              = 16,
	/* DATA_REGISTER:  */
    SQ_ALU_CONSTANT0_0                                    = 0x00030000,	/* (64-state) ALU Constant store data for use in DX9 mode (DX10 mode uses the constant-cache instead and this constant-file is not available). All four components of a constant must be written for that constant to be updated - the physical write to the constant store only occurs after the fourth component has been written. The first set of 256 constants (0-255) are reserved for the pixel shader (PS). The second set of 256 constants (256-511) are reserved for the vertex shader (VS). None are available to the GS or ES. */
	/* X: Format is IEEE float */
    SQ_ALU_CONSTANT1_0                                    = 0x00030004,
	/* Y: Format is IEEE float */
    SQ_ALU_CONSTANT2_0                                    = 0x00030008,
	/* Z: Format is IEEE float */
    SQ_ALU_CONSTANT3_0                                    = 0x0003000c,
	/* W: Format is IEEE float */
    SQ_VTX_CONSTANT_WORD0_0                               = 0x00038000,
	/* BASE_ADDRESS:  */
    SQ_TEX_RESOURCE_WORD0_0                               = 0x00038000,
	DIM_mask                                          = 0x07 << 0,
	DIM_shift                                         = 0,
	    SQ_TEX_DIM_1D                                 = 0x00,	/* SQ_TEX_DIM_1D */
	    SQ_TEX_DIM_2D                                 = 0x01,	/* SQ_TEX_DIM_2D */
	    SQ_TEX_DIM_3D                                 = 0x02,	/* SQ_TEX_DIM_3D */
	    SQ_TEX_DIM_CUBEMAP                            = 0x03,	/* SQ_TEX_DIM_CUBEMAP */
	    SQ_TEX_DIM_1D_ARRAY                           = 0x04,	/* SQ_TEX_DIM_1D_ARRAY */
	    SQ_TEX_DIM_2D_ARRAY                           = 0x05,	/* SQ_TEX_DIM_2D_ARRAY */
	    SQ_TEX_DIM_2D_MSAA                            = 0x06,	/* SQ_TEX_DIM_2D_MSAA */
	    SQ_TEX_DIM_2D_ARRAY_MSAA                      = 0x07,	/* SQ_TEX_DIM_2D_ARRAY_MSAA */
	SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask           = 0x0f << 3,
	SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift          = 3,
	TILE_TYPE_bit                                     = 1 << 7,
	PITCH_mask                                        = 0x7ff << 8,
	PITCH_shift                                       = 8,
	TEX_WIDTH_mask                                    = 0x1fff << 19,
	TEX_WIDTH_shift                                   = 19,
    SQ_VTX_CONSTANT_WORD1_0                               = 0x00038004,
	/* SIZE:  */
    SQ_TEX_RESOURCE_WORD1_0                               = 0x00038004,
	TEX_HEIGHT_mask                                   = 0x1fff << 0,
	TEX_HEIGHT_shift                                  = 0,
	TEX_DEPTH_mask                                    = 0x1fff << 13,
	TEX_DEPTH_shift                                   = 13,
	SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask         = 0x3f << 26,
	SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift        = 26,
    SQ_VTX_CONSTANT_WORD2_0                               = 0x00038008,
	BASE_ADDRESS_HI_mask                              = 0xff << 0,
	BASE_ADDRESS_HI_shift                             = 0,
	SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask              = 0x7ff << 8,
	SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift             = 8,
	SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit              = 1 << 19,
	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask         = 0x3f << 20,
	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift        = 20,
	SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask      = 0x03 << 26,
	SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift     = 26,
/* 	    SQ_NUM_FORMAT_NORM                            = 0x00, */	/* SQ_NUM_FORMAT_NORM: repeating fraction number (0.N) with range [0, 1] if unsigned, or [- 1, 1] if signed. */
/* 	    SQ_NUM_FORMAT_INT                             = 0x01, */	/* SQ_NUM_FORMAT_INT: integer number (N.0) with range [0, 2^N] if unsigned, or [-2^M, 2^M] if signed (M = N - 1). */
/* 	    SQ_NUM_FORMAT_SCALED                          = 0x02, */	/* SQ_NUM_FORMAT_SCALED: integer number stored as a S23E8 floating-point representation (1 == 0x3f800000). */
	SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit      = 1 << 28,
	SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit         = 1 << 29,
	SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask         = 0x03 << 30,
	SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift        = 30,
/* 	    SQ_ENDIAN_NONE                                = 0x00, */	/* SQ_ENDIAN_NONE: no endian swap (XOR by 0) */
/* 	    SQ_ENDIAN_8IN16                               = 0x01, */	/* SQ_ENDIAN_8IN16: 8 bit swap in 16 bit word (XOR by 1): AABBCCDD -> BBAADDCC */
/* 	    SQ_ENDIAN_8IN32                               = 0x02, */	/* SQ_ENDIAN_8IN32: 8 bit swap in 32 bit word (XOR by 3): AABBCCDD -> DDCCBBAA */
    SQ_TEX_RESOURCE_WORD2_0                               = 0x00038008,
	/* BASE_ADDRESS:  */
    SQ_VTX_CONSTANT_WORD3_0                               = 0x0003800c,
	MEM_REQUEST_SIZE_mask                             = 0x03 << 0,
	MEM_REQUEST_SIZE_shift                            = 0,
    SQ_TEX_RESOURCE_WORD3_0                               = 0x0003800c,
	/* MIP_ADDRESS:  */
    SQ_TEX_RESOURCE_WORD4_0                               = 0x00038010,
	FORMAT_COMP_X_mask                                = 0x03 << 0,
	FORMAT_COMP_X_shift                               = 0,
	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00,	/* SQ_FORMAT_COMP_UNSIGNED */
	    SQ_FORMAT_COMP_SIGNED                         = 0x01,	/* SQ_FORMAT_COMP_SIGNED */
	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02,	/* SQ_FORMAT_COMP_UNSIGNED_BIASED */
	FORMAT_COMP_Y_mask                                = 0x03 << 2,
	FORMAT_COMP_Y_shift                               = 2,
/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */	/* SQ_FORMAT_COMP_UNSIGNED */
/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */	/* SQ_FORMAT_COMP_SIGNED */
/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */	/* SQ_FORMAT_COMP_UNSIGNED_BIASED */
	FORMAT_COMP_Z_mask                                = 0x03 << 4,
	FORMAT_COMP_Z_shift                               = 4,
/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */	/* SQ_FORMAT_COMP_UNSIGNED */
/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */	/* SQ_FORMAT_COMP_SIGNED */
/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */	/* SQ_FORMAT_COMP_UNSIGNED_BIASED */
	FORMAT_COMP_W_mask                                = 0x03 << 6,
	FORMAT_COMP_W_shift                               = 6,
/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */	/* SQ_FORMAT_COMP_UNSIGNED */
/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */	/* SQ_FORMAT_COMP_SIGNED */
/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */	/* SQ_FORMAT_COMP_UNSIGNED_BIASED */
	SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask      = 0x03 << 8,
	SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift     = 8,
/* 	    SQ_NUM_FORMAT_NORM                            = 0x00, */	/* SQ_NUM_FORMAT_NORM: repeating fraction number (0.N) with range [0, 1] if unsigned, or [- 1, 1] if signed. */
/* 	    SQ_NUM_FORMAT_INT                             = 0x01, */	/* SQ_NUM_FORMAT_INT: integer number (N.0) with range [0, 2^N] if unsigned, or [-2^M, 2^M] if signed (M = N - 1). */
/* 	    SQ_NUM_FORMAT_SCALED                          = 0x02, */	/* SQ_NUM_FORMAT_SCALED: integer number stored as a S23E8 floating-point representation (1 == 0x3f800000). */
	SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit         = 1 << 10,
	SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit        = 1 << 11,
	SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask         = 0x03 << 12,
	SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift        = 12,
/* 	    SQ_ENDIAN_NONE                                = 0x00, */	/* SQ_ENDIAN_NONE: no endian swap (XOR by 0) */
/* 	    SQ_ENDIAN_8IN16                               = 0x01, */	/* SQ_ENDIAN_8IN16: 8 bit swap in 16 bit word (XOR by 1): AABBCCDD -> BBAADDCC */
/* 	    SQ_ENDIAN_8IN32                               = 0x02, */	/* SQ_ENDIAN_8IN32: 8 bit swap in 32 bit word (XOR by 3): AABBCCDD -> DDCCBBAA */
	REQUEST_SIZE_mask                                 = 0x03 << 14,
	REQUEST_SIZE_shift                                = 14,
	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask           = 0x07 << 16,
	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift          = 16,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask           = 0x07 << 19,
	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift          = 19,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask           = 0x07 << 22,
	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift          = 22,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask           = 0x07 << 25,
	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift          = 25,
/* 	    SQ_SEL_X                                      = 0x00, */	/* SQ_SEL_X: use X component */
/* 	    SQ_SEL_Y                                      = 0x01, */	/* SQ_SEL_Y: use Y component */
/* 	    SQ_SEL_Z                                      = 0x02, */	/* SQ_SEL_Z: use Z component */
/* 	    SQ_SEL_W                                      = 0x03, */	/* SQ_SEL_W: use W component */
/* 	    SQ_SEL_0                                      = 0x04, */	/* SQ_SEL_0: use constant 0.0 */
/* 	    SQ_SEL_1                                      = 0x05, */	/* SQ_SEL_1: use constant 1.0 */
	BASE_LEVEL_mask                                   = 0x0f << 28,
	BASE_LEVEL_shift                                  = 28,
    SQ_TEX_RESOURCE_WORD5_0                               = 0x00038014,
	LAST_LEVEL_mask                                   = 0x0f << 0,
	LAST_LEVEL_shift                                  = 0,
	BASE_ARRAY_mask                                   = 0x1fff << 4,
	BASE_ARRAY_shift                                  = 4,
	LAST_ARRAY_mask                                   = 0x1fff << 17,
	LAST_ARRAY_shift                                  = 17,
    SQ_TEX_RESOURCE_WORD6_0                               = 0x00038018,
	MPEG_CLAMP_mask                                   = 0x03 << 0,
	MPEG_CLAMP_shift                                  = 0,
	    SQ_TEX_MPEG_CLAMP_OFF                         = 0x00,	/* SQ_TEX_MPEG_CLAMP_OFF: no clamping (FMT_16 is plain 16b fixed/normalized number). */
	    SQ_TEX_MPEG_9                                 = 0x01,	/* SQ_TEX_MPEG_9: consider FMT_16 as s9 in LSBs, clamp range to [-256, 255). */
	    SQ_TEX_MPEG_10                                = 0x02,	/* SQ_TEX_MPEG_10: mask bottom 6b of FMT_16. */
	PERF_MODULATION_mask                              = 0x07 << 5,
	PERF_MODULATION_shift                             = 5,
	INTERLACED_bit                                    = 1 << 8,
	SQ_TEX_RESOURCE_WORD6_0__TYPE_mask                = 0x03 << 30,
	SQ_TEX_RESOURCE_WORD6_0__TYPE_shift               = 30,
	    SQ_TEX_VTX_INVALID_TEXTURE                    = 0x00,	/* SQ_TEX_VTX_INVALID_TEXTURE */
	    SQ_TEX_VTX_INVALID_BUFFER                     = 0x01,	/* SQ_TEX_VTX_INVALID_BUFFER */
	    SQ_TEX_VTX_VALID_TEXTURE                      = 0x02,	/* SQ_TEX_VTX_VALID_TEXTURE */
	    SQ_TEX_VTX_VALID_BUFFER                       = 0x03,	/* SQ_TEX_VTX_VALID_BUFFER 8. Shader Texture Sampler Constants */
    SQ_VTX_CONSTANT_WORD6_0                               = 0x00038018,
	SQ_VTX_CONSTANT_WORD6_0__TYPE_mask                = 0x03 << 30,
	SQ_VTX_CONSTANT_WORD6_0__TYPE_shift               = 30,
/* 	    SQ_TEX_VTX_INVALID_TEXTURE                    = 0x00, */	/* SQ_TEX_VTX_INVALID_TEXTURE */
/* 	    SQ_TEX_VTX_INVALID_BUFFER                     = 0x01, */	/* SQ_TEX_VTX_INVALID_BUFFER */
/* 	    SQ_TEX_VTX_VALID_TEXTURE                      = 0x02, */	/* SQ_TEX_VTX_VALID_TEXTURE */
/* 	    SQ_TEX_VTX_VALID_BUFFER                       = 0x03, */	/* SQ_TEX_VTX_VALID_BUFFER 7. Shader Texture Resource Constants */
    SQ_TEX_SAMPLER_WORD0_0                                = 0x0003c000,
	SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask              = 0x07 << 0,
	SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift             = 0,
	    SQ_TEX_WRAP                                   = 0x00,	/* SQ_TEX_WRAP */
	    SQ_TEX_MIRROR                                 = 0x01,	/* SQ_TEX_MIRROR */
	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02,	/* SQ_TEX_CLAMP_LAST_TEXEL: [0,1] normalized, [0,dimen] unnormalized */
	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03,	/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL: [- 1,1] */
	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04,	/* SQ_TEX_CLAMP_HALF_BORDER: [0,1] normalized, [0,dimen] unnormalized */
	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05,	/* SQ_TEX_MIRROR_ONCE_HALF_BORDER: [-1,1] */
	    SQ_TEX_CLAMP_BORDER                           = 0x06,	/* SQ_TEX_CLAMP_BORDER: [0,1] normalized, [0,dimen] unnormalized */
	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07,	/* SQ_TEX_MIRROR_ONCE_BORDER: [-1,1] */
	CLAMP_Y_mask                                      = 0x07 << 3,
	CLAMP_Y_shift                                     = 3,
/* 	    SQ_TEX_WRAP                                   = 0x00, */	/* SQ_TEX_WRAP */
/* 	    SQ_TEX_MIRROR                                 = 0x01, */	/* SQ_TEX_MIRROR */
/* 	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02, */	/* SQ_TEX_CLAMP_LAST_TEXEL: [0,1] normalized, [0,dimen] unnormalized */
/* 	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03, */	/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL: [- 1,1] */
/* 	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04, */	/* SQ_TEX_CLAMP_HALF_BORDER: [0,1] normalized, [0,dimen] unnormalized */
/* 	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05, */	/* SQ_TEX_MIRROR_ONCE_HALF_BORDER: [-1,1] */
/* 	    SQ_TEX_CLAMP_BORDER                           = 0x06, */	/* SQ_TEX_CLAMP_BORDER: [0,1] normalized, [0,dimen] unnormalized */
/* 	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07, */	/* SQ_TEX_MIRROR_ONCE_BORDER: [-1,1] */
	CLAMP_Z_mask                                      = 0x07 << 6,
	CLAMP_Z_shift                                     = 6,
/* 	    SQ_TEX_WRAP                                   = 0x00, */	/* SQ_TEX_WRAP */
/* 	    SQ_TEX_MIRROR                                 = 0x01, */	/* SQ_TEX_MIRROR */
/* 	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02, */	/* SQ_TEX_CLAMP_LAST_TEXEL: [0,1] normalized, [0,dimen] unnormalized */
/* 	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03, */	/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL: [- 1,1] */
/* 	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04, */	/* SQ_TEX_CLAMP_HALF_BORDER: [0,1] normalized, [0,dimen] unnormalized */
/* 	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05, */	/* SQ_TEX_MIRROR_ONCE_HALF_BORDER: [-1,1] */
/* 	    SQ_TEX_CLAMP_BORDER                           = 0x06, */	/* SQ_TEX_CLAMP_BORDER: [0,1] normalized, [0,dimen] unnormalized */
/* 	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07, */	/* SQ_TEX_MIRROR_ONCE_BORDER: [-1,1] */
	XY_MAG_FILTER_mask                                = 0x07 << 9,
	XY_MAG_FILTER_shift                               = 9,
	    SQ_TEX_XY_FILTER_POINT                        = 0x00,	/* SQ_TEX_XY_FILTER_POINT */
	    SQ_TEX_XY_FILTER_BILINEAR                     = 0x01,	/* SQ_TEX_XY_FILTER_BILINEAR */
	    SQ_TEX_XY_FILTER_BICUBIC                      = 0x02,	/* SQ_TEX_XY_FILTER_BICUBIC */
	XY_MIN_FILTER_mask                                = 0x07 << 12,
	XY_MIN_FILTER_shift                               = 12,
/* 	    SQ_TEX_XY_FILTER_POINT                        = 0x00, */	/* SQ_TEX_XY_FILTER_POINT */
/* 	    SQ_TEX_XY_FILTER_BILINEAR                     = 0x01, */	/* SQ_TEX_XY_FILTER_BILINEAR */
/* 	    SQ_TEX_XY_FILTER_BICUBIC                      = 0x02, */	/* SQ_TEX_XY_FILTER_BICUBIC */
	Z_FILTER_mask                                     = 0x03 << 15,
	Z_FILTER_shift                                    = 15,
	    SQ_TEX_Z_FILTER_NONE                          = 0x00,	/* SQ_TEX_Z_FILTER_NONE */
	    SQ_TEX_Z_FILTER_POINT                         = 0x01,	/* SQ_TEX_Z_FILTER_POINT */
	    SQ_TEX_Z_FILTER_LINEAR                        = 0x02,	/* SQ_TEX_Z_FILTER_LINEAR */
	MIP_FILTER_mask                                   = 0x03 << 17,
	MIP_FILTER_shift                                  = 17,
/* 	    SQ_TEX_Z_FILTER_NONE                          = 0x00, */	/* SQ_TEX_Z_FILTER_NONE */
/* 	    SQ_TEX_Z_FILTER_POINT                         = 0x01, */	/* SQ_TEX_Z_FILTER_POINT */
/* 	    SQ_TEX_Z_FILTER_LINEAR                        = 0x02, */	/* SQ_TEX_Z_FILTER_LINEAR */
	BORDER_COLOR_TYPE_mask                            = 0x03 << 22,
	BORDER_COLOR_TYPE_shift                           = 22,
	    SQ_TEX_BORDER_COLOR_TRANS_BLACK               = 0x00,	/* SQ_TEX_BORDER_COLOR_TRANS_BLACK: (0.0, 0.0, 0.0, 0.0) */
	    SQ_TEX_BORDER_COLOR_OPAQUE_BLACK              = 0x01,	/* SQ_TEX_BORDER_COLOR_OPAQUE_BLACK: (0.0, 0.0, 0.0, 1.0) */
	    SQ_TEX_BORDER_COLOR_OPAQUE_WHITE              = 0x02,	/* SQ_TEX_BORDER_COLOR_OPAQUE_WHITE: (1.0, 1.0, 1.0, 1.0) */
	    SQ_TEX_BORDER_COLOR_REGISTER                  = 0x03,	/* SQ_TEX_BORDER_COLOR_REGISTER: use BORDER_COLOR_[XYZW] */
	POINT_SAMPLING_CLAMP_bit                          = 1 << 24,
	TEX_ARRAY_OVERRIDE_bit                            = 1 << 25,
	DEPTH_COMPARE_FUNCTION_mask                       = 0x07 << 26,
	DEPTH_COMPARE_FUNCTION_shift                      = 26,
	    SQ_TEX_DEPTH_COMPARE_NEVER                    = 0x00,	/* SQ_TEX_DEPTH_COMPARE_NEVER: always 0 */
	    SQ_TEX_DEPTH_COMPARE_LESS                     = 0x01,	/* SQ_TEX_DEPTH_COMPARE_LESS: 1 if incoming Z < fetched data */
	    SQ_TEX_DEPTH_COMPARE_EQUAL                    = 0x02,	/* SQ_TEX_DEPTH_COMPARE_EQUAL: 1 if incoming Z == fetched data */
	    SQ_TEX_DEPTH_COMPARE_LESSEQUAL                = 0x03,	/* SQ_TEX_DEPTH_COMPARE_LESSEQUAL: 1 if incoming Z <= fetched data */
	    SQ_TEX_DEPTH_COMPARE_GREATER                  = 0x04,	/* SQ_TEX_DEPTH_COMPARE_GREATER: 1 if incoming Z > fetched data */
	    SQ_TEX_DEPTH_COMPARE_NOTEQUAL                 = 0x05,	/* SQ_TEX_DEPTH_COMPARE_NOTEQUAL: 1 if incoming Z != fetched data */
	    SQ_TEX_DEPTH_COMPARE_GREATEREQUAL             = 0x06,	/* SQ_TEX_DEPTH_COMPARE_GREATEREQUAL: 1 if incoming Z >= fetched data */
	    SQ_TEX_DEPTH_COMPARE_ALWAYS                   = 0x07,	/* SQ_TEX_DEPTH_COMPARE_ALWAYS: always 1 */
	CHROMA_KEY_mask                                   = 0x03 << 29,
	CHROMA_KEY_shift                                  = 29,
	    SQ_TEX_CHROMA_KEY_DISABLED                    = 0x00,	/* SQ_TEX_CHROMA_KEY_DISABLED: no chroma keying */
	    SQ_TEX_CHROMA_KEY_KILL                        = 0x01,	/* SQ_TEX_CHROMA_KEY_KILL: returns negative value if any texel matches chroma key */
	    SQ_TEX_CHROMA_KEY_BLEND                       = 0x02,	/* SQ_TEX_CHROMA_KEY_BLEND: sets matching texels to 0 before blending */
	LOD_USES_MINOR_AXIS_bit                           = 1 << 31,
    SQ_TEX_SAMPLER_WORD1_0                                = 0x0003c004,
	MIN_LOD_mask                                      = 0x3ff << 0,
	MIN_LOD_shift                                     = 0,
	MAX_LOD_mask                                      = 0x3ff << 10,
	MAX_LOD_shift                                     = 10,
	SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask             = 0xfff << 20,
	SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift            = 20,
    SQ_TEX_SAMPLER_WORD2_0                                = 0x0003c008,
	LOD_BIAS_SEC_mask                                 = 0xfff << 0,
	LOD_BIAS_SEC_shift                                = 0,
	MC_COORD_TRUNCATE_bit                             = 1 << 12,
	SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit         = 1 << 13,
	HIGH_PRECISION_FILTER_bit                         = 1 << 14,
	PERF_MIP_mask                                     = 0x07 << 15,
	PERF_MIP_shift                                    = 15,
	PERF_Z_mask                                       = 0x03 << 18,
	PERF_Z_shift                                      = 18,
	FETCH_4_bit                                       = 1 << 26,
	SAMPLE_IS_PCF_bit                                 = 1 << 27,
	SQ_TEX_SAMPLER_WORD2_0__TYPE_bit                  = 1 << 31,	/* 9. Shader ALU Constants */
    SQ_VTX_BASE_VTX_LOC                                   = 0x0003cff0,	/* (64-state) Vertex fetch base location. can be used as an index offset for vertex fetch. one entry per state (up to 64 states). */
	/* OFFSET: Vertex Base location for vertex fetching */
    SQ_VTX_START_INST_LOC                                 = 0x0003cff4,	/* (64-state) Vertex fetch instance offset. can be used as an index offset for vertex fetch. one entry per state (up to 64 states, but probably less than base_vtx_loc). */
	/* OFFSET: Instance start location for vertex fetching 4. R6xx Shader Instructions */
    SQ_LOOP_CONST_DX10_0                                  = 0x0003e200,	/* (64-state) DX9 loop counter constants - these are used to define the behaviour of a programmed loop. There are 96 loop counter constants available - 32 each for the PS, VS, and GS. First 32 for PS, next 32 for VS, last 32 for GS. The loop counter is usable in both DX9 and DX10 modes. This version is used for SQ_CF_INST_LOOP_DX10 statements. */
	/* COUNT: Total number of loop iterations (unsigned) */
    SQ_LOOP_CONST_0                                       = 0x0003e200,	/* (64-state) DX9 loop counter constants - these are used to define the behaviour of a programmed loop. There are 96 loop counter constants available - 32 each for the PS, VS, and GS. First 32 for PS, next 32 for VS, last 32 for GS. The loop counter is usable in both DX9 and DX10 modes. This version is used for SQ_CF_INST_LOOP and SQ_CF_INST_LOOP_NO_AL statements. */
	SQ_LOOP_CONST_0__COUNT_mask                       = 0xfff << 0,	/* Total number of loop iterations (unsigned) */
	SQ_LOOP_CONST_0__COUNT_shift                      = 0,
	INIT_mask                                         = 0xfff << 12,	/* Initial value of loop counter AL (unsigned) */
	INIT_shift                                        = 12,
	INC_mask                                          = 0xff << 24,	/* Amount loop counter increments after each loop iteration (signed) */
	INC_shift                                         = 24,
    SQ_BOOL_CONST_0                                       = 0x0003e380,	/* (64-state) DX9 Boolean constants - these are available as input to flow control instructions such as `IF`.There are 96 boolean constants available - 32 bits for each of the PS, VS, and GS. First for PS, next for VS, last for GS. The booleans are usable in both dx9 and dx10 modes. */
	SQ_BOOL_CONST_0_num                               = 3,
	/* BOOLEANS: 32 one-bit booleans for static branching */

} ;

#endif /* _AUTOREGS */