summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Hopf <mhopf@suse.de>2009-01-27 15:37:22 +0100
committerMatthias Hopf <mhopf@suse.de>2009-01-27 15:37:22 +0100
commitca157c24a8b46102d2ae293694e01d645b811b43 (patch)
tree14f4ff5b6ee775edf73be1183a1f4a6cbf0acc49
parent7c6f56e8bd8a31d80412c3e2f59c7f841d7ac2ba (diff)
Add comments to register file.
Helps with developing, especially when used with tags target.
-rw-r--r--r600_reg_auto_r6xx.h4479
1 files changed, 2303 insertions, 2176 deletions
diff --git a/r600_reg_auto_r6xx.h b/r600_reg_auto_r6xx.h
index 714ea48..d757030 100644
--- a/r600_reg_auto_r6xx.h
+++ b/r600_reg_auto_r6xx.h
@@ -27,192 +27,200 @@
enum {
- VGT_VTX_VECT_EJECT_REG = 0x000088b0,
- PRIM_COUNT_mask = 0x3ff << 0,
+ VGT_VTX_VECT_EJECT_REG = 0x000088b0, /* This register defines the number of primitives that are allowed to pass during the assembly of a single vertex vector. After this number of primitives have passed, the vertex vector is submitted to the shaders for processing even if it is not full. */
+ PRIM_COUNT_mask = 0x3ff << 0, /* This is the count of primitives allowed to pass during the assembly of a single vertex vector. 2. Primitive Assembly Registers */
PRIM_COUNT_shift = 0,
- VGT_LAST_COPY_STATE = 0x000088c0,
- SRC_STATE_ID_mask = 0x07 << 0,
+ VGT_LAST_COPY_STATE = 0x000088c0, /* This register retains the data from the last GFX_COPY_STATE command. */
+ SRC_STATE_ID_mask = 0x07 << 0, /* Source context from last GFX_COPY_STATE command. */
SRC_STATE_ID_shift = 0,
- DST_STATE_ID_mask = 0x07 << 16,
+ DST_STATE_ID_mask = 0x07 << 16, /* Destination context from last GFX_COPY_STATE command. */
DST_STATE_ID_shift = 16,
- VGT_CACHE_INVALIDATION = 0x000088c4,
- CACHE_INVALIDATION_mask = 0x03 << 0,
+ VGT_CACHE_INVALIDATION = 0x000088c4, /* VGT cache invalidation */
+ CACHE_INVALIDATION_mask = 0x03 << 0, /* Indicates whether VC or TC is used for cache invalidation */
CACHE_INVALIDATION_shift = 0,
- VC_ONLY = 0x00,
- TC_ONLY = 0x01,
- VC_AND_TC = 0x02,
+ VC_ONLY = 0x00, /* VC_ONLY: VC_ONLY */
+ TC_ONLY = 0x01, /* TC_ONLY: TC_ONLY */
+ VC_AND_TC = 0x02, /* VC_AND_TC: VC_AND_TC */
VS_NO_EXTRA_BUFFER_bit = 1 << 5,
- VGT_GS_PER_ES = 0x000088c8,
- VGT_ES_PER_GS = 0x000088cc,
- VGT_GS_VERTEX_REUSE = 0x000088d4,
- VERT_REUSE_mask = 0x1f << 0,
+ VGT_GS_PER_ES = 0x000088c8, /* Maximum GS prims per ES thread */
+ /* Maximum number of GS prims per ES thread */
+ VGT_ES_PER_GS = 0x000088cc, /* Maximum ES vertices per GS thread */
+ /* Maximum number of ES vertices per GS thread */
+ VGT_GS_VERTEX_REUSE = 0x000088d4, /* reuseability for GS path, it is nothing to do with number of good simd */
+ VERT_REUSE_mask = 0x1f << 0, /* reuse number of GS block. Valid values are 0, 4-16. */
VERT_REUSE_shift = 0,
- VGT_MC_LAT_CNTL = 0x000088d8,
- MC_TIME_STAMP_RES_mask = 0x03 << 0,
+ VGT_MC_LAT_CNTL = 0x000088d8, /* Time Stamp Counter Resolution Select */
+ MC_TIME_STAMP_RES_mask = 0x03 << 0, /* Select the counter resolution for tracking memory controller latency */
MC_TIME_STAMP_RES_shift = 0,
- X_0_992_MAX_LATENCY = 0x00,
- X_0_496_MAX_LATENCY = 0x01,
- X_0_248_MAX_LATENCY = 0x02,
- X_0_124_MAX_LATENCY = 0x03,
- VGT_GS_PER_VS = 0x000088e8,
- GS_PER_VS_mask = 0x0f << 0,
+ X_0_992_MAX_LATENCY = 0x00, /* 0 -> 992 max latency, step of 32 */
+ X_0_496_MAX_LATENCY = 0x01, /* 0 -> 496 max latency, step of 16 */
+ X_0_248_MAX_LATENCY = 0x02, /* 0 -> 248 max latency, step of 8 */
+ X_0_124_MAX_LATENCY = 0x03, /* 0 -> 124 max latency, step of 4 */
+ VGT_GS_PER_VS = 0x000088e8, /* Maximum GS threads per VS thread */
+ GS_PER_VS_mask = 0x0f << 0, /* Maximum number of GS threads per VS thread */
GS_PER_VS_shift = 0,
- VGT_CNTL_STATUS = 0x000088f0,
- VGT_OUT_INDX_BUSY_bit = 1 << 0,
- VGT_OUT_BUSY_bit = 1 << 1,
- VGT_PT_BUSY_bit = 1 << 2,
- VGT_TE_BUSY_bit = 1 << 3,
- VGT_VR_BUSY_bit = 1 << 4,
- VGT_GRP_BUSY_bit = 1 << 5,
- VGT_DMA_REQ_BUSY_bit = 1 << 6,
- VGT_DMA_BUSY_bit = 1 << 7,
- VGT_GS_BUSY_bit = 1 << 8,
- VGT_BUSY_bit = 1 << 9,
- VGT_PRIMITIVE_TYPE = 0x00008958,
- VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0,
+ VGT_CNTL_STATUS = 0x000088f0, /* Status Bits */
+ VGT_OUT_INDX_BUSY_bit = 1 << 0, /* If set, the Output Index block within the VGT is busy */
+ VGT_OUT_BUSY_bit = 1 << 1, /* If set, the Output block within the VGT is busy */
+ VGT_PT_BUSY_bit = 1 << 2, /* If set, the Pass-thru block within the VGT is busy */
+ VGT_TE_BUSY_bit = 1 << 3, /* If set, the Tessellation Engine block within the VGT is busy */
+ VGT_VR_BUSY_bit = 1 << 4, /* If set, the Vertex Reuse Block within the VGT is busy */
+ VGT_GRP_BUSY_bit = 1 << 5, /* If set, the Grouper Block within the VGT is busy */
+ VGT_DMA_REQ_BUSY_bit = 1 << 6, /* If set, the VGT DMA is busy requesting */
+ VGT_DMA_BUSY_bit = 1 << 7, /* If set, the VGT DMA is busy */
+ VGT_GS_BUSY_bit = 1 << 8, /* If set, VGT GS is actively processing */
+ VGT_BUSY_bit = 1 << 9, /* If set, VGT is Busy */
+ VGT_PRIMITIVE_TYPE = 0x00008958, /* VGT Primitive Type */
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0, /* Primitive Type */
VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0,
- DI_PT_NONE = 0x00,
- DI_PT_POINTLIST = 0x01,
- DI_PT_LINELIST = 0x02,
- DI_PT_LINESTRIP = 0x03,
- DI_PT_TRILIST = 0x04,
- DI_PT_TRIFAN = 0x05,
- DI_PT_TRISTRIP = 0x06,
- DI_PT_UNUSED_0 = 0x07,
- DI_PT_UNUSED_1 = 0x08,
- DI_PT_UNUSED_2 = 0x09,
- DI_PT_LINELIST_ADJ = 0x0a,
- DI_PT_LINESTRIP_ADJ = 0x0b,
- DI_PT_TRILIST_ADJ = 0x0c,
- DI_PT_TRISTRIP_ADJ = 0x0d,
- DI_PT_UNUSED_3 = 0x0e,
- DI_PT_UNUSED_4 = 0x0f,
- DI_PT_TRI_WITH_WFLAGS = 0x10,
- DI_PT_RECTLIST = 0x11,
- DI_PT_LINELOOP = 0x12,
- DI_PT_QUADLIST = 0x13,
- DI_PT_QUADSTRIP = 0x14,
- DI_PT_POLYGON = 0x15,
- DI_PT_2D_COPY_RECT_LIST_V0 = 0x16,
- DI_PT_2D_COPY_RECT_LIST_V1 = 0x17,
- DI_PT_2D_COPY_RECT_LIST_V2 = 0x18,
- DI_PT_2D_COPY_RECT_LIST_V3 = 0x19,
- DI_PT_2D_FILL_RECT_LIST = 0x1a,
- DI_PT_2D_LINE_STRIP = 0x1b,
- DI_PT_2D_TRI_STRIP = 0x1c,
- VGT_INDEX_TYPE = 0x0000895c,
- INDEX_TYPE_mask = 0x03 << 0,
+ DI_PT_NONE = 0x00, /* DI_PT_NONE: DI_PT_NONE None (does not create draw trigger) */
+ DI_PT_POINTLIST = 0x01, /* DI_PT_POINTLIST: DI_PT_POINTLIST Point List */
+ DI_PT_LINELIST = 0x02, /* DI_PT_LINELIST: DI_PT_LINELIST Line List */
+ DI_PT_LINESTRIP = 0x03, /* DI_PT_LINESTRIP: DI_PT_LINESTRIP Line Strip */
+ DI_PT_TRILIST = 0x04, /* DI_PT_TRILIST: DI_PT_TRILIST Tri List */
+ DI_PT_TRIFAN = 0x05, /* DI_PT_TRIFAN: DI_PT_TRIFAN Tri Fan */
+ DI_PT_TRISTRIP = 0x06, /* DI_PT_TRISTRIP: DI_PT_TRISTRIP Tri Strip */
+ DI_PT_UNUSED_0 = 0x07, /* DI_PT_UNUSED_0: DI_PT_UNUSED_0 Reserved 1 */
+ DI_PT_UNUSED_1 = 0x08, /* DI_PT_UNUSED_1: DI_PT_UNUSED_1 Reserved 2 */
+ DI_PT_UNUSED_2 = 0x09, /* DI_PT_UNUSED_2: DI_PT_UNUSED_2 Reserved 3 */
+ DI_PT_LINELIST_ADJ = 0x0a, /* DI_PT_LINELIST_ADJ: DI_PT_LINELIST_ADJ Adjacent Line List */
+ DI_PT_LINESTRIP_ADJ = 0x0b, /* DI_PT_LINESTRIP_ADJ: DI_PT_LINESTRIP_ADJ Adjacent Line Strip */
+ DI_PT_TRILIST_ADJ = 0x0c, /* DI_PT_TRILIST_ADJ: DI_PT_TRILIST_ADJ Adjacent Tri List */
+ DI_PT_TRISTRIP_ADJ = 0x0d, /* DI_PT_TRISTRIP_ADJ: DI_PT_TRISTRIP_ADJ Adjacent Tri Strip */
+ DI_PT_UNUSED_3 = 0x0e, /* DI_PT_UNUSED_3: DI_PT_UNUSED_3 Reserved 3 */
+ DI_PT_UNUSED_4 = 0x0f, /* DI_PT_UNUSED_4: DI_PT_UNUSED_4 Reserved 4 */
+ DI_PT_TRI_WITH_WFLAGS = 0x10, /* DI_PT_TRI_WITH_WFLAGS: DI_PT_TRI_WITH_WFLAGS Tri List w/Flags (legacy R128) */
+ DI_PT_RECTLIST = 0x11, /* DI_PT_RECTLIST: DI_PT_RECTLIST Rect List */
+ DI_PT_LINELOOP = 0x12, /* DI_PT_LINELOOP: DI_PT_LINELOOP Line LOOP */
+ DI_PT_QUADLIST = 0x13, /* DI_PT_QUADLIST: DI_PT_QUADLIST Quad List */
+ DI_PT_QUADSTRIP = 0x14, /* DI_PT_QUADSTRIP: DI_PT_QUADSTRIP Quad Strip */
+ DI_PT_POLYGON = 0x15, /* DI_PT_POLYGON: DI_PT_POLYGON Polygon */
+ DI_PT_2D_COPY_RECT_LIST_V0 = 0x16, /* DI_PT_2D_COPY_RECT_LIST_V0: DI_PT_2D_COPY_RECT_LIST_V0 2D Copy Rect List V0 */
+ DI_PT_2D_COPY_RECT_LIST_V1 = 0x17, /* DI_PT_2D_COPY_RECT_LIST_V1: DI_PT_2D_COPY_RECT_LIST_V1 2D Copy Rect List V1 */
+ DI_PT_2D_COPY_RECT_LIST_V2 = 0x18, /* DI_PT_2D_COPY_RECT_LIST_V2: DI_PT_2D_COPY_RECT_LIST_V2 2D Copy Rect List V2 */
+ DI_PT_2D_COPY_RECT_LIST_V3 = 0x19, /* DI_PT_2D_COPY_RECT_LIST_V3: DI_PT_2D_COPY_RECT_LIST_V3 2D Copy Rect List V3 */
+ DI_PT_2D_FILL_RECT_LIST = 0x1a, /* DI_PT_2D_FILL_RECT_LIST: DI_PT_2D_FILL_RECT_LIST 2D Fill Rect List */
+ DI_PT_2D_LINE_STRIP = 0x1b, /* DI_PT_2D_LINE_STRIP: DI_PT_2D_LINE_STRIP 2D Line Strip */
+ DI_PT_2D_TRI_STRIP = 0x1c, /* DI_PT_2D_TRI_STRIP: DI_PT_2D_TRI_STRIP 2D Triangle Strip */
+ VGT_INDEX_TYPE = 0x0000895c, /* VGT Index Type */
+ INDEX_TYPE_mask = 0x03 << 0, /* Index Type (applicable to prim types 0-28 only). If the Source Select field is set to `Auto-increment Index` mode, then this field is ignored and the index type is 32- bits per index */
INDEX_TYPE_shift = 0,
- DI_INDEX_SIZE_16_BIT = 0x00,
- DI_INDEX_SIZE_32_BIT = 0x01,
- VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960,
- VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964,
- VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968,
- VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c,
- VGT_NUM_INDICES = 0x00008970,
- VGT_NUM_INSTANCES = 0x00008974,
- PA_CL_CNTL_STATUS = 0x00008a10,
- CL_BUSY_bit = 1 << 31,
- PA_CL_ENHANCE = 0x00008a14,
- CLIP_VTX_REORDER_ENA_bit = 1 << 0,
- NUM_CLIP_SEQ_mask = 0x03 << 1,
+ DI_INDEX_SIZE_16_BIT = 0x00, /* DI_INDEX_SIZE_16_BIT: DI_INDEX_SIZE_16_BIT 16 bits per index */
+ DI_INDEX_SIZE_32_BIT = 0x01, /* DI_INDEX_SIZE_32_BIT: DI_INDEX_SIZE_32_BIT 32 bits per index */
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960, /* Stream-out adjusted size. */
+ /* SIZE: DWORD Sum of (SO_BufferOffset + BufDwordWritten) for given buffer. Read Only. To read this register the VGT needs to be flushed to the point BufDwordWritten counts are maintained. */
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964, /* Stream-out adjusted size. */
+ /* SIZE: DWORD Sum of (SO_BufferOffset + BufDwordWritten) for given buffer. Read Only. To read this register the VGT needs to be flushed to the point BufDwordWritten counts are maintained. */
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968, /* Stream-out adjusted size. */
+ /* SIZE: DWORD Sum of (SO_BufferOffset + BufDwordWritten) for given buffer. Read Only. To read this register the VGT needs to be flushed to the point BufDwordWritten counts are maintained. */
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c, /* Stream-out adjusted size. */
+ /* SIZE: DWORD Sum of (SO_BufferOffset + BufDwordWritten) for given buffer. Read Only. To read this register the VGT needs to be flushed to the point BufDwordWritten counts are maintained. */
+ VGT_NUM_INDICES = 0x00008970, /* VGT Number of Indices */
+ /* This field indicates the number of indices to process for this draw initiator. Note this count is not necessarily the count of the primitives. It is also not the index buffer size in memory. */
+ VGT_NUM_INSTANCES = 0x00008974, /* VGT Number of Instances */
+ /* VGT Number of Instances */
+ PA_CL_CNTL_STATUS = 0x00008a10, /* Status Bits */
+ CL_BUSY_bit = 1 << 31, /* Busy Status Bit */
+ PA_CL_ENHANCE = 0x00008a14, /* Used for Late Additions of Control Bits */
+ CLIP_VTX_REORDER_ENA_bit = 1 << 0, /* Enables vertex-order-independent clipping */
+ NUM_CLIP_SEQ_mask = 0x03 << 1, /* Number of Clip Sequences Active (+1). Should be set to 3 (4 sequences) for best performance */
NUM_CLIP_SEQ_shift = 1,
- CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3,
+ CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3, /* Forces a faster clip path if NUM_CLIP_SEQ is set to 0 (which should only be if 3 does not work) */
VE_NAN_PROC_DISABLE_bit = 1 << 4,
- PA_SU_CNTL_STATUS = 0x00008a50,
- SU_BUSY_bit = 1 << 31,
- PA_SC_LINE_STIPPLE_STATE = 0x00008b10,
- CURRENT_PTR_mask = 0x0f << 0,
+ PA_SU_CNTL_STATUS = 0x00008a50, /* Status Bits */
+ SU_BUSY_bit = 1 << 31, /* Busy Status Bit */
+ PA_SC_LINE_STIPPLE_STATE = 0x00008b10, /* Current values for Line Stipple */
+ CURRENT_PTR_mask = 0x0f << 0, /* Indicates current state of pattern pointer (can be set w/ a register write). */
CURRENT_PTR_shift = 0,
- CURRENT_COUNT_mask = 0xff << 8,
+ CURRENT_COUNT_mask = 0xff << 8, /* Current state of the repeat counter (can be set w/a register write). */
CURRENT_COUNT_shift = 8,
- PA_SC_MULTI_CHIP_CNTL = 0x00008b20,
- LOG2_NUM_CHIPS_mask = 0x07 << 0,
+ PA_SC_MULTI_CHIP_CNTL = 0x00008b20, /* Controls the Screen Divisioning for Multi-Chip Configurations */
+ LOG2_NUM_CHIPS_mask = 0x07 << 0, /* Log2 of the number of chips in the multi-chip configuration. */
LOG2_NUM_CHIPS_shift = 0,
- MULTI_CHIP_TILE_SIZE_mask = 0x03 << 3,
+ MULTI_CHIP_TILE_SIZE_mask = 0x03 << 3, /* Size of the tile per chip within each super-tile. POSSIBLE VALUES: 00 - 16 x 16 pixel tile per chip. 01 - 32 x 32 pixel tile per chip. 02 - 64 x 64 pixel tile per chip. 03 - 128x128 pixel tile per chip. */
MULTI_CHIP_TILE_SIZE_shift = 3,
- X_16_X_16_PIXEL_TILE_PER_CHIP = 0x00,
- X_32_X_32_PIXEL_TILE_PER_CHIP = 0x01,
- X_64_X_64_PIXEL_TILE_PER_CHIP = 0x02,
- X_128X128_PIXEL_TILE_PER_CHIP = 0x03,
- CHIP_TILE_X_LOC_mask = 0x07 << 5,
+ X_16_X_16_PIXEL_TILE_PER_CHIP = 0x00, /* 16 x 16 pixel tile per chip. */
+ X_32_X_32_PIXEL_TILE_PER_CHIP = 0x01, /* 32 x 32 pixel tile per chip. */
+ X_64_X_64_PIXEL_TILE_PER_CHIP = 0x02, /* 64 x 64 pixel tile per chip. */
+ X_128X128_PIXEL_TILE_PER_CHIP = 0x03, /* 128x128 pixel tile per chip. */
+ CHIP_TILE_X_LOC_mask = 0x07 << 5, /* X Location of the chip within the super-tile. */
CHIP_TILE_X_LOC_shift = 5,
- CHIP_TILE_Y_LOC_mask = 0x07 << 8,
+ CHIP_TILE_Y_LOC_mask = 0x07 << 8, /* Y Location of the chip within the super-tile. */
CHIP_TILE_Y_LOC_shift = 8,
- CHIP_SUPER_TILE_B_bit = 1 << 11,
- PA_SC_AA_SAMPLE_LOCS_2S = 0x00008b40,
- S0_X_mask = 0x0f << 0,
+ CHIP_SUPER_TILE_B_bit = 1 << 11, /* Must be 0 for even LOG2_NUM_CHIPS. For odd LOG2_NUM_CHIPS, this field specifies the second super tile. */
+ PA_SC_AA_SAMPLE_LOCS_2S = 0x00008b40, /* Multi-Sample Programmable Sample Locations for 2-Sample - Used by SC & CB`s */
+ S0_X_mask = 0x0f << 0, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S0_X_shift = 0,
- S0_Y_mask = 0x0f << 4,
+ S0_Y_mask = 0x0f << 4, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S0_Y_shift = 4,
- S1_X_mask = 0x0f << 8,
+ S1_X_mask = 0x0f << 8, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S1_X_shift = 8,
- S1_Y_mask = 0x0f << 12,
+ S1_Y_mask = 0x0f << 12, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S1_Y_shift = 12,
- PA_SC_AA_SAMPLE_LOCS_4S = 0x00008b44,
-/* S0_X_mask = 0x0f << 0, */
+ PA_SC_AA_SAMPLE_LOCS_4S = 0x00008b44, /* Multi-Sample Programmable Sample Locations for 4-Sample - Used by SC & CB`s */
+/* S0_X_mask = 0x0f << 0, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S0_X_shift = 0, */
-/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_mask = 0x0f << 4, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S0_Y_shift = 4, */
-/* S1_X_mask = 0x0f << 8, */
+/* S1_X_mask = 0x0f << 8, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S1_X_shift = 8, */
-/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_mask = 0x0f << 12, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S1_Y_shift = 12, */
- S2_X_mask = 0x0f << 16,
+ S2_X_mask = 0x0f << 16, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S2_X_shift = 16,
- S2_Y_mask = 0x0f << 20,
+ S2_Y_mask = 0x0f << 20, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S2_Y_shift = 20,
- S3_X_mask = 0x0f << 24,
+ S3_X_mask = 0x0f << 24, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S3_X_shift = 24,
- S3_Y_mask = 0x0f << 28,
+ S3_Y_mask = 0x0f << 28, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S3_Y_shift = 28,
- PA_SC_AA_SAMPLE_LOCS_8S_WD0 = 0x00008b48,
-/* S0_X_mask = 0x0f << 0, */
+ PA_SC_AA_SAMPLE_LOCS_8S_WD0 = 0x00008b48, /* Multi-Sample Programmable Sample Locations for 8-Sample First Word - Used by SC & CB`s */
+/* S0_X_mask = 0x0f << 0, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S0_X_shift = 0, */
-/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_mask = 0x0f << 4, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S0_Y_shift = 4, */
-/* S1_X_mask = 0x0f << 8, */
+/* S1_X_mask = 0x0f << 8, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S1_X_shift = 8, */
-/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_mask = 0x0f << 12, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S1_Y_shift = 12, */
-/* S2_X_mask = 0x0f << 16, */
+/* S2_X_mask = 0x0f << 16, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S2_X_shift = 16, */
-/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_mask = 0x0f << 20, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S2_Y_shift = 20, */
-/* S3_X_mask = 0x0f << 24, */
+/* S3_X_mask = 0x0f << 24, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S3_X_shift = 24, */
-/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_mask = 0x0f << 28, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S3_Y_shift = 28, */
- PA_SC_AA_SAMPLE_LOCS_8S_WD1 = 0x00008b4c,
- S4_X_mask = 0x0f << 0,
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1 = 0x00008b4c, /* Multi-Sample Programmable Sample Locations for 8-Sample Second Word - Used by SC & CB`s */
+ S4_X_mask = 0x0f << 0, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S4_X_shift = 0,
- S4_Y_mask = 0x0f << 4,
+ S4_Y_mask = 0x0f << 4, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S4_Y_shift = 4,
- S5_X_mask = 0x0f << 8,
+ S5_X_mask = 0x0f << 8, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S5_X_shift = 8,
- S5_Y_mask = 0x0f << 12,
+ S5_Y_mask = 0x0f << 12, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S5_Y_shift = 12,
- S6_X_mask = 0x0f << 16,
+ S6_X_mask = 0x0f << 16, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S6_X_shift = 16,
- S6_Y_mask = 0x0f << 20,
+ S6_Y_mask = 0x0f << 20, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S6_Y_shift = 20,
- S7_X_mask = 0x0f << 24,
+ S7_X_mask = 0x0f << 24, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S7_X_shift = 24,
- S7_Y_mask = 0x0f << 28,
+ S7_Y_mask = 0x0f << 28, /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
S7_Y_shift = 28,
- PA_SC_CNTL_STATUS = 0x00008be0,
- MPASS_OVERFLOW_bit = 1 << 30,
- PA_SC_ENHANCE = 0x00008bf0,
- FORCE_EOV_MAX_CLK_CNT_mask = 0xfff << 0,
+ PA_SC_CNTL_STATUS = 0x00008be0, /* Status Bits */
+ MPASS_OVERFLOW_bit = 1 << 30, /* If set, the Multipass Pixel Shader SC 32-bit PV counter overflowed. This bit is reset when register is read */
+ PA_SC_ENHANCE = 0x00008bf0, /* Used for Late Additions of Control Bits */
+ FORCE_EOV_MAX_CLK_CNT_mask = 0xfff << 0, /* Cycle count used to determine when to force out a pixel vector prematurely */
FORCE_EOV_MAX_CLK_CNT_shift = 0,
- FORCE_EOV_MAX_TILE_CNT_mask = 0xfff << 12,
+ FORCE_EOV_MAX_TILE_CNT_mask = 0xfff << 12, /* Tile count used to determine when to force out a pixel vector prematurely */
FORCE_EOV_MAX_TILE_CNT_shift = 12,
- SQ_CONFIG = 0x00008c00,
- VC_ENABLE_bit = 1 << 0,
+ SQ_CONFIG = 0x00008c00, /* (1-state) SQ config options. The graphics pipe must be idle to change these. */
+ VC_ENABLE_bit = 1 << 0, /* Vertex Cache (VC) is present; set to zero to disable VC. When VC is disabled, all vertex fetches go through the TC rather than VC regardless of the instruction bit which selects TC/VC. */
EXPORT_SRC_C_bit = 1 << 1,
- DX9_CONSTS_bit = 1 << 2,
- ALU_INST_PREFER_VECTOR_bit = 1 << 3,
- SQ_CONFIG__DX10_CLAMP_bit = 1 << 4,
+ DX9_CONSTS_bit = 1 << 2, /* DX9 constant file mode. (0 = dx10 constant cache mode, 1 = dx9 constant file mode). This applies to all shaders. */
+ ALU_INST_PREFER_VECTOR_bit = 1 << 3, /* ALU clause instruction assignment. When a group of 4 or less instructions, there may be ambiguity whether to assign the last instruction to the vector pipe (according to the instruction`s dest-chan), or to the scalar pipe (trans). This bit controls that decision: 0 = send the last instruction word to the scalar (trans) pipe if possible, 1 = prefer to send it to the vector pipe. This bit is only used when the decision is ambiguous (not ambiguous if: a vector-only or trans-only opcode, or the last instruction writes to the same dest-chan as another instruction in the group. The shader-compiler must be aware of this bit setting and compile accordingly. Default is: 0 (prefer-scalar). */
+ SQ_CONFIG__DX10_CLAMP_bit = 1 << 4, /* R600: DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. NOT USED IN R6XX DERIVATIVES (see sq_pgm_resources_*) */
ALU_PREFER_ONE_WATERFALL_bit = 1 << 5,
ALU_MAX_ONE_WATERFALL_bit = 1 << 6,
CLAUSE_SEQ_PRIO_mask = 0x03 << 8,
@@ -225,797 +233,816 @@ enum {
GS_PRIO_shift = 28,
ES_PRIO_mask = 0x03 << 30,
ES_PRIO_shift = 30,
- SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04,
- NUM_PS_GPRS_mask = 0xff << 0,
+ SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04, /* (1-state) Defines how GPR space is divided among the 4 thread types. All ES, VS, and GS work (and PS work for R600) must be flushed before writing this register. PS work must also be flushed prior to changing NUM_PS_GPRS or NUM_CLAUSE_TEMP_GPRS. */
+ NUM_PS_GPRS_mask = 0xff << 0, /* Number of GPRs (per SIMD) assigned to the PS programs [0..255]. */
NUM_PS_GPRS_shift = 0,
- NUM_VS_GPRS_mask = 0xff << 16,
+ NUM_VS_GPRS_mask = 0xff << 16, /* Number of GPRs (per SIMD) assigned to the VS programs [0..255]. */
NUM_VS_GPRS_shift = 16,
- NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28,
+ NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28, /* Number of GPRs reserved for clause temporaries [0-7]. This is the number of GPRs available to a single thread, so the hardware will reserve twice this many physical registers (for even & odd clauses). */
NUM_CLAUSE_TEMP_GPRS_shift = 28,
- SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08,
- NUM_GS_GPRS_mask = 0xff << 0,
+ SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08, /* (1-state) Defines how GPR space is divided among the 4 thread types. All ES, VS, and GS work (and PS work for R600) must be flushed before writing this register. */
+ NUM_GS_GPRS_mask = 0xff << 0, /* Number of GPRs (per SIMD) assigned to the GS programs [0..255]. */
NUM_GS_GPRS_shift = 0,
- NUM_ES_GPRS_mask = 0xff << 16,
+ NUM_ES_GPRS_mask = 0xff << 16, /* Number of GPRs (per SIMD) assigned to the ES programs [0..255]. */
NUM_ES_GPRS_shift = 16,
- SQ_THREAD_RESOURCE_MGMT = 0x00008c0c,
- NUM_PS_THREADS_mask = 0xff << 0,
+ SQ_THREAD_RESOURCE_MGMT = 0x00008c0c, /* (1-state) Defines how thread space is divided among the thread types. In hardware, PS threads are [0, NUM_PS_THREADS-1], then VS, then GS and ES in the higest #s. All ES, VS, and GS work (and PS work for R600) must be flushed before writing this register. PS work must also be flushed prior to changing NUM_PS_THREADS. */
+ NUM_PS_THREADS_mask = 0xff << 0, /* Number of threads assigned to PS programs [1..127]. */
NUM_PS_THREADS_shift = 0,
- NUM_VS_THREADS_mask = 0xff << 8,
+ NUM_VS_THREADS_mask = 0xff << 8, /* Number of threads assigned to VS programs [1..127]. */
NUM_VS_THREADS_shift = 8,
- NUM_GS_THREADS_mask = 0xff << 16,
+ NUM_GS_THREADS_mask = 0xff << 16, /* Number of threads assigned to GS programs [1..127]. */
NUM_GS_THREADS_shift = 16,
- NUM_ES_THREADS_mask = 0xff << 24,
+ NUM_ES_THREADS_mask = 0xff << 24, /* Number of threads assigned to ES programs [1..127]. */
NUM_ES_THREADS_shift = 24,
- SQ_STACK_RESOURCE_MGMT_1 = 0x00008c10,
- NUM_PS_STACK_ENTRIES_mask = 0xfff << 0,
+ SQ_STACK_RESOURCE_MGMT_1 = 0x00008c10, /* (1-state) Defines how thread stack space is divided among the thread types. All ES, VS, and GS work (and PS work for R600) must be flushed before writing this register. PS work must also be flushed prior to changing NUM_PS_STACK_ENTRIES. */
+ NUM_PS_STACK_ENTRIES_mask = 0xfff << 0, /* Number of stack entries allocated to PS programs [0..4095]. */
NUM_PS_STACK_ENTRIES_shift = 0,
- NUM_VS_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_VS_STACK_ENTRIES_mask = 0xfff << 16, /* Number of stack entries allocated to VS programs [0..4095]. */
NUM_VS_STACK_ENTRIES_shift = 16,
- SQ_STACK_RESOURCE_MGMT_2 = 0x00008c14,
- NUM_GS_STACK_ENTRIES_mask = 0xfff << 0,
+ SQ_STACK_RESOURCE_MGMT_2 = 0x00008c14, /* (1-state) Defines how thread stack space is divided among the thread types. All ES, VS, and GS work (and PS work for R600) must be flushed before writing this register. */
+ NUM_GS_STACK_ENTRIES_mask = 0xfff << 0, /* Number of stack entries allocated to GS programs [0..4095]. */
NUM_GS_STACK_ENTRIES_shift = 0,
- NUM_ES_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_ES_STACK_ENTRIES_mask = 0xfff << 16, /* Number of stack entries allocated to ES programs [0..4095]. */
NUM_ES_STACK_ENTRIES_shift = 16,
- SQ_ESGS_RING_BASE = 0x00008c40,
- SQ_ESGS_RING_SIZE = 0x00008c44,
- SQ_GSVS_RING_BASE = 0x00008c48,
- SQ_GSVS_RING_SIZE = 0x00008c4c,
- SQ_ESTMP_RING_BASE = 0x00008c50,
- SQ_ESTMP_RING_SIZE = 0x00008c54,
- SQ_GSTMP_RING_BASE = 0x00008c58,
- SQ_GSTMP_RING_SIZE = 0x00008c5c,
- SQ_VSTMP_RING_BASE = 0x00008c60,
- SQ_VSTMP_RING_SIZE = 0x00008c64,
- SQ_PSTMP_RING_BASE = 0x00008c68,
- SQ_PSTMP_RING_SIZE = 0x00008c6c,
- SQ_FBUF_RING_BASE = 0x00008c70,
- SQ_FBUF_RING_SIZE = 0x00008c74,
- SQ_REDUC_RING_BASE = 0x00008c78,
- SQ_REDUC_RING_SIZE = 0x00008c7c,
- SQ_ALU_WORD1_OP3 = 0x00008dfc,
- SRC2_SEL_mask = 0x1ff << 0,
+ SQ_ESGS_RING_BASE = 0x00008c40, /* (1-state) Memory base address of the ES->GS ring buffer (256-byte aligned) */
+ /* MEM_BASE: Format is [39:8] */
+ SQ_ESGS_RING_SIZE = 0x00008c44, /* (1-state) Memory region size address of the ES->GS ring buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
+ /* MEM_SIZE: Format is [39:8] */
+ SQ_GSVS_RING_BASE = 0x00008c48, /* (1-state) Memory base address of the GS->ES ring buffer (256-byte aligned) */
+ /* MEM_BASE: Format is [39:8] */
+ SQ_GSVS_RING_SIZE = 0x00008c4c, /* (1-state) Memory region size address of the GS->ES ring buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
+ /* MEM_SIZE: Format is [39:8] */
+ SQ_ESTMP_RING_BASE = 0x00008c50, /* (1-state) Memory base address of the ES Temp buffer (256-byte aligned) */
+ /* MEM_BASE: Format is [39:8] */
+ SQ_ESTMP_RING_SIZE = 0x00008c54, /* (1-state) Memory region size address of the ES Temp buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
+ /* MEM_SIZE: Format is [39:8] */
+ SQ_GSTMP_RING_BASE = 0x00008c58, /* (1-state) Memory base address of the GS Temp buffer (256-byte aligned) */
+ /* MEM_BASE: Format is [39:8] */
+ SQ_GSTMP_RING_SIZE = 0x00008c5c, /* (1-state) Memory region size address of the GS Temp buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
+ /* MEM_SIZE: Format is [39:8] */
+ SQ_VSTMP_RING_BASE = 0x00008c60, /* (1-state) Memory base address of the VS Temp buffer (256-byte aligned) */
+ /* MEM_BASE: Format is [39:8] */
+ SQ_VSTMP_RING_SIZE = 0x00008c64, /* (1-state) Memory region size address of the VS Temp buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
+ /* MEM_SIZE: Format is [39:8] */
+ SQ_PSTMP_RING_BASE = 0x00008c68, /* (1-state) Memory base address of the PS Temp buffer (256-byte aligned) */
+ /* MEM_BASE: Format is [39:8] */
+ SQ_PSTMP_RING_SIZE = 0x00008c6c, /* (1-state) Memory region size address of the PS Temp buffer (in units of 256-bytes). True size, not size -1. Setting to zero disables. */
+ /* MEM_SIZE: Format is [39:8] */
+ SQ_FBUF_RING_BASE = 0x00008c70, /* (1-state) Memory base address of the FBUFFER (PS only) (256-byte aligned) */
+ /* MEM_BASE: Format is [39:8] */
+ SQ_FBUF_RING_SIZE = 0x00008c74, /* (1-state) Memory region size address of the FBUFFER. True size, not size -1. Setting to zero disables. */
+ /* MEM_SIZE: Format is [39:8] */
+ SQ_REDUC_RING_BASE = 0x00008c78, /* (1-state) Memory base address of the Reduction Buffer */
+ /* MEM_BASE: Format is [39:8] */
+ SQ_REDUC_RING_SIZE = 0x00008c7c, /* (1-state) Memory region size address of the Reduction Buffer. True size, not size -1. Setting to zero disables. */
+ /* MEM_SIZE: Format is [39:8] */
+ SQ_ALU_WORD1_OP3 = 0x00008dfc, /* ALU instruction word 1. This subencoding is used for OP3 instructions (instructions taking 3 operands). */
+ SRC2_SEL_mask = 0x1ff << 0, /* Source for operands src2. Values [0,127] correspond to GPR[0..127]. Values [128,159] correspond to kcache constants in bank 0. Values [160,191] correspond to kcache constants in bank 1. Values [256,511] correspond to cfile constants c[0..255]. Other special values are shown in the list below. POSSIBLE VALUES: 248 - SQ_ALU_SRC_0: special constant 0.0. 249 - SQ_ALU_SRC_1: special constant 1.0 float. 250 - SQ_ALU_SRC_1_INT: special constant 1 integer. 251 - SQ_ALU_SRC_M_1_INT: special constant -1 integer. 252 - SQ_ALU_SRC_0_5: special constant 0.5 float. 253 - SQ_ALU_SRC_LITERAL: literal constant. 254 - SQ_ALU_SRC_PV: previous vector result. 255 - SQ_ALU_SRC_PS: previous scalar result. */
SRC2_SEL_shift = 0,
- SQ_ALU_SRC_0 = 0xf8,
- SQ_ALU_SRC_1 = 0xf9,
- SQ_ALU_SRC_1_INT = 0xfa,
- SQ_ALU_SRC_M_1_INT = 0xfb,
- SQ_ALU_SRC_0_5 = 0xfc,
- SQ_ALU_SRC_LITERAL = 0xfd,
- SQ_ALU_SRC_PV = 0xfe,
- SQ_ALU_SRC_PS = 0xff,
- SRC2_REL_bit = 1 << 9,
- SRC2_CHAN_mask = 0x03 << 10,
+ SQ_ALU_SRC_0 = 0xf8, /* SQ_ALU_SRC_0: special constant 0.0. */
+ SQ_ALU_SRC_1 = 0xf9, /* SQ_ALU_SRC_1: special constant 1.0 float. */
+ SQ_ALU_SRC_1_INT = 0xfa, /* SQ_ALU_SRC_1_INT: special constant 1 integer. */
+ SQ_ALU_SRC_M_1_INT = 0xfb, /* SQ_ALU_SRC_M_1_INT: special constant -1 integer. */
+ SQ_ALU_SRC_0_5 = 0xfc, /* SQ_ALU_SRC_0_5: special constant 0.5 float. */
+ SQ_ALU_SRC_LITERAL = 0xfd, /* SQ_ALU_SRC_LITERAL: literal constant. */
+ SQ_ALU_SRC_PV = 0xfe, /* SQ_ALU_SRC_PV: previous vector result. */
+ SQ_ALU_SRC_PS = 0xff, /* SQ_ALU_SRC_PS: previous scalar result. */
+ SRC2_REL_bit = 1 << 9, /* If set, this operand uses relative addressing based on the INDEX_MODE. */
+ SRC2_CHAN_mask = 0x03 << 10, /* Specify which channel of the source to use for this operand. POSSIBLE VALUES: 00 - SQ_CHAN_X: Use X component. 01 - SQ_CHAN_Y: Use Y component. 02 - SQ_CHAN_Z: Use Z component. 03 - SQ_CHAN_W: Use W component. */
SRC2_CHAN_shift = 10,
- SQ_CHAN_X = 0x00,
- SQ_CHAN_Y = 0x01,
- SQ_CHAN_Z = 0x02,
- SQ_CHAN_W = 0x03,
- SRC2_NEG_bit = 1 << 12,
- SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13,
+ SQ_CHAN_X = 0x00, /* SQ_CHAN_X: Use X component. */
+ SQ_CHAN_Y = 0x01, /* SQ_CHAN_Y: Use Y component. */
+ SQ_CHAN_Z = 0x02, /* SQ_CHAN_Z: Use Z component. */
+ SQ_CHAN_W = 0x03, /* SQ_CHAN_W: Use W component. */
+ SRC2_NEG_bit = 1 << 12, /* If set, negate the input for this operand. Should only be set for floating point inputs. */
+ SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, /* Instruction opcode. Caution: opcode values do not begin at zero. POSSIBLE VALUES: 12 - SQ_OP3_INST_MUL_LIT 13 - SQ_OP3_INST_MUL_LIT_M2 14 - SQ_OP3_INST_MUL_LIT_M4 15 - SQ_OP3_INST_MUL_LIT_D2 16 - SQ_OP3_INST_MULADD 17 - SQ_OP3_INST_MULADD_M2 18 - SQ_OP3_INST_MULADD_M4 19 - SQ_OP3_INST_MULADD_D2 20 - SQ_OP3_INST_MULADD_IEEE 21 - SQ_OP3_INST_MULADD_IEEE_M2 22 - SQ_OP3_INST_MULADD_IEEE_M4 23 - SQ_OP3_INST_MULADD_IEEE_D2 24 - SQ_OP3_INST_CNDE 25 - SQ_OP3_INST_CNDGT 26 - SQ_OP3_INST_CNDGE 27 - Reserved 28 - SQ_OP3_INST_CNDE_INT 29 - SQ_OP3_INST_CNDGT_INT 30 - SQ_OP3_INST_CNDGE_INT 31 - Reserved */
SQ_ALU_WORD1_OP3__ALU_INST_shift = 13,
- SQ_OP3_INST_MUL_LIT = 0x0c,
- SQ_OP3_INST_MUL_LIT_M2 = 0x0d,
- SQ_OP3_INST_MUL_LIT_M4 = 0x0e,
- SQ_OP3_INST_MUL_LIT_D2 = 0x0f,
- SQ_OP3_INST_MULADD = 0x10,
- SQ_OP3_INST_MULADD_M2 = 0x11,
- SQ_OP3_INST_MULADD_M4 = 0x12,
- SQ_OP3_INST_MULADD_D2 = 0x13,
- SQ_OP3_INST_MULADD_IEEE = 0x14,
- SQ_OP3_INST_MULADD_IEEE_M2 = 0x15,
- SQ_OP3_INST_MULADD_IEEE_M4 = 0x16,
- SQ_OP3_INST_MULADD_IEEE_D2 = 0x17,
- SQ_OP3_INST_CNDE = 0x18,
- SQ_OP3_INST_CNDGT = 0x19,
- SQ_OP3_INST_CNDGE = 0x1a,
- SQ_OP3_INST_CNDE_INT = 0x1c,
- SQ_OP3_INST_CNDGT_INT = 0x1d,
- SQ_OP3_INST_CNDGE_INT = 0x1e,
- SQ_TEX_WORD2 = 0x00008dfc,
- OFFSET_X_mask = 0x1f << 0,
+ SQ_OP3_INST_MUL_LIT = 0x0c, /* SQ_OP3_INST_MUL_LIT */
+ SQ_OP3_INST_MUL_LIT_M2 = 0x0d, /* SQ_OP3_INST_MUL_LIT_M2 */
+ SQ_OP3_INST_MUL_LIT_M4 = 0x0e, /* SQ_OP3_INST_MUL_LIT_M4 */
+ SQ_OP3_INST_MUL_LIT_D2 = 0x0f, /* SQ_OP3_INST_MUL_LIT_D2 */
+ SQ_OP3_INST_MULADD = 0x10, /* SQ_OP3_INST_MULADD */
+ SQ_OP3_INST_MULADD_M2 = 0x11, /* SQ_OP3_INST_MULADD_M2 */
+ SQ_OP3_INST_MULADD_M4 = 0x12, /* SQ_OP3_INST_MULADD_M4 */
+ SQ_OP3_INST_MULADD_D2 = 0x13, /* SQ_OP3_INST_MULADD_D2 */
+ SQ_OP3_INST_MULADD_IEEE = 0x14, /* SQ_OP3_INST_MULADD_IEEE */
+ SQ_OP3_INST_MULADD_IEEE_M2 = 0x15, /* SQ_OP3_INST_MULADD_IEEE_M2 */
+ SQ_OP3_INST_MULADD_IEEE_M4 = 0x16, /* SQ_OP3_INST_MULADD_IEEE_M4 */
+ SQ_OP3_INST_MULADD_IEEE_D2 = 0x17, /* SQ_OP3_INST_MULADD_IEEE_D2 */
+ SQ_OP3_INST_CNDE = 0x18, /* SQ_OP3_INST_CNDE */
+ SQ_OP3_INST_CNDGT = 0x19, /* SQ_OP3_INST_CNDGT */
+ SQ_OP3_INST_CNDGE = 0x1a, /* SQ_OP3_INST_CNDGE */
+ SQ_OP3_INST_CNDE_INT = 0x1c, /* SQ_OP3_INST_CNDE_INT */
+ SQ_OP3_INST_CNDGT_INT = 0x1d, /* SQ_OP3_INST_CNDGT_INT */
+ SQ_OP3_INST_CNDGE_INT = 0x1e, /* SQ_OP3_INST_CNDGE_INT */
+ SQ_TEX_WORD2 = 0x00008dfc, /* Texture fetch clause instruction word 2. */
+ OFFSET_X_mask = 0x1f << 0, /* Value added to X component of texel address before sampling (in texel space). S3.1 fixpoint value ranging from [-8, 8). */
OFFSET_X_shift = 0,
- OFFSET_Y_mask = 0x1f << 5,
+ OFFSET_Y_mask = 0x1f << 5, /* Value added to Y component of texel address before sampling (in texel space). S3.1 fixpoint value ranging from [-8, 8). */
OFFSET_Y_shift = 5,
- OFFSET_Z_mask = 0x1f << 10,
+ OFFSET_Z_mask = 0x1f << 10, /* Value added to Z component of texel address before sampling (in texel space). S3.1 fixpoint value ranging from [-8, 8). */
OFFSET_Z_shift = 10,
- SAMPLER_ID_mask = 0x1f << 15,
+ SAMPLER_ID_mask = 0x1f << 15, /* Sampler ID to use (specifies filter options, etc.). Value in the range [0, 17]. */
SAMPLER_ID_shift = 15,
- SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20,
+ SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20, /* Indicate component source for src.XYZW. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 */
SQ_TEX_WORD2__SRC_SEL_X_shift = 20,
- SQ_SEL_X = 0x00,
- SQ_SEL_Y = 0x01,
- SQ_SEL_Z = 0x02,
- SQ_SEL_W = 0x03,
- SQ_SEL_0 = 0x04,
- SQ_SEL_1 = 0x05,
- SRC_SEL_Y_mask = 0x07 << 23,
+ SQ_SEL_X = 0x00, /* SQ_SEL_X: use X component */
+ SQ_SEL_Y = 0x01, /* SQ_SEL_Y: use Y component */
+ SQ_SEL_Z = 0x02, /* SQ_SEL_Z: use Z component */
+ SQ_SEL_W = 0x03, /* SQ_SEL_W: use W component */
+ SQ_SEL_0 = 0x04, /* SQ_SEL_0: use constant 0.0 */
+ SQ_SEL_1 = 0x05, /* SQ_SEL_1: use constant 1.0 */
+ SRC_SEL_Y_mask = 0x07 << 23, /* Indicate component source for src.XYZW. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 */
SRC_SEL_Y_shift = 23,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
- SRC_SEL_Z_mask = 0x07 << 26,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+ SRC_SEL_Z_mask = 0x07 << 26, /* Indicate component source for src.XYZW. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 */
SRC_SEL_Z_shift = 26,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
- SRC_SEL_W_mask = 0x07 << 29,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+ SRC_SEL_W_mask = 0x07 << 29, /* Indicate component source for src.XYZW. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 6. Shader Vertex Resource Constants */
SRC_SEL_W_shift = 29,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
- SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc,
- BURST_COUNT_mask = 0x0f << 17,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 5. R7xx Shader Instructions */
+ SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, /* Word 1 of the control flow instruction for alloc/export is the bitwise OR of WORD1 | WORD1_{BUF,SWIZ}. This part contains fields that are always defined. */
+ BURST_COUNT_mask = 0x0f << 17, /* Number of MRTs, positions, parameters, or logical export values to allocate and/or export, minus one. This field is interpreted as a value in [1,16]. */
BURST_COUNT_shift = 17,
- END_OF_PROGRAM_bit = 1 << 21,
- VALID_PIXEL_MODE_bit = 1 << 22,
- SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23,
+ END_OF_PROGRAM_bit = 1 << 21, /* If set, then this instruction is the last instruction of the CF program. Execution ends after this instruction is issued. */
+ VALID_PIXEL_MODE_bit = 1 << 22, /* If set, execute this instruction/clause as if invalid pixels are inactive. Antonym of WHOLE_QUAD_MODE. Caution: VALID_PIXEL_MODE is not the `default` mode; this bit should be set to 0 by default. */
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23, /* Type of instruction to evaluate in CF. This value MUST be one of the alloc/export instructions listed below. POSSIBLE VALUES: 32 - SQ_CF_INST_MEM_STREAM0: perform a memory operation on the stream buffer 0 (write-only). 33 - SQ_CF_INST_MEM_STREAM1: perform a memory operation on the stream buffer 1 (write-only). 34 - SQ_CF_INST_MEM_STREAM2: perform a memory operation on the stream buffer 2 (write-only). 35 - SQ_CF_INST_MEM_STREAM3: perform a memory operation on the stream buffer 3 (write-only). 36 - SQ_CF_INST_MEM_SCRATCH: perform a memory operation on the scratch buffer (read-write). 37 - SQ_CF_INST_MEM_REDUCTION: perform a memory operation on the reduction buffer (read-write). 38 - SQ_CF_INST_MEM_RING: perform a memory operation on the ring buffer (write-only). 39 - SQ_CF_INST_EXPORT: export only (not last). Used for PIXEL, POS, PARAM exports. 40 - SQ_CF_INST_EXPORT_DONE: export only (last export). Used for PIXEL, POS, PARAM exports. */
SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 23,
- SQ_CF_INST_MEM_STREAM0 = 0x20,
- SQ_CF_INST_MEM_STREAM1 = 0x21,
- SQ_CF_INST_MEM_STREAM2 = 0x22,
- SQ_CF_INST_MEM_STREAM3 = 0x23,
- SQ_CF_INST_MEM_SCRATCH = 0x24,
- SQ_CF_INST_MEM_REDUCTION = 0x25,
- SQ_CF_INST_MEM_RING = 0x26,
- SQ_CF_INST_EXPORT = 0x27,
- SQ_CF_INST_EXPORT_DONE = 0x28,
- WHOLE_QUAD_MODE_bit = 1 << 30,
- BARRIER_bit = 1 << 31,
- SQ_CF_ALU_WORD1 = 0x00008dfc,
- KCACHE_MODE1_mask = 0x03 << 0,
+ SQ_CF_INST_MEM_STREAM0 = 0x20, /* SQ_CF_INST_MEM_STREAM0: perform a memory operation on the stream buffer 0 (write-only). */
+ SQ_CF_INST_MEM_STREAM1 = 0x21, /* SQ_CF_INST_MEM_STREAM1: perform a memory operation on the stream buffer 1 (write-only). */
+ SQ_CF_INST_MEM_STREAM2 = 0x22, /* SQ_CF_INST_MEM_STREAM2: perform a memory operation on the stream buffer 2 (write-only). */
+ SQ_CF_INST_MEM_STREAM3 = 0x23, /* SQ_CF_INST_MEM_STREAM3: perform a memory operation on the stream buffer 3 (write-only). */
+ SQ_CF_INST_MEM_SCRATCH = 0x24, /* SQ_CF_INST_MEM_SCRATCH: perform a memory operation on the scratch buffer (read-write). */
+ SQ_CF_INST_MEM_REDUCTION = 0x25, /* SQ_CF_INST_MEM_REDUCTION: perform a memory operation on the reduction buffer (read-write). */
+ SQ_CF_INST_MEM_RING = 0x26, /* SQ_CF_INST_MEM_RING: perform a memory operation on the ring buffer (write-only). */
+ SQ_CF_INST_EXPORT = 0x27, /* SQ_CF_INST_EXPORT: export only (not last). Used for PIXEL, POS, PARAM exports. */
+ SQ_CF_INST_EXPORT_DONE = 0x28, /* SQ_CF_INST_EXPORT_DONE: export only (last export). Used for PIXEL, POS, PARAM exports. */
+ WHOLE_QUAD_MODE_bit = 1 << 30, /* If set, execute this instruction/clause as if all pixels were active and valid. */
+ BARRIER_bit = 1 << 31, /* If set, all prior CF instructions/clauses must complete before this instruction/clause executes. If not set, this instruction/clause may run in parallel with prior instructions. */
+ SQ_CF_ALU_WORD1 = 0x00008dfc, /* Control flow instruction word 1. This word is used by ALU clause instructions. */
+ KCACHE_MODE1_mask = 0x03 << 0, /* Mode for second set of locked cache lines. POSSIBLE VALUES: 00 - SQ_CF_KCACHE_NOP: do not lock any cache lines. 01 - SQ_CF_KCACHE_LOCK_1: lock cache line [bank][addr]. 02 - SQ_CF_KCACHE_LOCK_2: lock cache lines [bank][addr] and [bank][addr+1]. 03 - SQ_CF_KCACHE_LOCK_LOOP_INDEX: lock cache lines [bank][loop/16+addr] and [bank][loop/16+addr+1], where loop is current loop index. */
KCACHE_MODE1_shift = 0,
- SQ_CF_KCACHE_NOP = 0x00,
- SQ_CF_KCACHE_LOCK_1 = 0x01,
- SQ_CF_KCACHE_LOCK_2 = 0x02,
- SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03,
- KCACHE_ADDR0_mask = 0xff << 2,
+ SQ_CF_KCACHE_NOP = 0x00, /* SQ_CF_KCACHE_NOP: do not lock any cache lines. */
+ SQ_CF_KCACHE_LOCK_1 = 0x01, /* SQ_CF_KCACHE_LOCK_1: lock cache line [bank][addr]. */
+ SQ_CF_KCACHE_LOCK_2 = 0x02, /* SQ_CF_KCACHE_LOCK_2: lock cache lines [bank][addr] and [bank][addr+1]. */
+ SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, /* SQ_CF_KCACHE_LOCK_LOOP_INDEX: lock cache lines [bank][loop/16+addr] and [bank][loop/16+addr+1], where loop is current loop index. */
+ KCACHE_ADDR0_mask = 0xff << 2, /* Constant buffer address for first set of locked cache lines. In units of cache lines where a line holds 16 128-bit constants (byte addr[15:8]). */
KCACHE_ADDR0_shift = 2,
- KCACHE_ADDR1_mask = 0xff << 10,
+ KCACHE_ADDR1_mask = 0xff << 10, /* Constant buffer address for second set of locked cache lines. */
KCACHE_ADDR1_shift = 10,
- SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18,
+ SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18, /* Number of instructions to execute in the clause, minus one. This is interpreted as the number of instruction slots (64-bit slots) in the range [1,128]. */
SQ_CF_ALU_WORD1__COUNT_shift = 18,
- SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
- SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26,
+ SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, /* if set, uses constants from alternate thread type: ps->vs, vs->gs, gs->vs, es->gs (note that es and vs share constants). */
+ SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26, /* Type of ALU instruction to evaluate in CF. For this encoding, CF_INST must be set to one of the following values. POSSIBLE VALUES: 08 - SQ_CF_INST_ALU: each PRED_SET updates the active state but does not update the stack. 09 - SQ_CF_INST_ALU_PUSH_BEFORE: do CF_PUSH; then CF_INST_ALU 10 - SQ_CF_INST_ALU_POP_AFTER: do CF_INST_ALU; then do CF_INST_POP. 11 - SQ_CF_INST_ALU_POP2_AFTER: do CF_INST_ALU; then do CF_INST_POP twice. 13 - SQ_CF_INST_ALU_CONTINUE: each PRED_SET causes a continue operation on the masked pixels. Equivalent to CF_INST_PUSH; CF_INST_ALU; CF_INST_ELSE; CF_INST_CONTINUE; CF_POP. 14 - SQ_CF_INST_ALU_BREAK: each PRED_SET causes a break operation on the masked pixels. Equivalent to CF_INST_PUSH; CF_INST_ALU; CF_INST_ELSE; CF_INST_CONTINUE; CF_POP. 15 - SQ_CF_INST_ALU_ELSE_AFTER: do CF_INST_ALU; then do CF_INST_ELSE. */
SQ_CF_ALU_WORD1__CF_INST_shift = 26,
- SQ_CF_INST_ALU = 0x08,
- SQ_CF_INST_ALU_PUSH_BEFORE = 0x09,
- SQ_CF_INST_ALU_POP_AFTER = 0x0a,
- SQ_CF_INST_ALU_POP2_AFTER = 0x0b,
- SQ_CF_INST_ALU_CONTINUE = 0x0d,
- SQ_CF_INST_ALU_BREAK = 0x0e,
- SQ_CF_INST_ALU_ELSE_AFTER = 0x0f,
-/* WHOLE_QUAD_MODE_bit = 1 << 30, */
-/* BARRIER_bit = 1 << 31, */
- SQ_TEX_WORD1 = 0x00008dfc,
- SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0,
+ SQ_CF_INST_ALU = 0x08, /* SQ_CF_INST_ALU: each PRED_SET updates the active state but does not update the stack. */
+ SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, /* SQ_CF_INST_ALU_PUSH_BEFORE: do CF_PUSH; then CF_INST_ALU */
+ SQ_CF_INST_ALU_POP_AFTER = 0x0a, /* SQ_CF_INST_ALU_POP_AFTER: do CF_INST_ALU; then do CF_INST_POP. */
+ SQ_CF_INST_ALU_POP2_AFTER = 0x0b, /* SQ_CF_INST_ALU_POP2_AFTER: do CF_INST_ALU; then do CF_INST_POP twice. */
+ SQ_CF_INST_ALU_CONTINUE = 0x0d, /* SQ_CF_INST_ALU_CONTINUE: each PRED_SET causes a continue operation on the masked pixels. Equivalent to CF_INST_PUSH; CF_INST_ALU; CF_INST_ELSE; CF_INST_CONTINUE; CF_POP. */
+ SQ_CF_INST_ALU_BREAK = 0x0e, /* SQ_CF_INST_ALU_BREAK: each PRED_SET causes a break operation on the masked pixels. Equivalent to CF_INST_PUSH; CF_INST_ALU; CF_INST_ELSE; CF_INST_CONTINUE; CF_POP. */
+ SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, /* SQ_CF_INST_ALU_ELSE_AFTER: do CF_INST_ALU; then do CF_INST_ELSE. */
+/* WHOLE_QUAD_MODE_bit = 1 << 30, */ /* If set, execute this instruction/clause as if all pixels are active and valid. Antonym of VALID_PIXEL_MODE. Set at most one of these bits. */
+/* BARRIER_bit = 1 << 31, */ /* If set, all prior CF instructions/clauses must complete before this instruction/clause executes. If not set, this instruction/clause may run in parallel with prior instructions. */
+ SQ_TEX_WORD1 = 0x00008dfc, /* Texture fetch clause instruction word 1. */
+ SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0, /* Destination GPR address to write result to. */
SQ_TEX_WORD1__DST_GPR_shift = 0,
- SQ_TEX_WORD1__DST_REL_bit = 1 << 7,
- SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_TEX_WORD1__DST_REL_bit = 1 << 7, /* Indicate whether destination address is absolute or relative to an index. */
+ SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9, /* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SQ_TEX_WORD1__DST_SEL_X_shift = 9,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
- SQ_SEL_MASK = 0x07,
- SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+ SQ_SEL_MASK = 0x07, /* SQ_SEL_MASK: mask out this component */
+ SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12, /* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SQ_TEX_WORD1__DST_SEL_Y_shift = 12,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15, /* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SQ_TEX_WORD1__DST_SEL_Z_shift = 15,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18, /* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SQ_TEX_WORD1__DST_SEL_W_shift = 18,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21, /* Constant LOD bias to add to the computed bias for this lookup. Twos-complement S3.4 fixpoint value with range [-4, 4). */
SQ_TEX_WORD1__LOD_BIAS_shift = 21,
- COORD_TYPE_X_bit = 1 << 28,
- COORD_TYPE_Y_bit = 1 << 29,
- COORD_TYPE_Z_bit = 1 << 30,
- COORD_TYPE_W_bit = 1 << 31,
- SQ_VTX_WORD0 = 0x00008dfc,
- VTX_INST_mask = 0x1f << 0,
+ COORD_TYPE_X_bit = 1 << 28, /* Indicate the type of the src.XYZW component. */
+ COORD_TYPE_Y_bit = 1 << 29, /* Indicate the type of the src.XYZW component. */
+ COORD_TYPE_Z_bit = 1 << 30, /* Indicate the type of the src.XYZW component. */
+ COORD_TYPE_W_bit = 1 << 31, /* Indicate the type of the src.XYZW component. */
+ SQ_VTX_WORD0 = 0x00008dfc, /* Vertex fetch clause instruction word 0. */
+ VTX_INST_mask = 0x1f << 0, /* Opcode for this vertex fetch instruction. POSSIBLE VALUES: 00 - SQ_VTX_INST_FETCH: vertex fetch (X = uint32 index) 01 - SQ_VTX_INST_SEMANTIC: semantic vertex fetch */
VTX_INST_shift = 0,
- SQ_VTX_INST_FETCH = 0x00,
- SQ_VTX_INST_SEMANTIC = 0x01,
- FETCH_TYPE_mask = 0x03 << 5,
+ SQ_VTX_INST_FETCH = 0x00, /* SQ_VTX_INST_FETCH: vertex fetch (X = uint32 index) */
+ SQ_VTX_INST_SEMANTIC = 0x01, /* SQ_VTX_INST_SEMANTIC: semantic vertex fetch */
+ FETCH_TYPE_mask = 0x03 << 5, /* Specify which index offset to send to VC. POSSIBLE VALUES: 00 - SQ_VTX_FETCH_VERTEX_DATA 01 - SQ_VTX_FETCH_INSTANCE_DATA 02 - SQ_VTX_FETCH_NO_INDEX_OFFSET */
FETCH_TYPE_shift = 5,
- SQ_VTX_FETCH_VERTEX_DATA = 0x00,
- SQ_VTX_FETCH_INSTANCE_DATA = 0x01,
- SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02,
- FETCH_WHOLE_QUAD_bit = 1 << 7,
- BUFFER_ID_mask = 0xff << 8,
+ SQ_VTX_FETCH_VERTEX_DATA = 0x00, /* SQ_VTX_FETCH_VERTEX_DATA */
+ SQ_VTX_FETCH_INSTANCE_DATA = 0x01, /* SQ_VTX_FETCH_INSTANCE_DATA */
+ SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02, /* SQ_VTX_FETCH_NO_INDEX_OFFSET */
+ FETCH_WHOLE_QUAD_bit = 1 << 7, /* If set, texture instruction must fetch data for all pixels (result may be used as source coordinate of a dependent read). If cleared, texture instruction can ignore invalid pixels. */
+ BUFFER_ID_mask = 0xff << 8, /* Constant ID to use for this vertex fetch (indicates the buffer address, size, and format). */
BUFFER_ID_shift = 8,
- SRC_GPR_mask = 0x7f << 16,
+ SRC_GPR_mask = 0x7f << 16, /* Source GPR address to get fetch address from. */
SRC_GPR_shift = 16,
- SRC_REL_bit = 1 << 23,
- SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24,
+ SRC_REL_bit = 1 << 23, /* Indicate whether source address is absolute or relative to an index. */
+ SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24, /* Indicate which component of src to use for the fetch address. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component */
SQ_VTX_WORD0__SRC_SEL_X_shift = 24,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
- MEGA_FETCH_COUNT_mask = 0x3f << 26,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+ MEGA_FETCH_COUNT_mask = 0x3f << 26, /* For a mega-fetch, number of bytes to fetch at once. For mini-fetch, number of bytes to fetch if SQ converts this instruction into a mega-fetch. This value`s range is [1,64]. */
MEGA_FETCH_COUNT_shift = 26,
- SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc,
- SEL_X_mask = 0x07 << 0,
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc, /* Word 1 of the control flow instruction. This subencoding is used by alloc/exports for PIXEL, POS, and PARAM. */
+ SEL_X_mask = 0x07 << 0, /* Specify source for each component of the export. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SEL_X_shift = 0,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- SEL_Y_mask = 0x07 << 3,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ SEL_Y_mask = 0x07 << 3, /* Specify source for each component of the export. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SEL_Y_shift = 3,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- SEL_Z_mask = 0x07 << 6,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ SEL_Z_mask = 0x07 << 6, /* Specify source for each component of the export. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SEL_Z_shift = 6,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- SEL_W_mask = 0x07 << 9,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ SEL_W_mask = 0x07 << 9, /* Specify source for each component of the export. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SEL_W_shift = 9,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- SQ_ALU_WORD1 = 0x00008dfc,
- ENCODING_mask = 0x07 << 15,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ SQ_ALU_WORD1 = 0x00008dfc, /* ALU instruction word 1 is the bitwise OR of SQ_ALU_WORD1 | SQ_ALU_WORD1_OP[2,3]. SQ_ALU_WORD1 contains fields used by all encodings. */
+ ENCODING_mask = 0x07 << 15, /* A read-only field used to determine whether OP2 or OP3 encoding is being used. If this field`s value is 0, the instruction is using OP2. Otherwise, the instruction is using OP3. Do not write to this field directly. */
ENCODING_shift = 15,
- BANK_SWIZZLE_mask = 0x07 << 18,
+ BANK_SWIZZLE_mask = 0x07 << 18, /* Specify how to load operands into the SP. POSSIBLE VALUES: 00 - SQ_ALU_VEC_012, SQ_ALU_SCL_210 01 - SQ_ALU_VEC_021, SQ_ALU_SCL_122 02 - SQ_ALU_VEC_120, SQ_ALU_SCL_212 03 - SQ_ALU_VEC_102, SQ_ALU_SCL_221 04 - SQ_ALU_VEC_201 05 - SQ_ALU_VEC_210 */
BANK_SWIZZLE_shift = 18,
- SQ_ALU_VEC_012 = 0x00,
- SQ_ALU_VEC_021 = 0x01,
- SQ_ALU_VEC_120 = 0x02,
- SQ_ALU_VEC_102 = 0x03,
- SQ_ALU_VEC_201 = 0x04,
- SQ_ALU_VEC_210 = 0x05,
- SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21,
+ SQ_ALU_VEC_012 = 0x00, /* SQ_ALU_VEC_012, SQ_ALU_SCL_210 */
+ SQ_ALU_VEC_021 = 0x01, /* SQ_ALU_VEC_021, SQ_ALU_SCL_122 */
+ SQ_ALU_VEC_120 = 0x02, /* SQ_ALU_VEC_120, SQ_ALU_SCL_212 */
+ SQ_ALU_VEC_102 = 0x03, /* SQ_ALU_VEC_102, SQ_ALU_SCL_221 */
+ SQ_ALU_VEC_201 = 0x04, /* SQ_ALU_VEC_201 */
+ SQ_ALU_VEC_210 = 0x05, /* SQ_ALU_VEC_210 */
+ SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21, /* Destination address to write result to. Always a GPR address. */
SQ_ALU_WORD1__DST_GPR_shift = 21,
- SQ_ALU_WORD1__DST_REL_bit = 1 << 28,
- DST_CHAN_mask = 0x03 << 29,
+ SQ_ALU_WORD1__DST_REL_bit = 1 << 28, /* If set, this operand uses relative addressing based on the INDEX_MODE. */
+ DST_CHAN_mask = 0x03 << 29, /* Specify which channel of DST_GPR to write the result to. POSSIBLE VALUES: 00 - CHAN_X: write to X channel of dest. 01 - CHAN_Y: write to Y channel of dest. 02 - CHAN_Z: write to Z channel of dest. 03 - CHAN_W: write to W channel of dest. */
DST_CHAN_shift = 29,
- CHAN_X = 0x00,
- CHAN_Y = 0x01,
- CHAN_Z = 0x02,
- CHAN_W = 0x03,
- SQ_ALU_WORD1__CLAMP_bit = 1 << 31,
- SQ_CF_ALU_WORD0 = 0x00008dfc,
- SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0,
+ CHAN_X = 0x00, /* CHAN_X: write to X channel of dest. */
+ CHAN_Y = 0x01, /* CHAN_Y: write to Y channel of dest. */
+ CHAN_Z = 0x02, /* CHAN_Z: write to Z channel of dest. */
+ CHAN_W = 0x03, /* CHAN_W: write to W channel of dest. */
+ SQ_ALU_WORD1__CLAMP_bit = 1 << 31, /* If set, clamp the result to [0.0, 1.0]. Not mathematically defined for opcodes that produce integer results. */
+ SQ_CF_ALU_WORD0 = 0x00008dfc, /* Control flow instruction word 0. This word is used by ALU clause instructions. */
+ SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0, /* Bits [24:3] of the byte offset (producing a QUAD-word- aligned value) of the clause to execute. The offset is relative to the byte address specified by PGM_START. */
SQ_CF_ALU_WORD0__ADDR_shift = 0,
- KCACHE_BANK0_mask = 0x0f << 22,
+ KCACHE_BANK0_mask = 0x0f << 22, /* Bank (constant buffer number) for first set of locked cache lines. */
KCACHE_BANK0_shift = 22,
- KCACHE_BANK1_mask = 0x0f << 26,
+ KCACHE_BANK1_mask = 0x0f << 26, /* Bank (constant buffer number) for second set of locked cache lines. */
KCACHE_BANK1_shift = 26,
- KCACHE_MODE0_mask = 0x03 << 30,
+ KCACHE_MODE0_mask = 0x03 << 30, /* Mode for first set of locked cache lines. POSSIBLE VALUES: 00 - SQ_CF_KCACHE_NOP: do not lock any cache lines. 01 - SQ_CF_KCACHE_LOCK_1: lock cache line [bank][addr]. 02 - SQ_CF_KCACHE_LOCK_2: lock cache lines [bank][addr] and [bank][addr+1]. 03 - SQ_CF_KCACHE_LOCK_LOOP_INDEX: lock cache lines [bank][loop/16+addr] and [bank][loop/16+addr+1], where loop is current loop index. */
KCACHE_MODE0_shift = 30,
-/* SQ_CF_KCACHE_NOP = 0x00, */
-/* SQ_CF_KCACHE_LOCK_1 = 0x01, */
-/* SQ_CF_KCACHE_LOCK_2 = 0x02, */
-/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */
- SQ_VTX_WORD2 = 0x00008dfc,
- SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0,
+/* SQ_CF_KCACHE_NOP = 0x00, */ /* SQ_CF_KCACHE_NOP: do not lock any cache lines. */
+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ /* SQ_CF_KCACHE_LOCK_1: lock cache line [bank][addr]. */
+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ /* SQ_CF_KCACHE_LOCK_2: lock cache lines [bank][addr] and [bank][addr+1]. */
+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ /* SQ_CF_KCACHE_LOCK_LOOP_INDEX: lock cache lines [bank][loop/16+addr] and [bank][loop/16+addr+1], where loop is current loop index. */
+ SQ_VTX_WORD2 = 0x00008dfc, /* Vertex fetch clause instruction word 2. */
+ SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0, /* Offset to begin reading from. Byte-aligned. */
SQ_VTX_WORD2__OFFSET_shift = 0,
- SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16,
+ SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16, /* Endian control (ignored if USE_CONST_FIELDS = 1). POSSIBLE VALUES: 00 - SQ_ENDIAN_NONE: no endian swap (XOR by 0) 01 - SQ_ENDIAN_8IN16: 8 bit swap in 16 bit word (XOR by 1): AABBCCDD -> BBAADDCC 02 - SQ_ENDIAN_8IN32: 8 bit swap in 32 bit word (XOR by 3): AABBCCDD -> DDCCBBAA */
SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16,
- SQ_ENDIAN_NONE = 0x00,
- SQ_ENDIAN_8IN16 = 0x01,
- SQ_ENDIAN_8IN32 = 0x02,
- CONST_BUF_NO_STRIDE_bit = 1 << 18,
- MEGA_FETCH_bit = 1 << 19,
- SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20,
- SQ_ALU_WORD1_OP2_V2 = 0x00008dfc,
- SRC0_ABS_bit = 1 << 0,
- SRC1_ABS_bit = 1 << 1,
- UPDATE_EXECUTE_MASK_bit = 1 << 2,
- UPDATE_PRED_bit = 1 << 3,
- WRITE_MASK_bit = 1 << 4,
- SQ_ALU_WORD1_OP2_V2__OMOD_mask = 0x03 << 5,
+ SQ_ENDIAN_NONE = 0x00, /* SQ_ENDIAN_NONE: no endian swap (XOR by 0) */
+ SQ_ENDIAN_8IN16 = 0x01, /* SQ_ENDIAN_8IN16: 8 bit swap in 16 bit word (XOR by 1): AABBCCDD -> BBAADDCC */
+ SQ_ENDIAN_8IN32 = 0x02, /* SQ_ENDIAN_8IN32: 8 bit swap in 32 bit word (XOR by 3): AABBCCDD -> DDCCBBAA */
+ CONST_BUF_NO_STRIDE_bit = 1 << 18, /* If set, force stride to zero for constant buffer fetches that use absolute addresses. */
+ MEGA_FETCH_bit = 1 << 19, /* If set, this instruction is a mega-fetch. Otherwise it is a mini-fetch. */
+ SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, /* if set, uses constants from alternate thread type: ps->vs, vs->gs, gs->vs, es->gs (note that es and vs share constants). */
+ SQ_ALU_WORD1_OP2_V2 = 0x00008dfc, /* ALU instruction word 1. This subencoding is used for OP2 instructions (instructions taking 0 to 2 operands). */
+ SRC0_ABS_bit = 1 << 0, /* If set, take the absolute value of the input for this operand. Should only be set for floating point inputs; performed before negation. */
+ SRC1_ABS_bit = 1 << 1, /* If set, take the absolute value of the input for this operand. Should only be set for floating point inputs; performed before negation. */
+ UPDATE_EXECUTE_MASK_bit = 1 << 2, /* If set, update the execute mask in the SQ after executing this instruction based on the current predicate. */
+ UPDATE_PRED_bit = 1 << 3, /* If set, update the predicate in the SP based on the predicate operation computed here. */
+ WRITE_MASK_bit = 1 << 4, /* If set, write this scalar result to the destination GPR channel. */
+ SQ_ALU_WORD1_OP2_V2__OMOD_mask = 0x03 << 5, /* Output modifier for this instruction. Must be set to ALU_OMOD_OFF for operations that produce an integer result. */
SQ_ALU_WORD1_OP2_V2__OMOD_shift = 5,
- SQ_ALU_OMOD_OFF = 0x00,
- SQ_ALU_OMOD_M2 = 0x01,
- SQ_ALU_OMOD_M4 = 0x02,
- SQ_ALU_OMOD_D2 = 0x03,
- SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7,
+ SQ_ALU_OMOD_OFF = 0x00, /* SQ_ALU_OMOD_OFF: identity. */
+ SQ_ALU_OMOD_M2 = 0x01, /* SQ_ALU_OMOD_M2: multiply by 2.0. */
+ SQ_ALU_OMOD_M4 = 0x02, /* SQ_ALU_OMOD_M4: multiply by 4.0. */
+ SQ_ALU_OMOD_D2 = 0x03, /* SQ_ALU_OMOD_D2: divide by 2.0. */
+ SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7, /* Instruction opcode. The top 3 bits of this must be zero. Caution: gaps in opcode values are not marked in the table below. */
SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7,
- SQ_OP2_INST_ADD = 0x00,
- SQ_OP2_INST_MUL = 0x01,
- SQ_OP2_INST_MUL_IEEE = 0x02,
- SQ_OP2_INST_MAX = 0x03,
- SQ_OP2_INST_MIN = 0x04,
- SQ_OP2_INST_MAX_DX10 = 0x05,
- SQ_OP2_INST_MIN_DX10 = 0x06,
- SQ_OP2_INST_SETE = 0x08,
- SQ_OP2_INST_SETGT = 0x09,
- SQ_OP2_INST_SETGE = 0x0a,
- SQ_OP2_INST_SETNE = 0x0b,
- SQ_OP2_INST_SETE_DX10 = 0x0c,
- SQ_OP2_INST_SETGT_DX10 = 0x0d,
- SQ_OP2_INST_SETGE_DX10 = 0x0e,
- SQ_OP2_INST_SETNE_DX10 = 0x0f,
- SQ_OP2_INST_FRACT = 0x10,
- SQ_OP2_INST_TRUNC = 0x11,
- SQ_OP2_INST_CEIL = 0x12,
- SQ_OP2_INST_RNDNE = 0x13,
- SQ_OP2_INST_FLOOR = 0x14,
- SQ_OP2_INST_MOVA = 0x15,
- SQ_OP2_INST_MOVA_FLOOR = 0x16,
- SQ_OP2_INST_MOVA_INT = 0x18,
- SQ_OP2_INST_MOV = 0x19,
- SQ_OP2_INST_NOP = 0x1a,
- SQ_OP2_INST_PRED_SETGT_UINT = 0x1e,
- SQ_OP2_INST_PRED_SETGE_UINT = 0x1f,
- SQ_OP2_INST_PRED_SETE = 0x20,
- SQ_OP2_INST_PRED_SETGT = 0x21,
- SQ_OP2_INST_PRED_SETGE = 0x22,
- SQ_OP2_INST_PRED_SETNE = 0x23,
- SQ_OP2_INST_PRED_SET_INV = 0x24,
- SQ_OP2_INST_PRED_SET_POP = 0x25,
- SQ_OP2_INST_PRED_SET_CLR = 0x26,
- SQ_OP2_INST_PRED_SET_RESTORE = 0x27,
- SQ_OP2_INST_PRED_SETE_PUSH = 0x28,
- SQ_OP2_INST_PRED_SETGT_PUSH = 0x29,
- SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a,
- SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b,
- SQ_OP2_INST_KILLE = 0x2c,
- SQ_OP2_INST_KILLGT = 0x2d,
- SQ_OP2_INST_KILLGE = 0x2e,
- SQ_OP2_INST_KILLNE = 0x2f,
- SQ_OP2_INST_AND_INT = 0x30,
- SQ_OP2_INST_OR_INT = 0x31,
- SQ_OP2_INST_XOR_INT = 0x32,
- SQ_OP2_INST_NOT_INT = 0x33,
- SQ_OP2_INST_ADD_INT = 0x34,
- SQ_OP2_INST_SUB_INT = 0x35,
- SQ_OP2_INST_MAX_INT = 0x36,
- SQ_OP2_INST_MIN_INT = 0x37,
- SQ_OP2_INST_MAX_UINT = 0x38,
- SQ_OP2_INST_MIN_UINT = 0x39,
- SQ_OP2_INST_SETE_INT = 0x3a,
- SQ_OP2_INST_SETGT_INT = 0x3b,
- SQ_OP2_INST_SETGE_INT = 0x3c,
- SQ_OP2_INST_SETNE_INT = 0x3d,
- SQ_OP2_INST_SETGT_UINT = 0x3e,
- SQ_OP2_INST_SETGE_UINT = 0x3f,
- SQ_OP2_INST_KILLGT_UINT = 0x40,
- SQ_OP2_INST_KILLGE_UINT = 0x41,
- SQ_OP2_INST_PRED_SETE_INT = 0x42,
- SQ_OP2_INST_PRED_SETGT_INT = 0x43,
- SQ_OP2_INST_PRED_SETGE_INT = 0x44,
- SQ_OP2_INST_PRED_SETNE_INT = 0x45,
- SQ_OP2_INST_KILLE_INT = 0x46,
- SQ_OP2_INST_KILLGT_INT = 0x47,
- SQ_OP2_INST_KILLGE_INT = 0x48,
- SQ_OP2_INST_KILLNE_INT = 0x49,
- SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a,
- SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b,
- SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c,
- SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d,
- SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e,
- SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f,
- SQ_OP2_INST_DOT4 = 0x50,
- SQ_OP2_INST_DOT4_IEEE = 0x51,
- SQ_OP2_INST_CUBE = 0x52,
- SQ_OP2_INST_MAX4 = 0x53,
- SQ_OP2_INST_MOVA_GPR_INT = 0x60,
- SQ_OP2_INST_EXP_IEEE = 0x61,
- SQ_OP2_INST_LOG_CLAMPED = 0x62,
- SQ_OP2_INST_LOG_IEEE = 0x63,
- SQ_OP2_INST_RECIP_CLAMPED = 0x64,
- SQ_OP2_INST_RECIP_FF = 0x65,
- SQ_OP2_INST_RECIP_IEEE = 0x66,
- SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67,
- SQ_OP2_INST_RECIPSQRT_FF = 0x68,
- SQ_OP2_INST_RECIPSQRT_IEEE = 0x69,
- SQ_OP2_INST_SQRT_IEEE = 0x6a,
- SQ_OP2_INST_FLT_TO_INT = 0x6b,
- SQ_OP2_INST_INT_TO_FLT = 0x6c,
- SQ_OP2_INST_UINT_TO_FLT = 0x6d,
- SQ_OP2_INST_SIN = 0x6e,
- SQ_OP2_INST_COS = 0x6f,
- SQ_OP2_INST_ASHR_INT = 0x70,
- SQ_OP2_INST_LSHR_INT = 0x71,
- SQ_OP2_INST_LSHL_INT = 0x72,
- SQ_OP2_INST_MULLO_INT = 0x73,
- SQ_OP2_INST_MULHI_INT = 0x74,
- SQ_OP2_INST_MULLO_UINT = 0x75,
- SQ_OP2_INST_MULHI_UINT = 0x76,
- SQ_OP2_INST_RECIP_INT = 0x77,
- SQ_OP2_INST_RECIP_UINT = 0x78,
- SQ_OP2_INST_FLT_TO_UINT = 0x79,
- SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc,
- ARRAY_SIZE_mask = 0xfff << 0,
+ SQ_OP2_INST_ADD = 0x00, /* SQ_OP2_INST_ADD */
+ SQ_OP2_INST_MUL = 0x01, /* SQ_OP2_INST_MUL */
+ SQ_OP2_INST_MUL_IEEE = 0x02, /* SQ_OP2_INST_MUL_IEEE */
+ SQ_OP2_INST_MAX = 0x03, /* SQ_OP2_INST_MAX */
+ SQ_OP2_INST_MIN = 0x04, /* SQ_OP2_INST_MIN */
+ SQ_OP2_INST_MAX_DX10 = 0x05, /* SQ_OP2_INST_MAX_DX10 */
+ SQ_OP2_INST_MIN_DX10 = 0x06, /* SQ_OP2_INST_MIN_DX10 */
+ SQ_OP2_INST_SETE = 0x08, /* SQ_OP2_INST_SETE */
+ SQ_OP2_INST_SETGT = 0x09, /* SQ_OP2_INST_SETGT */
+ SQ_OP2_INST_SETGE = 0x0a, /* SQ_OP2_INST_SETGE */
+ SQ_OP2_INST_SETNE = 0x0b, /* SQ_OP2_INST_SETNE */
+ SQ_OP2_INST_SETE_DX10 = 0x0c, /* SQ_OP2_INST_SETE_DX10 */
+ SQ_OP2_INST_SETGT_DX10 = 0x0d, /* SQ_OP2_INST_SETGT_DX10 */
+ SQ_OP2_INST_SETGE_DX10 = 0x0e, /* SQ_OP2_INST_SETGE_DX10 */
+ SQ_OP2_INST_SETNE_DX10 = 0x0f, /* SQ_OP2_INST_SETNE_DX10 */
+ SQ_OP2_INST_FRACT = 0x10, /* SQ_OP2_INST_FRACT */
+ SQ_OP2_INST_TRUNC = 0x11, /* SQ_OP2_INST_TRUNC */
+ SQ_OP2_INST_CEIL = 0x12, /* SQ_OP2_INST_CEIL */
+ SQ_OP2_INST_RNDNE = 0x13, /* SQ_OP2_INST_RNDNE */
+ SQ_OP2_INST_FLOOR = 0x14, /* SQ_OP2_INST_FLOOR */
+ SQ_OP2_INST_MOVA = 0x15, /* SQ_OP2_INST_MOVA */
+ SQ_OP2_INST_MOVA_FLOOR = 0x16, /* SQ_OP2_INST_MOVA_FLOOR */
+ SQ_OP2_INST_MOVA_INT = 0x18, /* SQ_OP2_INST_MOVA_INT */
+ SQ_OP2_INST_MOV = 0x19, /* SQ_OP2_INST_MOV */
+ SQ_OP2_INST_NOP = 0x1a, /* SQ_OP2_INST_NOP */
+ SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, /* SQ_OP2_INST_PRED_SETGT_UINT */
+ SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, /* SQ_OP2_INST_PRED_SETGE_UINT */
+ SQ_OP2_INST_PRED_SETE = 0x20, /* SQ_OP2_INST_PRED_SETE */
+ SQ_OP2_INST_PRED_SETGT = 0x21, /* SQ_OP2_INST_PRED_SETGT */
+ SQ_OP2_INST_PRED_SETGE = 0x22, /* SQ_OP2_INST_PRED_SETGE */
+ SQ_OP2_INST_PRED_SETNE = 0x23, /* SQ_OP2_INST_PRED_SETNE */
+ SQ_OP2_INST_PRED_SET_INV = 0x24, /* SQ_OP2_INST_PRED_SET_INV */
+ SQ_OP2_INST_PRED_SET_POP = 0x25, /* SQ_OP2_INST_PRED_SET_POP */
+ SQ_OP2_INST_PRED_SET_CLR = 0x26, /* SQ_OP2_INST_PRED_SET_CLR */
+ SQ_OP2_INST_PRED_SET_RESTORE = 0x27, /* SQ_OP2_INST_PRED_SET_RESTORE */
+ SQ_OP2_INST_PRED_SETE_PUSH = 0x28, /* SQ_OP2_INST_PRED_SETE_PUSH */
+ SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, /* SQ_OP2_INST_PRED_SETGT_PUSH */
+ SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, /* SQ_OP2_INST_PRED_SETGE_PUSH */
+ SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, /* SQ_OP2_INST_PRED_SETNE_PUSH */
+ SQ_OP2_INST_KILLE = 0x2c, /* SQ_OP2_INST_KILLE */
+ SQ_OP2_INST_KILLGT = 0x2d, /* SQ_OP2_INST_KILLGT */
+ SQ_OP2_INST_KILLGE = 0x2e, /* SQ_OP2_INST_KILLGE */
+ SQ_OP2_INST_KILLNE = 0x2f, /* SQ_OP2_INST_KILLNE */
+ SQ_OP2_INST_AND_INT = 0x30, /* SQ_OP2_INST_AND_INT */
+ SQ_OP2_INST_OR_INT = 0x31, /* SQ_OP2_INST_OR_INT */
+ SQ_OP2_INST_XOR_INT = 0x32, /* SQ_OP2_INST_XOR_INT */
+ SQ_OP2_INST_NOT_INT = 0x33, /* SQ_OP2_INST_NOT_INT */
+ SQ_OP2_INST_ADD_INT = 0x34, /* SQ_OP2_INST_ADD_INT */
+ SQ_OP2_INST_SUB_INT = 0x35, /* SQ_OP2_INST_SUB_INT */
+ SQ_OP2_INST_MAX_INT = 0x36, /* SQ_OP2_INST_MAX_INT */
+ SQ_OP2_INST_MIN_INT = 0x37, /* SQ_OP2_INST_MIN_INT */
+ SQ_OP2_INST_MAX_UINT = 0x38, /* SQ_OP2_INST_MAX_UINT */
+ SQ_OP2_INST_MIN_UINT = 0x39, /* SQ_OP2_INST_MIN_UINT */
+ SQ_OP2_INST_SETE_INT = 0x3a, /* SQ_OP2_INST_SETE_INT */
+ SQ_OP2_INST_SETGT_INT = 0x3b, /* SQ_OP2_INST_SETGT_INT */
+ SQ_OP2_INST_SETGE_INT = 0x3c, /* SQ_OP2_INST_SETGE_INT */
+ SQ_OP2_INST_SETNE_INT = 0x3d, /* SQ_OP2_INST_SETNE_INT */
+ SQ_OP2_INST_SETGT_UINT = 0x3e, /* SQ_OP2_INST_SETGT_UINT */
+ SQ_OP2_INST_SETGE_UINT = 0x3f, /* SQ_OP2_INST_SETGE_UINT */
+ SQ_OP2_INST_KILLGT_UINT = 0x40, /* SQ_OP2_INST_KILLGT_UINT */
+ SQ_OP2_INST_KILLGE_UINT = 0x41, /* SQ_OP2_INST_KILLGE_UINT */
+ SQ_OP2_INST_PRED_SETE_INT = 0x42, /* SQ_OP2_INST_PRED_SETE_INT */
+ SQ_OP2_INST_PRED_SETGT_INT = 0x43, /* SQ_OP2_INST_PRED_SETGT_INT */
+ SQ_OP2_INST_PRED_SETGE_INT = 0x44, /* SQ_OP2_INST_PRED_SETGE_INT */
+ SQ_OP2_INST_PRED_SETNE_INT = 0x45, /* SQ_OP2_INST_PRED_SETNE_INT */
+ SQ_OP2_INST_KILLE_INT = 0x46, /* SQ_OP2_INST_KILLE_INT */
+ SQ_OP2_INST_KILLGT_INT = 0x47, /* SQ_OP2_INST_KILLGT_INT */
+ SQ_OP2_INST_KILLGE_INT = 0x48, /* SQ_OP2_INST_KILLGE_INT */
+ SQ_OP2_INST_KILLNE_INT = 0x49, /* SQ_OP2_INST_KILLNE_INT */
+ SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, /* SQ_OP2_INST_PRED_SETE_PUSH_INT */
+ SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, /* SQ_OP2_INST_PRED_SETGT_PUSH_INT */
+ SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, /* SQ_OP2_INST_PRED_SETGE_PUSH_INT */
+ SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, /* SQ_OP2_INST_PRED_SETNE_PUSH_INT */
+ SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, /* SQ_OP2_INST_PRED_SETLT_PUSH_INT */
+ SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, /* SQ_OP2_INST_PRED_SETLE_PUSH_INT */
+ SQ_OP2_INST_DOT4 = 0x50, /* SQ_OP2_INST_DOT4 */
+ SQ_OP2_INST_DOT4_IEEE = 0x51, /* SQ_OP2_INST_DOT4_IEEE */
+ SQ_OP2_INST_CUBE = 0x52, /* SQ_OP2_INST_CUBE */
+ SQ_OP2_INST_MAX4 = 0x53, /* SQ_OP2_INST_MAX4 */
+ SQ_OP2_INST_MOVA_GPR_INT = 0x60, /* SQ_OP2_INST_MOVA_GPR_INT */
+ SQ_OP2_INST_EXP_IEEE = 0x61, /* SQ_OP2_INST_EXP_IEEE */
+ SQ_OP2_INST_LOG_CLAMPED = 0x62, /* SQ_OP2_INST_LOG_CLAMPED */
+ SQ_OP2_INST_LOG_IEEE = 0x63, /* SQ_OP2_INST_LOG_IEEE */
+ SQ_OP2_INST_RECIP_CLAMPED = 0x64, /* SQ_OP2_INST_RECIP_CLAMPED */
+ SQ_OP2_INST_RECIP_FF = 0x65, /* SQ_OP2_INST_RECIP_FF */
+ SQ_OP2_INST_RECIP_IEEE = 0x66, /* SQ_OP2_INST_RECIP_IEEE */
+ SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, /* SQ_OP2_INST_RECIPSQRT_CLAMPED */
+ SQ_OP2_INST_RECIPSQRT_FF = 0x68, /* SQ_OP2_INST_RECIPSQRT_FF */
+ SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, /* SQ_OP2_INST_RECIPSQRT_IEEE */
+ SQ_OP2_INST_SQRT_IEEE = 0x6a, /* SQ_OP2_INST_SQRT_IEEE */
+ SQ_OP2_INST_FLT_TO_INT = 0x6b, /* SQ_OP2_INST_FLT_TO_INT */
+ SQ_OP2_INST_INT_TO_FLT = 0x6c, /* SQ_OP2_INST_INT_TO_FLT */
+ SQ_OP2_INST_UINT_TO_FLT = 0x6d, /* SQ_OP2_INST_UINT_TO_FLT */
+ SQ_OP2_INST_SIN = 0x6e, /* SQ_OP2_INST_SIN */
+ SQ_OP2_INST_COS = 0x6f, /* SQ_OP2_INST_COS */
+ SQ_OP2_INST_ASHR_INT = 0x70, /* SQ_OP2_INST_ASHR_INT */
+ SQ_OP2_INST_LSHR_INT = 0x71, /* SQ_OP2_INST_LSHR_INT */
+ SQ_OP2_INST_LSHL_INT = 0x72, /* SQ_OP2_INST_LSHL_INT */
+ SQ_OP2_INST_MULLO_INT = 0x73, /* SQ_OP2_INST_MULLO_INT */
+ SQ_OP2_INST_MULHI_INT = 0x74, /* SQ_OP2_INST_MULHI_INT */
+ SQ_OP2_INST_MULLO_UINT = 0x75, /* SQ_OP2_INST_MULLO_UINT */
+ SQ_OP2_INST_MULHI_UINT = 0x76, /* SQ_OP2_INST_MULHI_UINT */
+ SQ_OP2_INST_RECIP_INT = 0x77, /* SQ_OP2_INST_RECIP_INT */
+ SQ_OP2_INST_RECIP_UINT = 0x78, /* SQ_OP2_INST_RECIP_UINT */
+ SQ_OP2_INST_FLT_TO_UINT = 0x79, /* SQ_OP2_INST_FLT_TO_UINT */
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc, /* Word 1 of the control flow instruction. This subencoding is used by alloc/exports for all input/outputs to scratch/ring/stream/reduction buffers. */
+ ARRAY_SIZE_mask = 0xfff << 0, /* Array size (elem-size units). Represents values [1,4096] when ELEMSIZE=0, [4,16384] when ELEMSIZE=3. */
ARRAY_SIZE_shift = 0,
- COMP_MASK_mask = 0x0f << 12,
+ COMP_MASK_mask = 0x0f << 12, /* XYZW component mask (X is the LSB). Write the component iff the corresponding bit is 1. */
COMP_MASK_shift = 12,
- SQ_CF_WORD0 = 0x00008dfc,
- SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc,
- ARRAY_BASE_mask = 0x1fff << 0,
+ SQ_CF_WORD0 = 0x00008dfc, /* Control flow instruction word 0. This word is the default representation for CF instructions. */
+ /* ADDR: Bits [34:3] of the byte offset (producing a QUAD-word- aligned value) of the clause to execute (clause instructions only). Bits [34:3] of the byte offset (producing a QUAD-word-aligned value) of the control flow address to jump to (instructions that can jump). Offsets are relative to the byte address specified by PGM_START. Texture & Vertex clauses must start on 16-byte aligned addresses. */
+ SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, /* Word 0 of the control flow instruction for alloc/export. */
+ ARRAY_BASE_mask = 0x1fff << 0, /* For scratch/reduction input/output, this is the base address of the array in multiples of 4 dwords [0,32764]. For stream/ring output, this is the base addess of the array in multiples of 1 dword [0,8191]. For pixel/z output, this is the index of the first export (framebuffer 0..7; computed Z: 61). For parameter output, this is the parameter index of the first export [0,31]. For position output, this is the position index of the first export [60,63]. */
ARRAY_BASE_shift = 0,
- SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13,
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13, /* Type of allocation/export. In the table below, the first enumeration value listed (PIXEL, POS, PARAM) is used with CF_INST_EXPORT*. The second enumeration value listed (WRITE, WRITE_IND, WRITE_ACK, WRITE_IND_ACK) is used with CF_INST_MEM*. POSSIBLE VALUES: 00 - SQ_EXPORT_PIXEL: write pixel. SQ_EXPORT_WRITE: write to memory buffer. 01 - SQ_EXPORT_POS: write position. SQ_EXPORT_WRITE_IND: write to memory buffer, use offset in INDEX_GPR. 02 - SQ_EXPORT_PARAM: write parameter cache. SQ_EXPORT_WRITE_ACK: write to memory buffer, request an ACK when write is committed to memory. 03 - Unused for SX exports. SQ_EXPORT_WRITE_IND_ACK: write to memory buffer with offset in INDEX_GPR, get an ACK when done. */
SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13,
- SQ_EXPORT_PIXEL = 0x00,
- SQ_EXPORT_POS = 0x01,
- SQ_EXPORT_PARAM = 0x02,
- X_UNUSED_FOR_SX_EXPORTS = 0x03,
- RW_GPR_mask = 0x7f << 15,
+ SQ_EXPORT_PIXEL = 0x00, /* SQ_EXPORT_PIXEL: write pixel. SQ_EXPORT_WRITE: write to memory buffer. */
+ SQ_EXPORT_POS = 0x01, /* SQ_EXPORT_POS: write position. SQ_EXPORT_WRITE_IND: write to memory buffer, use offset in INDEX_GPR. */
+ SQ_EXPORT_PARAM = 0x02, /* SQ_EXPORT_PARAM: write parameter cache. SQ_EXPORT_READ: read from memory buffer (scratch and reduction only). */
+ X_UNUSED_FOR_SX_EXPORTS = 0x03, /* Unused for SX exports. SQ_EXPORT_READ_IND: read from memory buffer, use offset in INDEX_GPR (scratch and reduction only). */
+ RW_GPR_mask = 0x7f << 15, /* GPR register to write data to. */
RW_GPR_shift = 15,
- RW_REL_bit = 1 << 22,
- INDEX_GPR_mask = 0x7f << 23,
+ RW_REL_bit = 1 << 22, /* Indicates whether GPR is an absolute address, or relative to the loop index. */
+ INDEX_GPR_mask = 0x7f << 23, /* For any indexed export, this GPR contains an index that will be used in the computation for determining the address of the first export. The index is multipled by (ELEM_SIZE + 1). Only the X component is used (other components ignored, no swizzle allowed). */
INDEX_GPR_shift = 23,
- ELEM_SIZE_mask = 0x03 << 30,
+ ELEM_SIZE_mask = 0x03 << 30, /* Number of DWORDs per element, minus one. This field is interpreted as a value in [1,2,4] (3 not supported). The value from INDEX_GPR and the loop counter are multiplied by this factor, if applicable. Also, BURST_COUNT is multiplied by this factor for CF_INST_MEM*. This field is ignored for CF_INST_EXPORT*. Normally, ELEMSIZE = 4 DWORDs for scratch & reduction, one DWORD for other types. */
ELEM_SIZE_shift = 30,
- SQ_VTX_WORD1 = 0x00008dfc,
- SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_VTX_WORD1 = 0x00008dfc, /* Vertex fetch clause instruction word 1 is the bitwise OR of WORD1 | WORD1_{GPR,SEM}. This part contains fields shared by both subencodings. */
+ SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9, /* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SQ_VTX_WORD1__DST_SEL_X_shift = 9,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12, /* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SQ_VTX_WORD1__DST_SEL_Y_shift = 12,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15, /* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SQ_VTX_WORD1__DST_SEL_Z_shift = 15,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18, /* Indicate which component of the result to write to dst.XYZW. Can be used to mask out components when writing to destination GPR. POSSIBLE VALUES: 00 - SQ_SEL_X: use X component 01 - SQ_SEL_Y: use Y component 02 - SQ_SEL_Z: use Z component 03 - SQ_SEL_W: use W component 04 - SQ_SEL_0: use constant 0.0 05 - SQ_SEL_1: use constant 1.0 06 - Reserved 07 - SQ_SEL_MASK: mask out this component */
SQ_VTX_WORD1__DST_SEL_W_shift = 18,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
-/* SQ_SEL_MASK = 0x07, */
- USE_CONST_FIELDS_bit = 1 << 21,
- SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22,
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
+/* SQ_SEL_MASK = 0x07, */ /* SQ_SEL_MASK: mask out this component */
+ USE_CONST_FIELDS_bit = 1 << 21, /* If set, use format given in the fetch constant instead of in this instruction. */
+ SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22, /* Indicate vertex data format (ignored if USE_CONST_FIELDS = 1). */
SQ_VTX_WORD1__DATA_FORMAT_shift = 22,
- SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28, /* Format of returning data (N is the number of bits derived from DATA_FORMAT and gamma) (ignored if USE_CONST_FIELDS = 1). POSSIBLE VALUES: 00 - SQ_NUM_FORMAT_NORM: repeating fraction number (0.N) with range [0, 1] if unsigned, or [- 1, 1] if signed. 01 - SQ_NUM_FORMAT_INT: integer number (N.0) with range [0, 2^N] if unsigned, or [-2^M, 2^M] if signed (M = N - 1). 02 - SQ_NUM_FORMAT_SCALED: integer number stored as a S23E8 floating-point representation (1 == 0x3f800000). */
SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28,
- SQ_NUM_FORMAT_NORM = 0x00,
- SQ_NUM_FORMAT_INT = 0x01,
- SQ_NUM_FORMAT_SCALED = 0x02,
- SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30,
- SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31,
- SQ_ALU_WORD1_OP2 = 0x00008dfc,
-/* SRC0_ABS_bit = 1 << 0, */
-/* SRC1_ABS_bit = 1 << 1, */
-/* UPDATE_EXECUTE_MASK_bit = 1 << 2, */
-/* UPDATE_PRED_bit = 1 << 3, */
-/* WRITE_MASK_bit = 1 << 4, */
- FOG_MERGE_bit = 1 << 5,
- SQ_ALU_WORD1_OP2__OMOD_mask = 0x03 << 6,
+ SQ_NUM_FORMAT_NORM = 0x00, /* SQ_NUM_FORMAT_NORM: repeating fraction number (0.N) with range [0, 1] if unsigned, or [- 1, 1] if signed. */
+ SQ_NUM_FORMAT_INT = 0x01, /* SQ_NUM_FORMAT_INT: integer number (N.0) with range [0, 2^N] if unsigned, or [-2^M, 2^M] if signed (M = N - 1). */
+ SQ_NUM_FORMAT_SCALED = 0x02, /* SQ_NUM_FORMAT_SCALED: integer number stored as a S23E8 floating-point representation (1 == 0x3f800000). */
+ SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30, /* Indicate sign of source components (ignored if USE_CONST_FIELDS = 1). */
+ SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31, /* Mapping to use when converting from signed RF to float (ignored if USE_CONST_FIELDS = 1). */
+ SQ_ALU_WORD1_OP2 = 0x00008dfc, /* ALU instruction word 1. This subencoding is used for OP2 instructions (instructions taking 0 to 2 operands). */
+/* SRC0_ABS_bit = 1 << 0, */ /* If set, take the absolute value of the input for this operand. Should only be set for floating point inputs; performed before negation. */
+/* SRC1_ABS_bit = 1 << 1, */ /* If set, take the absolute value of the input for this operand. Should only be set for floating point inputs; performed before negation. */
+/* UPDATE_EXECUTE_MASK_bit = 1 << 2, */ /* If set, update the execute mask in the SQ after executing this instruction based on the current predicate. */
+/* UPDATE_PRED_bit = 1 << 3, */ /* If set, update the predicate in the SP based on the predicate operation computed here. */
+/* WRITE_MASK_bit = 1 << 4, */ /* If set, write this scalar result to the destination GPR channel. */
+ FOG_MERGE_bit = 1 << 5, /* If set, export fog value by merging the transcendental ALU result into the low-order bits of the vector destination. The vector results will lose some precision. This bit takes effect when set on the scalar instruction. */
+ SQ_ALU_WORD1_OP2__OMOD_mask = 0x03 << 6, /* Output modifier for this instruction. Must be set to ALU_OMOD_OFF for operations that produce an integer result. */
SQ_ALU_WORD1_OP2__OMOD_shift = 6,
-/* SQ_ALU_OMOD_OFF = 0x00, */
-/* SQ_ALU_OMOD_M2 = 0x01, */
-/* SQ_ALU_OMOD_M4 = 0x02, */
-/* SQ_ALU_OMOD_D2 = 0x03, */
- SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8,
+/* SQ_ALU_OMOD_OFF = 0x00, */ /* SQ_ALU_OMOD_OFF: identity. */
+/* SQ_ALU_OMOD_M2 = 0x01, */ /* SQ_ALU_OMOD_M2: multiply by 2.0. */
+/* SQ_ALU_OMOD_M4 = 0x02, */ /* SQ_ALU_OMOD_M4: multiply by 4.0. */
+/* SQ_ALU_OMOD_D2 = 0x03, */ /* SQ_ALU_OMOD_D2: divide by 2.0. */
+ SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8, /* Instruction opcode. The top 3 bits of this must be zero. Caution: gaps in opcode values are not marked in the table below. */
SQ_ALU_WORD1_OP2__ALU_INST_shift = 8,
-/* SQ_OP2_INST_ADD = 0x00, */
-/* SQ_OP2_INST_MUL = 0x01, */
-/* SQ_OP2_INST_MUL_IEEE = 0x02, */
-/* SQ_OP2_INST_MAX = 0x03, */
-/* SQ_OP2_INST_MIN = 0x04, */
-/* SQ_OP2_INST_MAX_DX10 = 0x05, */
-/* SQ_OP2_INST_MIN_DX10 = 0x06, */
-/* SQ_OP2_INST_SETE = 0x08, */
-/* SQ_OP2_INST_SETGT = 0x09, */
-/* SQ_OP2_INST_SETGE = 0x0a, */
-/* SQ_OP2_INST_SETNE = 0x0b, */
-/* SQ_OP2_INST_SETE_DX10 = 0x0c, */
-/* SQ_OP2_INST_SETGT_DX10 = 0x0d, */
-/* SQ_OP2_INST_SETGE_DX10 = 0x0e, */
-/* SQ_OP2_INST_SETNE_DX10 = 0x0f, */
-/* SQ_OP2_INST_FRACT = 0x10, */
-/* SQ_OP2_INST_TRUNC = 0x11, */
-/* SQ_OP2_INST_CEIL = 0x12, */
-/* SQ_OP2_INST_RNDNE = 0x13, */
-/* SQ_OP2_INST_FLOOR = 0x14, */
-/* SQ_OP2_INST_MOVA = 0x15, */
-/* SQ_OP2_INST_MOVA_FLOOR = 0x16, */
-/* SQ_OP2_INST_MOVA_INT = 0x18, */
-/* SQ_OP2_INST_MOV = 0x19, */
-/* SQ_OP2_INST_NOP = 0x1a, */
-/* SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, */
-/* SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, */
-/* SQ_OP2_INST_PRED_SETE = 0x20, */
-/* SQ_OP2_INST_PRED_SETGT = 0x21, */
-/* SQ_OP2_INST_PRED_SETGE = 0x22, */
-/* SQ_OP2_INST_PRED_SETNE = 0x23, */
-/* SQ_OP2_INST_PRED_SET_INV = 0x24, */
-/* SQ_OP2_INST_PRED_SET_POP = 0x25, */
-/* SQ_OP2_INST_PRED_SET_CLR = 0x26, */
-/* SQ_OP2_INST_PRED_SET_RESTORE = 0x27, */
-/* SQ_OP2_INST_PRED_SETE_PUSH = 0x28, */
-/* SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, */
-/* SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, */
-/* SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, */
-/* SQ_OP2_INST_KILLE = 0x2c, */
-/* SQ_OP2_INST_KILLGT = 0x2d, */
-/* SQ_OP2_INST_KILLGE = 0x2e, */
-/* SQ_OP2_INST_KILLNE = 0x2f, */
-/* SQ_OP2_INST_AND_INT = 0x30, */
-/* SQ_OP2_INST_OR_INT = 0x31, */
-/* SQ_OP2_INST_XOR_INT = 0x32, */
-/* SQ_OP2_INST_NOT_INT = 0x33, */
-/* SQ_OP2_INST_ADD_INT = 0x34, */
-/* SQ_OP2_INST_SUB_INT = 0x35, */
-/* SQ_OP2_INST_MAX_INT = 0x36, */
-/* SQ_OP2_INST_MIN_INT = 0x37, */
-/* SQ_OP2_INST_MAX_UINT = 0x38, */
-/* SQ_OP2_INST_MIN_UINT = 0x39, */
-/* SQ_OP2_INST_SETE_INT = 0x3a, */
-/* SQ_OP2_INST_SETGT_INT = 0x3b, */
-/* SQ_OP2_INST_SETGE_INT = 0x3c, */
-/* SQ_OP2_INST_SETNE_INT = 0x3d, */
-/* SQ_OP2_INST_SETGT_UINT = 0x3e, */
-/* SQ_OP2_INST_SETGE_UINT = 0x3f, */
-/* SQ_OP2_INST_KILLGT_UINT = 0x40, */
-/* SQ_OP2_INST_KILLGE_UINT = 0x41, */
-/* SQ_OP2_INST_PRED_SETE_INT = 0x42, */
-/* SQ_OP2_INST_PRED_SETGT_INT = 0x43, */
-/* SQ_OP2_INST_PRED_SETGE_INT = 0x44, */
-/* SQ_OP2_INST_PRED_SETNE_INT = 0x45, */
-/* SQ_OP2_INST_KILLE_INT = 0x46, */
-/* SQ_OP2_INST_KILLGT_INT = 0x47, */
-/* SQ_OP2_INST_KILLGE_INT = 0x48, */
-/* SQ_OP2_INST_KILLNE_INT = 0x49, */
-/* SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, */
-/* SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, */
-/* SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, */
-/* SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, */
-/* SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, */
-/* SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, */
-/* SQ_OP2_INST_DOT4 = 0x50, */
-/* SQ_OP2_INST_DOT4_IEEE = 0x51, */
-/* SQ_OP2_INST_CUBE = 0x52, */
-/* SQ_OP2_INST_MAX4 = 0x53, */
-/* SQ_OP2_INST_MOVA_GPR_INT = 0x60, */
-/* SQ_OP2_INST_EXP_IEEE = 0x61, */
-/* SQ_OP2_INST_LOG_CLAMPED = 0x62, */
-/* SQ_OP2_INST_LOG_IEEE = 0x63, */
-/* SQ_OP2_INST_RECIP_CLAMPED = 0x64, */
-/* SQ_OP2_INST_RECIP_FF = 0x65, */
-/* SQ_OP2_INST_RECIP_IEEE = 0x66, */
-/* SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, */
-/* SQ_OP2_INST_RECIPSQRT_FF = 0x68, */
-/* SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, */
-/* SQ_OP2_INST_SQRT_IEEE = 0x6a, */
-/* SQ_OP2_INST_FLT_TO_INT = 0x6b, */
-/* SQ_OP2_INST_INT_TO_FLT = 0x6c, */
-/* SQ_OP2_INST_UINT_TO_FLT = 0x6d, */
-/* SQ_OP2_INST_SIN = 0x6e, */
-/* SQ_OP2_INST_COS = 0x6f, */
-/* SQ_OP2_INST_ASHR_INT = 0x70, */
-/* SQ_OP2_INST_LSHR_INT = 0x71, */
-/* SQ_OP2_INST_LSHL_INT = 0x72, */
-/* SQ_OP2_INST_MULLO_INT = 0x73, */
-/* SQ_OP2_INST_MULHI_INT = 0x74, */
-/* SQ_OP2_INST_MULLO_UINT = 0x75, */
-/* SQ_OP2_INST_MULHI_UINT = 0x76, */
-/* SQ_OP2_INST_RECIP_INT = 0x77, */
-/* SQ_OP2_INST_RECIP_UINT = 0x78, */
-/* SQ_OP2_INST_FLT_TO_UINT = 0x79, */
- SQ_CF_WORD1 = 0x00008dfc,
- POP_COUNT_mask = 0x07 << 0,
+/* SQ_OP2_INST_ADD = 0x00, */ /* SQ_OP2_INST_ADD */
+/* SQ_OP2_INST_MUL = 0x01, */ /* SQ_OP2_INST_MUL */
+/* SQ_OP2_INST_MUL_IEEE = 0x02, */ /* SQ_OP2_INST_MUL_IEEE */
+/* SQ_OP2_INST_MAX = 0x03, */ /* SQ_OP2_INST_MAX */
+/* SQ_OP2_INST_MIN = 0x04, */ /* SQ_OP2_INST_MIN */
+/* SQ_OP2_INST_MAX_DX10 = 0x05, */ /* SQ_OP2_INST_MAX_DX10 */
+/* SQ_OP2_INST_MIN_DX10 = 0x06, */ /* SQ_OP2_INST_MIN_DX10 */
+/* SQ_OP2_INST_SETE = 0x08, */ /* SQ_OP2_INST_SETE */
+/* SQ_OP2_INST_SETGT = 0x09, */ /* SQ_OP2_INST_SETGT */
+/* SQ_OP2_INST_SETGE = 0x0a, */ /* SQ_OP2_INST_SETGE */
+/* SQ_OP2_INST_SETNE = 0x0b, */ /* SQ_OP2_INST_SETNE */
+/* SQ_OP2_INST_SETE_DX10 = 0x0c, */ /* SQ_OP2_INST_SETE_DX10 */
+/* SQ_OP2_INST_SETGT_DX10 = 0x0d, */ /* SQ_OP2_INST_SETGT_DX10 */
+/* SQ_OP2_INST_SETGE_DX10 = 0x0e, */ /* SQ_OP2_INST_SETGE_DX10 */
+/* SQ_OP2_INST_SETNE_DX10 = 0x0f, */ /* SQ_OP2_INST_SETNE_DX10 */
+/* SQ_OP2_INST_FRACT = 0x10, */ /* SQ_OP2_INST_FRACT */
+/* SQ_OP2_INST_TRUNC = 0x11, */ /* SQ_OP2_INST_TRUNC */
+/* SQ_OP2_INST_CEIL = 0x12, */ /* SQ_OP2_INST_CEIL */
+/* SQ_OP2_INST_RNDNE = 0x13, */ /* SQ_OP2_INST_RNDNE */
+/* SQ_OP2_INST_FLOOR = 0x14, */ /* SQ_OP2_INST_FLOOR */
+/* SQ_OP2_INST_MOVA = 0x15, */ /* SQ_OP2_INST_MOVA */
+/* SQ_OP2_INST_MOVA_FLOOR = 0x16, */ /* SQ_OP2_INST_MOVA_FLOOR */
+/* SQ_OP2_INST_MOVA_INT = 0x18, */ /* SQ_OP2_INST_MOVA_INT */
+/* SQ_OP2_INST_MOV = 0x19, */ /* SQ_OP2_INST_MOV */
+/* SQ_OP2_INST_NOP = 0x1a, */ /* SQ_OP2_INST_NOP */
+/* SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, */ /* SQ_OP2_INST_PRED_SETGT_UINT */
+/* SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, */ /* SQ_OP2_INST_PRED_SETGE_UINT */
+/* SQ_OP2_INST_PRED_SETE = 0x20, */ /* SQ_OP2_INST_PRED_SETE */
+/* SQ_OP2_INST_PRED_SETGT = 0x21, */ /* SQ_OP2_INST_PRED_SETGT */
+/* SQ_OP2_INST_PRED_SETGE = 0x22, */ /* SQ_OP2_INST_PRED_SETGE */
+/* SQ_OP2_INST_PRED_SETNE = 0x23, */ /* SQ_OP2_INST_PRED_SETNE */
+/* SQ_OP2_INST_PRED_SET_INV = 0x24, */ /* SQ_OP2_INST_PRED_SET_INV */
+/* SQ_OP2_INST_PRED_SET_POP = 0x25, */ /* SQ_OP2_INST_PRED_SET_POP */
+/* SQ_OP2_INST_PRED_SET_CLR = 0x26, */ /* SQ_OP2_INST_PRED_SET_CLR */
+/* SQ_OP2_INST_PRED_SET_RESTORE = 0x27, */ /* SQ_OP2_INST_PRED_SET_RESTORE */
+/* SQ_OP2_INST_PRED_SETE_PUSH = 0x28, */ /* SQ_OP2_INST_PRED_SETE_PUSH */
+/* SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, */ /* SQ_OP2_INST_PRED_SETGT_PUSH */
+/* SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, */ /* SQ_OP2_INST_PRED_SETGE_PUSH */
+/* SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, */ /* SQ_OP2_INST_PRED_SETNE_PUSH */
+/* SQ_OP2_INST_KILLE = 0x2c, */ /* SQ_OP2_INST_KILLE */
+/* SQ_OP2_INST_KILLGT = 0x2d, */ /* SQ_OP2_INST_KILLGT */
+/* SQ_OP2_INST_KILLGE = 0x2e, */ /* SQ_OP2_INST_KILLGE */
+/* SQ_OP2_INST_KILLNE = 0x2f, */ /* SQ_OP2_INST_KILLNE */
+/* SQ_OP2_INST_AND_INT = 0x30, */ /* SQ_OP2_INST_AND_INT */
+/* SQ_OP2_INST_OR_INT = 0x31, */ /* SQ_OP2_INST_OR_INT */
+/* SQ_OP2_INST_XOR_INT = 0x32, */ /* SQ_OP2_INST_XOR_INT */
+/* SQ_OP2_INST_NOT_INT = 0x33, */ /* SQ_OP2_INST_NOT_INT */
+/* SQ_OP2_INST_ADD_INT = 0x34, */ /* SQ_OP2_INST_ADD_INT */
+/* SQ_OP2_INST_SUB_INT = 0x35, */ /* SQ_OP2_INST_SUB_INT */
+/* SQ_OP2_INST_MAX_INT = 0x36, */ /* SQ_OP2_INST_MAX_INT */
+/* SQ_OP2_INST_MIN_INT = 0x37, */ /* SQ_OP2_INST_MIN_INT */
+/* SQ_OP2_INST_MAX_UINT = 0x38, */ /* SQ_OP2_INST_MAX_UINT */
+/* SQ_OP2_INST_MIN_UINT = 0x39, */ /* SQ_OP2_INST_MIN_UINT */
+/* SQ_OP2_INST_SETE_INT = 0x3a, */ /* SQ_OP2_INST_SETE_INT */
+/* SQ_OP2_INST_SETGT_INT = 0x3b, */ /* SQ_OP2_INST_SETGT_INT */
+/* SQ_OP2_INST_SETGE_INT = 0x3c, */ /* SQ_OP2_INST_SETGE_INT */
+/* SQ_OP2_INST_SETNE_INT = 0x3d, */ /* SQ_OP2_INST_SETNE_INT */
+/* SQ_OP2_INST_SETGT_UINT = 0x3e, */ /* SQ_OP2_INST_SETGT_UINT */
+/* SQ_OP2_INST_SETGE_UINT = 0x3f, */ /* SQ_OP2_INST_SETGE_UINT */
+/* SQ_OP2_INST_KILLGT_UINT = 0x40, */ /* SQ_OP2_INST_KILLGT_UINT */
+/* SQ_OP2_INST_KILLGE_UINT = 0x41, */ /* SQ_OP2_INST_KILLGE_UINT */
+/* SQ_OP2_INST_PRED_SETE_INT = 0x42, */ /* SQ_OP2_INST_PRED_SETE_INT */
+/* SQ_OP2_INST_PRED_SETGT_INT = 0x43, */ /* SQ_OP2_INST_PRED_SETGT_INT */
+/* SQ_OP2_INST_PRED_SETGE_INT = 0x44, */ /* SQ_OP2_INST_PRED_SETGE_INT */
+/* SQ_OP2_INST_PRED_SETNE_INT = 0x45, */ /* SQ_OP2_INST_PRED_SETNE_INT */
+/* SQ_OP2_INST_KILLE_INT = 0x46, */ /* SQ_OP2_INST_KILLE_INT */
+/* SQ_OP2_INST_KILLGT_INT = 0x47, */ /* SQ_OP2_INST_KILLGT_INT */
+/* SQ_OP2_INST_KILLGE_INT = 0x48, */ /* SQ_OP2_INST_KILLGE_INT */
+/* SQ_OP2_INST_KILLNE_INT = 0x49, */ /* SQ_OP2_INST_KILLNE_INT */
+/* SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, */ /* SQ_OP2_INST_PRED_SETE_PUSH_INT */
+/* SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, */ /* SQ_OP2_INST_PRED_SETGT_PUSH_INT */
+/* SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, */ /* SQ_OP2_INST_PRED_SETGE_PUSH_INT */
+/* SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, */ /* SQ_OP2_INST_PRED_SETNE_PUSH_INT */
+/* SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, */ /* SQ_OP2_INST_PRED_SETLT_PUSH_INT */
+/* SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, */ /* SQ_OP2_INST_PRED_SETLE_PUSH_INT */
+/* SQ_OP2_INST_DOT4 = 0x50, */ /* SQ_OP2_INST_DOT4 */
+/* SQ_OP2_INST_DOT4_IEEE = 0x51, */ /* SQ_OP2_INST_DOT4_IEEE */
+/* SQ_OP2_INST_CUBE = 0x52, */ /* SQ_OP2_INST_CUBE */
+/* SQ_OP2_INST_MAX4 = 0x53, */ /* SQ_OP2_INST_MAX4 */
+/* SQ_OP2_INST_MOVA_GPR_INT = 0x60, */ /* SQ_OP2_INST_MOVA_GPR_INT */
+/* SQ_OP2_INST_EXP_IEEE = 0x61, */ /* SQ_OP2_INST_EXP_IEEE */
+/* SQ_OP2_INST_LOG_CLAMPED = 0x62, */ /* SQ_OP2_INST_LOG_CLAMPED */
+/* SQ_OP2_INST_LOG_IEEE = 0x63, */ /* SQ_OP2_INST_LOG_IEEE */
+/* SQ_OP2_INST_RECIP_CLAMPED = 0x64, */ /* SQ_OP2_INST_RECIP_CLAMPED */
+/* SQ_OP2_INST_RECIP_FF = 0x65, */ /* SQ_OP2_INST_RECIP_FF */
+/* SQ_OP2_INST_RECIP_IEEE = 0x66, */ /* SQ_OP2_INST_RECIP_IEEE */
+/* SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, */ /* SQ_OP2_INST_RECIPSQRT_CLAMPED */
+/* SQ_OP2_INST_RECIPSQRT_FF = 0x68, */ /* SQ_OP2_INST_RECIPSQRT_FF */
+/* SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, */ /* SQ_OP2_INST_RECIPSQRT_IEEE */
+/* SQ_OP2_INST_SQRT_IEEE = 0x6a, */ /* SQ_OP2_INST_SQRT_IEEE */
+/* SQ_OP2_INST_FLT_TO_INT = 0x6b, */ /* SQ_OP2_INST_FLT_TO_INT */
+/* SQ_OP2_INST_INT_TO_FLT = 0x6c, */ /* SQ_OP2_INST_INT_TO_FLT */
+/* SQ_OP2_INST_UINT_TO_FLT = 0x6d, */ /* SQ_OP2_INST_UINT_TO_FLT */
+/* SQ_OP2_INST_SIN = 0x6e, */ /* SQ_OP2_INST_SIN */
+/* SQ_OP2_INST_COS = 0x6f, */ /* SQ_OP2_INST_COS */
+/* SQ_OP2_INST_ASHR_INT = 0x70, */ /* SQ_OP2_INST_ASHR_INT */
+/* SQ_OP2_INST_LSHR_INT = 0x71, */ /* SQ_OP2_INST_LSHR_INT */
+/* SQ_OP2_INST_LSHL_INT = 0x72, */ /* SQ_OP2_INST_LSHL_INT */
+/* SQ_OP2_INST_MULLO_INT = 0x73, */ /* SQ_OP2_INST_MULLO_INT */
+/* SQ_OP2_INST_MULHI_INT = 0x74, */ /* SQ_OP2_INST_MULHI_INT */
+/* SQ_OP2_INST_MULLO_UINT = 0x75, */ /* SQ_OP2_INST_MULLO_UINT */
+/* SQ_OP2_INST_MULHI_UINT = 0x76, */ /* SQ_OP2_INST_MULHI_UINT */
+/* SQ_OP2_INST_RECIP_INT = 0x77, */ /* SQ_OP2_INST_RECIP_INT */
+/* SQ_OP2_INST_RECIP_UINT = 0x78, */ /* SQ_OP2_INST_RECIP_UINT */
+/* SQ_OP2_INST_FLT_TO_UINT = 0x79, */ /* SQ_OP2_INST_FLT_TO_UINT */
+ SQ_CF_WORD1 = 0x00008dfc, /* Control flow instruction word 1. This word is the default representation for CF instructions. */
+ POP_COUNT_mask = 0x07 << 0, /* Specify the number of entries to pop from the stack, in [0..7]. Only used by certain CF instructions that pop the branch-loop stack. May be zero, to indicate no pop operation. */
POP_COUNT_shift = 0,
- CF_CONST_mask = 0x1f << 3,
+ CF_CONST_mask = 0x1f << 3, /* Specify the CF constant to use for flow control statements. For LOOP/ENDLOOP, this specifies the integer constant to use for the loop counter, loop index initializer, and increment. For instructions using COND, this specifies the index of the boolean constant to use. */
CF_CONST_shift = 3,
- COND_mask = 0x03 << 8,
+ COND_mask = 0x03 << 8, /* Specifies how to evaluate the condition test for each pixel. Not used by all instructions. May reference CF_CONST. POSSIBLE VALUES: 00 - SQ_CF_COND_ACTIVE: condition test passes for active pixels. 01 - SQ_CF_COND_FALSE: contition test fails for all pixels. 02 - SQ_CF_COND_BOOL: condition test passes iff pixel is active and boolean referenced by CF_CONST is true. 03 - SQ_CF_COND_NOT_BOOL: condition test passes iff pixel is active and boolean referenced by CF_CONST is false. */
COND_shift = 8,
- SQ_CF_COND_ACTIVE = 0x00,
- SQ_CF_COND_FALSE = 0x01,
- SQ_CF_COND_BOOL = 0x02,
- SQ_CF_COND_NOT_BOOL = 0x03,
- SQ_CF_WORD1__COUNT_mask = 0x07 << 10,
+ SQ_CF_COND_ACTIVE = 0x00, /* SQ_CF_COND_ACTIVE: condition test passes for active pixels. */
+ SQ_CF_COND_FALSE = 0x01, /* SQ_CF_COND_FALSE: contition test fails for all pixels. */
+ SQ_CF_COND_BOOL = 0x02, /* SQ_CF_COND_BOOL: condition test passes iff pixel is active and boolean referenced by CF_CONST is true. */
+ SQ_CF_COND_NOT_BOOL = 0x03, /* SQ_CF_COND_NOT_BOOL: condition test passes iff pixel is active and boolean referenced by CF_CONST is false. */
+ SQ_CF_WORD1__COUNT_mask = 0x07 << 10, /* Number of instructions to execute in the clause, minus one (clause instructions only). This is interpreted as the number of instruction slots in the range [1,16]. MSB of count is COUNT_3 field. */
SQ_CF_WORD1__COUNT_shift = 10,
- CALL_COUNT_mask = 0x3f << 13,
+ CALL_COUNT_mask = 0x3f << 13, /* Amount to increment call nesting counter by when executing a CALL statement; a CALL is skipped if the current nesting depth + call_count > 32. This field is interpreted in the range [0,31], and has no effect for other instruction types. */
CALL_COUNT_shift = 13,
- COUNT_3_bit = 1 << 19,
-/* END_OF_PROGRAM_bit = 1 << 21, */
-/* VALID_PIXEL_MODE_bit = 1 << 22, */
- SQ_CF_WORD1__CF_INST_mask = 0x7f << 23,
+ COUNT_3_bit = 1 << 19, /* MSB of COUNT field. */
+/* END_OF_PROGRAM_bit = 1 << 21, */ /* If set, then this instruction is the last instruction of the CF program. Execution ends after this instruction is issued. */
+/* VALID_PIXEL_MODE_bit = 1 << 22, */ /* If set, execute this instruction/clause as if invalid pixels are inactive. Antonym of WHOLE_QUAD_MODE. Caution: VALID_PIXEL_MODE is not the `default` mode; this bit should be set to 0 by default. */
+ SQ_CF_WORD1__CF_INST_mask = 0x7f << 23, /* Type of instruction to evaluate in CF. For this encoding, CF_INST must be set to one of the following values. POSSIBLE VALUES: 00 - SQ_CF_INST_NOP: perform no operation. 01 - SQ_CF_INST_TEX: execute texture fetch clause, through the texture cache. CF_COND=ACTIVE is required. 02 - SQ_CF_INST_VTX: execute vertex fetch clause, through the vertex-cache (if exists). CF_COND=ACTIVE is required. 03 - SQ_CF_INST_VTX_TC: execute vertex fetch clause through the texture cache. CF_COND=ACTIVE is required. 04 - SQ_CF_INST_LOOP_START: execute DX9 loop start instruction (push onto loop stack if loop body executes). 05 - SQ_CF_INST_LOOP_END: execute DX9 loop end instruction (pop loop stack if loop is finished). 06 - SQ_CF_INST_LOOP_START_DX10: execute DX10 loop start instruction (push onto loop stack if loop body executes). 07 - SQ_CF_INST_LOOP_START_NO_AL: same as LOOP_START but don`t push AL onto stack or update AL. 08 - SQ_CF_INST_LOOP_CONTINUE: execute continue statement (jump to end of loop if all pixels ready to continue). 09 - SQ_CF_INST_LOOP_BREAK: execute a break statement (pop loop stack if all pixels ready to break). 10 - SQ_CF_INST_JUMP: execute jump statement (may be conditional). 11 - SQ_CF_INST_PUSH: push current per-pixel active state onto stack OR jump and pop if no items would be active. 12 - SQ_CF_INST_PUSH_ELSE: push current per- pixel active state onto stack ND jump if no items would be active. 13 - SQ_CF_INST_ELSE: execute else statement (may be conditional) OR jump if no items would be active. 14 - SQ_CF_INST_POP: pop current per-pixel state from the stack. jump if no pixels were enabled prior to pop. 15 - SQ_CF_INST_POP_JUMP: pop current per- pixel state from the stack. then execute CF_INST_JUMP with pop count = 0. 16 - SQ_CF_INST_POP_PUSH: pop current per- pixel state from the stack. then execute CF_INST_PUSH with pop count = 0. 17 - SQ_CF_INST_POP_PUSH_ELSE: pop current per-pixel state from the stack. then execute CF_INST_PUSH_ELSE. 18 - SQ_CF_INST_CALL: execute subroutine call instruction (push onto address stack). 19 - SQ_CF_INST_CALL_FS: call fetch shader. The address to call is stored in a state register in SQ. 20 - SQ_CF_INST_RETURN: execute subroutine return instruction (pop address stack). Pair with CF_INST_CALL only. 21 - SQ_CF_INST_EMIT_VERTEX: signal that GS has finished exporting a vertex to memory. CF_COND=ACTIVE is required. 22 - SQ_CF_INST_EMIT_CUT_VERTEX: emit a vertex and an end of primitive strip marker. The next emitted vertex will start a new primitive strip. CF_COND=ACTIVE is required. 23 - SQ_CF_INST_CUT_VERTEX: emit an end of primitive strip marker. The next emitted vertex will start a new primitive strip. CF_COND=ACTIVE is required. 24 - SQ_CF_INST_KILL: kill pixels that pass the condition test (may be conditional). jump if all pixels are killed. CF_COND=ACTIVE is required. */
SQ_CF_WORD1__CF_INST_shift = 23,
- SQ_CF_INST_NOP = 0x00,
- SQ_CF_INST_TEX = 0x01,
- SQ_CF_INST_VTX = 0x02,
- SQ_CF_INST_VTX_TC = 0x03,
- SQ_CF_INST_LOOP_START = 0x04,
- SQ_CF_INST_LOOP_END = 0x05,
- SQ_CF_INST_LOOP_START_DX10 = 0x06,
- SQ_CF_INST_LOOP_START_NO_AL = 0x07,
- SQ_CF_INST_LOOP_CONTINUE = 0x08,
- SQ_CF_INST_LOOP_BREAK = 0x09,
- SQ_CF_INST_JUMP = 0x0a,
- SQ_CF_INST_PUSH = 0x0b,
- SQ_CF_INST_PUSH_ELSE = 0x0c,
- SQ_CF_INST_ELSE = 0x0d,
- SQ_CF_INST_POP = 0x0e,
- SQ_CF_INST_POP_JUMP = 0x0f,
- SQ_CF_INST_POP_PUSH = 0x10,
- SQ_CF_INST_POP_PUSH_ELSE = 0x11,
- SQ_CF_INST_CALL = 0x12,
- SQ_CF_INST_CALL_FS = 0x13,
- SQ_CF_INST_RETURN = 0x14,
- SQ_CF_INST_EMIT_VERTEX = 0x15,
- SQ_CF_INST_EMIT_CUT_VERTEX = 0x16,
- SQ_CF_INST_CUT_VERTEX = 0x17,
- SQ_CF_INST_KILL = 0x18,
-/* WHOLE_QUAD_MODE_bit = 1 << 30, */
-/* BARRIER_bit = 1 << 31, */
- SQ_VTX_WORD1_SEM = 0x00008dfc,
- SEMANTIC_ID_mask = 0xff << 0,
+ SQ_CF_INST_NOP = 0x00, /* SQ_CF_INST_NOP: perform no operation. */
+ SQ_CF_INST_TEX = 0x01, /* SQ_CF_INST_TEX: execute texture fetch or constant fetch clause. CF_COND=ACTIVE is required. */
+ SQ_CF_INST_VTX = 0x02, /* SQ_CF_INST_VTX: execute vertex fetch clause. CF_COND=ACTIVE is required. */
+ SQ_CF_INST_VTX_TC = 0x03, /* SQ_CF_INST_VTX_TC: execute vertex fetch clause through the texture cache (for systems lacking VC). CF_COND=ACTIVE is required. */
+ SQ_CF_INST_LOOP_START = 0x04, /* SQ_CF_INST_LOOP_START: execute DX9 loop start instruction (push onto loop stack if loop body executes). */
+ SQ_CF_INST_LOOP_END = 0x05, /* SQ_CF_INST_LOOP_END: execute DX9 loop end instruction (pop loop stack if loop is finished). */
+ SQ_CF_INST_LOOP_START_DX10 = 0x06, /* SQ_CF_INST_LOOP_START_DX10: execute DX10 loop start instruction (push onto loop stack if loop body executes). */
+ SQ_CF_INST_LOOP_START_NO_AL = 0x07, /* SQ_CF_INST_LOOP_START_NO_AL: same as LOOP_START but don`t push AL onto stack or update AL. */
+ SQ_CF_INST_LOOP_CONTINUE = 0x08, /* SQ_CF_INST_LOOP_CONTINUE: execute continue statement (jump to end of loop if all pixels ready to continue). */
+ SQ_CF_INST_LOOP_BREAK = 0x09, /* SQ_CF_INST_LOOP_BREAK: execute a break statement (pop loop stack if all pixels ready to break). */
+ SQ_CF_INST_JUMP = 0x0a, /* SQ_CF_INST_JUMP: execute jump statement (may be conditional). */
+ SQ_CF_INST_PUSH = 0x0b, /* SQ_CF_INST_PUSH: push current per-pixel active state onto stack OR jump and pop if no items would be active. */
+ SQ_CF_INST_PUSH_ELSE = 0x0c, /* SQ_CF_INST_PUSH_ELSE: push current per- pixel active state onto stack ND jump if no items would be active. */
+ SQ_CF_INST_ELSE = 0x0d, /* SQ_CF_INST_ELSE: execute else statement (may be conditional) OR jump if no items would be active. */
+ SQ_CF_INST_POP = 0x0e, /* SQ_CF_INST_POP: pop current per-pixel state from the stack. jump if no pixels were enabled prior to pop. */
+ SQ_CF_INST_POP_JUMP = 0x0f, /* SQ_CF_INST_POP_JUMP: pop current per- pixel state from the stack. then execute CF_INST_JUMP with pop count = 0. */
+ SQ_CF_INST_POP_PUSH = 0x10, /* SQ_CF_INST_POP_PUSH: pop current per- pixel state from the stack. then execute CF_INST_PUSH with pop count = 0. */
+ SQ_CF_INST_POP_PUSH_ELSE = 0x11, /* SQ_CF_INST_POP_PUSH_ELSE: pop current per-pixel state from the stack. then execute CF_INST_PUSH_ELSE. */
+ SQ_CF_INST_CALL = 0x12, /* SQ_CF_INST_CALL: execute subroutine call instruction (push onto address stack). */
+ SQ_CF_INST_CALL_FS = 0x13, /* SQ_CF_INST_CALL_FS: call fetch shader. The address to call is stored in a state register in SQ. */
+ SQ_CF_INST_RETURN = 0x14, /* SQ_CF_INST_RETURN: execute subroutine return instruction (pop address stack). Pair with CF_INST_CALL only. */
+ SQ_CF_INST_EMIT_VERTEX = 0x15, /* SQ_CF_INST_EMIT_VERTEX: signal that GS has finished exporting a vertex to memory. CF_COND=ACTIVE is required. */
+ SQ_CF_INST_EMIT_CUT_VERTEX = 0x16, /* SQ_CF_INST_EMIT_CUT_VERTEX: emit a vertex and an end of primitive strip marker. The next emitted vertex will start a new primitive strip. CF_COND=ACTIVE is required. */
+ SQ_CF_INST_CUT_VERTEX = 0x17, /* SQ_CF_INST_CUT_VERTEX: emit an end of primitive strip marker. The next emitted vertex will start a new primitive strip. CF_COND=ACTIVE is required. */
+ SQ_CF_INST_KILL = 0x18, /* SQ_CF_INST_KILL: kill pixels that pass the condition test (may be conditional). jump if all pixels are killed. CF_COND=ACTIVE is required. */
+/* WHOLE_QUAD_MODE_bit = 1 << 30, */ /* If set, execute this instruction/clause as if all pixels are active and valid. Antonym of VALID_PIXEL_MODE. Set at most one of these bits. */
+/* BARRIER_bit = 1 << 31, */ /* If set, all prior CF instructions/clauses must complete before this instruction/clause executes. If not set, this instruction/clause may run in parallel with prior instructions. */
+ SQ_VTX_WORD1_SEM = 0x00008dfc, /* Vertex fetch clause instruction word 1. This subencoding is used by semantic fetch instructions that specify the destination using a semantic table. */
+ SEMANTIC_ID_mask = 0xff << 0, /* Specify the 8-bit semantic ID used to lookup the destination GPR from the semantic table. */
SEMANTIC_ID_shift = 0,
- SQ_TEX_WORD0 = 0x00008dfc,
- TEX_INST_mask = 0x1f << 0,
+ SQ_TEX_WORD0 = 0x00008dfc, /* Texture fetch clause instruction word 0. */
+ TEX_INST_mask = 0x1f << 0, /* Opcode for this texture instruction. POSSIBLE VALUES: 00 - SQ_TEX_INST_VTX_FETCH: vertex fetch (X = uint32 index) 01 - SQ_TEX_INST_VTX_SEMANTIC: semantic vertex fetch 03 - SQ_TEX_INST_LD: fetch texel, XYZL are uint32 04 - SQ_TEX_INST_GET_TEXTURE_RESINFO: retrieve width, height, depth, number of mipmap levels 05 - SQ_TEX_INST_GET_NUMBER_OF_SAMPLES: retrieve width, height, depth, number of samples of an MSAA surface 06 - SQ_TEX_INST_GET_LOD: X = computed LOD for all pixels in quad 07 - SQ_TEX_INST_GET_GRADIENTS_H: slopes relative to horizontal: X = dx/dh, Y = dy/dh, Z = dz/dh, W = dw/dh 08 - SQ_TEX_INST_GET_GRADIENTS_V: slopes relative to vertical: X = dx/dv, Y = dy/dv, Z = dz/dv, W = dw/dv 09 - SQ_TEX_INST_GET_LERP: retrieve weights used for bilinear fetch, X = horizontal lerp, Y = vertical lerp, Z = volume slice lerp, W = mipmap lerp 11 - SQ_TEX_INST_SET_GRADIENTS_H: XYZ set horizontal gradients 12 - SQ_TEX_INST_SET_GRADIENTS_V: XYZ set vertical gradients 13 - SQ_TEX_INST_PASS: returns the address read in memory 14 - Z set index for array of cubemaps 16 - SQ_TEX_INST_SAMPLE 17 - SQ_TEX_INST_SAMPLE_L 18 - SQ_TEX_INST_SAMPLE_LB 19 - SQ_TEX_INST_SAMPLE_LZ 20 - SQ_TEX_INST_SAMPLE_G 21 - SQ_TEX_INST_SAMPLE_G_L 22 - SQ_TEX_INST_SAMPLE_G_LB 23 - SQ_TEX_INST_SAMPLE_G_LZ 24 - SQ_TEX_INST_SAMPLE_C 25 - SQ_TEX_INST_SAMPLE_C_L 26 - SQ_TEX_INST_SAMPLE_C_LB 27 - SQ_TEX_INST_SAMPLE_C_LZ 28 - SQ_TEX_INST_SAMPLE_C_G 29 - SQ_TEX_INST_SAMPLE_C_G_L 30 - SQ_TEX_INST_SAMPLE_C_G_LB 31 - SQ_TEX_INST_SAMPLE_C_G_LZ */
TEX_INST_shift = 0,
- SQ_TEX_INST_VTX_FETCH = 0x00,
- SQ_TEX_INST_VTX_SEMANTIC = 0x01,
- SQ_TEX_INST_LD = 0x03,
- SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04,
- SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05,
- SQ_TEX_INST_GET_LOD = 0x06,
- SQ_TEX_INST_GET_GRADIENTS_H = 0x07,
- SQ_TEX_INST_GET_GRADIENTS_V = 0x08,
- SQ_TEX_INST_GET_LERP = 0x09,
- SQ_TEX_INST_RESERVED_10 = 0x0a,
- SQ_TEX_INST_SET_GRADIENTS_H = 0x0b,
- SQ_TEX_INST_SET_GRADIENTS_V = 0x0c,
- SQ_TEX_INST_PASS = 0x0d,
- X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS = 0x0e,
- SQ_TEX_INST_SAMPLE = 0x10,
- SQ_TEX_INST_SAMPLE_L = 0x11,
- SQ_TEX_INST_SAMPLE_LB = 0x12,
- SQ_TEX_INST_SAMPLE_LZ = 0x13,
- SQ_TEX_INST_SAMPLE_G = 0x14,
- SQ_TEX_INST_SAMPLE_G_L = 0x15,
- SQ_TEX_INST_SAMPLE_G_LB = 0x16,
- SQ_TEX_INST_SAMPLE_G_LZ = 0x17,
- SQ_TEX_INST_SAMPLE_C = 0x18,
- SQ_TEX_INST_SAMPLE_C_L = 0x19,
- SQ_TEX_INST_SAMPLE_C_LB = 0x1a,
- SQ_TEX_INST_SAMPLE_C_LZ = 0x1b,
- SQ_TEX_INST_SAMPLE_C_G = 0x1c,
- SQ_TEX_INST_SAMPLE_C_G_L = 0x1d,
- SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e,
- SQ_TEX_INST_SAMPLE_C_G_LZ = 0x1f,
- BC_FRAC_MODE_bit = 1 << 5,
-/* FETCH_WHOLE_QUAD_bit = 1 << 7, */
- RESOURCE_ID_mask = 0xff << 8,
+ SQ_TEX_INST_VTX_FETCH = 0x00, /* SQ_TEX_INST_VTX_FETCH: vertex fetch (X = uint32 index) */
+ SQ_TEX_INST_VTX_SEMANTIC = 0x01, /* SQ_TEX_INST_VTX_SEMANTIC: semantic vertex fetch */
+ SQ_TEX_INST_LD = 0x03, /* SQ_TEX_INST_LD: fetch texel, XYZL are uint32 */
+ SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04, /* SQ_TEX_INST_GET_TEXTURE_RESINFO: retrieve width, height, depth, number of mipmap levels */
+ SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05, /* SQ_TEX_INST_GET_NUMBER_OF_SAMPLES: retrieve width, height, depth, number of samples of an MSAA surface */
+ SQ_TEX_INST_GET_LOD = 0x06, /* SQ_TEX_INST_GET_LOD: X = computed LOD for all pixels in quad */
+ SQ_TEX_INST_GET_GRADIENTS_H = 0x07, /* SQ_TEX_INST_GET_GRADIENTS_H: slopes relative to horizontal: X = dx/dh, Y = dy/dh, Z = dz/dh, W = dw/dh */
+ SQ_TEX_INST_GET_GRADIENTS_V = 0x08, /* SQ_TEX_INST_GET_GRADIENTS_V: slopes relative to vertical: X = dx/dv, Y = dy/dv, Z = dz/dv, W = dw/dv */
+ SQ_TEX_INST_GET_LERP = 0x09, /* SQ_TEX_INST_GET_LERP: retrieve weights used for bilinear fetch, X = horizontal lerp, Y = vertical lerp, Z = volume slice lerp, W = mipmap lerp */
+ SQ_TEX_INST_RESERVED_10 = 0x0a, /* SQ_TEX_INST_RESERVED_10: Reserved (was GetWeight: retrieve weights used for bilinear fetch, X = TL weight, Y = TR weight, Z = BL weight, W = BR weight) */
+ SQ_TEX_INST_SET_GRADIENTS_H = 0x0b, /* SQ_TEX_INST_SET_GRADIENTS_H: XYZ set horizontal gradients */
+ SQ_TEX_INST_SET_GRADIENTS_V = 0x0c, /* SQ_TEX_INST_SET_GRADIENTS_V: XYZ set vertical gradients */
+ SQ_TEX_INST_PASS = 0x0d, /* SQ_TEX_INST_PASS: returns the address read in memory */
+ X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS = 0x0e, /* Z set index for array of cubemaps */
+ SQ_TEX_INST_SAMPLE = 0x10, /* SQ_TEX_INST_SAMPLE */
+ SQ_TEX_INST_SAMPLE_L = 0x11, /* SQ_TEX_INST_SAMPLE_L */
+ SQ_TEX_INST_SAMPLE_LB = 0x12, /* SQ_TEX_INST_SAMPLE_LB */
+ SQ_TEX_INST_SAMPLE_LZ = 0x13, /* SQ_TEX_INST_SAMPLE_LZ */
+ SQ_TEX_INST_SAMPLE_G = 0x14, /* SQ_TEX_INST_SAMPLE_G */
+ SQ_TEX_INST_SAMPLE_G_L = 0x15, /* SQ_TEX_INST_SAMPLE_G_L */
+ SQ_TEX_INST_SAMPLE_G_LB = 0x16, /* SQ_TEX_INST_SAMPLE_G_LB */
+ SQ_TEX_INST_SAMPLE_G_LZ = 0x17, /* SQ_TEX_INST_SAMPLE_G_LZ */
+ SQ_TEX_INST_SAMPLE_C = 0x18, /* SQ_TEX_INST_SAMPLE_C */
+ SQ_TEX_INST_SAMPLE_C_L = 0x19, /* SQ_TEX_INST_SAMPLE_C_L */
+ SQ_TEX_INST_SAMPLE_C_LB = 0x1a, /* SQ_TEX_INST_SAMPLE_C_LB */
+ SQ_TEX_INST_SAMPLE_C_LZ = 0x1b, /* SQ_TEX_INST_SAMPLE_C_LZ */
+ SQ_TEX_INST_SAMPLE_C_G = 0x1c, /* SQ_TEX_INST_SAMPLE_C_G */
+ SQ_TEX_INST_SAMPLE_C_G_L = 0x1d, /* SQ_TEX_INST_SAMPLE_C_G_L */
+ SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e, /* SQ_TEX_INST_SAMPLE_C_G_LB */
+ SQ_TEX_INST_SAMPLE_C_G_LZ = 0x1f, /* SQ_TEX_INST_SAMPLE_C_G_LZ */
+ BC_FRAC_MODE_bit = 1 << 5, /* If set, force black texture data and white border to retrieve fraction of pixel that hits the border. */
+/* FETCH_WHOLE_QUAD_bit = 1 << 7, */ /* If set, texture instruction must fetch data for all pixels (result may be used as source coordinate of a dependent read). If cleared, texture instruction can ignore invalid pixels. */
+ RESOURCE_ID_mask = 0xff << 8, /* Surface ID to read from (specifies the buffer address, size, and format). 160 available for GS and PS; 176 shared across FS and VS. */
RESOURCE_ID_shift = 8,
-/* SRC_GPR_mask = 0x7f << 16, */
+/* SRC_GPR_mask = 0x7f << 16, */ /* Source GPR address to get the texture lookup address from. */
/* SRC_GPR_shift = 16, */
-/* SRC_REL_bit = 1 << 23, */
- SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24,
- SQ_VTX_WORD1_GPR = 0x00008dfc,
- SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0,
+/* SRC_REL_bit = 1 << 23, */ /* Indicate whether source address is absolute or relative to an index. */
+ SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, /* if set, uses constants from alternate thread type: ps->vs, vs->gs, gs->vs, es->gs (note that es and vs share constants). */
+ SQ_VTX_WORD1_GPR = 0x00008dfc, /* Vertex fetch clause instruction word 1. This subencoding is used by fetch instructions that specify a destination GPR directly. */
+ SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0, /* Destination GPR address to write result to. */
SQ_VTX_WORD1_GPR__DST_GPR_shift = 0,
- SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7,
- SQ_ALU_WORD0 = 0x00008dfc,
- SRC0_SEL_mask = 0x1ff << 0,
+ SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7, /* Indicate whether destination address is absolute or relative to an index. */
+ SQ_ALU_WORD0 = 0x00008dfc, /* ALU instruction word 0. */
+ SRC0_SEL_mask = 0x1ff << 0, /* Source for operands src0, src1. Values [0,127] correspond to GPR[0..127]. Values [128,159] correspond to kcache constants in bank 0. Values [160,191] correspond to kcache constants in bank 1. Values [256,511] correspond to cfile constants c[0..255]. Other special values are shown in the list below. POSSIBLE VALUES: 248 - SQ_ALU_SRC_0: special constant 0.0. 249 - SQ_ALU_SRC_1: special constant 1.0 float. 250 - SQ_ALU_SRC_1_INT: special constant 1 integer. 251 - SQ_ALU_SRC_M_1_INT: special constant -1 integer. 252 - SQ_ALU_SRC_0_5: special constant 0.5 float. 253 - SQ_ALU_SRC_LITERAL: literal constant. 254 - SQ_ALU_SRC_PV: previous vector result. 255 - SQ_ALU_SRC_PS: previous scalar result. */
SRC0_SEL_shift = 0,
-/* SQ_ALU_SRC_0 = 0xf8, */
-/* SQ_ALU_SRC_1 = 0xf9, */
-/* SQ_ALU_SRC_1_INT = 0xfa, */
-/* SQ_ALU_SRC_M_1_INT = 0xfb, */
-/* SQ_ALU_SRC_0_5 = 0xfc, */
-/* SQ_ALU_SRC_LITERAL = 0xfd, */
-/* SQ_ALU_SRC_PV = 0xfe, */
-/* SQ_ALU_SRC_PS = 0xff, */
- SRC0_REL_bit = 1 << 9,
- SRC0_CHAN_mask = 0x03 << 10,
+/* SQ_ALU_SRC_0 = 0xf8, */ /* SQ_ALU_SRC_0: special constant 0.0. */
+/* SQ_ALU_SRC_1 = 0xf9, */ /* SQ_ALU_SRC_1: special constant 1.0 float. */
+/* SQ_ALU_SRC_1_INT = 0xfa, */ /* SQ_ALU_SRC_1_INT: special constant 1 integer. */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */ /* SQ_ALU_SRC_M_1_INT: special constant -1 integer. */
+/* SQ_ALU_SRC_0_5 = 0xfc, */ /* SQ_ALU_SRC_0_5: special constant 0.5 float. */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */ /* SQ_ALU_SRC_LITERAL: literal constant. */
+/* SQ_ALU_SRC_PV = 0xfe, */ /* SQ_ALU_SRC_PV: previous vector result. */
+/* SQ_ALU_SRC_PS = 0xff, */ /* SQ_ALU_SRC_PS: previous scalar result. */
+ SRC0_REL_bit = 1 << 9, /* If set, this operand uses relative addressing based on the INDEX_MODE. */
+ SRC0_CHAN_mask = 0x03 << 10, /* Specify which channel of the source to use for this operand. POSSIBLE VALUES: 00 - SQ_CHAN_X: Use X component. 01 - SQ_CHAN_Y: Use Y component. 02 - SQ_CHAN_Z: Use Z component. 03 - SQ_CHAN_W: Use W component. */
SRC0_CHAN_shift = 10,
-/* SQ_CHAN_X = 0x00, */
-/* SQ_CHAN_Y = 0x01, */
-/* SQ_CHAN_Z = 0x02, */
-/* SQ_CHAN_W = 0x03, */
- SRC0_NEG_bit = 1 << 12,
- SRC1_SEL_mask = 0x1ff << 13,
+/* SQ_CHAN_X = 0x00, */ /* SQ_CHAN_X: Use X component. */
+/* SQ_CHAN_Y = 0x01, */ /* SQ_CHAN_Y: Use Y component. */
+/* SQ_CHAN_Z = 0x02, */ /* SQ_CHAN_Z: Use Z component. */
+/* SQ_CHAN_W = 0x03, */ /* SQ_CHAN_W: Use W component. */
+ SRC0_NEG_bit = 1 << 12, /* If set, negate the input for this operand. Should only be set for floating point inputs. */
+ SRC1_SEL_mask = 0x1ff << 13, /* Source for operands src0, src1. Values [0,127] correspond to GPR[0..127]. Values [128,159] correspond to kcache constants in bank 0. Values [160,191] correspond to kcache constants in bank 1. Values [256,511] correspond to cfile constants c[0..255]. Other special values are shown in the list below. POSSIBLE VALUES: 248 - SQ_ALU_SRC_0: special constant 0.0. 249 - SQ_ALU_SRC_1: special constant 1.0 float. 250 - SQ_ALU_SRC_1_INT: special constant 1 integer. 251 - SQ_ALU_SRC_M_1_INT: special constant -1 integer. 252 - SQ_ALU_SRC_0_5: special constant 0.5 float. 253 - SQ_ALU_SRC_LITERAL: literal constant. 254 - SQ_ALU_SRC_PV: previous vector result. 255 - SQ_ALU_SRC_PS: previous scalar result. */
SRC1_SEL_shift = 13,
-/* SQ_ALU_SRC_0 = 0xf8, */
-/* SQ_ALU_SRC_1 = 0xf9, */
-/* SQ_ALU_SRC_1_INT = 0xfa, */
-/* SQ_ALU_SRC_M_1_INT = 0xfb, */
-/* SQ_ALU_SRC_0_5 = 0xfc, */
-/* SQ_ALU_SRC_LITERAL = 0xfd, */
-/* SQ_ALU_SRC_PV = 0xfe, */
-/* SQ_ALU_SRC_PS = 0xff, */
- SRC1_REL_bit = 1 << 22,
- SRC1_CHAN_mask = 0x03 << 23,
+/* SQ_ALU_SRC_0 = 0xf8, */ /* SQ_ALU_SRC_0: special constant 0.0. */
+/* SQ_ALU_SRC_1 = 0xf9, */ /* SQ_ALU_SRC_1: special constant 1.0 float. */
+/* SQ_ALU_SRC_1_INT = 0xfa, */ /* SQ_ALU_SRC_1_INT: special constant 1 integer. */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */ /* SQ_ALU_SRC_M_1_INT: special constant -1 integer. */
+/* SQ_ALU_SRC_0_5 = 0xfc, */ /* SQ_ALU_SRC_0_5: special constant 0.5 float. */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */ /* SQ_ALU_SRC_LITERAL: literal constant. */
+/* SQ_ALU_SRC_PV = 0xfe, */ /* SQ_ALU_SRC_PV: previous vector result. */
+/* SQ_ALU_SRC_PS = 0xff, */ /* SQ_ALU_SRC_PS: previous scalar result. */
+ SRC1_REL_bit = 1 << 22, /* If set, this operand uses relative addressing based on the INDEX_MODE. */
+ SRC1_CHAN_mask = 0x03 << 23, /* Specify which channel of the source to use for this operand. POSSIBLE VALUES: 00 - SQ_CHAN_X: Use X component. 01 - SQ_CHAN_Y: Use Y component. 02 - SQ_CHAN_Z: Use Z component. 03 - SQ_CHAN_W: Use W component. */
SRC1_CHAN_shift = 23,
-/* SQ_CHAN_X = 0x00, */
-/* SQ_CHAN_Y = 0x01, */
-/* SQ_CHAN_Z = 0x02, */
-/* SQ_CHAN_W = 0x03, */
- SRC1_NEG_bit = 1 << 25,
- INDEX_MODE_mask = 0x07 << 26,
+/* SQ_CHAN_X = 0x00, */ /* SQ_CHAN_X: Use X component. */
+/* SQ_CHAN_Y = 0x01, */ /* SQ_CHAN_Y: Use Y component. */
+/* SQ_CHAN_Z = 0x02, */ /* SQ_CHAN_Z: Use Z component. */
+/* SQ_CHAN_W = 0x03, */ /* SQ_CHAN_W: Use W component. */
+ SRC1_NEG_bit = 1 << 25, /* If set, negate the input for this operand. Should only be set for floating point inputs. */
+ INDEX_MODE_mask = 0x07 << 26, /* Specify what relative addressing mode to use for operands that have the REL bit set. POSSIBLE VALUES: 00 - SQ_INDEX_AR_X: constants: add AR.X. registers: add GPR index. 01 - SQ_INDEX_AR_Y: constants: add AR.Y. registers: add GPR index. 02 - SQ_INDEX_AR_Z: constants: add AR.Z. registers: add GPR index. 03 - SQ_INDEX_AR_W: constants: add AR.W. registers: add GPR index. 04 - SQ_INDEX_LOOP: add current loop index value. */
INDEX_MODE_shift = 26,
- SQ_INDEX_AR_X = 0x00,
- SQ_INDEX_AR_Y = 0x01,
- SQ_INDEX_AR_Z = 0x02,
- SQ_INDEX_AR_W = 0x03,
- SQ_INDEX_LOOP = 0x04,
- PRED_SEL_mask = 0x03 << 29,
+ SQ_INDEX_AR_X = 0x00, /* SQ_INDEX_AR_X: constants: add AR.X. registers: add GPR index. */
+ SQ_INDEX_AR_Y = 0x01, /* SQ_INDEX_AR_Y: constants: add AR.Y. registers: add GPR index. */
+ SQ_INDEX_AR_Z = 0x02, /* SQ_INDEX_AR_Z: constants: add AR.Z. registers: add GPR index. */
+ SQ_INDEX_AR_W = 0x03, /* SQ_INDEX_AR_W: constants: add AR.W. registers: add GPR index. */
+ SQ_INDEX_LOOP = 0x04, /* SQ_INDEX_LOOP: add current loop index value. */
+ PRED_SEL_mask = 0x03 << 29, /* Predicate to apply to this instruction. POSSIBLE VALUES: 00 - SQ_PRED_SEL_OFF: execute all pixels. 01 - Reserved 02 - SQ_PRED_SEL_ZERO: execute when pred = 0. 03 - SQ_PRED_SEL_ONE: execute when pred = 1. */
PRED_SEL_shift = 29,
- SQ_PRED_SEL_OFF = 0x00,
- SQ_PRED_SEL_ZERO = 0x02,
- SQ_PRED_SEL_ONE = 0x03,
- LAST_bit = 1 << 31,
- SX_EXPORT_BUFFER_SIZES = 0x0000900c,
- COLOR_BUFFER_SIZE_mask = 0xff << 0,
+ SQ_PRED_SEL_OFF = 0x00, /* SQ_PRED_SEL_OFF: execute all pixels. */
+ SQ_PRED_SEL_ZERO = 0x02, /* SQ_PRED_SEL_ZERO: execute when pred = 0. */
+ SQ_PRED_SEL_ONE = 0x03, /* SQ_PRED_SEL_ONE: execute when pred = 1. */
+ LAST_bit = 1 << 31, /* If set, this is the last 64-bit word for this instruction. */
+ SX_EXPORT_BUFFER_SIZES = 0x0000900c, /* Register that defines export buffer ring sizes */
+ COLOR_BUFFER_SIZE_mask = 0xff << 0, /* Number of 4 line buffers -1 in color buffer. Each memory buffer corresponds to 4 lines of 16*128 bits elements. Minimum acceptable value of register field is 0xA. */
COLOR_BUFFER_SIZE_shift = 0,
- POSITION_BUFFER_SIZE_mask = 0xff << 8,
+ POSITION_BUFFER_SIZE_mask = 0xff << 8, /* Number of 4 line buffers -1 in position buffer. Each memory buffer corresponds to 4 lines of 16*128 bits elements. Minimum acceptable value of register field is 0x12. */
POSITION_BUFFER_SIZE_shift = 8,
- SMX_BUFFER_SIZE_mask = 0xff << 16,
+ SMX_BUFFER_SIZE_mask = 0xff << 16, /* Number of 4 line buffers -1 in smx buffer. Each memory buffer corresponds to 4 lines of 16*128 bits elements */
SMX_BUFFER_SIZE_shift = 16,
- SX_MEMORY_EXPORT_BASE = 0x00009010,
- SX_MEMORY_EXPORT_SIZE = 0x00009014,
+ SX_MEMORY_EXPORT_BASE = 0x00009010, /* Defines the base address of the memory export. Only available if chip supports GPU__GC__MEM_EXPORT_PRESENT */
+ /* ADDRESS: 256 byte aligned base address, SX will add 8`h0 at the bottom to get byte address */
+ SX_MEMORY_EXPORT_SIZE = 0x00009014, /* Defines the aperture of the memory export. Only available if chip supports GPU__GC__MEM_EXPORT_PRESENT */
+ /* If computed address minus base address is greater than size, SX will clamp to Size - 1 dword and disable the write. Read will happen at size - 1 dword */
SPI_CONFIG_CNTL = 0x00009100,
- GPR_WRITE_PRIORITY_mask = 0x1f << 0,
+ GPR_WRITE_PRIORITY_mask = 0x1f << 0, /* POSSIBLE VALUES: 00 - Priority order (high to low) = VS, GS, ES, PS 01 - Priority order = VS, GS, PS, ES 02 - Priority order = VS, ES, GS, PS 03 - Priority order = VS, ES, PS, GS 04 - Priority order = VS, PS, GS, ES 05 - Priority order = VS, PS, ES, GS 06 - Priority order = GS, VS, ES, PS 07 - Priority order = GS, VS, PS, ES 08 - Priority order = GS, ES, VS, PS 09 - Priority order = GS, ES, PS, VS 10 - Priority order = GS, PS, VS, ES 11 - Priority order = GS, PS, ES, VS 12 - Priority order = ES, VS, GS, PS 13 - Priority order = ES, VS, PS, GS 14 - Priority order = ES, GS, VS, PS 15 - Priority order = ES, GS, PS, VS 16 - Priority order = ES, PS, VS, GS 17 - Priority order = ES, PS, GS, VS 18 - Priority order = PS, VS, GS, ES 19 - Priority order = PS, VS, ES, GS 20 - Priority order = PS, GS, VS, ES 21 - Priority order = PS, GS, ES, VS 22 - Priority order = PS, ES, VS, GS 23 - Priority order = PS, ES, GS, VS */
GPR_WRITE_PRIORITY_shift = 0,
- X_PRIORITY_ORDER = 0x00,
- X_PRIORITY_ORDER_VS = 0x01,
+ X_PRIORITY_ORDER = 0x00, /* Priority order (high to low) = VS, GS, ES, PS */
+ X_PRIORITY_ORDER_VS = 0x01, /* Priority order = VS, GS, PS, ES */
DISABLE_INTERP_1_bit = 1 << 5,
- DEBUG_THREAD_TYPE_SEL_mask = 0x03 << 6,
+ DEBUG_THREAD_TYPE_SEL_mask = 0x03 << 6, /* POSSIBLE VALUES: 00 - PS 01 - VS 02 - GS 03 - ES */
DEBUG_THREAD_TYPE_SEL_shift = 6,
DEBUG_GROUP_SEL_mask = 0x1f << 8,
DEBUG_GROUP_SEL_shift = 8,
@@ -1023,205 +1050,205 @@ enum {
SPI_CONFIG_CNTL_1 = 0x0000913c,
VTX_DONE_DELAY_mask = 0x0f << 0,
VTX_DONE_DELAY_shift = 0,
- X_DELAY_10_CLKS = 0x00,
- X_DELAY_11_CLKS = 0x01,
- X_DELAY_12_CLKS = 0x02,
- X_DELAY_13_CLKS = 0x03,
- X_DELAY_14_CLKS = 0x04,
- X_DELAY_15_CLKS = 0x05,
- X_DELAY_16_CLKS = 0x06,
- X_DELAY_17_CLKS = 0x07,
- X_DELAY_2_CLKS = 0x08,
- X_DELAY_3_CLKS = 0x09,
- X_DELAY_4_CLKS = 0x0a,
- X_DELAY_5_CLKS = 0x0b,
- X_DELAY_6_CLKS = 0x0c,
- X_DELAY_7_CLKS = 0x0d,
- X_DELAY_8_CLKS = 0x0e,
- X_DELAY_9_CLKS = 0x0f,
+ X_DELAY_10_CLKS = 0x00, /* delay 10 clks (defalut, min value needed for R600 config) */
+ X_DELAY_11_CLKS = 0x01, /* delay 11 clks */
+ X_DELAY_12_CLKS = 0x02, /* delay 12 clks */
+ X_DELAY_13_CLKS = 0x03, /* delay 13 clks */
+ X_DELAY_14_CLKS = 0x04, /* delay 14 clks */
+ X_DELAY_15_CLKS = 0x05, /* delay 15 clks */
+ X_DELAY_16_CLKS = 0x06, /* delay 16 clks */
+ X_DELAY_17_CLKS = 0x07, /* delay 17 clks */
+ X_DELAY_2_CLKS = 0x08, /* delay 2 clks */
+ X_DELAY_3_CLKS = 0x09, /* delay 3 clks */
+ X_DELAY_4_CLKS = 0x0a, /* delay 4 clks */
+ X_DELAY_5_CLKS = 0x0b, /* delay 5 clks */
+ X_DELAY_6_CLKS = 0x0c, /* delay 6 clks */
+ X_DELAY_7_CLKS = 0x0d, /* delay 7 clks */
+ X_DELAY_8_CLKS = 0x0e, /* delay 8 clks */
+ X_DELAY_9_CLKS = 0x0f, /* delay 9 clks */
INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4,
- TD_FILTER4 = 0x00009400,
- WEIGHT_1_mask = 0x7ff << 0,
+ TD_FILTER4 = 0x00009400, /* FILTER4 Write Weights */
+ WEIGHT_1_mask = 0x7ff << 0, /* Right (or Bottom) weight of pair: format s2.9 (range [-2, 2), with 9b of fraction). */
WEIGHT_1_shift = 0,
- WEIGHT_0_mask = 0x7ff << 11,
+ WEIGHT_0_mask = 0x7ff << 11, /* Left (or Top) weight of pair: format s2.9 (range [-2, 2), with 9b of fraction). */
WEIGHT_0_shift = 11,
- WEIGHT_PAIR_bit = 1 << 22,
- PHASE_mask = 0x0f << 23,
+ WEIGHT_PAIR_bit = 1 << 22, /* Indicates which pair of weights is loaded. 0: Left (or Top) pair 1: Right (or Bottom) pair */
+ PHASE_mask = 0x0f << 23, /* Indicates which of 9 phases is loaded. */
PHASE_shift = 23,
- DIRECTION_bit = 1 << 27,
+ DIRECTION_bit = 1 << 27, /* Indicates whether to load the horizontal (Left+Right) or vertical (Top+Bottom) weight pair. 0: Horizontal 1: Vertical */
TD_FILTER4_1 = 0x00009404,
TD_FILTER4_1_num = 35,
/* WEIGHT_1_mask = 0x7ff << 0, */
/* WEIGHT_1_shift = 0, */
/* WEIGHT_0_mask = 0x7ff << 11, */
/* WEIGHT_0_shift = 11, */
- TD_CNTL = 0x00009490,
+ TD_CNTL = 0x00009490, /* Texture Data Common Control */
SYNC_PHASE_SH_mask = 0x03 << 0,
SYNC_PHASE_SH_shift = 0,
SYNC_PHASE_VC_SMX_mask = 0x03 << 4,
SYNC_PHASE_VC_SMX_shift = 4,
- TD0_CNTL = 0x00009494,
+ TD0_CNTL = 0x00009494, /* Texture Data 0 Control */
TD0_CNTL_num = 4,
- ID_OVERRIDE_mask = 0x03 << 28,
+ ID_OVERRIDE_mask = 0x03 << 28, /* Texture Data 0 ID Override */
ID_OVERRIDE_shift = 28,
- TD0_STATUS = 0x000094a4,
+ TD0_STATUS = 0x000094a4, /* Texture Data 0 Status */
TD0_STATUS_num = 4,
- BUSY_bit = 1 << 31,
- TA_CNTL_AUX = 0x00009508,
- DISABLE_CUBE_WRAP_bit = 1 << 0,
- SYNC_GRADIENT_bit = 1 << 24,
- SYNC_WALKER_bit = 1 << 25,
- SYNC_ALIGNER_bit = 1 << 26,
- BILINEAR_PRECISION_bit = 1 << 31,
- TA0_CNTL = 0x00009510,
-/* ID_OVERRIDE_mask = 0x03 << 28, */
+ BUSY_bit = 1 << 31, /* (Access: R) */
+ TA_CNTL_AUX = 0x00009508, /* Texture Addresser Common Control */
+ DISABLE_CUBE_WRAP_bit = 1 << 0, /* CubeMap Clamp Policy Override */
+ SYNC_GRADIENT_bit = 1 << 24, /* Gradient synchronization mode */
+ SYNC_WALKER_bit = 1 << 25, /* Walker synchronization mode */
+ SYNC_ALIGNER_bit = 1 << 26, /* Aligner synchronization mode */
+ BILINEAR_PRECISION_bit = 1 << 31, /* Bilinear precision setting */
+ TA0_CNTL = 0x00009510, /* Texture Addresser 0 Control */
+/* ID_OVERRIDE_mask = 0x03 << 28, */ /* Texture Addresser 0 ID Override */
/* ID_OVERRIDE_shift = 28, */
- TA1_CNTL = 0x00009514,
-/* ID_OVERRIDE_mask = 0x03 << 28, */
+ TA1_CNTL = 0x00009514, /* Texture Addresser 1 Control */
+/* ID_OVERRIDE_mask = 0x03 << 28, */ /* Texture Addresser 1 ID Override */
/* ID_OVERRIDE_shift = 28, */
- TA2_CNTL = 0x00009518,
-/* ID_OVERRIDE_mask = 0x03 << 28, */
+ TA2_CNTL = 0x00009518, /* Texture Addresser 2 Control */
+/* ID_OVERRIDE_mask = 0x03 << 28, */ /* Texture Addresser 2 ID Override */
/* ID_OVERRIDE_shift = 28, */
- TA3_CNTL = 0x0000951c,
-/* ID_OVERRIDE_mask = 0x03 << 28, */
+ TA3_CNTL = 0x0000951c, /* Texture Addresser 3 Control */
+/* ID_OVERRIDE_mask = 0x03 << 28, */ /* Texture Addresser 3 ID Override */
/* ID_OVERRIDE_shift = 28, */
- TA0_STATUS = 0x00009520,
- FG_PFIFO_EMPTYB_bit = 1 << 12,
- FG_LFIFO_EMPTYB_bit = 1 << 13,
- FG_SFIFO_EMPTYB_bit = 1 << 14,
- FL_PFIFO_EMPTYB_bit = 1 << 16,
- FL_LFIFO_EMPTYB_bit = 1 << 17,
- FL_SFIFO_EMPTYB_bit = 1 << 18,
- FA_PFIFO_EMPTYB_bit = 1 << 20,
- FA_LFIFO_EMPTYB_bit = 1 << 21,
- FA_SFIFO_EMPTYB_bit = 1 << 22,
- IN_BUSY_bit = 1 << 24,
- FG_BUSY_bit = 1 << 25,
- FL_BUSY_bit = 1 << 27,
- TA_BUSY_bit = 1 << 28,
- FA_BUSY_bit = 1 << 29,
- AL_BUSY_bit = 1 << 30,
-/* BUSY_bit = 1 << 31, */
- TA1_STATUS = 0x00009524,
-/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
-/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
-/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
-/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
-/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
-/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
-/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
-/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
-/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
-/* IN_BUSY_bit = 1 << 24, */
-/* FG_BUSY_bit = 1 << 25, */
-/* FL_BUSY_bit = 1 << 27, */
-/* TA_BUSY_bit = 1 << 28, */
-/* FA_BUSY_bit = 1 << 29, */
-/* AL_BUSY_bit = 1 << 30, */
-/* BUSY_bit = 1 << 31, */
- TA2_STATUS = 0x00009528,
-/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
-/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
-/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
-/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
-/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
-/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
-/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
-/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
-/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
-/* IN_BUSY_bit = 1 << 24, */
-/* FG_BUSY_bit = 1 << 25, */
-/* FL_BUSY_bit = 1 << 27, */
-/* TA_BUSY_bit = 1 << 28, */
-/* FA_BUSY_bit = 1 << 29, */
-/* AL_BUSY_bit = 1 << 30, */
-/* BUSY_bit = 1 << 31, */
- TA3_STATUS = 0x0000952c,
-/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
-/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
-/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
-/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
-/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
-/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
-/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
-/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
-/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
-/* IN_BUSY_bit = 1 << 24, */
-/* FG_BUSY_bit = 1 << 25, */
-/* FL_BUSY_bit = 1 << 27, */
-/* TA_BUSY_bit = 1 << 28, */
-/* FA_BUSY_bit = 1 << 29, */
-/* AL_BUSY_bit = 1 << 30, */
-/* BUSY_bit = 1 << 31, */
- TC_STATUS = 0x00009600,
- TC_BUSY_bit = 1 << 0,
- TC_INVALIDATE = 0x00009604,
- START_bit = 1 << 0,
- TC_CNTL = 0x00009608,
+ TA0_STATUS = 0x00009520, /* Texture Addresser 0 Status */
+ FG_PFIFO_EMPTYB_bit = 1 << 12, /* (Access: R) Gradient FIFO state, pipeline fifo not empty */
+ FG_LFIFO_EMPTYB_bit = 1 << 13, /* (Access: R) Gradient FIFO state, latency fifo not empty */
+ FG_SFIFO_EMPTYB_bit = 1 << 14, /* (Access: R) Gradient FIFO state, state fifo not empty */
+ FL_PFIFO_EMPTYB_bit = 1 << 16, /* (Access: R) LOD FIFO state, pipeline fifo not empty */
+ FL_LFIFO_EMPTYB_bit = 1 << 17, /* (Access: R) LOD FIFO state, latency fifo not empty */
+ FL_SFIFO_EMPTYB_bit = 1 << 18, /* (Access: R) LOD FIFO state, state fifo not empty */
+ FA_PFIFO_EMPTYB_bit = 1 << 20, /* (Access: R) Addresser FIFO state, pipeline fifo not empty */
+ FA_LFIFO_EMPTYB_bit = 1 << 21, /* (Access: R) Addresser FIFO state, latency fifo not empty */
+ FA_SFIFO_EMPTYB_bit = 1 << 22, /* (Access: R) Addresser FIFO state, state fifo not empty */
+ IN_BUSY_bit = 1 << 24, /* (Access: R) Input/LOD(Deriv) busy */
+ FG_BUSY_bit = 1 << 25, /* (Access: R) Gradient FIFO busy */
+ FL_BUSY_bit = 1 << 27, /* (Access: R) LOD FIFO busy */
+ TA_BUSY_bit = 1 << 28, /* (Access: R) Addresser busy */
+ FA_BUSY_bit = 1 << 29, /* (Access: R) Addresser FIFO busy */
+ AL_BUSY_bit = 1 << 30, /* (Access: R) Aligner busy */
+/* BUSY_bit = 1 << 31, */ /* (Access: R) Global TA0 busy */
+ TA1_STATUS = 0x00009524, /* Texture Addresser 1 Status */
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ /* (Access: R) Gradient FIFO state, pipeline fifo not empty */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ /* (Access: R) Gradient FIFO state, latency fifo not empty */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ /* (Access: R) Gradient FIFO state, state fifo not empty */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ /* (Access: R) LOD FIFO state, pipeline fifo not empty */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ /* (Access: R) LOD FIFO state, latency fifo not empty */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ /* (Access: R) LOD FIFO state, state fifo not empty */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ /* (Access: R) Addresser FIFO state, pipeline fifo not empty */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ /* (Access: R) Addresser FIFO state, latency fifo not empty */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ /* (Access: R) Addresser FIFO state, state fifo not empty */
+/* IN_BUSY_bit = 1 << 24, */ /* (Access: R) Input/LOD(Deriv) busy */
+/* FG_BUSY_bit = 1 << 25, */ /* (Access: R) Gradient FIFO busy */
+/* FL_BUSY_bit = 1 << 27, */ /* (Access: R) LOD FIFO busy */
+/* TA_BUSY_bit = 1 << 28, */ /* (Access: R) Addresser busy */
+/* FA_BUSY_bit = 1 << 29, */ /* (Access: R) Addresser FIFO busy */
+/* AL_BUSY_bit = 1 << 30, */ /* (Access: R) Aligner busy */
+/* BUSY_bit = 1 << 31, */ /* (Access: R) Global TA1 busy */
+ TA2_STATUS = 0x00009528, /* Texture Addresser 2 Status */
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ /* (Access: R) Gradient FIFO state, pipeline fifo not empty */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ /* (Access: R) Gradient FIFO state, latency fifo not empty */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ /* (Access: R) Gradient FIFO state, state fifo not empty */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ /* (Access: R) LOD FIFO state, pipeline fifo not empty */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ /* (Access: R) LOD FIFO state, latency fifo not empty */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ /* (Access: R) LOD FIFO state, state fifo not empty */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ /* (Access: R) Addresser FIFO state, pipeline fifo not empty */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ /* (Access: R) Addresser FIFO state, latency fifo not empty */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ /* (Access: R) Addresser FIFO state, state fifo not empty */
+/* IN_BUSY_bit = 1 << 24, */ /* (Access: R) Input/LOD(Deriv) busy */
+/* FG_BUSY_bit = 1 << 25, */ /* (Access: R) Gradient FIFO busy 26 none */
+/* FL_BUSY_bit = 1 << 27, */ /* (Access: R) LOD FIFO busy */
+/* TA_BUSY_bit = 1 << 28, */ /* (Access: R) Addresser busy */
+/* FA_BUSY_bit = 1 << 29, */ /* (Access: R) Addresser FIFO busy */
+/* AL_BUSY_bit = 1 << 30, */ /* (Access: R) Aligner busy */
+/* BUSY_bit = 1 << 31, */ /* (Access: R) Global TA2 busy */
+ TA3_STATUS = 0x0000952c, /* Texture Addresser 3 Status */
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ /* (Access: R) Gradient FIFO state, pipeline fifo not empty */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ /* (Access: R) Gradient FIFO state, latency fifo not empty */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ /* (Access: R) Gradient FIFO state, state fifo not empty */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ /* (Access: R) LOD FIFO state, pipeline fifo not empty */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ /* (Access: R) LOD FIFO state, latency fifo not empty */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ /* (Access: R) LOD FIFO state, state fifo not empty */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ /* (Access: R) Addresser FIFO state, pipeline fifo not empty */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ /* (Access: R) Addresser FIFO state, latency fifo not empty */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ /* (Access: R) Addresser FIFO state, state fifo not empty */
+/* IN_BUSY_bit = 1 << 24, */ /* (Access: R) Input/LOD(Deriv) busy */
+/* FG_BUSY_bit = 1 << 25, */ /* (Access: R) Gradient FIFO busy */
+/* FL_BUSY_bit = 1 << 27, */ /* (Access: R) LOD FIFO busy */
+/* TA_BUSY_bit = 1 << 28, */ /* (Access: R) Addresser busy */
+/* FA_BUSY_bit = 1 << 29, */ /* (Access: R) Addresser FIFO busy */
+/* AL_BUSY_bit = 1 << 30, */ /* (Access: R) Aligner busy */
+/* BUSY_bit = 1 << 31, */ /* (Access: R) Global TA3 busy */
+ TC_STATUS = 0x00009600, /* Texture Cache Status */
+ TC_BUSY_bit = 1 << 0, /* (Access: R) Texture Cache busy 14. Texture Pipe Registers */
+ TC_INVALIDATE = 0x00009604, /* Texture Cache Invalidate - When used, TC must be idle or rendering artifacts can occur */
+ START_bit = 1 << 0, /* (Access: W) Invalidate L1 and L2 caches */
+ TC_CNTL = 0x00009608, /* Texture Cache Control - When used, TC must be idle or rendering artifacts can occur */
FORCE_HIT_bit = 1 << 0,
FORCE_MISS_bit = 1 << 1,
- L2_SIZE_mask = 0x0f << 5,
+ L2_SIZE_mask = 0x0f << 5, /* L2 cache size, can be used to disable L2 completely. RV630 default=128K ; RV610 default=0 POSSIBLE VALUES: 00 - 256K 01 - 224K 02 - 192K 03 - 160K 04 - 128K 05 - 96K 06 - 64K 07 - 32K 08 - 0 */
L2_SIZE_shift = 5,
- _256K = 0x00,
- _224K = 0x01,
- _192K = 0x02,
- _160K = 0x03,
- _128K = 0x04,
- _96K = 0x05,
- _64K = 0x06,
- _32K = 0x07,
+ _256K = 0x00, /* 256K */
+ _224K = 0x01, /* 224K */
+ _192K = 0x02, /* 192K */
+ _160K = 0x03, /* 160K */
+ _128K = 0x04, /* 128K */
+ _96K = 0x05, /* 96K */
+ _64K = 0x06, /* 64K */
+ _32K = 0x07, /* 32K */
L2_DISABLE_LATE_HIT_bit = 1 << 9,
DISABLE_VERT_PERF_bit = 1 << 10,
DISABLE_INVAL_BUSY_bit = 1 << 11,
DISABLE_INVAL_SAME_SURFACE_bit = 1 << 12,
- PARTITION_MODE_mask = 0x03 << 13,
+ PARTITION_MODE_mask = 0x03 << 13, /* Default is no partitioning POSSIBLE VALUES: 00 - Vertex: Full Cache ; Texture: Full Cache 01 - Vertex: 1/2 Cache ; Texture: 1/2 Cache 02 - Vertex: 1/4 Cache ; Texture: 3/4 Cache */
PARTITION_MODE_shift = 13,
- X_VERTEX = 0x00,
+ X_VERTEX = 0x00, /* Vertex: Full Cache ; Texture: Full Cache */
MISS_ARB_MODE_bit = 1 << 15,
HIT_ARB_MODE_bit = 1 << 16,
DISABLE_WRITE_DELAY_bit = 1 << 17,
HIT_FIFO_DEPTH_bit = 1 << 18,
- VC_CNTL_STATUS = 0x00009704,
- RP_BUSY_bit = 1 << 0,
- RG_BUSY_bit = 1 << 1,
- VC_BUSY_bit = 1 << 2,
+ VC_CNTL_STATUS = 0x00009704, /* Vertex Cache Status */
+ RP_BUSY_bit = 1 << 0, /* Vertex Cache Request Processor is Busy */
+ RG_BUSY_bit = 1 << 1, /* Vertex Cache Request Generator is Busy */
+ VC_BUSY_bit = 1 << 2, /* Vertex Cache is Busy */
CLAMP_DETECT_bit = 1 << 3,
- SMX_DC_CTL0 = 0x0000a020,
- WR_GATHER_STREAM0_bit = 1 << 0,
- WR_GATHER_STREAM1_bit = 1 << 1,
- WR_GATHER_STREAM2_bit = 1 << 2,
- WR_GATHER_STREAM3_bit = 1 << 3,
- WR_GATHER_SCRATCH_bit = 1 << 4,
- WR_GATHER_REDUC_BUF_bit = 1 << 5,
- WR_GATHER_RING_BUF_bit = 1 << 6,
- WR_GATHER_F_BUF_bit = 1 << 7,
- DISABLE_CACHES_bit = 1 << 8,
- AUTO_FLUSH_INVAL_EN_bit = 1 << 10,
- AUTO_FLUSH_EN_bit = 1 << 11,
- AUTO_FLUSH_CNT_mask = 0xffff << 12,
+ SMX_DC_CTL0 = 0x0000a020, /* Control settings for all Data Caches. These settings should only be changed when the SMX is idle. */
+ WR_GATHER_STREAM0_bit = 1 << 0, /* For Stream0 traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
+ WR_GATHER_STREAM1_bit = 1 << 1, /* For Stream1 traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
+ WR_GATHER_STREAM2_bit = 1 << 2, /* For Stream2 traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
+ WR_GATHER_STREAM3_bit = 1 << 3, /* For Stream3 traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
+ WR_GATHER_SCRATCH_bit = 1 << 4, /* For Scratch traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
+ WR_GATHER_REDUC_BUF_bit = 1 << 5, /* For Reduction Buffer traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
+ WR_GATHER_RING_BUF_bit = 1 << 6, /* For Ring Buffer traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
+ WR_GATHER_F_BUF_bit = 1 << 7, /* For F Buffer traffic, use write gather on a write miss. To be used in the case when there will be many writes to memory. This allows SMX to send writes directly to the memory without having to first fetch the cache line from memory to the data caches. */
+ DISABLE_CACHES_bit = 1 << 8, /* Disables all Data Caches and turns on the bypass path. WARNING: Only write requests can be handled while caches are disabled. Read requests will still go to memory but read returns will be dropped by the SMX. */
+ AUTO_FLUSH_INVAL_EN_bit = 1 << 10, /* Valid only if AUTO_FLUSH_EN is set. Will cause auto- invalidate as well as auto-flush */
+ AUTO_FLUSH_EN_bit = 1 << 11, /* Turn on Auto Flush of caches. All caches will automatically flush after AUTO_FLUSH_CNT idle cycles. */
+ AUTO_FLUSH_CNT_mask = 0xffff << 12, /* Nr of idle cycles after which all caches will automatically flush. */
AUTO_FLUSH_CNT_shift = 12,
- MC_RD_STALL_FACTOR_mask = 0x03 << 28,
+ MC_RD_STALL_FACTOR_mask = 0x03 << 28, /* How easily SMX will assert MC`s read info stall bit. 2`d3 = only if cache_ctl_op_fifo_stalled, 2`d2 = as in 2 and if any VFA is full, 2`d1 = as in 1 and if IB full, 2`d0 = SMX will never assert read info stall bit. */
MC_RD_STALL_FACTOR_shift = 28,
- MC_WR_STALL_FACTOR_mask = 0x03 << 30,
+ MC_WR_STALL_FACTOR_mask = 0x03 << 30, /* How easily SMX will assert MC`s write info stall bit. 2`d3 = only if MU`s L2 victim cache or wr req fifo stalled, 2`d2 = as in 2 and if any VFA is full, 2`d1 = as in 1 and if IB full, 2`d0 = SMX will never assert write info stall bit. */
MC_WR_STALL_FACTOR_shift = 30,
- SMX_DC_CTL1 = 0x0000a024,
- OP_FIFO_SKID_mask = 0x7f << 0,
+ SMX_DC_CTL1 = 0x0000a024, /* Control settings for all Data Caches. These settings should only be changed when the SMX is idle. */
+ OP_FIFO_SKID_mask = 0x7f << 0, /* Skid for Cache Operation Fifo. Must be at least 1. */
OP_FIFO_SKID_shift = 0,
- CACHE_LINE_SIZE_bit = 1 << 8,
- MULTI_FLUSH_MODE_bit = 1 << 9,
- MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask = 0x0f << 10,
+ CACHE_LINE_SIZE_bit = 1 << 8, /* Selects between 32-byte (CL32) or 64-byte (CL64) size cache lines. Note that CL64 has double the cache line width but half the number of cache lines as CL32. Since the SMX MC write and read interfaces are only 32 bytes wide, a 64 byte cache line transfer takes 2 consecutive cycles over the MC interface, this makes more efficient use of MC bandwidth. */
+ MULTI_FLUSH_MODE_bit = 1 << 9, /* Allows multiple outstanding flushes to be in flight without stalling the pipeline. Only for ES/GS Flush and Flush and/or invalidate all events. Multi-Flush mode does not exist in RV630. */
+ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask = 0x0f << 10, /* Skid for Multi-Flush Engine`s Flush Request Abort Index Fifo. Must be at least 1. */
MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift = 10,
- DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit = 1 << 16,
- DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit = 1 << 17,
- DISABLE_FLUSH_ES_ALSO_INVALS_bit = 1 << 18,
- DISABLE_FLUSH_GS_ALSO_INVALS_bit = 1 << 19,
- SMX_DC_CTL2 = 0x0000a028,
- INVALIDATE_CACHES_bit = 1 << 0,
- CACHES_INVALID_bit = 1 << 1,
- CACHES_DIRTY_bit = 1 << 2,
- FLUSH_ALL_bit = 1 << 4,
- FLUSH_GS_THREADS_bit = 1 << 8,
- FLUSH_ES_THREADS_bit = 1 << 9,
+ DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit = 1 << 16, /* A Read hit of a write-gathering cacheline forces it to first evict to memory then read back to ensure coherency. Setting this bit allows you to read the line without evicting it first, but coherency (of cache vs memory) is not guaranteed. */
+ DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit = 1 << 17, /* In a write-gathering cacheline, a read tag check also checks if the comp valid bits allow a read to be serviced from cache, else it is evicted and read back. Setting this bit disables the comp valid checking forcing any read hit to a write gathering cacheline to evict to memory and read back. */
+ DISABLE_FLUSH_ES_ALSO_INVALS_bit = 1 << 18, /* A Flush ES event also invalidates all ES lines in the caches. Disabling this will reduce cache`s ability to process incoming requests while flushing, reducing performance. */
+ DISABLE_FLUSH_GS_ALSO_INVALS_bit = 1 << 19, /* A Flush GS event also invalidates all GS lines in the caches. Disabling this will reduce cache`s ability to process incoming requests while flushing, reducing performance. */
+ SMX_DC_CTL2 = 0x0000a028, /* Operations on all Data Caches. These operations should only be done when the SMX is idle. The register fields can be polled to check for completion of the operation */
+ INVALIDATE_CACHES_bit = 1 << 0, /* Invalidates all lines in all Data Caches. This field will stay at 1 until the operation is complete, after which it will revert to 0. */
+ CACHES_INVALID_bit = 1 << 1, /* READ-ONLY. All lines in all Data Caches are invalid, i.e., the caches are empty. */
+ CACHES_DIRTY_bit = 1 << 2, /* READ-ONLY. There are some dirty lines in the Data Caches. */
+ FLUSH_ALL_bit = 1 << 4, /* Flush all lines from all caches. This field will stay at 1 until the operation is complete, after which it will revert to 0. */
+ FLUSH_GS_THREADS_bit = 1 << 8, /* Flush all lines from all caches which come from Geometry Shader threads. This field will stay at 1 until the operation is complete, after which it will revert to 0. */
+ FLUSH_ES_THREADS_bit = 1 << 9, /* Flush all lines from all caches which come from Export Shader threads. This field will stay at 1 until the operation is complete, after which it will revert to 0. */
TD_PS_SAMPLER0_BORDER_RED = 0x0000a400,
TD_PS_SAMPLER0_BORDER_RED_num = 18,
TD_PS_SAMPLER0_BORDER_RED_offset = 16,
@@ -1265,205 +1292,215 @@ enum {
TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask = 0x07 << 3,
TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift = 3,
DB_DEPTH_SIZE = 0x00028000,
- PITCH_TILE_MAX_mask = 0x3ff << 0,
+ PITCH_TILE_MAX_mask = 0x3ff << 0, /* Width in 8x8 pixel tiles. (Pitch - 1) */
PITCH_TILE_MAX_shift = 0,
- SLICE_TILE_MAX_mask = 0xfffff << 10,
+ SLICE_TILE_MAX_mask = 0xfffff << 10, /* Number of 8x8 pixel tiles until the next slice plus some small number to be able to rotate the tile pattern. (Pitch - 1) */
SLICE_TILE_MAX_shift = 10,
- DB_DEPTH_VIEW = 0x00028004,
- SLICE_START_mask = 0x7ff << 0,
+ DB_DEPTH_VIEW = 0x00028004, /* Selects slice index range for render target 0. */
+ SLICE_START_mask = 0x7ff << 0, /* Specifies the starting slice number for this view. This field is added to the RenderTargetArrayIndex to compute the slice to render. */
SLICE_START_shift = 0,
- SLICE_MAX_mask = 0x7ff << 13,
+ SLICE_MAX_mask = 0x7ff << 13, /* Specifies the maximum allowed Z slice index for this resource, which is one less than the total number of slices. */
SLICE_MAX_shift = 13,
DB_DEPTH_BASE = 0x0002800c,
+ /* BASE_256B: Location of the first byte of the Depth surface in Device Address Space, which must be 256 byte aligned. High 32-bits of 40-bit address. */
DB_DEPTH_INFO = 0x00028010,
- DB_DEPTH_INFO__FORMAT_mask = 0x07 << 0,
+ DB_DEPTH_INFO__FORMAT_mask = 0x07 << 0, /* Specifies the size of the depth and stencil components and whether depth is floating point. */
DB_DEPTH_INFO__FORMAT_shift = 0,
- DEPTH_INVALID = 0x00,
- DEPTH_16 = 0x01,
- DEPTH_X8_24 = 0x02,
- DEPTH_8_24 = 0x03,
- DEPTH_X8_24_FLOAT = 0x04,
- DEPTH_8_24_FLOAT = 0x05,
- DEPTH_32_FLOAT = 0x06,
- DEPTH_X24_8_32_FLOAT = 0x07,
- DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3,
- DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15,
+ DEPTH_INVALID = 0x00, /* DEPTH_INVALID: Depth and stencil surface are not valid. */
+ DEPTH_16 = 0x01, /* DEPTH_16: UNORM 16-bit depth. */
+ DEPTH_X8_24 = 0x02, /* DEPTH_X8_24: 24-bit UNORM depth and invalid stencil surface. */
+ DEPTH_8_24 = 0x03, /* DEPTH_8_24: 24-bit UNORM depth and int stencil. */
+ DEPTH_X8_24_FLOAT = 0x04, /* DEPTH_X8_24_FLOAT: 24-bit float depth and invalid stencil surface. */
+ DEPTH_8_24_FLOAT = 0x05, /* DEPTH_8_24_FLOAT: 24-bit float depth and int stencil. */
+ DEPTH_32_FLOAT = 0x06, /* DEPTH_32_FLOAT: 32-bit float depth. */
+ DEPTH_X24_8_32_FLOAT = 0x07, /* DEPTH_X24_8_32_FLOAT: 32-bit float depth and int stencil. */
+ DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3, /* Specifies the read size: larger reads are more efficient for AGP accesses, for example. */
+ DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15, /* Specifies the tiling format for this array. DB does not support values 0, 1, 3, 7, 11, 13, or 15. */
DB_DEPTH_INFO__ARRAY_MODE_shift = 15,
- ARRAY_2D_TILED_THIN1 = 0x04,
- TILE_SURFACE_ENABLE_bit = 1 << 25,
- TILE_COMPACT_bit = 1 << 26,
- ZRANGE_PRECISION_bit = 1 << 31,
+ ARRAY_2D_TILED_THIN1 = 0x04, /* ARRAY_2D_TILED_THIN1: Uses 8x8x1 macro-tiles */
+ TILE_SURFACE_ENABLE_bit = 1 << 25, /* Enables reading and writing of the htile data. If off HiZ+S is off. */
+ TILE_COMPACT_bit = 1 << 26, /* If true, this surface is compacted to eliminate storage that would be unused due to multi-chip supertiling. The supertiling mode is specified in PA_SC_MULTI_CHIP_CNTL. If this bit is set, then MULTI_CHIP_SUPERTILE_ENABLE must be set in PA_SC_MODE_CNTL. */
+ ZRANGE_PRECISION_bit = 1 << 31, /* 0 = ZMin is the base, generally set when doing a Z > test, 1 = ZMax is the base, set when generally using a Z < test. The value used as base has full 14 bit precision. By setting the base to Max culling has less error in a < test. Can only be changed after a full surface clear. */
DB_HTILE_DATA_BASE = 0x00028014,
+ /* BASE_256B: Location of the first byte of the HTileData surface in Device Address Space, which must be 256 byte aligned. High 32-bits of 40-bit address. This surface contains the HiZ data. */
DB_STENCIL_CLEAR = 0x00028028,
- DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0,
+ DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0, /* Stencil value when SMEM==0, which specifies that the tile is cleared to background stencil values. */
DB_STENCIL_CLEAR__CLEAR_shift = 0,
- MIN_mask = 0xff << 16,
+ MIN_mask = 0xff << 16, /* Compressed stencils store values from STENCIL_MIN to STENCIL_MIN+15. Cannot be changed without clearing or previously expanding the stencil buffer. 16. Color Buffer Registers */
MIN_shift = 16,
DB_DEPTH_CLEAR = 0x0002802c,
- PA_SC_SCREEN_SCISSOR_TL = 0x00028030,
- PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0x7fff << 0,
+ /* Depth value when ZMASK==0, which indicates that the tile has been cleared to the background depth. This register holds a 32bit float value. */
+ PA_SC_SCREEN_SCISSOR_TL = 0x00028030, /* Screen Scissor rectangle specification. This scissor is NOT affected by WINDOW_OFFSET. Negative numbers clamped to 0, so reads will mismatch on negative values. */
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0x7fff << 0, /* Left hand edge of scissor rectangle. 15 bits signed. Valid range -16K to 8191. Inclusive for UPPER_LEFT. */
PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0,
- PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, /* Upper edge of scissor rectangle. 15 bits signed. Valid range -16K to 8191. Inclusive for UPPER_LEFT. */
PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16,
- PA_SC_SCREEN_SCISSOR_BR = 0x00028034,
- PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_SCREEN_SCISSOR_BR = 0x00028034, /* Screen Scissor rectangle specification. This scissor is NOT affected by WINDOW_OFFSET. Negative numbers clamped to 0, so reads will mismatch on negative values. */
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0x7fff << 0, /* Right hand edge of scissor rectangle. 15 bits signed. Valid range -16K to 8192. Exclusive for BOTTOM_RIGHT. */
PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0,
- PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, /* Lower edge of scissor rectangle. 15 bits signed. Valid range -16K to 8192. Exclusive for BOTTOM_RIGHT. */
PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16,
CB_COLOR0_BASE = 0x00028040,
CB_COLOR0_BASE_num = 8,
+ /* BASE_256B: For linear and 1D tiled surfaces, this value times 256 is the byte address of the start of the resource in device address space. In other words, this field is the high 32- bits of an up to 40-bit virtual device address. For 2D tiled surfaces, the bits corresponding to the bank and pipe number in the address actually specify the bank/pipe swizzle for the surface. 2D tiled surfaces are always aligned to a multiple of the group size times the number of banks times the number of pipes (memory channels). */
CB_COLOR0_SIZE = 0x00028060,
CB_COLOR0_SIZE_num = 8,
-/* PITCH_TILE_MAX_mask = 0x3ff << 0, */
+/* PITCH_TILE_MAX_mask = 0x3ff << 0, */ /* Define Pitch as the number of data elements per scanline. This field equals (Pitch/8) - 1, which equals the maximum 8x8 tile number allowed in the X dimension for the surface. Allowed values for Pitch depend on ARRAY_MODE, ElemBytes (the number of bytes per data element: 1 to 16), and Samples (the number of multi-samples per pixel: 1, 2, 4, or 8). It also depends on two configuration parameters that are the same for all resources: GroupBytes (the bytes per memory interleave group: 256 or 512), and Banks (the number of DDRAM banks: 4 or 8). Linear: Pitch*ElemBytes is a multiple of GroupBytes and Pitch is a multiple of 64 1D tiled: Pitch*8*ElemBytes*Samples is a multiple of GroupBytes and Pitch is a multiple of 8 2D/3D tiled: Pitch*8*ElemBytes*Samples/Banks is multiple of GroupBytes and Pitch is a multiple of 8*Banks/Factor where Factor is 1, 2, or 4 for THIN1/THIN2/THIN4 2B/3B tiled: also padded to bank swap boundaries, which are determined from GB_TILING_CONFIG fields In addition to these constraints */
/* PITCH_TILE_MAX_shift = 0, */
-/* SLICE_TILE_MAX_mask = 0xfffff << 10, */
+/* SLICE_TILE_MAX_mask = 0xfffff << 10, */ /* Define SliceTiles as (Pitch*Height/64). This field equals SliceTiles-1, and is the maximum allowed 8x8 or 64x1 tile number within an (x,y) slice of a 2D or 3D surface. The following constraints apply to allowable heights and z-depths for resources (see the ARRAY_MODE field): All cases: Height is in the range [1..8192]. 1D tiling: Height is a multiple of 8. 2D THIN1 tiling: Height is a multiple of 8*Pipes 2D THIN2 tiling: Height is a multiple of 16*Pipes (and pitch is a multiple of 4*Banks) 2D THIN4 tiling: Height is a multiple of 32*Pipes (and pitch is a multiple of 2*Banks) 2D THICK tiling: Height is a multiple of 8*Pipes and z-depth is a multiple of 4 Note: Pitch, height and Z-depth must be powers of 2 for mipmap chains (other than the base map). */
/* SLICE_TILE_MAX_shift = 10, */
- CB_COLOR0_VIEW = 0x00028080,
+ CB_COLOR0_VIEW = 0x00028080, /* Selects slice index range for render target 0. */
CB_COLOR0_VIEW_num = 8,
-/* SLICE_START_mask = 0x7ff << 0, */
+/* SLICE_START_mask = 0x7ff << 0, */ /* For ARRAY_LINEAR_GENERAL the low 8-bits together with BASE_256B specify a 40-bit starting addressess (must be element-aligned). Else this specifies the starting slice number for this view: this field is added to the RenderTargetArrayIndex to compute the slice to render. */
/* SLICE_START_shift = 0, */
-/* SLICE_MAX_mask = 0x7ff << 13, */
+/* SLICE_MAX_mask = 0x7ff << 13, */ /* Specifies the maximum allowed Z slice index for this resource, which is one less than the total number of slices. Clamp Z slice to SLICE_START if this value is exceeded (clamp to zero for ARRAY_LINEAR_GENERAL). */
/* SLICE_MAX_shift = 13, */
- CB_COLOR0_INFO = 0x000280a0,
+ CB_COLOR0_INFO = 0x000280a0, /* Information needed for render target 0 */
CB_COLOR0_INFO_num = 8,
- ENDIAN_mask = 0x03 << 0,
+ ENDIAN_mask = 0x03 << 0, /* Specifies what kind of byte swapping to perform, if any, for different endian modes. The byte swap is equivalent to computing dest[A] = src[A XOR N] for byte address A and the XOR values listed below. See the COMP_SWAP field for component swapping options. */
ENDIAN_shift = 0,
- ENDIAN_NONE = 0x00,
- ENDIAN_8IN16 = 0x01,
- ENDIAN_8IN32 = 0x02,
- ENDIAN_8IN64 = 0x03,
- CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2,
+ ENDIAN_NONE = 0x00, /* ENDIAN_NONE: No endian swapping (XOR by 0) */
+ ENDIAN_8IN16 = 0x01, /* ENDIAN_8IN16: 8 bit swap within 16 bit word (XOR by 1): 0xAABBCCDD -> 0xBBAADDCC */
+ ENDIAN_8IN32 = 0x02, /* ENDIAN_8IN32: 8 bit swap within 32 bit word (XOR by 3): 0xAABBCCDD -> 0xDDCCBBAA */
+ ENDIAN_8IN64 = 0x03, /* ENDIAN_8IN64: 8 bit swap in 64 bits (XOR by 7): 0xaabbccddeeffgghh -> 0xhhggffeeddccbbaa */
+ CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2, /* Specifies the size of the color components and in some cases the number format. See the COMP_SWAP field below for mappings of RGBA (XYZW) shader pipe results to color component positions in the pixel format. */
CB_COLOR0_INFO__FORMAT_shift = 2,
- COLOR_INVALID = 0x00,
- COLOR_8 = 0x01,
- COLOR_4_4 = 0x02,
- COLOR_3_3_2 = 0x03,
- COLOR_16 = 0x05,
- COLOR_16_FLOAT = 0x06,
- COLOR_8_8 = 0x07,
- COLOR_5_6_5 = 0x08,
- COLOR_6_5_5 = 0x09,
- COLOR_1_5_5_5 = 0x0a,
- COLOR_4_4_4_4 = 0x0b,
- COLOR_5_5_5_1 = 0x0c,
- COLOR_32 = 0x0d,
- COLOR_32_FLOAT = 0x0e,
- COLOR_16_16 = 0x0f,
- COLOR_16_16_FLOAT = 0x10,
- COLOR_8_24 = 0x11,
- COLOR_8_24_FLOAT = 0x12,
- COLOR_24_8 = 0x13,
- COLOR_24_8_FLOAT = 0x14,
- COLOR_10_11_11 = 0x15,
- COLOR_10_11_11_FLOAT = 0x16,
- COLOR_11_11_10 = 0x17,
- COLOR_11_11_10_FLOAT = 0x18,
- COLOR_2_10_10_10 = 0x19,
- COLOR_8_8_8_8 = 0x1a,
- COLOR_10_10_10_2 = 0x1b,
- COLOR_X24_8_32_FLOAT = 0x1c,
- COLOR_32_32 = 0x1d,
- COLOR_32_32_FLOAT = 0x1e,
- COLOR_16_16_16_16 = 0x1f,
- COLOR_16_16_16_16_FLOAT = 0x20,
- COLOR_32_32_32_32 = 0x22,
- COLOR_32_32_32_32_FLOAT = 0x23,
- CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8,
+ COLOR_INVALID = 0x00, /* COLOR_INVALID: this resource is disabled */
+ COLOR_8 = 0x01, /* COLOR_8: */
+ COLOR_4_4 = 0x02, /* COLOR_4_4: */
+ COLOR_3_3_2 = 0x03, /* COLOR_3_3_2: (*) */
+ COLOR_16 = 0x05, /* COLOR_16: */
+ COLOR_16_FLOAT = 0x06, /* COLOR_16_FLOAT: */
+ COLOR_8_8 = 0x07, /* COLOR_8_8: */
+ COLOR_5_6_5 = 0x08, /* COLOR_5_6_5: */
+ COLOR_6_5_5 = 0x09, /* COLOR_6_5_5: */
+ COLOR_1_5_5_5 = 0x0a, /* COLOR_1_5_5_5: 1-bit component is uint for uint/sint number type, else it isunorm */
+ COLOR_4_4_4_4 = 0x0b, /* COLOR_4_4_4_4: */
+ COLOR_5_5_5_1 = 0x0c, /* COLOR_5_5_5_1: 1-bit component is uint uint/sint number type, else it is unorm */
+ COLOR_32 = 0x0d, /* COLOR_32: float/uint/sint only */
+ COLOR_32_FLOAT = 0x0e, /* COLOR_32_FLOAT: */
+ COLOR_16_16 = 0x0f, /* COLOR_16_16: */
+ COLOR_16_16_FLOAT = 0x10, /* COLOR_16_16_FLOAT: */
+ COLOR_8_24 = 0x11, /* COLOR_8_24: unorm depth, uint stencil */
+ COLOR_8_24_FLOAT = 0x12, /* COLOR_8_24_FLOAT: float depth, uint stencil */
+ COLOR_24_8 = 0x13, /* COLOR_24_8: unorm depth, uint stencil */
+ COLOR_24_8_FLOAT = 0x14, /* COLOR_24_8_FLOAT: float depth, uint stencil */
+ COLOR_10_11_11 = 0x15, /* COLOR_10_11_11: */
+ COLOR_10_11_11_FLOAT = 0x16, /* COLOR_10_11_11_FLOAT: */
+ COLOR_11_11_10 = 0x17, /* COLOR_11_11_10: */
+ COLOR_11_11_10_FLOAT = 0x18, /* COLOR_11_11_10_FLOAT: */
+ COLOR_2_10_10_10 = 0x19, /* COLOR_2_10_10_10: */
+ COLOR_8_8_8_8 = 0x1a, /* COLOR_8_8_8_8: srgb allowed */
+ COLOR_10_10_10_2 = 0x1b, /* COLOR_10_10_10_2: */
+ COLOR_X24_8_32_FLOAT = 0x1c, /* COLOR_X24_8_32_FLOAT: float depth, uint stencil */
+ COLOR_32_32 = 0x1d, /* COLOR_32_32: float/uint/sint only */
+ COLOR_32_32_FLOAT = 0x1e, /* COLOR_32_32_FLOAT: */
+ COLOR_16_16_16_16 = 0x1f, /* COLOR_16_16_16_16: */
+ COLOR_16_16_16_16_FLOAT = 0x20, /* COLOR_16_16_16_16_FLOAT: */
+ COLOR_32_32_32_32 = 0x22, /* COLOR_32_32_32_32: float/uint/sint only */
+ COLOR_32_32_32_32_FLOAT = 0x23, /* COLOR_32_32_32_32_FLOAT: */
+ CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8, /* Specifies the tiling format of this render target array. */
CB_COLOR0_INFO__ARRAY_MODE_shift = 8,
- ARRAY_LINEAR_GENERAL = 0x00,
- ARRAY_LINEAR_ALIGNED = 0x01,
-/* ARRAY_2D_TILED_THIN1 = 0x04, */
- NUMBER_TYPE_mask = 0x07 << 12,
+ ARRAY_LINEAR_GENERAL = 0x00, /* ARRAY_LINEAR_GENERAL: Unaligned linear array */
+ ARRAY_LINEAR_ALIGNED = 0x01, /* ARRAY_LINEAR_ALIGNED: Aligned linear array */
+/* ARRAY_2D_TILED_THIN1 = 0x04, */ /* ARRAY_2D_TILED_THIN1: Uses 8x8x1 macro-tiles */
+ NUMBER_TYPE_mask = 0x07 << 12, /* Specifies the numeric type of the color components. This field is ignored if FORMAT specifies a number type (e.g. float or gamma). */
NUMBER_TYPE_shift = 12,
- NUMBER_UNORM = 0x00,
- NUMBER_SNORM = 0x01,
- NUMBER_USCALED = 0x02,
- NUMBER_SSCALED = 0x03,
- NUMBER_UINT = 0x04,
- NUMBER_SINT = 0x05,
- NUMBER_SRGB = 0x06,
- NUMBER_FLOAT = 0x07,
- CB_COLOR0_INFO__READ_SIZE_bit = 1 << 15,
- COMP_SWAP_mask = 0x03 << 16,
+ NUMBER_UNORM = 0x00, /* NUMBER_UNORM: unsigned repeating fraction (urf): range [0..1], scale factor (2^n)-1 */
+ NUMBER_SNORM = 0x01, /* NUMBER_SNORM: Microsoft-style signed rf: range [-1..1], scale factor (2^(n-1))-1 */
+ NUMBER_USCALED = 0x02, /* NUMBER_USCALED: unsigned integer, converted to float in shader: range [0..(2^n)-1] */
+ NUMBER_SSCALED = 0x03, /* NUMBER_SSCALED: signed integer, converted to float in shader: range [-2^(n-1)..2^(n-1)-1] */
+ NUMBER_UINT = 0x04, /* NUMBER_UINT: zero-extended bit field, int in shader: not blendable or filterable */
+ NUMBER_SINT = 0x05, /* NUMBER_SINT: sign-extended bit field, int in shader: not blendable or filterable */
+ NUMBER_SRGB = 0x06, /* NUMBER_SRGB: gamma corrected, range [0..1] (only suported for 8-bit components (always rounds color channels) */
+ NUMBER_FLOAT = 0x07, /* NUMBER_FLOAT: floating point, depends on component size: 32-bit: IEEE float, SE8M23, bias 127, range (- 2^129..2^129) 24-bit: Depth float, E4M20, bias 15, range [0..1] 16-bit: Short float SE5M10, bias 15, range (-2^17..2^17) 11-bit: Packed float, E5M6 bias 15, range [0..2^17) 10-bit: Packed float, E5M5 bias 15, range [0..2^17) all other component sizes are treated as UINT */
+ CB_COLOR0_INFO__READ_SIZE_bit = 1 << 15, /* Specifies the preferred read size: larger reads are more efficient for PCIE accesses, for example. */
+ COMP_SWAP_mask = 0x03 << 16, /* Specifies how to map the red, green, blue, and alpha components from the shader to the components in the frame buffer pixel format. There are four choices for each number of components. With one component, the four modes select any one component. With 2-4 components, SWAP_STD selects the low order shader components in little-endian order; SWAP_ALT selects an alternate order (for 4 compoents) or inclusion of alpha (for 2 or 3 components); and the other two reverse the component orders for use on big-endian machines. The following table specifies the exact component mappings: 1 comp std alt std_rev alt_rev\ ----------- ------- ------- ------- ------- comp 0: red green blue alpha 2 comps std alt std_rev alt_rev ----------- ------- ------- ------- ------- comp 0: red red green alpha comp 1: green alpha red red 3 comps std alt std_rev alt_rev ----------- ------- ------- ------- ------- comp 0: red red blue alpha comp 1: green green green green comp 2: blue alpha red red 4 comps std alt std_rev alt_rev ----------- ------- ------- ------- ------- comp 0: red blue alpha alpha comp 1: green green blue red comp 2: blue red green green comp 3: alpha alpha red blue */
COMP_SWAP_shift = 16,
- SWAP_STD = 0x00,
- SWAP_ALT = 0x01,
- SWAP_STD_REV = 0x02,
- SWAP_ALT_REV = 0x03,
- CB_COLOR0_INFO__TILE_MODE_mask = 0x03 << 18,
+ SWAP_STD = 0x00, /* SWAP_STD: standard little-endian comp order */
+ SWAP_ALT = 0x01, /* SWAP_ALT: alternate components or order */
+ SWAP_STD_REV = 0x02, /* SWAP_STD_REV: reverses SWAP_STD order */
+ SWAP_ALT_REV = 0x03, /* SWAP_ALT_REV: reverses SWAP_ALT order */
+ CB_COLOR0_INFO__TILE_MODE_mask = 0x03 << 18, /* Selects how and whether to use per-tile CMASK and FMASK per-tile data with this surface. */
CB_COLOR0_INFO__TILE_MODE_shift = 18,
- TILE_DISABLE = 0x00,
- TILE_CLEAR_ENABLE = 0x01,
- TILE_FRAG_ENABLE = 0x02,
- BLEND_CLAMP_bit = 1 << 20,
- CLEAR_COLOR_bit = 1 << 21,
- BLEND_BYPASS_bit = 1 << 22,
- BLEND_FLOAT32_bit = 1 << 23,
- SIMPLE_FLOAT_bit = 1 << 24,
- CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 25,
-/* TILE_COMPACT_bit = 1 << 26, */
- SOURCE_FORMAT_bit = 1 << 27,
+ TILE_DISABLE = 0x00, /* TILE_DISABLE: do not access any per-tile data */
+ TILE_CLEAR_ENABLE = 0x01, /* TILE_CLEAR_ENABLE: allow background clear only */
+ TILE_FRAG_ENABLE = 0x02, /* TILE_FRAG_ENABLE: allow background clear and multi-sample fragment masks */
+ BLEND_CLAMP_bit = 1 << 20, /* Specifies whether to clamp source data to the render target range prior to blending, in addition to the post- blend clamp. This bit must be zero for uscaled, sscaled and float number types and when blend_bypass is set. */
+ CLEAR_COLOR_bit = 1 << 21, /* If false, use RGB=0.0 and A=1.0 (0x3f800000) to expand fast-cleared tiles. If true, use the CB_CLEAR register values to expand fast-cleared tiles. */
+ BLEND_BYPASS_bit = 1 << 22, /* If false, blending occurs normaly as specified in CB_BLEND#_CONTROL. If true, blending (but not fog) is disabled. This must be set for the 24_8 and 8_24 formats and when the number type is uint or sint. It should also be set for number types that are required to ignore the blend state in a specific aplication interface. */
+ BLEND_FLOAT32_bit = 1 << 23, /* If true, use 32-bit float precision for source colors, else truncate to 12-bit mantissa precision. This applies even if blending is disabled so that a null blend and blend disable produce the same result. This field is ignored for NUMBER_UINT and NUMBER_SINT. It must be one for floating point components larger than 16-bits or non- floating components larger than 12-bits, otherwise it must be 0. */
+ SIMPLE_FLOAT_bit = 1 << 24, /* If false, floating point processing follows full IEEE rules for INF, NaN, and -0. If true, 0*anything produces 0 and no operation produces -0. */
+ CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 25, /* This field selects between truncating (standard for floats) and rounding (standard for most other cases) to convert blender results to frame buffer components. The ROUND_BY_HALF setting can be over-riden by the DITHER_ENABLE field in CB_COLOR_CONTROL. */
+/* TILE_COMPACT_bit = 1 << 26, */ /* If true, this surface is compacted to eliminate storage that would be unused due to multi-chip supertiling. The supertiling mode is specified in PA_SC_MULTI_CHIP_CNTL. If this bit is set, then MULTI_CHIP_SUPERTILE_ENABLE must be set in PA_SC_MODE_CNTL. */
+ SOURCE_FORMAT_bit = 1 << 27, /* This field indicates the allowed format for color data being exported from the pixel shader into the output merge block. This field may only be set to EXPORT_NORM if BLEND_CLAMP is enabled, BLEND_FLOAT32 is disabled, and the render target has only 11-bit or smaller UNORM or SNORM components. Selecting EXPORT_NORM flushes to zero values with exponent less than 0x70 (values less than 2^-15). */
CB_COLOR0_TILE = 0x000280c0,
CB_COLOR0_TILE_num = 8,
+ /* BASE_256B: This value times 256 is the byte address of the start of the CMASK per-tile data, if any, in device address space. In other words, this field is the high 32-bits of an up to 40-bit virtual device address. */
CB_COLOR0_FRAG = 0x000280e0,
CB_COLOR0_FRAG_num = 8,
+ /* BASE_256B: For linear and 1D tiled surfaces, this value times 256 is the byte address of the start of the FMASK per-tile data, if any, in device address space. In other words, this field is the high 32-bits of an up to 40-bit virtual device address. 2D tiled surfaces are the same except that the bits corresponding to the bank and pipe number in the address actually specify the bank/pipe swizzle for the surface. 2D tiled surfaces are always aligned to a multiple of the group size times the number of banks times the number of pipes (memory channels). */
CB_COLOR0_MASK = 0x00028100,
CB_COLOR0_MASK_num = 8,
- CMASK_BLOCK_MAX_mask = 0xfff << 0,
+ CMASK_BLOCK_MAX_mask = 0xfff << 0, /* This field equals one less than the number of 128x128 blocks of color mask data per 2D slice. For R600, 4-bit CMASK values are stored in macro-tiles that have pixel width and height determined by computing sqrt(Pipes*16K and rounding up (for width) or down (for height) to the nearest power of two. The pitch for the Cmask array is derived from PITCH_TILE_MAX, padding to the nearest multiple of the macro tile width. */
CMASK_BLOCK_MAX_shift = 0,
- FMASK_TILE_MAX_mask = 0xfffff << 12,
+ FMASK_TILE_MAX_mask = 0xfffff << 12, /* This field equals one less than the number of 8x8 tiles of fragment mask data per 2D slice. For R600, FMASK values are stored in macro-tiles that have pixel width and height determined ... TBD. The pitch for the Fmask array is derived from PITCH_TILE_MAX, padding to the nearest multiple of the macro tile width. */
FMASK_TILE_MAX_shift = 12,
CB_CLEAR_RED = 0x00028120,
+ /* FP32 red component of background clear value. */
CB_CLEAR_GREEN = 0x00028124,
+ /* FP32 green component of background clear value. */
CB_CLEAR_BLUE = 0x00028128,
+ /* FP32 blue component of background clear value. */
CB_CLEAR_ALPHA = 0x0002812c,
- SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140,
+ /* FP32 alpha component of background clear value. */
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140, /* (8-state). Number of elements in this constant buffer [0..4096], in units of 16 constants (cache lines). Associated with SQ_ALU_CONST_CACHE_PS_0. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16,
- SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0, /* Number of constant buffer elements */
SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0,
- SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180, /* (8-state). Number of elements in this constant buffer [0..4096], in units of 16 constants (cache lines). Associated with SQ_ALU_CONST_CACHE_VS_0. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16,
- SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0, /* Number of constant buffer elements */
SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0,
- SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0, /* (8-state). Number of elements in this constant buffer [0..4096], in units of 16 constants (cache lines). Associated with SQ_ALU_CONST_CACHE_GS_0. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16,
- SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0, /* Number of constant buffer elements */
SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0,
- PA_SC_WINDOW_OFFSET = 0x00028200,
- WINDOW_X_OFFSET_mask = 0x7fff << 0,
+ PA_SC_WINDOW_OFFSET = 0x00028200, /* Offset from screen coords to window coords. Vertices will be offset by these values if PA_SU_SC_MODE_CNTL.VTX_WINDOW_OFFSET_ENABLE is et. The WINDOW_SCISSOR will be offset by these values if the WINDOW_SCISSOR_TL.WINDOW_OFFSET_DISABLE is clear. If this value allows the window to extend beyond the Front Buffer (Surface) dimensions, it is expected that the SCREEN_SCISSOR is used to limit to FB surface. */
+ WINDOW_X_OFFSET_mask = 0x7fff << 0, /* Offset in x-direction from screen to window coords. 16- bit 2`s comp signed value. Valid Range +/- 16K. */
WINDOW_X_OFFSET_shift = 0,
- WINDOW_Y_OFFSET_mask = 0x7fff << 16,
+ WINDOW_Y_OFFSET_mask = 0x7fff << 16, /* Offset in y-direction from screen to window coords. 16- bit 2`s comp signed value. Valid Range +/- 16K. */
WINDOW_Y_OFFSET_shift = 16,
- PA_SC_WINDOW_SCISSOR_TL = 0x00028204,
- PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_WINDOW_SCISSOR_TL = 0x00028204, /* Window Scissor rectangle specification. Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x3fff << 0, /* Left hand edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0,
- PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x3fff << 16, /* Upper edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16,
- WINDOW_OFFSET_DISABLE_bit = 1 << 31,
- PA_SC_WINDOW_SCISSOR_BR = 0x00028208,
- PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x3fff << 0,
+ WINDOW_OFFSET_DISABLE_bit = 1 << 31, /* If set, window scissor is not offset by the WINDOW_OFFSET register values. */
+ PA_SC_WINDOW_SCISSOR_BR = 0x00028208, /* Window Scissor rectangle specification. Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x3fff << 0, /* Right hand edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0,
- PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x3fff << 16, /* Lower edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16,
- PA_SC_CLIPRECT_RULE = 0x0002820c,
- CLIP_RULE_mask = 0xffff << 0,
+ PA_SC_CLIPRECT_RULE = 0x0002820c, /* OpenGL Clip boolean function */
+ CLIP_RULE_mask = 0xffff << 0, /* OpenGL Clip boolean function. The `inside` flags for each of the four clip rectangles form a 4-bit binary number. The corresponding bit in this 16-bit number specifies whether the pixel is visible. */
CLIP_RULE_shift = 0,
- PA_SC_CLIPRECT_0_TL = 0x00028210,
+ PA_SC_CLIPRECT_0_TL = 0x00028210, /* Clip Rectangle Top-Left Specification */
PA_SC_CLIPRECT_0_TL_num = 4,
PA_SC_CLIPRECT_0_TL_offset = 8,
- PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x3fff << 0, /* Left x value of clip rectangle. 14 bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT */
PA_SC_CLIPRECT_0_TL__TL_X_shift = 0,
- PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x3fff << 16, /* Top y value of clip rectangle. 14 bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT */
PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16,
- PA_SC_CLIPRECT_0_BR = 0x00028214,
+ PA_SC_CLIPRECT_0_BR = 0x00028214, /* Clip Rectangle Bottom-Right Specification */
PA_SC_CLIPRECT_0_BR_num = 4,
PA_SC_CLIPRECT_0_BR_offset = 8,
- PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x3fff << 0, /* Right x value of clip rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT */
PA_SC_CLIPRECT_0_BR__BR_X_shift = 0,
- PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x3fff << 16, /* Bottom y value of clip rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT */
PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16,
- CB_TARGET_MASK = 0x00028238,
- TARGET0_ENABLE_mask = 0x0f << 0,
+ CB_TARGET_MASK = 0x00028238, /* Contains color component mask fields for writing the render targets. Red, green, blue, and alpha are components 0, 1, 2, and 3 in the pixel shader and are enabled by bits 0, 1, 2, and 3 in each field. Note that the components may be in a different order in the frame buffer, depending on the COMP_SWAP field. Targets 1-7 are defined equivalently to output 0. */
+ TARGET0_ENABLE_mask = 0x0f << 0, /* Enables writing to render target 0 components. The low order bit corresponds to the red channel. A zero bit disables writing to that channel and a one bit enables writing to that channel. If blending is enabled, all components are read from the frame buffer, regardless of this mask value. Any components that are missing due to the element format are replaced with their default value: 0.0 for color or 1.0 for alpha. */
TARGET0_ENABLE_shift = 0,
TARGET1_ENABLE_mask = 0x0f << 4,
TARGET1_ENABLE_shift = 4,
@@ -1479,8 +1516,8 @@ enum {
TARGET6_ENABLE_shift = 24,
TARGET7_ENABLE_mask = 0x0f << 28,
TARGET7_ENABLE_shift = 28,
- CB_SHADER_MASK = 0x0002823c,
- OUTPUT0_ENABLE_mask = 0x0f << 0,
+ CB_SHADER_MASK = 0x0002823c, /* Contains color component mask fields for the colors output by the shader. Outputs 1-7 are defined equivalently to output 0. */
+ OUTPUT0_ENABLE_mask = 0x0f << 0, /* If zero, this field disables writes to render target 0, else it specifies which components are enabled in the shader. The low order bit corresponds to the red channel. A one bit bit passes the shader output component value to the color block. A zero bit replaces the component with the default value: 0.0 for RGB or 1.0 for alpha. */
OUTPUT0_ENABLE_shift = 0,
OUTPUT1_ENABLE_mask = 0x0f << 4,
OUTPUT1_ENABLE_shift = 4,
@@ -1496,102 +1533,122 @@ enum {
OUTPUT6_ENABLE_shift = 24,
OUTPUT7_ENABLE_mask = 0x0f << 28,
OUTPUT7_ENABLE_shift = 28,
- PA_SC_GENERIC_SCISSOR_TL = 0x00028240,
- PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_GENERIC_SCISSOR_TL = 0x00028240, /* Generic Scissor rectangle specification. Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x3fff << 0, /* Left hand edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0,
- PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x3fff << 16, /* Upper edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16,
-/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
- PA_SC_GENERIC_SCISSOR_BR = 0x00028244,
- PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x3fff << 0,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ /* If set, generic scissor is not offset by the WINDOW_OFFSET register values. */
+ PA_SC_GENERIC_SCISSOR_BR = 0x00028244, /* Generic Scissor rectangle specification. Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x3fff << 0, /* Right hand edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0,
- PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x3fff << 16, /* Lower edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16,
- PA_SC_VPORT_SCISSOR_0_TL = 0x00028250,
+ PA_SC_VPORT_SCISSOR_0_TL = 0x00028250, /* WGF ViewportId Scissor rectangle specification(0-15). Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
PA_SC_VPORT_SCISSOR_0_TL_num = 16,
PA_SC_VPORT_SCISSOR_0_TL_offset = 8,
- PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x3fff << 0, /* Left hand edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0,
- PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x3fff << 16, /* Upper edge of scissor rectangle. 14-bits unsigned. Valid range 0-8191. Inclusive for UPPER_LEFT. */
PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16,
-/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
- PA_SC_VPORT_SCISSOR_0_BR = 0x00028254,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ /* If set, viewportId scissor is not offset by the WINDOW_OFFSET register values. */
+ PA_SC_VPORT_SCISSOR_0_BR = 0x00028254, /* WGF ViewportID Scissor rectangle specification(0-15). Scissor is conditionally (See WINDOW_OFFSET_ENABLE) offset by WINDOW_OFFSET. */
PA_SC_VPORT_SCISSOR_0_BR_num = 16,
PA_SC_VPORT_SCISSOR_0_BR_offset = 8,
- PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x3fff << 0, /* Right hand edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0,
- PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x3fff << 16, /* Lower edge of scissor rectangle. 14 bits unsigned. Valid range 0-8192. Exclusive for BOTTOM_RIGHT. */
PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16,
- PA_SC_VPORT_ZMIN_0 = 0x000282d0,
+ PA_SC_VPORT_ZMIN_0 = 0x000282d0, /* Viewport Transform Z Min Clamp - 0-15 For WGF ViewportId */
PA_SC_VPORT_ZMIN_0_num = 16,
PA_SC_VPORT_ZMIN_0_offset = 8,
- PA_SC_VPORT_ZMAX_0 = 0x000282d4,
+ /* VPORT_ZMIN: Minimum Z Value from Viewport Transform. Z values will be clamped by the DB to this value. */
+ PA_SC_VPORT_ZMAX_0 = 0x000282d4, /* Viewport Transform Z Max Clamp - 0-15 For WGF ViewportId */
PA_SC_VPORT_ZMAX_0_num = 16,
PA_SC_VPORT_ZMAX_0_offset = 8,
+ /* VPORT_ZMAX: Maximum Z Value from Viewport Transform. Z values will be clamped by the DB to this value. */
SX_MISC = 0x00028350,
MULTIPASS_bit = 1 << 0,
- SQ_VTX_SEMANTIC_0 = 0x00028380,
+ SQ_VTX_SEMANTIC_0 = 0x00028380, /* (8-state) Vertex Fetch Semantic Name. Used for semantic-based vertex fetches. 32 entries provided (8 states). The address in which the semantic occurs dictates which GPR the named element goes to in the vertex shader. Note that the hardware does not interpret this value, other than simply compare these 8 bits versus the 8-bit semantic in the vertex fetch instruction. These registers are write-only (not readable). */
SQ_VTX_SEMANTIC_0_num = 32,
-/* SEMANTIC_ID_mask = 0xff << 0, */
+/* SEMANTIC_ID_mask = 0xff << 0, */ /* 8-bit semantic id */
/* SEMANTIC_ID_shift = 0, */
- VGT_MAX_VTX_INDX = 0x00028400,
- VGT_MIN_VTX_INDX = 0x00028404,
- VGT_INDX_OFFSET = 0x00028408,
- VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c,
+ VGT_MAX_VTX_INDX = 0x00028400, /* For components that are that are specified to be indices (see the VGT_GROUP_VECT_0_FMT_CNTL register), this register is the maximum clamp value. Clamping occurs after offsetting and prior to fix->flt conversion. */
+ /* MAX_INDX: maximum clamp value for index clamp, exten it to 32-bit */
+ VGT_MIN_VTX_INDX = 0x00028404, /* For components that are that are specified to be indices (see the VGT_GROUP_VECT_0_FMT_CNTL register), this register is the minimum clamp value. Clamping occurs after offsetting and prior to fix->flt conversion. */
+ /* MIN_INDX: minimum clamp value for index clamp, extend it to 32- bits */
+ VGT_INDX_OFFSET = 0x00028408, /* For components that are that are specified to be indices (see the VGT_GROUP_VECT_0_FMT_CNTL register), this register is the offset value. Offsetting occurs prior to clamping and fix->flt conversion. */
+ /* Index offset value (32-bit adder), extend it to 32-bits */
+ VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c, /* This register defines the index which resets primitive sets when MULTI_PRIM_IB is enabled. */
+ /* If this value matches an index in the IB, a new primitive set is started. */
SX_ALPHA_TEST_CONTROL = 0x00028410,
- ALPHA_FUNC_mask = 0x07 << 0,
+ ALPHA_FUNC_mask = 0x07 << 0, /* Specifies the function used to compare the fragment alpha value (produced by the shader pipe) to ALPHA_REF, the reference alpha value. The alpha test passes (keeping the pixel) if frag_alpha OP alpha_ref is true. */
ALPHA_FUNC_shift = 0,
- REF_NEVER = 0x00,
- REF_LESS = 0x01,
- REF_EQUAL = 0x02,
- REF_LEQUAL = 0x03,
- REF_GREATER = 0x04,
- REF_NOTEQUAL = 0x05,
- REF_GEQUAL = 0x06,
- REF_ALWAYS = 0x07,
- ALPHA_TEST_ENABLE_bit = 1 << 3,
- ALPHA_TEST_BYPASS_bit = 1 << 8,
+ REF_NEVER = 0x00, /* REF_NEVER: never pass */
+ REF_LESS = 0x01, /* REF_LESS: pass if left < right */
+ REF_EQUAL = 0x02, /* REF_EQUAL: pass if left = right */
+ REF_LEQUAL = 0x03, /* REF_LEQUAL: pass if left <= right */
+ REF_GREATER = 0x04, /* REF_GREATER: pass if left > right */
+ REF_NOTEQUAL = 0x05, /* REF_NOTEQUAL: pass if left != right */
+ REF_GEQUAL = 0x06, /* REF_GEQUAL: pass if left >= right */
+ REF_ALWAYS = 0x07, /* REF_ALWAYS: always pass */
+ ALPHA_TEST_ENABLE_bit = 1 << 3, /* If alpha test is enabled, then a failed ALPHA_FUNC comparison causes the pixel to be killed. */
+ ALPHA_TEST_BYPASS_bit = 1 << 8, /* Driver can st this bit to bypass the alpha test for surface types that don`t support alpha testing. */
CB_BLEND_RED = 0x00028414,
+ /* FP32 red component of constant blend color. */
CB_BLEND_GREEN = 0x00028418,
+ /* FP32 green component of constant blend color. */
CB_BLEND_BLUE = 0x0002841c,
+ /* FP32 blue component of constant blend color. */
CB_BLEND_ALPHA = 0x00028420,
+ /* FP32 alpha component of constant blend color. */
CB_FOG_RED = 0x00028424,
+ /* Red component of fog color, specified in IEEE floating point. */
CB_FOG_GREEN = 0x00028428,
+ /* Green component of fog color, specified in IEEE floating point. */
CB_FOG_BLUE = 0x0002842c,
+ /* Blue component of fog color, specified in IEEE floating point. */
DB_STENCILREFMASK = 0x00028430,
- STENCILREF_mask = 0xff << 0,
+ STENCILREF_mask = 0xff << 0, /* Specifies the reference stencil value for front facing primitives. */
STENCILREF_shift = 0,
- STENCILMASK_mask = 0xff << 8,
+ STENCILMASK_mask = 0xff << 8, /* This value is ANDed with both the reference and the current stencil value prior to the stencil test for front facing primitives. */
STENCILMASK_shift = 8,
- STENCILWRITEMASK_mask = 0xff << 16,
+ STENCILWRITEMASK_mask = 0xff << 16, /* Specifies the write mask for the stencil planes for front facing primitives. */
STENCILWRITEMASK_shift = 16,
DB_STENCILREFMASK_BF = 0x00028434,
- STENCILREF_BF_mask = 0xff << 0,
+ STENCILREF_BF_mask = 0xff << 0, /* Specifies the reference stencil value for back facing primitives. */
STENCILREF_BF_shift = 0,
- STENCILMASK_BF_mask = 0xff << 8,
+ STENCILMASK_BF_mask = 0xff << 8, /* This value is ANDed with both the reference and the current stencil value prior to the stencil test for back facing primitives. */
STENCILMASK_BF_shift = 8,
- STENCILWRITEMASK_BF_mask = 0xff << 16,
+ STENCILWRITEMASK_BF_mask = 0xff << 16, /* Specifies the write mask for the stencil planes for back facing primitives. */
STENCILWRITEMASK_BF_shift = 16,
SX_ALPHA_REF = 0x00028438,
- PA_CL_VPORT_XSCALE_0 = 0x0002843c,
+ /* Reference value for alpha test, which is specified in IEEE floating point. */
+ PA_CL_VPORT_XSCALE_0 = 0x0002843c, /* Viewport Transform X Scale Factor - 1-15 For WGF ViewportId */
PA_CL_VPORT_XSCALE_0_num = 16,
PA_CL_VPORT_XSCALE_0_offset = 24,
- PA_CL_VPORT_XOFFSET_0 = 0x00028440,
+ /* VPORT_XSCALE: Viewport Scale Factor for X coordinates. An IEEE float. */
+ PA_CL_VPORT_XOFFSET_0 = 0x00028440, /* Viewport Transform X Offset - 1-15 For WGF ViewportId */
PA_CL_VPORT_XOFFSET_0_num = 16,
PA_CL_VPORT_XOFFSET_0_offset = 24,
- PA_CL_VPORT_YSCALE_0 = 0x00028444,
+ /* VPORT_XOFFSET: Viewport Offset for X coordinates. An IEEE float. */
+ PA_CL_VPORT_YSCALE_0 = 0x00028444, /* Viewport Transform Y Scale Factor - 1-15 For WGF ViewportId */
PA_CL_VPORT_YSCALE_0_num = 16,
PA_CL_VPORT_YSCALE_0_offset = 24,
- PA_CL_VPORT_YOFFSET_0 = 0x00028448,
+ /* VPORT_YSCALE: Viewport Scale Factor for Y coordinates. An IEEE float. */
+ PA_CL_VPORT_YOFFSET_0 = 0x00028448, /* Viewport Transform Y Offset - 1-15 For WGF ViewportId */
PA_CL_VPORT_YOFFSET_0_num = 16,
PA_CL_VPORT_YOFFSET_0_offset = 24,
- PA_CL_VPORT_ZSCALE_0 = 0x0002844c,
+ /* VPORT_YOFFSET: Viewport Offset for Y coordinates. An IEEE float. */
+ PA_CL_VPORT_ZSCALE_0 = 0x0002844c, /* Viewport Transform Z Scale Factor - 1-15 For WGF ViewportId */
PA_CL_VPORT_ZSCALE_0_num = 16,
PA_CL_VPORT_ZSCALE_0_offset = 24,
- PA_CL_VPORT_ZOFFSET_0 = 0x00028450,
+ /* VPORT_ZSCALE: Viewport Scale Factor for Z coordinates. An IEEE float. */
+ PA_CL_VPORT_ZOFFSET_0 = 0x00028450, /* Viewport Transform Z Offset - 1-15 For WGF ViewportId */
PA_CL_VPORT_ZOFFSET_0_num = 16,
PA_CL_VPORT_ZOFFSET_0_offset = 24,
- SPI_VS_OUT_ID_0 = 0x00028614,
+ /* VPORT_ZOFFSET: Viewport Offset for Z coordinates. An IEEE float. */
+ SPI_VS_OUT_ID_0 = 0x00028614, /* VS output semantic mapping for 4 components/vectors */
SPI_VS_OUT_ID_0_num = 10,
SEMANTIC_0_mask = 0xff << 0,
SEMANTIC_0_shift = 0,
@@ -1599,112 +1656,114 @@ enum {
SEMANTIC_1_shift = 8,
SEMANTIC_2_mask = 0xff << 16,
SEMANTIC_2_shift = 16,
- SEMANTIC_3_mask = 0xff << 24,
+ SEMANTIC_3_mask = 0xff << 24, /* 12. Shader Export Registers */
SEMANTIC_3_shift = 24,
- SPI_PS_INPUT_CNTL_0 = 0x00028644,
+ SPI_PS_INPUT_CNTL_0 = 0x00028644, /* PS interpolator setttings for parameter 0 */
SPI_PS_INPUT_CNTL_0_num = 32,
- SEMANTIC_mask = 0xff << 0,
+ SEMANTIC_mask = 0xff << 0, /* PS input semantic mapping */
SEMANTIC_shift = 0,
- DEFAULT_VAL_mask = 0x03 << 8,
+ DEFAULT_VAL_mask = 0x03 << 8, /* Selects value to force into GPR if no semantic match found POSSIBLE VALUES: 00 - 0.0f, 0.0f, 0.0f, 0.0f 01 - 0.0f, 0.0f, 0.0f, 1.0f 02 - 1.0f, 1.0f, 1.0f, 0.0f 03 - 1,0f, 1.0f, 1.0f, 1.0f */
DEFAULT_VAL_shift = 8,
- X_0_0F = 0x00,
- FLAT_SHADE_bit = 1 << 10,
- SEL_CENTROID_bit = 1 << 11,
- SEL_LINEAR_bit = 1 << 12,
- CYL_WRAP_mask = 0x0f << 13,
+ X_0_0F = 0x00, /* 0.0f, 0.0f, 0.0f, 0.0f */
+ FLAT_SHADE_bit = 1 << 10, /* Flat shade select */
+ SEL_CENTROID_bit = 1 << 11, /* Use IJ data sampled at pixel centroid */
+ SEL_LINEAR_bit = 1 << 12, /* Use IJ data from linear gradients */
+ CYL_WRAP_mask = 0x0f << 13, /* 4-bit cylindrical wrap control (1 bit per component) */
CYL_WRAP_shift = 13,
- PT_SPRITE_TEX_bit = 1 << 17,
+ PT_SPRITE_TEX_bit = 1 << 17, /* Override this parameter with texture coordinates if global enable set and prim is a point */
SEL_SAMPLE_bit = 1 << 18,
- SPI_VS_OUT_CONFIG = 0x000286c4,
- VS_PER_COMPONENT_bit = 1 << 0,
- VS_EXPORT_COUNT_mask = 0x1f << 1,
+ SPI_VS_OUT_CONFIG = 0x000286c4, /* VS output configuration */
+ VS_PER_COMPONENT_bit = 1 << 0, /* When set, each entry in SPI_VS_OUT_ID_0-9 represents one component of a vector (not valid for DX10). Otherwise each entry represents an entire vector */
+ VS_EXPORT_COUNT_mask = 0x1f << 1, /* Number of vectors exported by the VS (value is minus 1) */
VS_EXPORT_COUNT_shift = 1,
- VS_EXPORTS_FOG_bit = 1 << 8,
- VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9,
+ VS_EXPORTS_FOG_bit = 1 << 8, /* Set when VS exports fog */
+ VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9, /* Vector address where VS exported fog. Fog factor will always be in the X channel */
VS_OUT_FOG_VEC_ADDR_shift = 9,
- SPI_PS_IN_CONTROL_0 = 0x000286cc,
- NUM_INTERP_mask = 0x3f << 0,
+ SPI_PS_IN_CONTROL_0 = 0x000286cc, /* Interpolator control settings */
+ NUM_INTERP_mask = 0x3f << 0, /* Number of parameters to interp (no minus 1). Does not include fog, param_gen, or gen_indx, but should include position and frontface */
NUM_INTERP_shift = 0,
- POSITION_ENA_bit = 1 << 8,
- POSITION_CENTROID_bit = 1 << 9,
- POSITION_ADDR_mask = 0x1f << 10,
+ POSITION_ENA_bit = 1 << 8, /* Load per-pixel position into the PS */
+ POSITION_CENTROID_bit = 1 << 9, /* Calculate per-pixel position at pixel centroid */
+ POSITION_ADDR_mask = 0x1f << 10, /* Relative GPR address where position is loaded (0->31) */
POSITION_ADDR_shift = 10,
- PARAM_GEN_mask = 0x0f << 15,
+ PARAM_GEN_mask = 0x0f << 15, /* Generate up to 4 sets of ST coordinates. Bit 0=persp/center, 1=persp/centroid, 2=linear/center, 3=linear/centroid */
PARAM_GEN_shift = 15,
- PARAM_GEN_ADDR_mask = 0x7f << 19,
+ PARAM_GEN_ADDR_mask = 0x7f << 19, /* First relative GPR address where param_gen values are loaded (0->(127-num_param_gen)) */
PARAM_GEN_ADDR_shift = 19,
BARYC_SAMPLE_CNTL_mask = 0x03 << 26,
BARYC_SAMPLE_CNTL_shift = 26,
- CENTROIDS_ONLY = 0x00,
- CENTERS_ONLY = 0x01,
- CENTROIDS_AND_CENTERS = 0x02,
- UNDEF = 0x03,
- PERSP_GRADIENT_ENA_bit = 1 << 28,
- LINEAR_GRADIENT_ENA_bit = 1 << 29,
+ CENTROIDS_ONLY = 0x00, /* CENTROIDS_ONLY: CENTROIDS_ONLY */
+ CENTERS_ONLY = 0x01, /* CENTERS_ONLY: CENTERS_ONLY */
+ CENTROIDS_AND_CENTERS = 0x02, /* CENTROIDS_AND_CENTERS: CENTROIDS_AND_CENTERS */
+ UNDEF = 0x03, /* UNDEF: UNDEFINED */
+ PERSP_GRADIENT_ENA_bit = 1 << 28, /* Enable perspective gradients (if linear is set to 0, persp is always enabled) */
+ LINEAR_GRADIENT_ENA_bit = 1 << 29, /* Enable linear gradients */
POSITION_SAMPLE_bit = 1 << 30,
BARYC_AT_SAMPLE_ENA_bit = 1 << 31,
- SPI_PS_IN_CONTROL_1 = 0x000286d0,
- GEN_INDEX_PIX_bit = 1 << 0,
- GEN_INDEX_PIX_ADDR_mask = 0x7f << 1,
+ SPI_PS_IN_CONTROL_1 = 0x000286d0, /* Interpolator control settings */
+ GEN_INDEX_PIX_bit = 1 << 0, /* Load incrementing value into each pixel to create a unique index for each */
+ GEN_INDEX_PIX_ADDR_mask = 0x7f << 1, /* Relative GPR address where gen_index is loaded (0- >126) */
GEN_INDEX_PIX_ADDR_shift = 1,
- FRONT_FACE_ENA_bit = 1 << 8,
- FRONT_FACE_CHAN_mask = 0x03 << 9,
+ FRONT_FACE_ENA_bit = 1 << 8, /* Override interpolator results with frontface information */
+ FRONT_FACE_CHAN_mask = 0x03 << 9, /* Select channel to override */
FRONT_FACE_CHAN_shift = 9,
FRONT_FACE_ALL_BITS_bit = 1 << 11,
- FRONT_FACE_ADDR_mask = 0x1f << 12,
+ FRONT_FACE_ADDR_mask = 0x1f << 12, /* Relative GPR address to load (0->31) */
FRONT_FACE_ADDR_shift = 12,
- FOG_ADDR_mask = 0x7f << 17,
+ FOG_ADDR_mask = 0x7f << 17, /* Relative GPR address to load (0->126) */
FOG_ADDR_shift = 17,
FIXED_PT_POSITION_ENA_bit = 1 << 24,
FIXED_PT_POSITION_ADDR_mask = 0x1f << 25,
FIXED_PT_POSITION_ADDR_shift = 25,
- SPI_INTERP_CONTROL_0 = 0x000286d4,
- FLAT_SHADE_ENA_bit = 1 << 0,
- PNT_SPRITE_ENA_bit = 1 << 1,
+ SPI_INTERP_CONTROL_0 = 0x000286d4, /* Interpolator control settings */
+ FLAT_SHADE_ENA_bit = 1 << 0, /* Global flat shade enable used in conjunction with per- parameter flat shade control */
+ PNT_SPRITE_ENA_bit = 1 << 1, /* Enable PT_SPRITE_TEX override for point primitives */
PNT_SPRITE_OVRD_X_mask = 0x07 << 2,
PNT_SPRITE_OVRD_X_shift = 2,
- SPI_PNT_SPRITE_SEL_0 = 0x00,
- SPI_PNT_SPRITE_SEL_1 = 0x01,
- SPI_PNT_SPRITE_SEL_S = 0x02,
- SPI_PNT_SPRITE_SEL_T = 0x03,
- SPI_PNT_SPRITE_SEL_NONE = 0x04,
+ SPI_PNT_SPRITE_SEL_0 = 0x00, /* SPI_PNT_SPRITE_SEL_0: Override component with 0.0f */
+ SPI_PNT_SPRITE_SEL_1 = 0x01, /* SPI_PNT_SPRITE_SEL_1: Override component with 1.0f */
+ SPI_PNT_SPRITE_SEL_S = 0x02, /* SPI_PNT_SPRITE_SEL_S: Override component with S value */
+ SPI_PNT_SPRITE_SEL_T = 0x03, /* SPI_PNT_SPRITE_SEL_T: Override component with T value */
+ SPI_PNT_SPRITE_SEL_NONE = 0x04, /* SPI_PNT_SPRITE_SEL_NONE: Keep interpolated result */
PNT_SPRITE_OVRD_Y_mask = 0x07 << 5,
PNT_SPRITE_OVRD_Y_shift = 5,
-/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
-/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
-/* SPI_PNT_SPRITE_SEL_S = 0x02, */
-/* SPI_PNT_SPRITE_SEL_T = 0x03, */
-/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ /* SPI_PNT_SPRITE_SEL_0: Override component with 0.0f */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ /* SPI_PNT_SPRITE_SEL_1: Override component with 1.0f */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */ /* SPI_PNT_SPRITE_SEL_S: Override component with S value */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */ /* SPI_PNT_SPRITE_SEL_T: Override component with T value */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ /* SPI_PNT_SPRITE_SEL_NONE: Keep interpolated result */
PNT_SPRITE_OVRD_Z_mask = 0x07 << 8,
PNT_SPRITE_OVRD_Z_shift = 8,
-/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
-/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
-/* SPI_PNT_SPRITE_SEL_S = 0x02, */
-/* SPI_PNT_SPRITE_SEL_T = 0x03, */
-/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ /* SPI_PNT_SPRITE_SEL_0: Override component with 0.0f */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ /* SPI_PNT_SPRITE_SEL_1: Override component with 1.0f */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */ /* SPI_PNT_SPRITE_SEL_S: Override component with S value */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */ /* SPI_PNT_SPRITE_SEL_T: Override component with T value */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ /* SPI_PNT_SPRITE_SEL_NONE: Keep interpolated result */
PNT_SPRITE_OVRD_W_mask = 0x07 << 11,
PNT_SPRITE_OVRD_W_shift = 11,
-/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
-/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
-/* SPI_PNT_SPRITE_SEL_S = 0x02, */
-/* SPI_PNT_SPRITE_SEL_T = 0x03, */
-/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ /* SPI_PNT_SPRITE_SEL_0: Override component with 0.0f */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ /* SPI_PNT_SPRITE_SEL_1: Override component with 1.0f */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */ /* SPI_PNT_SPRITE_SEL_S: Override component with S value */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */ /* SPI_PNT_SPRITE_SEL_T: Override component with T value */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ /* SPI_PNT_SPRITE_SEL_NONE: Keep interpolated result */
PNT_SPRITE_TOP_1_bit = 1 << 14,
SPI_INPUT_Z = 0x000286d8,
PROVIDE_Z_TO_SPI_bit = 1 << 0,
- SPI_FOG_CNTL = 0x000286dc,
- PASS_FOG_THROUGH_PS_bit = 1 << 0,
+ SPI_FOG_CNTL = 0x000286dc, /* Fog interpolation control */
+ PASS_FOG_THROUGH_PS_bit = 1 << 0, /* Enable fog processing */
PIXEL_FOG_FUNC_mask = 0x03 << 1,
PIXEL_FOG_FUNC_shift = 1,
- SPI_FOG_NONE = 0x00,
- SPI_FOG_EXP = 0x01,
- SPI_FOG_EXP2 = 0x02,
- SPI_FOG_LINEAR = 0x03,
+ SPI_FOG_NONE = 0x00, /* SPI_FOG_NONE: SPI_FOG_NONE */
+ SPI_FOG_EXP = 0x01, /* SPI_FOG_EXP: SPI_FOG_EXP */
+ SPI_FOG_EXP2 = 0x02, /* SPI_FOG_EXP2: SPI_FOG_EXP2 */
+ SPI_FOG_LINEAR = 0x03, /* SPI_FOG_LINEAR: SPI_FOG_LINEAR */
PIXEL_FOG_SRC_SEL_bit = 1 << 3,
VS_FOG_CLAMP_DISABLE_bit = 1 << 4,
SPI_FOG_FUNC_SCALE = 0x000286e0,
+ /* VALUE: */
SPI_FOG_FUNC_BIAS = 0x000286e4,
- CB_BLEND0_CONTROL = 0x00028780,
+ /* VALUE: */
+ CB_BLEND0_CONTROL = 0x00028780, /* Per-MRT blend control for render target 0, used if PER_MRT_BLEND is true. See CB_BLEND_CONTROL for field descriptions. */
CB_BLEND0_CONTROL_num = 8,
COLOR_SRCBLEND_mask = 0x1f << 0,
COLOR_SRCBLEND_shift = 0,
@@ -1720,348 +1779,350 @@ enum {
ALPHA_DESTBLEND_mask = 0x1f << 24,
ALPHA_DESTBLEND_shift = 24,
SEPARATE_ALPHA_BLEND_bit = 1 << 29,
- VGT_DMA_BASE_HI = 0x000287e4,
- VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0,
+ VGT_DMA_BASE_HI = 0x000287e4, /* VGT DMA Base Address : upper 8-bits of 40 bit address */
+ VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0, /* This specfies upper 8-bits of 40-bits of DMA address */
VGT_DMA_BASE_HI__BASE_ADDR_shift = 0,
- VGT_DMA_BASE = 0x000287e8,
- VGT_DRAW_INITIATOR = 0x000287f0,
- SOURCE_SELECT_mask = 0x03 << 0,
+ VGT_DMA_BASE = 0x000287e8, /* VGT DMA Base Address */
+ /* BASE_ADDR: VGT DMA Base Address This address must be naturally aligned to a 16-bit word. Therefore, bit 0 of this register must be 0 */
+ VGT_DRAW_INITIATOR = 0x000287f0, /* Draw Inititiator */
+ SOURCE_SELECT_mask = 0x03 << 0, /* Input Source Select. If the Source Select field is set to `Auto-increment Index` mode and the Primitive Type is set to `Tri List w/Flags`, then the draw initiator is processed as just a regular `Tri List`. */
SOURCE_SELECT_shift = 0,
- DI_SRC_SEL_DMA = 0x00,
- DI_SRC_SEL_IMMEDIATE = 0x01,
- DI_SRC_SEL_AUTO_INDEX = 0x02,
- DI_SRC_SEL_RESERVED = 0x03,
- MAJOR_MODE_mask = 0x03 << 2,
+ DI_SRC_SEL_DMA = 0x00, /* DI_SRC_SEL_DMA: VGT DMA Data */
+ DI_SRC_SEL_IMMEDIATE = 0x01, /* DI_SRC_SEL_IMMEDIATE: Immediate Data */
+ DI_SRC_SEL_AUTO_INDEX = 0x02, /* DI_SRC_SEL_AUTO_INDEX: Auto-increment Index */
+ DI_SRC_SEL_RESERVED = 0x03, /* DI_SRC_SEL_RESERVED: Reserved - unused */
+ MAJOR_MODE_mask = 0x03 << 2, /* Major Mode */
MAJOR_MODE_shift = 2,
- DI_MAJOR_MODE_0 = 0x00,
- DI_MAJOR_MODE_1 = 0x01,
- SPRITE_EN_bit = 1 << 4,
- NOT_EOP_bit = 1 << 5,
- USE_OPAQUE_bit = 1 << 6,
- VGT_IMMED_DATA = 0x000287f4,
- VGT_EVENT_ADDRESS_REG = 0x000287f8,
- ADDRESS_LOW_mask = 0xfffffff << 0,
+ DI_MAJOR_MODE_0 = 0x00, /* DI_MAJOR_MODE_0: DI_MAJOR_MODE_0 Normal (Implicit) Mode -- applies only to prim types 0- 21. Some VGT state registers are ignored (their values implied) in this mode. */
+ DI_MAJOR_MODE_1 = 0x01, /* DI_MAJOR_MODE_1: DI_MAJOR_MODE_1 Explicit Mode -- Configuration completely specified by state registers. */
+ SPRITE_EN_bit = 1 << 4, /* sprite enable */
+ NOT_EOP_bit = 1 << 5, /* This bit indicates that this draw initiator should not generate an end-of-packet signal because it will be followed by one or more chained draw initiators. Care must be taken so that this draw initiator is immediately followed, at the hardware interface, by a chained draw initiator. (In other words, chained draw initiators cannot be separated over driver buffer boundaries that can be interrupted. This bit is primarily intended to be set by the CP to improve the processing parallelism of small 2D blits.) */
+ USE_OPAQUE_bit = 1 << 6, /* This bit indicates that this draw call is a opaque draw call */
+ VGT_IMMED_DATA = 0x000287f4, /* VGT Immediate Data */
+ /* Data written to this address is written into the VGT Immediate Data FIFO. */
+ VGT_EVENT_ADDRESS_REG = 0x000287f8, /* Event address */
+ ADDRESS_LOW_mask = 0xfffffff << 0, /* address bit 31:4 for zpass event */
ADDRESS_LOW_shift = 0,
- DB_DEPTH_CONTROL = 0x00028800,
- STENCIL_ENABLE_bit = 1 << 0,
- Z_ENABLE_bit = 1 << 1,
- Z_WRITE_ENABLE_bit = 1 << 2,
- ZFUNC_mask = 0x07 << 4,
+ DB_DEPTH_CONTROL = 0x00028800, /* This register controls depth and stencil tests. */
+ STENCIL_ENABLE_bit = 1 << 0, /* Enables stencil testing. If disabled, all pixels pass the stencil test. If there is no stencil buffer this is treated as disabled. */
+ Z_ENABLE_bit = 1 << 1, /* Enables depth testing. If disabled, all pixels pass the depth test. If there is no depth buffer this is treated as disabled. */
+ Z_WRITE_ENABLE_bit = 1 << 2, /* Enables writing to the depth buffer if the depth test passes. */
+ ZFUNC_mask = 0x07 << 4, /* Specifies the function that compares the depth at each sample in the fragment to the destination depth at the corresponding sample point. */
ZFUNC_shift = 4,
- FRAG_NEVER = 0x00,
- FRAG_LESS = 0x01,
- FRAG_EQUAL = 0x02,
- FRAG_LEQUAL = 0x03,
- FRAG_GREATER = 0x04,
- FRAG_NOTEQUAL = 0x05,
- FRAG_GEQUAL = 0x06,
- FRAG_ALWAYS = 0x07,
- BACKFACE_ENABLE_bit = 1 << 7,
- STENCILFUNC_mask = 0x07 << 8,
+ FRAG_NEVER = 0x00, /* FRAG_NEVER: never pass */
+ FRAG_LESS = 0x01, /* FRAG_LESS: pass if fragment < dest */
+ FRAG_EQUAL = 0x02, /* FRAG_EQUAL: pass if fragment = dest */
+ FRAG_LEQUAL = 0x03, /* FRAG_LEQUAL: pass if fragment <= dest */
+ FRAG_GREATER = 0x04, /* FRAG_GREATER: pass if fragment > dest */
+ FRAG_NOTEQUAL = 0x05, /* FRAG_NOTEQUAL: pass if fragment != dest */
+ FRAG_GEQUAL = 0x06, /* FRAG_GEQUAL: pass if fragment >= dest */
+ FRAG_ALWAYS = 0x07, /* FRAG_ALWAYS: always pass */
+ BACKFACE_ENABLE_bit = 1 << 7, /* If false, forces all quads to be stencil tested as frontface quads. */
+ STENCILFUNC_mask = 0x07 << 8, /* Specifies the function that compares STENCILREF to the destination stencil value for frontface quads. The stencil test passes if ref OP dest is true. */
STENCILFUNC_shift = 8,
-/* REF_NEVER = 0x00, */
-/* REF_LESS = 0x01, */
-/* REF_EQUAL = 0x02, */
-/* REF_LEQUAL = 0x03, */
-/* REF_GREATER = 0x04, */
-/* REF_NOTEQUAL = 0x05, */
-/* REF_GEQUAL = 0x06, */
-/* REF_ALWAYS = 0x07, */
- STENCILFAIL_mask = 0x07 << 11,
+/* REF_NEVER = 0x00, */ /* REF_NEVER: never pass */
+/* REF_LESS = 0x01, */ /* REF_LESS: pass if left < right */
+/* REF_EQUAL = 0x02, */ /* REF_EQUAL: pass if left = right */
+/* REF_LEQUAL = 0x03, */ /* REF_LEQUAL: pass if left <= right */
+/* REF_GREATER = 0x04, */ /* REF_GREATER: pass if left > right */
+/* REF_NOTEQUAL = 0x05, */ /* REF_NOTEQUAL: pass if left != right */
+/* REF_GEQUAL = 0x06, */ /* REF_GEQUAL: pass if left >= right */
+/* REF_ALWAYS = 0x07, */ /* REF_ALWAYS: always pass */
+ STENCILFAIL_mask = 0x07 << 11, /* Specifies the stencil operation for frontface quads if the stencil function fails. */
STENCILFAIL_shift = 11,
- STENCIL_KEEP = 0x00,
- STENCIL_ZERO = 0x01,
- STENCIL_REPLACE = 0x02,
- STENCIL_INCR_CLAMP = 0x03,
- STENCIL_DECR_CLAMP = 0x04,
- STENCIL_INVERT = 0x05,
- STENCIL_INCR_WRAP = 0x06,
- STENCIL_DECR_WRAP = 0x07,
- STENCILZPASS_mask = 0x07 << 14,
+ STENCIL_KEEP = 0x00, /* STENCIL_KEEP: New value = Old Value */
+ STENCIL_ZERO = 0x01, /* STENCIL_ZERO: New value = 0 */
+ STENCIL_REPLACE = 0x02, /* STENCIL_REPLACE: New value = STENCILREF */
+ STENCIL_INCR_CLAMP = 0x03, /* STENCIL_INCR_CLAMP: New value++ (clamp) */
+ STENCIL_DECR_CLAMP = 0x04, /* STENCIL_DECR_CLAMP: New value-- (clamp) */
+ STENCIL_INVERT = 0x05, /* STENCIL_INVERT: New value=~Old value */
+ STENCIL_INCR_WRAP = 0x06, /* STENCIL_INCR_WRAP: New value++ (wrap) */
+ STENCIL_DECR_WRAP = 0x07, /* STENCIL_DECR_WRAP: New value-- (wrap) */
+ STENCILZPASS_mask = 0x07 << 14, /* Specifies the stencil operation for frontface quads if the stencil and depth functions both pass. */
STENCILZPASS_shift = 14,
-/* STENCIL_KEEP = 0x00, */
-/* STENCIL_ZERO = 0x01, */
-/* STENCIL_REPLACE = 0x02, */
-/* STENCIL_INCR_CLAMP = 0x03, */
-/* STENCIL_DECR_CLAMP = 0x04, */
-/* STENCIL_INVERT = 0x05, */
-/* STENCIL_INCR_WRAP = 0x06, */
-/* STENCIL_DECR_WRAP = 0x07, */
- STENCILZFAIL_mask = 0x07 << 17,
+/* STENCIL_KEEP = 0x00, */ /* STENCIL_KEEP: New value = Old Value */
+/* STENCIL_ZERO = 0x01, */ /* STENCIL_ZERO: New value = 0 */
+/* STENCIL_REPLACE = 0x02, */ /* STENCIL_REPLACE: New value = STENCILREF */
+/* STENCIL_INCR_CLAMP = 0x03, */ /* STENCIL_INCR_CLAMP: New value++ (clamp) */
+/* STENCIL_DECR_CLAMP = 0x04, */ /* STENCIL_DECR_CLAMP: New value-- (clamp) */
+/* STENCIL_INVERT = 0x05, */ /* STENCIL_INVERT: New value=~Old value */
+/* STENCIL_INCR_WRAP = 0x06, */ /* STENCIL_INCR_WRAP: New value++ (wrap) */
+/* STENCIL_DECR_WRAP = 0x07, */ /* STENCIL_DECR_WRAP: New value-- (wrap) */
+ STENCILZFAIL_mask = 0x07 << 17, /* Specifies the stencil operation for frontface quads if the stencil function passes and the depth function fails. */
STENCILZFAIL_shift = 17,
-/* STENCIL_KEEP = 0x00, */
-/* STENCIL_ZERO = 0x01, */
-/* STENCIL_REPLACE = 0x02, */
-/* STENCIL_INCR_CLAMP = 0x03, */
-/* STENCIL_DECR_CLAMP = 0x04, */
-/* STENCIL_INVERT = 0x05, */
-/* STENCIL_INCR_WRAP = 0x06, */
-/* STENCIL_DECR_WRAP = 0x07, */
- STENCILFUNC_BF_mask = 0x07 << 20,
+/* STENCIL_KEEP = 0x00, */ /* STENCIL_KEEP: New value = Old Value */
+/* STENCIL_ZERO = 0x01, */ /* STENCIL_ZERO: New value = 0 */
+/* STENCIL_REPLACE = 0x02, */ /* STENCIL_REPLACE: New value = STENCILREF */
+/* STENCIL_INCR_CLAMP = 0x03, */ /* STENCIL_INCR_CLAMP: New value++ (clamp) */
+/* STENCIL_DECR_CLAMP = 0x04, */ /* STENCIL_DECR_CLAMP: New value-- (clamp) */
+/* STENCIL_INVERT = 0x05, */ /* STENCIL_INVERT: New value=~Old value */
+/* STENCIL_INCR_WRAP = 0x06, */ /* STENCIL_INCR_WRAP: New value++ (wrap) */
+/* STENCIL_DECR_WRAP = 0x07, */ /* STENCIL_DECR_WRAP: New value-- (wrap) */
+ STENCILFUNC_BF_mask = 0x07 << 20, /* Specifies the function that compares STENCILREF_BF to the destination stencil for backface quads. The stencil test passes if ref OP dest is true. */
STENCILFUNC_BF_shift = 20,
-/* REF_NEVER = 0x00, */
-/* REF_LESS = 0x01, */
-/* REF_EQUAL = 0x02, */
-/* REF_LEQUAL = 0x03, */
-/* REF_GREATER = 0x04, */
-/* REF_NOTEQUAL = 0x05, */
-/* REF_GEQUAL = 0x06, */
-/* REF_ALWAYS = 0x07, */
- STENCILFAIL_BF_mask = 0x07 << 23,
+/* REF_NEVER = 0x00, */ /* REF_NEVER: never pass */
+/* REF_LESS = 0x01, */ /* REF_LESS: pass if left < right */
+/* REF_EQUAL = 0x02, */ /* REF_EQUAL: pass if left = right */
+/* REF_LEQUAL = 0x03, */ /* REF_LEQUAL: pass if left <= right */
+/* REF_GREATER = 0x04, */ /* REF_GREATER: pass if left > right */
+/* REF_NOTEQUAL = 0x05, */ /* REF_NOTEQUAL: pass if left != right */
+/* REF_GEQUAL = 0x06, */ /* REF_GEQUAL: pass if left >= right */
+/* REF_ALWAYS = 0x07, */ /* REF_ALWAYS: always pass */
+ STENCILFAIL_BF_mask = 0x07 << 23, /* Specifies the stencil operation for backface quads if the stencil function fails. */
STENCILFAIL_BF_shift = 23,
-/* STENCIL_KEEP = 0x00, */
-/* STENCIL_ZERO = 0x01, */
-/* STENCIL_REPLACE = 0x02, */
-/* STENCIL_INCR_CLAMP = 0x03, */
-/* STENCIL_DECR_CLAMP = 0x04, */
-/* STENCIL_INVERT = 0x05, */
-/* STENCIL_INCR_WRAP = 0x06, */
-/* STENCIL_DECR_WRAP = 0x07, */
- STENCILZPASS_BF_mask = 0x07 << 26,
+/* STENCIL_KEEP = 0x00, */ /* STENCIL_KEEP: New value = Old Value */
+/* STENCIL_ZERO = 0x01, */ /* STENCIL_ZERO: New value = 0 */
+/* STENCIL_REPLACE = 0x02, */ /* STENCIL_REPLACE: New value = STENCILREF */
+/* STENCIL_INCR_CLAMP = 0x03, */ /* STENCIL_INCR_CLAMP: New value++ (clamp) */
+/* STENCIL_DECR_CLAMP = 0x04, */ /* STENCIL_DECR_CLAMP: New value-- (clamp) */
+/* STENCIL_INVERT = 0x05, */ /* STENCIL_INVERT: New value=~Old value */
+/* STENCIL_INCR_WRAP = 0x06, */ /* STENCIL_INCR_WRAP: New value++ (wrap) */
+/* STENCIL_DECR_WRAP = 0x07, */ /* STENCIL_DECR_WRAP: New value-- (wrap) */
+ STENCILZPASS_BF_mask = 0x07 << 26, /* Specifies the stencil operation for backface quads if the stencil and depth functions both pass. */
STENCILZPASS_BF_shift = 26,
-/* STENCIL_KEEP = 0x00, */
-/* STENCIL_ZERO = 0x01, */
-/* STENCIL_REPLACE = 0x02, */
-/* STENCIL_INCR_CLAMP = 0x03, */
-/* STENCIL_DECR_CLAMP = 0x04, */
-/* STENCIL_INVERT = 0x05, */
-/* STENCIL_INCR_WRAP = 0x06, */
-/* STENCIL_DECR_WRAP = 0x07, */
- STENCILZFAIL_BF_mask = 0x07 << 29,
+/* STENCIL_KEEP = 0x00, */ /* STENCIL_KEEP: New value = Old Value */
+/* STENCIL_ZERO = 0x01, */ /* STENCIL_ZERO: New value = 0 */
+/* STENCIL_REPLACE = 0x02, */ /* STENCIL_REPLACE: New value = STENCILREF */
+/* STENCIL_INCR_CLAMP = 0x03, */ /* STENCIL_INCR_CLAMP: New value++ (clamp) */
+/* STENCIL_DECR_CLAMP = 0x04, */ /* STENCIL_DECR_CLAMP: New value-- (clamp) */
+/* STENCIL_INVERT = 0x05, */ /* STENCIL_INVERT: New value=~Old value */
+/* STENCIL_INCR_WRAP = 0x06, */ /* STENCIL_INCR_WRAP: New value++ (wrap) */
+/* STENCIL_DECR_WRAP = 0x07, */ /* STENCIL_DECR_WRAP: New value-- (wrap) */
+ STENCILZFAIL_BF_mask = 0x07 << 29, /* Specifies the stencil operation for backface quads if the stencil function passes and the depth function fails. */
STENCILZFAIL_BF_shift = 29,
-/* STENCIL_KEEP = 0x00, */
-/* STENCIL_ZERO = 0x01, */
-/* STENCIL_REPLACE = 0x02, */
-/* STENCIL_INCR_CLAMP = 0x03, */
-/* STENCIL_DECR_CLAMP = 0x04, */
-/* STENCIL_INVERT = 0x05, */
-/* STENCIL_INCR_WRAP = 0x06, */
-/* STENCIL_DECR_WRAP = 0x07, */
- CB_BLEND_CONTROL = 0x00028804,
-/* COLOR_SRCBLEND_mask = 0x1f << 0, */
+/* STENCIL_KEEP = 0x00, */ /* STENCIL_KEEP: New value = Old Value */
+/* STENCIL_ZERO = 0x01, */ /* STENCIL_ZERO: New value = 0 */
+/* STENCIL_REPLACE = 0x02, */ /* STENCIL_REPLACE: New value = STENCILREF */
+/* STENCIL_INCR_CLAMP = 0x03, */ /* STENCIL_INCR_CLAMP: New value++ (clamp) */
+/* STENCIL_DECR_CLAMP = 0x04, */ /* STENCIL_DECR_CLAMP: New value-- (clamp) */
+/* STENCIL_INVERT = 0x05, */ /* STENCIL_INVERT: New value=~Old value */
+/* STENCIL_INCR_WRAP = 0x06, */ /* STENCIL_INCR_WRAP: New value++ (wrap) */
+/* STENCIL_DECR_WRAP = 0x07, */ /* STENCIL_DECR_WRAP: New value-- (wrap) */
+ CB_BLEND_CONTROL = 0x00028804, /* Blend function used for all render targets if PER_MRT_BLEND is false. */
+/* COLOR_SRCBLEND_mask = 0x1f << 0, */ /* Source blend function for RGB components. BLEND_X name corresponds to GL_X blend function. */
/* COLOR_SRCBLEND_shift = 0, */
- BLEND_ZERO = 0x00,
- BLEND_ONE = 0x01,
- BLEND_SRC_COLOR = 0x02,
- BLEND_ONE_MINUS_SRC_COLOR = 0x03,
- BLEND_SRC_ALPHA = 0x04,
- BLEND_ONE_MINUS_SRC_ALPHA = 0x05,
- BLEND_DST_ALPHA = 0x06,
- BLEND_ONE_MINUS_DST_ALPHA = 0x07,
- BLEND_DST_COLOR = 0x08,
- BLEND_ONE_MINUS_DST_COLOR = 0x09,
- BLEND_SRC_ALPHA_SATURATE = 0x0a,
- BLEND_BOTH_SRC_ALPHA = 0x0b,
- BLEND_BOTH_INV_SRC_ALPHA = 0x0c,
- BLEND_CONSTANT_COLOR = 0x0d,
- BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e,
- BLEND_SRC1_COLOR = 0x0f,
- BLEND_INV_SRC1_COLOR = 0x10,
- BLEND_SRC1_ALPHA = 0x11,
- BLEND_INV_SRC1_ALPHA = 0x12,
- BLEND_CONSTANT_ALPHA = 0x13,
- BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14,
-/* COLOR_COMB_FCN_mask = 0x07 << 5, */
+ BLEND_ZERO = 0x00, /* BLEND_ZERO: (d3d_zero) */
+ BLEND_ONE = 0x01, /* BLEND_ONE: (d3d_one) */
+ BLEND_SRC_COLOR = 0x02, /* BLEND_SRC_COLOR: (d3d_srccolor) */
+ BLEND_ONE_MINUS_SRC_COLOR = 0x03, /* BLEND_ONE_MINUS_SRC_COLOR: (d3d_invsrccolor) */
+ BLEND_SRC_ALPHA = 0x04, /* BLEND_SRC_ALPHA: (d3d_srcalpha) */
+ BLEND_ONE_MINUS_SRC_ALPHA = 0x05, /* BLEND_ONE_MINUS_SRC_ALPHA: (d3d_invsrcalpha) */
+ BLEND_DST_ALPHA = 0x06, /* BLEND_DST_ALPHA: (d3d_destalpha) */
+ BLEND_ONE_MINUS_DST_ALPHA = 0x07, /* BLEND_ONE_MINUS_DST_ALPHA: (d3d_invdestalpha) */
+ BLEND_DST_COLOR = 0x08, /* BLEND_DST_COLOR: (d3d_destcolor) */
+ BLEND_ONE_MINUS_DST_COLOR = 0x09, /* BLEND_ONE_MINUS_DST_COLOR: (d3d_invdestcolor) */
+ BLEND_SRC_ALPHA_SATURATE = 0x0a, /* BLEND_SRC_ALPHA_SATURATE: (d3d_srcalphasat) */
+ BLEND_BOTH_SRC_ALPHA = 0x0b, /* BLEND_BOTH_SRC_ALPHA: dx9 mode */
+ BLEND_BOTH_INV_SRC_ALPHA = 0x0c, /* BLEND_BOTH_INV_SRC_ALPHA: dx9 mode */
+ BLEND_CONSTANT_COLOR = 0x0d, /* BLEND_CONSTANT_COLOR: (d3d_blendfactor, uses corresponding RB_BLEND component) */
+ BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, /* BLEND_ONE_MINUS_CONSTANT_COLOR: (d3d_invblendfactor) */
+ BLEND_SRC1_COLOR = 0x0f, /* BLEND_SRC1_COLOR: wgf dual-source mode */
+ BLEND_INV_SRC1_COLOR = 0x10, /* BLEND_INV_SRC1_COLOR: wgf dual-source mode */
+ BLEND_SRC1_ALPHA = 0x11, /* BLEND_SRC1_ALPHA: wgf dual-source mode */
+ BLEND_INV_SRC1_ALPHA = 0x12, /* BLEND_INV_SRC1_ALPHA: wgf dual-source mode */
+ BLEND_CONSTANT_ALPHA = 0x13, /* BLEND_CONSTANT_ALPHA: (uses RB_BLEND_ALPHA) */
+ BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, /* BLEND_ONE_MINUS_CONSTANT_ALPHA: */
+/* COLOR_COMB_FCN_mask = 0x07 << 5, */ /* Source/dest combination function for RGB components. Result is clamped to the representable range. */
/* COLOR_COMB_FCN_shift = 5, */
- COMB_DST_PLUS_SRC = 0x00,
- COMB_SRC_MINUS_DST = 0x01,
- COMB_MIN_DST_SRC = 0x02,
- COMB_MAX_DST_SRC = 0x03,
- COMB_DST_MINUS_SRC = 0x04,
-/* COLOR_DESTBLEND_mask = 0x1f << 8, */
+ COMB_DST_PLUS_SRC = 0x00, /* COMB_DST_PLUS_SRC: (ADD): Source*SRCBLEND + Dest*DSTBLEND */
+ COMB_SRC_MINUS_DST = 0x01, /* COMB_SRC_MINUS_DST: (SUBTRACT): Source*SRCBLEND - Dest*DSTBLEND */
+ COMB_MIN_DST_SRC = 0x02, /* COMB_MIN_DST_SRC: (MIN): min(Source, Dest) */
+ COMB_MAX_DST_SRC = 0x03, /* COMB_MAX_DST_SRC: (MAX): max(Source, Dest) */
+ COMB_DST_MINUS_SRC = 0x04, /* COMB_DST_MINUS_SRC: (REVSUBTRACT): Dest*DSTBLEND - Source*SRCBLEND */
+/* COLOR_DESTBLEND_mask = 0x1f << 8, */ /* Destination blend function for RGB components. BLEND_X name corresponds to GL_X blend function. */
/* COLOR_DESTBLEND_shift = 8, */
-/* BLEND_ZERO = 0x00, */
-/* BLEND_ONE = 0x01, */
-/* BLEND_SRC_COLOR = 0x02, */
-/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
-/* BLEND_SRC_ALPHA = 0x04, */
-/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
-/* BLEND_DST_ALPHA = 0x06, */
-/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
-/* BLEND_DST_COLOR = 0x08, */
-/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
-/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
-/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
-/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
-/* BLEND_CONSTANT_COLOR = 0x0d, */
-/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
-/* BLEND_SRC1_COLOR = 0x0f, */
-/* BLEND_INV_SRC1_COLOR = 0x10, */
-/* BLEND_SRC1_ALPHA = 0x11, */
-/* BLEND_INV_SRC1_ALPHA = 0x12, */
-/* BLEND_CONSTANT_ALPHA = 0x13, */
-/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
-/* OPACITY_WEIGHT_bit = 1 << 13, */
-/* ALPHA_SRCBLEND_mask = 0x1f << 16, */
+/* BLEND_ZERO = 0x00, */ /* BLEND_ZERO: (d3d_zero) */
+/* BLEND_ONE = 0x01, */ /* BLEND_ONE: (d3d_one) */
+/* BLEND_SRC_COLOR = 0x02, */ /* BLEND_SRC_COLOR: (d3d_srccolor) */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ /* BLEND_ONE_MINUS_SRC_COLOR: (d3d_invsrccolor) */
+/* BLEND_SRC_ALPHA = 0x04, */ /* BLEND_SRC_ALPHA: (d3d_srcalpha) */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ /* BLEND_ONE_MINUS_SRC_ALPHA: (d3d_invsrcalpha) */
+/* BLEND_DST_ALPHA = 0x06, */ /* BLEND_DST_ALPHA: (d3d_destalpha) */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ /* BLEND_ONE_MINUS_DST_ALPHA: (d3d_invdestalpha) */
+/* BLEND_DST_COLOR = 0x08, */ /* BLEND_DST_COLOR: (d3d_destcolor) */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ /* BLEND_ONE_MINUS_DST_COLOR: (d3d_invdestcolor) */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ /* BLEND_SRC_ALPHA_SATURATE: (d3d_srcalphasat) */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ /* BLEND_BOTH_SRC_ALPHA: dx9 mode */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ /* BLEND_BOTH_INV_SRC_ALPHA: dx9 mode */
+/* BLEND_CONSTANT_COLOR = 0x0d, */ /* BLEND_CONSTANT_COLOR: (d3d_blendfactor, uses corresponding RB_BLEND component) */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ /* BLEND_ONE_MINUS_CONSTANT_COLOR: (d3d_invblendfactor) */
+/* BLEND_SRC1_COLOR = 0x0f, */ /* BLEND_SRC1_COLOR: wgf dual-source mode */
+/* BLEND_INV_SRC1_COLOR = 0x10, */ /* BLEND_INV_SRC1_COLOR: wgf dual-source mode */
+/* BLEND_SRC1_ALPHA = 0x11, */ /* BLEND_SRC1_ALPHA: wgf dual-source mode */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */ /* BLEND_INV_SRC1_ALPHA: wgf dual-source mode */
+/* BLEND_CONSTANT_ALPHA = 0x13, */ /* BLEND_CONSTANT_ALPHA: (uses RB_BLEND_ALPHA) */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ /* BLEND_ONE_MINUS_CONSTANT_ALPHA: */
+/* OPACITY_WEIGHT_bit = 1 << 13, */ /* If one, enables multiplying source alpha times source RGB before blending. This field must be zero if FOG_ENABLE is one. */
+/* ALPHA_SRCBLEND_mask = 0x1f << 16, */ /* Source blend function for alpha component. BLEND_X name corresponds to GL_X blend function. */
/* ALPHA_SRCBLEND_shift = 16, */
-/* BLEND_ZERO = 0x00, */
-/* BLEND_ONE = 0x01, */
-/* BLEND_SRC_COLOR = 0x02, */
-/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
-/* BLEND_SRC_ALPHA = 0x04, */
-/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
-/* BLEND_DST_ALPHA = 0x06, */
-/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
-/* BLEND_DST_COLOR = 0x08, */
-/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
-/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
-/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
-/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
-/* BLEND_CONSTANT_COLOR = 0x0d, */
-/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
-/* BLEND_SRC1_COLOR = 0x0f, */
-/* BLEND_INV_SRC1_COLOR = 0x10, */
-/* BLEND_SRC1_ALPHA = 0x11, */
-/* BLEND_INV_SRC1_ALPHA = 0x12, */
-/* BLEND_CONSTANT_ALPHA = 0x13, */
-/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
-/* ALPHA_COMB_FCN_mask = 0x07 << 21, */
+/* BLEND_ZERO = 0x00, */ /* BLEND_ZERO: (d3d_zero) */
+/* BLEND_ONE = 0x01, */ /* BLEND_ONE: (d3d_one) */
+/* BLEND_SRC_COLOR = 0x02, */ /* BLEND_SRC_COLOR: (d3d_srccolor) */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ /* BLEND_ONE_MINUS_SRC_COLOR: (d3d_invsrccolor) */
+/* BLEND_SRC_ALPHA = 0x04, */ /* BLEND_SRC_ALPHA: (d3d_srcalpha) */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ /* BLEND_ONE_MINUS_SRC_ALPHA: (d3d_invsrcalpha) */
+/* BLEND_DST_ALPHA = 0x06, */ /* BLEND_DST_ALPHA: (d3d_destalpha) */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ /* BLEND_ONE_MINUS_DST_ALPHA: (d3d_invdestalpha) */
+/* BLEND_DST_COLOR = 0x08, */ /* BLEND_DST_COLOR: (d3d_destcolor) */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ /* BLEND_ONE_MINUS_DST_COLOR: (d3d_invdestcolor) */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ /* BLEND_SRC_ALPHA_SATURATE: (d3d_srcalphasat) */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ /* BLEND_BOTH_SRC_ALPHA: dx9 mode */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ /* BLEND_BOTH_INV_SRC_ALPHA: dx9 mode */
+/* BLEND_CONSTANT_COLOR = 0x0d, */ /* BLEND_CONSTANT_COLOR: (d3d_blendfactor, uses corresponding RB_BLEND component) */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ /* BLEND_ONE_MINUS_CONSTANT_COLOR: (d3d_invblendfactor) */
+/* BLEND_SRC1_COLOR = 0x0f, */ /* BLEND_SRC1_COLOR: wgf dual-source mode */
+/* BLEND_INV_SRC1_COLOR = 0x10, */ /* BLEND_INV_SRC1_COLOR: wgf dual-source mode */
+/* BLEND_SRC1_ALPHA = 0x11, */ /* BLEND_SRC1_ALPHA: wgf dual-source mode */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */ /* BLEND_INV_SRC1_ALPHA: wgf dual-source mode */
+/* BLEND_CONSTANT_ALPHA = 0x13, */ /* BLEND_CONSTANT_ALPHA: (uses RB_BLEND_ALPHA) */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ /* BLEND_ONE_MINUS_CONSTANT_ALPHA: */
+/* ALPHA_COMB_FCN_mask = 0x07 << 21, */ /* Source/dest combination function for alpha component. Result is clamped to the representable range. Note that Min and Max do not force src and dst blend functions to ONE. */
/* ALPHA_COMB_FCN_shift = 21, */
-/* COMB_DST_PLUS_SRC = 0x00, */
-/* COMB_SRC_MINUS_DST = 0x01, */
-/* COMB_MIN_DST_SRC = 0x02, */
-/* COMB_MAX_DST_SRC = 0x03, */
-/* COMB_DST_MINUS_SRC = 0x04, */
-/* ALPHA_DESTBLEND_mask = 0x1f << 24, */
+/* COMB_DST_PLUS_SRC = 0x00, */ /* COMB_DST_PLUS_SRC: (ADD): Source*SRCBLEND + Dest*DSTBLEND */
+/* COMB_SRC_MINUS_DST = 0x01, */ /* COMB_SRC_MINUS_DST: (SUBTRACT): Source*SRCBLEND - Dest*DSTBLEND */
+/* COMB_MIN_DST_SRC = 0x02, */ /* COMB_MIN_DST_SRC: (MIN): min(Source, Dest) */
+/* COMB_MAX_DST_SRC = 0x03, */ /* COMB_MAX_DST_SRC: (MAX): max(Source, Dest) */
+/* COMB_DST_MINUS_SRC = 0x04, */ /* COMB_DST_MINUS_SRC: (REVSUBTRACT): Dest*DSTBLEND - Source*SRCBLEND */
+/* ALPHA_DESTBLEND_mask = 0x1f << 24, */ /* Destination blend function for alpha component. BLEND_X name corresponds to GL_X blend function. */
/* ALPHA_DESTBLEND_shift = 24, */
-/* BLEND_ZERO = 0x00, */
-/* BLEND_ONE = 0x01, */
-/* BLEND_SRC_COLOR = 0x02, */
-/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
-/* BLEND_SRC_ALPHA = 0x04, */
-/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
-/* BLEND_DST_ALPHA = 0x06, */
-/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
-/* BLEND_DST_COLOR = 0x08, */
-/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
-/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
-/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
-/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
-/* BLEND_CONSTANT_COLOR = 0x0d, */
-/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
-/* BLEND_SRC1_COLOR = 0x0f, */
-/* BLEND_INV_SRC1_COLOR = 0x10, */
-/* BLEND_SRC1_ALPHA = 0x11, */
-/* BLEND_INV_SRC1_ALPHA = 0x12, */
-/* BLEND_CONSTANT_ALPHA = 0x13, */
-/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
-/* SEPARATE_ALPHA_BLEND_bit = 1 << 29, */
+/* BLEND_ZERO = 0x00, */ /* BLEND_ZERO: (d3d_zero) */
+/* BLEND_ONE = 0x01, */ /* BLEND_ONE: (d3d_one) */
+/* BLEND_SRC_COLOR = 0x02, */ /* BLEND_SRC_COLOR: (d3d_srccolor) */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ /* BLEND_ONE_MINUS_SRC_COLOR: (d3d_invsrccolor) */
+/* BLEND_SRC_ALPHA = 0x04, */ /* BLEND_SRC_ALPHA: (d3d_srcalpha) */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ /* BLEND_ONE_MINUS_SRC_ALPHA: (d3d_invsrcalpha) */
+/* BLEND_DST_ALPHA = 0x06, */ /* BLEND_DST_ALPHA: (d3d_destalpha) */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ /* BLEND_ONE_MINUS_DST_ALPHA: (d3d_invdestalpha) */
+/* BLEND_DST_COLOR = 0x08, */ /* BLEND_DST_COLOR: (d3d_destcolor) */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ /* BLEND_ONE_MINUS_DST_COLOR: (d3d_invdestcolor) */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ /* BLEND_SRC_ALPHA_SATURATE: (d3d_srcalphasat) */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ /* BLEND_BOTH_SRC_ALPHA: dx9 mode */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ /* BLEND_BOTH_INV_SRC_ALPHA: dx9 mode */
+/* BLEND_CONSTANT_COLOR = 0x0d, */ /* BLEND_CONSTANT_COLOR: (d3d_blendfactor, uses corresponding RB_BLEND component) */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ /* BLEND_ONE_MINUS_CONSTANT_COLOR: (d3d_invblendfactor) */
+/* BLEND_SRC1_COLOR = 0x0f, */ /* BLEND_SRC1_COLOR: wgf dual-source mode */
+/* BLEND_INV_SRC1_COLOR = 0x10, */ /* BLEND_INV_SRC1_COLOR: wgf dual-source mode */
+/* BLEND_SRC1_ALPHA = 0x11, */ /* BLEND_SRC1_ALPHA: wgf dual-source mode */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */ /* BLEND_INV_SRC1_ALPHA: wgf dual-source mode */
+/* BLEND_CONSTANT_ALPHA = 0x13, */ /* BLEND_CONSTANT_ALPHA: (uses RB_BLEND_ALPHA) */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ /* BLEND_ONE_MINUS_CONSTANT_ALPHA: */
+/* SEPARATE_ALPHA_BLEND_bit = 1 << 29, */ /* If false, use color blend modes for blending the alpha channel. If true, use the ALPHA_ fields to control blending to the alpha channel. */
CB_COLOR_CONTROL = 0x00028808,
- FOG_ENABLE_bit = 1 << 0,
- MULTIWRITE_ENABLE_bit = 1 << 1,
- DITHER_ENABLE_bit = 1 << 2,
- DEGAMMA_ENABLE_bit = 1 << 3,
- SPECIAL_OP_mask = 0x07 << 4,
+ FOG_ENABLE_bit = 1 << 0, /* If true, extract a fog factor from each exported color and performs fog blending prior to alpha blending, using FOG_RED etc. as the fog color. This bit must be zero if a dual-source (SRC1) blend operation is selected. */
+ MULTIWRITE_ENABLE_bit = 1 << 1, /* If true, replicate color output 0 to each color output, so taht it is rendered to each enabled render target. This feature is used in OpenGL. SHADER_MASK.OUTPUTn_ENABLE masks the color components of color output 0 for render target n. */
+ DITHER_ENABLE_bit = 1 << 2, /* If true, then each component is dithered if it is no larger than 16-bits and its ROUND_MODE is set to ROUND_BY_HALF. This API state is present in OpenGL and DX9 but not DX10. */
+ DEGAMMA_ENABLE_bit = 1 << 3, /* If true, then each UNORM format COLOR_8_8_8_8 render target is treated as an SRGB format instead. This API state is present in DX9 but not WGF2. */
+ SPECIAL_OP_mask = 0x07 << 4, /* This field selects stanard color processing or one of several special operation modes, which ignore the backend state except that the fast clear and expand modes use nonzero fields in CB_TARGET_WRITE field to select render targets. NOTE: for the SPECIAL_EXPAND modes, all enabled MRTs must have a cmask buffer. */
SPECIAL_OP_shift = 4,
- SPECIAL_NORMAL = 0x00,
- SPECIAL_DISABLE = 0x01,
- SPECIAL_FAST_CLEAR = 0x02,
- SPECIAL_FORCE_CLEAR = 0x03,
- SPECIAL_EXPAND_COLOR = 0x04,
- SPECIAL_EXPAND_TEXTURE = 0x05,
- SPECIAL_EXPAND_SAMPLES = 0x06,
- SPECIAL_RESOLVE_BOX = 0x07,
- PER_MRT_BLEND_bit = 1 << 7,
- TARGET_BLEND_ENABLE_mask = 0xff << 8,
+ SPECIAL_NORMAL = 0x00, /* SPECIAL_NORMAL: use state to render */
+ SPECIAL_DISABLE = 0x01, /* SPECIAL_DISABLE: do not write color results */
+ SPECIAL_FAST_CLEAR = 0x02, /* SPECIAL_FAST_CLEAR: set fully covered tiles to fast clear value, as selected by CLEAR_MODE field. */
+ SPECIAL_FORCE_CLEAR = 0x03, /* SPECIAL_FORCE_CLEAR: use for full surface fast clear (removes knowledge of prior clear color). */
+ SPECIAL_EXPAND_COLOR = 0x04, /* SPECIAL_EXPAND_COLOR: expand cleared tiles so that clear color is not used. Use this or force_clear when changing the clear color. */
+ SPECIAL_EXPAND_TEXTURE = 0x05, /* SPECIAL_EXPAND_TEXTURE: expand as needed before binding the surface as a texture. */
+ SPECIAL_EXPAND_SAMPLES = 0x06, /* SPECIAL_EXPAND_SAMPLES: expand to a export_ separate color per sample. This is required before CPU access to the surface. */
+ SPECIAL_RESOLVE_BOX = 0x07, /* SPECIAL_RESOLVE_BOX: read from target 0, sum all covered samples samples, divide by the number of samples, and write to target 1, which is one-sample. This may be used to produce a linear array from a tiled array. NOTE: do EXPAND_COLOR before resolving surface. */
+ PER_MRT_BLEND_bit = 1 << 7, /* If false, use CB_BLEND_CONTROL for all blend functions. If true, use CB_BLEND#_CONTROL for the blend function for render target # (if blending is enabled). */
+ TARGET_BLEND_ENABLE_mask = 0xff << 8, /* Each bit enables blending for the corresponding render target if it is 1, else disables blending for that render target if it is 0. This field must be 0xcc (source) if BLEND_FLOAT32 is set for any enabled render target. */
TARGET_BLEND_ENABLE_shift = 8,
- ROP3_mask = 0xff << 16,
+ ROP3_mask = 0xff << 16, /* This field supports the 28 boolean ops that combine either source and dest or brush and dest, with brush provided by the shader in place of source. ROP3 codes that use both src and brush are emulated in software. Allowed ROP3 codes have either the form pqrspqrs (for source/dest ops) or pqpqrsrs (for brush/dest ops). The code 0xCC (11001100) copies the source to the destination, which disables the ROP function. */
ROP3_shift = 16,
DB_SHADER_CONTROL = 0x0002880c,
- Z_EXPORT_ENABLE_bit = 1 << 0,
- STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1,
- Z_ORDER_mask = 0x03 << 4,
+ Z_EXPORT_ENABLE_bit = 1 << 0, /* A shader is bound that exports Z as a float into Red. */
+ STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1, /* A shader is bound that exports a stencil ref value as an integer onto Green. */
+ Z_ORDER_mask = 0x03 << 4, /* Indicates Shader`s preference for which type of Z testing. The _THEN_ for early Z allows the shader to indicate a preference when EARLY_Z can`t be used. If RE_Z can`t be used then LATE_Z is. */
Z_ORDER_shift = 4,
- LATE_Z = 0x00,
- EARLY_Z_THEN_LATE_Z = 0x01,
- RE_Z = 0x02,
- EARLY_Z_THEN_RE_Z = 0x03,
- KILL_ENABLE_bit = 1 << 6,
- COVERAGE_TO_MASK_ENABLE_bit = 1 << 7,
- MASK_EXPORT_ENABLE_bit = 1 << 8,
- DUAL_EXPORT_ENABLE_bit = 1 << 9,
- EXEC_ON_HIER_FAIL_bit = 1 << 10,
- EXEC_ON_NOOP_bit = 1 << 11,
- PA_CL_CLIP_CNTL = 0x00028810,
- UCP_ENA_0_bit = 1 << 0,
- UCP_ENA_1_bit = 1 << 1,
- UCP_ENA_2_bit = 1 << 2,
- UCP_ENA_3_bit = 1 << 3,
- UCP_ENA_4_bit = 1 << 4,
- UCP_ENA_5_bit = 1 << 5,
+ LATE_Z = 0x00, /* LATE_Z */
+ EARLY_Z_THEN_LATE_Z = 0x01, /* EARLY_Z_THEN_LATE_Z */
+ RE_Z = 0x02, /* RE_Z */
+ EARLY_Z_THEN_RE_Z = 0x03, /* EARLY_Z_THEN_RE_Z */
+ KILL_ENABLE_bit = 1 << 6, /* Shader can kill pixels through texkill. */
+ COVERAGE_TO_MASK_ENABLE_bit = 1 << 7, /* Use Z (2nd) Export Alpha Channel to Generate Alpha to Mask. */
+ MASK_EXPORT_ENABLE_bit = 1 << 8, /* Use Z (2nd) Export Blue Channel as sample mask for pixel. */
+ DUAL_EXPORT_ENABLE_bit = 1 << 9, /* Allows the shader export block to pack two quads into each export to the backend. This only occurs if there is no depth export, the active render targets permit (see CB_COLOR0_INFO SOURCE_FORMAT field) and CB_COLOR_CONTROL FOG_ENABLE and MULTIWRITE_ENABLE are both zero. */
+ EXEC_ON_HIER_FAIL_bit = 1 << 10, /* Will execute the shader even if Hierarchical Z or Stencil would kill the quad. Enable if the pixel shader has a desired side effect not covered by the above flags for Z or Stencil failed pixels. EarlyZ and ReZ kills will still stop the shader from running. */
+ EXEC_ON_NOOP_bit = 1 << 11, /* Will execute the shader even if nothing uses the shader`s color or depth exports. Enable if the pixel shader has a desired side effect not caused by the above flags for passing pixels. */
+ PA_CL_CLIP_CNTL = 0x00028810, /* Clipper Control Bits */
+ UCP_ENA_0_bit = 1 << 0, /* Enable User-Clip Plane 0 */
+ UCP_ENA_1_bit = 1 << 1, /* Enable User-Clip Plane 1 */
+ UCP_ENA_2_bit = 1 << 2, /* Enable User-Clip Plane 2 */
+ UCP_ENA_3_bit = 1 << 3, /* Enable User-Clip Plane 3 */
+ UCP_ENA_4_bit = 1 << 4, /* Enable User-Clip Plane 4 */
+ UCP_ENA_5_bit = 1 << 5, /* Enable User-Clip Plane 5 */
PS_UCP_Y_SCALE_NEG_bit = 1 << 13,
- PS_UCP_MODE_mask = 0x03 << 14,
+ PS_UCP_MODE_mask = 0x03 << 14, /* 0 = Cull using distance from center of point 1 = Cull using radius-based distance from center of point 2 = Cull using radius-based distance from center of point, Expand and Clip on intersection 3 = Always expand and clip as trifan */
PS_UCP_MODE_shift = 14,
- CLIP_DISABLE_bit = 1 << 16,
- UCP_CULL_ONLY_ENA_bit = 1 << 17,
- BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18,
- DX_CLIP_SPACE_DEF_bit = 1 << 19,
- DIS_CLIP_ERR_DETECT_bit = 1 << 20,
- VTX_KILL_OR_bit = 1 << 21,
+ CLIP_DISABLE_bit = 1 << 16, /* Disables clip code generation and clipping process for TCL */
+ UCP_CULL_ONLY_ENA_bit = 1 << 17, /* Cull Primitives against UCPS, but don`t clip */
+ BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18, /* Currently unused: Pending Delete. Left as placeholder for now. */
+ DX_CLIP_SPACE_DEF_bit = 1 << 19, /* Clip space is defined as: 0: -W < X < W, -W < Y < W, -W < Z < W (OpenGL Definition) 1: -W < X < W, -W < Y < W, 0 < Z < W (DirectX Definition) */
+ DIS_CLIP_ERR_DETECT_bit = 1 << 20, /* Disables culling of primitives for which the clipped detects an error. Default is 0 */
+ VTX_KILL_OR_bit = 1 << 21, /* Used if Vertex Kill flags are exported from Vertex Shader. If clear, ALL vertices for current primitive must be set to kill the primitive ( AND MODE). If set, if ANY vertices for current primitive are set, the the primitive will be killed ( OR MODE). */
DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24,
VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25,
ZCLIP_NEAR_DISABLE_bit = 1 << 26,
ZCLIP_FAR_DISABLE_bit = 1 << 27,
- PA_SU_SC_MODE_CNTL = 0x00028814,
- CULL_FRONT_bit = 1 << 0,
- CULL_BACK_bit = 1 << 1,
- FACE_bit = 1 << 2,
- POLY_MODE_mask = 0x03 << 3,
+ PA_SU_SC_MODE_CNTL = 0x00028814, /* SU/SC Controls for Facedness Culling, Polymode, Polygon Offset, and various Enables */
+ CULL_FRONT_bit = 1 << 0, /* Enable for front-face culling. */
+ CULL_BACK_bit = 1 << 1, /* Enable for back-face culling. */
+ FACE_bit = 1 << 2, /* X-Ored with cross product sign to determine positive facing */
+ POLY_MODE_mask = 0x03 << 3, /* Polygon mode enable. */
POLY_MODE_shift = 3,
- X_DISABLE_POLY_MODE = 0x00,
- X_DUAL_MODE = 0x01,
- POLYMODE_FRONT_PTYPE_mask = 0x07 << 5,
+ X_DISABLE_POLY_MODE = 0x00, /* Disable poly mode (render triangles). */
+ X_DUAL_MODE = 0x01, /* Dual mode (send 2 sets of 3 polys with specified poly type). */
+ POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, /* Specifies how to render front-facing polygons. */
POLYMODE_FRONT_PTYPE_shift = 5,
- X_DRAW_POINTS = 0x00,
- X_DRAW_LINES = 0x01,
- X_DRAW_TRIANGLES = 0x02,
- POLYMODE_BACK_PTYPE_mask = 0x07 << 8,
+ X_DRAW_POINTS = 0x00, /* Draw points. */
+ X_DRAW_LINES = 0x01, /* Draw lines. */
+ X_DRAW_TRIANGLES = 0x02, /* Draw triangles. */
+ POLYMODE_BACK_PTYPE_mask = 0x07 << 8, /* Specifies how to render back-facing polygons. */
POLYMODE_BACK_PTYPE_shift = 8,
-/* X_DRAW_POINTS = 0x00, */
-/* X_DRAW_LINES = 0x01, */
-/* X_DRAW_TRIANGLES = 0x02, */
- POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11,
- POLY_OFFSET_BACK_ENABLE_bit = 1 << 12,
- POLY_OFFSET_PARA_ENABLE_bit = 1 << 13,
- VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16,
- PROVOKING_VTX_LAST_bit = 1 << 19,
- PERSP_CORR_DIS_bit = 1 << 20,
- MULTI_PRIM_IB_ENA_bit = 1 << 21,
- PA_CL_VTE_CNTL = 0x00028818,
- VPORT_X_SCALE_ENA_bit = 1 << 0,
- VPORT_X_OFFSET_ENA_bit = 1 << 1,
- VPORT_Y_SCALE_ENA_bit = 1 << 2,
- VPORT_Y_OFFSET_ENA_bit = 1 << 3,
- VPORT_Z_SCALE_ENA_bit = 1 << 4,
- VPORT_Z_OFFSET_ENA_bit = 1 << 5,
- VTX_XY_FMT_bit = 1 << 8,
- VTX_Z_FMT_bit = 1 << 9,
- VTX_W0_FMT_bit = 1 << 10,
- PERFCOUNTER_REF_bit = 1 << 11,
- PA_CL_VS_OUT_CNTL = 0x0002881c,
- CLIP_DIST_ENA_0_bit = 1 << 0,
- CLIP_DIST_ENA_1_bit = 1 << 1,
- CLIP_DIST_ENA_2_bit = 1 << 2,
- CLIP_DIST_ENA_3_bit = 1 << 3,
- CLIP_DIST_ENA_4_bit = 1 << 4,
- CLIP_DIST_ENA_5_bit = 1 << 5,
- CLIP_DIST_ENA_6_bit = 1 << 6,
- CLIP_DIST_ENA_7_bit = 1 << 7,
- CULL_DIST_ENA_0_bit = 1 << 8,
- CULL_DIST_ENA_1_bit = 1 << 9,
- CULL_DIST_ENA_2_bit = 1 << 10,
- CULL_DIST_ENA_3_bit = 1 << 11,
- CULL_DIST_ENA_4_bit = 1 << 12,
- CULL_DIST_ENA_5_bit = 1 << 13,
- CULL_DIST_ENA_6_bit = 1 << 14,
- CULL_DIST_ENA_7_bit = 1 << 15,
- USE_VTX_POINT_SIZE_bit = 1 << 16,
- USE_VTX_EDGE_FLAG_bit = 1 << 17,
- USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18,
- USE_VTX_VIEWPORT_INDX_bit = 1 << 19,
- USE_VTX_KILL_FLAG_bit = 1 << 20,
- VS_OUT_MISC_VEC_ENA_bit = 1 << 21,
- VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22,
- VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23,
+/* X_DRAW_POINTS = 0x00, */ /* Draw points. */
+/* X_DRAW_LINES = 0x01, */ /* Draw lines. */
+/* X_DRAW_TRIANGLES = 0x02, */ /* Draw triangles. */
+ POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11, /* Enables front facing polygon`s offset. */
+ POLY_OFFSET_BACK_ENABLE_bit = 1 << 12, /* Enables back facing polygon`s offset. */
+ POLY_OFFSET_PARA_ENABLE_bit = 1 << 13, /* Enables polygon offset for non-triangle primitives. */
+ VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16, /* Enables addition of PA_SC_WINDOW_OFFSET values to vertex data. */
+ PROVOKING_VTX_LAST_bit = 1 << 19, /* Defines which vertex of a primitive is used for attribute components when flat shading is enabled */
+ PERSP_CORR_DIS_bit = 1 << 20, /* Disables perspective correction for all attributes */
+ MULTI_PRIM_IB_ENA_bit = 1 << 21, /* Enables multiple primitive sets to be placed in a single index buffer, separated by RESET_INDX indices */
+ PA_CL_VTE_CNTL = 0x00028818, /* Viewport Transform Engine Control */
+ VPORT_X_SCALE_ENA_bit = 1 << 0, /* Viewport Transform Scale Enable for X component */
+ VPORT_X_OFFSET_ENA_bit = 1 << 1, /* Viewport Transform Offset Enable for X component */
+ VPORT_Y_SCALE_ENA_bit = 1 << 2, /* Viewport Transform Scale Enable for Y component */
+ VPORT_Y_OFFSET_ENA_bit = 1 << 3, /* Viewport Transform Offset Enable for Y component */
+ VPORT_Z_SCALE_ENA_bit = 1 << 4, /* Viewport Transform Scale Enable for Z component */
+ VPORT_Z_OFFSET_ENA_bit = 1 << 5, /* Viewport Transform Offset Enable for Z component */
+ VTX_XY_FMT_bit = 1 << 8, /* Indicates that the incoming X, Y have already been multiplied by 1/W0. If OFF, the Setup Engine will multiply the X, Y coordinates by 1/W0., */
+ VTX_Z_FMT_bit = 1 << 9, /* Indicates that the incoming Z has already been multiplied by 1/W0. If OFF, the Setup Engine will multiply the Z coordinate by 1/W0. */
+ VTX_W0_FMT_bit = 1 << 10, /* Indicates that the incoming W0 is not 1/W0. If ON, the Setup Engine will perform the reciprocal to get 1/W0. */
+ PERFCOUNTER_REF_bit = 1 << 11, /* Indicates perf counters should increment for this context. */
+ PA_CL_VS_OUT_CNTL = 0x0002881c, /* Vertex Shader Output Control */
+ CLIP_DIST_ENA_0_bit = 1 << 0, /* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
+ CLIP_DIST_ENA_1_bit = 1 << 1, /* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
+ CLIP_DIST_ENA_2_bit = 1 << 2, /* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
+ CLIP_DIST_ENA_3_bit = 1 << 3, /* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
+ CLIP_DIST_ENA_4_bit = 1 << 4, /* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
+ CLIP_DIST_ENA_5_bit = 1 << 5, /* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
+ CLIP_DIST_ENA_6_bit = 1 << 6, /* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
+ CLIP_DIST_ENA_7_bit = 1 << 7, /* Enable ClipDistance# to be used for user-defined clipping. Requires VS_OUT_CCDIST#_ENA to be set. */
+ CULL_DIST_ENA_0_bit = 1 << 8, /* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
+ CULL_DIST_ENA_1_bit = 1 << 9, /* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
+ CULL_DIST_ENA_2_bit = 1 << 10, /* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
+ CULL_DIST_ENA_3_bit = 1 << 11, /* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
+ CULL_DIST_ENA_4_bit = 1 << 12, /* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
+ CULL_DIST_ENA_5_bit = 1 << 13, /* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
+ CULL_DIST_ENA_6_bit = 1 << 14, /* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
+ CULL_DIST_ENA_7_bit = 1 << 15, /* Enable CullDistance# to be used for user-defined clip discard. Requires VS_OUT_CCDIST#_ENA to be set. If all verts of a primitive are outside (culldist<0), then primitive is discarded, else just let through (i.e. NOT clipped). */
+ USE_VTX_POINT_SIZE_bit = 1 << 16, /* Use the PointSize output from the VS (in the x channel of VS_OUT_MISC_VEC). */
+ USE_VTX_EDGE_FLAG_bit = 1 << 17, /* Use the EdgeFlag output from the VS (in the y channel of VS_OUT_MISC_VEC). */
+ USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18, /* Use the RenderTargetArrayIndx output from the VS (in the z channel of VS_OUT_MISC_VEC). Only valid for WGF Geometry Shader */
+ USE_VTX_VIEWPORT_INDX_bit = 1 << 19, /* Use the ViewportArrayIndx output from the VS (in the w channel of VS_OUT_MISC_VEC). Only valid for WGF Geometry Shader */
+ USE_VTX_KILL_FLAG_bit = 1 << 20, /* Use the KillFlag output from the VS (in the z channel of VS_OUT_MISC_VEC). Mutually exclusive from RTarrayindx */
+ VS_OUT_MISC_VEC_ENA_bit = 1 << 21, /* Output the VS output misc vector from the VS (SX) to the PA (primitive assembler). Should be set if any of the fields are to be used */
+ VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22, /* Output the VS output ccdist0 vector from the VS (SX) to the PA (primitive assembler). Should be set if any of the fields are to be used */
+ VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23, /* Output the VS output ccdist1 vector from the VS (SX) to the PA (primitive assembler). Should be set if any of the fields are to be used */
PA_CL_NANINF_CNTL = 0x00028820,
VTE_XY_INF_DISCARD_bit = 1 << 0,
VTE_Z_INF_DISCARD_bit = 1 << 1,
@@ -2079,187 +2140,198 @@ enum {
VS_W_INF_RETAIN_bit = 1 << 13,
VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14,
VTE_NO_OUTPUT_NEG_0_bit = 1 << 20,
- SQ_PGM_START_PS = 0x00028840,
- SQ_PGM_RESOURCES_PS = 0x00028850,
- NUM_GPRS_mask = 0xff << 0,
+ SQ_PGM_START_PS = 0x00028840, /* (8-state) Memory address of the (256-byte aligned) first CF instruction of the shader code for the pixel shader (PS) */
+ /* PGM_START: Format is [39:8] */
+ SQ_PGM_RESOURCES_PS = 0x00028850, /* (8-state). Resource requirements to run the PS program. Can only read most recent version, not all 8 states. */
+ NUM_GPRS_mask = 0xff << 0, /* number of GPRs required to run this program [0..127] */
NUM_GPRS_shift = 0,
- STACK_SIZE_mask = 0xff << 8,
+ STACK_SIZE_mask = 0xff << 8, /* number of stack entries needed [0..255] */
STACK_SIZE_shift = 8,
- SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit = 1 << 21,
- FETCH_CACHE_LINES_mask = 0x07 << 24,
+ SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit = 1 << 21, /* DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. See SP doc for details. */
+ FETCH_CACHE_LINES_mask = 0x07 << 24, /* number of program cache lines to fetch on a cache miss, up to the size of the program segment [1..8]. */
FETCH_CACHE_LINES_shift = 24,
- UNCACHED_FIRST_INST_bit = 1 << 28,
- CLAMP_CONSTS_bit = 1 << 31,
- SQ_PGM_EXPORTS_PS = 0x00028854,
- EXPORT_MODE_mask = 0x1f << 0,
+ UNCACHED_FIRST_INST_bit = 1 << 28, /* Ensure that the first instruction is not read from the first instruction cache. Should only be used for debugging if there is a problem with the cache. On R600 only: this bit MUST be set due to a bug that is fixed in derivative parts. */
+ CLAMP_CONSTS_bit = 1 << 31, /* Clamp ALU constants to [-1.0, 1.0]. Used for shader versions below PS2.0. Applies only to Constant-file constants (not literals) and only to const-file entries 0..7. Other entries are never clamped. */
+ SQ_PGM_EXPORTS_PS = 0x00028854, /* (8-state). Defines the exports from the Pixel Shader Program. */
+ EXPORT_MODE_mask = 0x1f << 0, /* Pixel Shader export mode. bbbbz where bbbb is how many color we export (0-8) and z is export z or not. It is illegal to program this to all zeros. */
EXPORT_MODE_shift = 0,
- SQ_PGM_START_VS = 0x00028858,
- SQ_PGM_RESOURCES_VS = 0x00028868,
-/* NUM_GPRS_mask = 0xff << 0, */
+ SQ_PGM_START_VS = 0x00028858, /* (8-state) Memory address of the (256-byte aligned) first CF instruction of the shader code for the vertex shader (VS) */
+ /* PGM_START: Format is [39:8] 11. Shader Interpolator Registers */
+ SQ_PGM_RESOURCES_VS = 0x00028868, /* (8-state). Resource requirements to run the VS program. Can only read most recent version, not all 8 states. */
+/* NUM_GPRS_mask = 0xff << 0, */ /* number of GPRs required to run this program [0..127] */
/* NUM_GPRS_shift = 0, */
-/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_mask = 0xff << 8, */ /* number of stack entries needed [0..255] */
/* STACK_SIZE_shift = 8, */
- SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit = 1 << 21,
-/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+ SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit = 1 << 21, /* DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. See SP doc for details. */
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ /* number of program cache lines to fetch on a cache miss, up to the size of the program segment [1..8]. */
/* FETCH_CACHE_LINES_shift = 24, */
-/* UNCACHED_FIRST_INST_bit = 1 << 28, */
- SQ_PGM_START_GS = 0x0002886c,
- SQ_PGM_RESOURCES_GS = 0x0002887c,
-/* NUM_GPRS_mask = 0xff << 0, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */ /* Ensure that the first instruction is not read from the first instruction cache. Should only be used for debugging if there is a problem with the cache. */
+ SQ_PGM_START_GS = 0x0002886c, /* (8-state) Memory address of the (256-byte aligned) first CF instruction of the shader code for the geometry shader (GS) */
+ /* PGM_START: Format is [39:8] */
+ SQ_PGM_RESOURCES_GS = 0x0002887c, /* (8-state). Resource requirements to run the GS program. Can only read most recent version, not all 8 states. */
+/* NUM_GPRS_mask = 0xff << 0, */ /* number of GPRs required to run this program [0..127] */
/* NUM_GPRS_shift = 0, */
-/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_mask = 0xff << 8, */ /* number of stack entries needed [0..255] */
/* STACK_SIZE_shift = 8, */
- SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit = 1 << 21,
-/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+ SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit = 1 << 21, /* DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. See SP doc for details. */
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ /* number of program cache lines to fetch on a cache miss, up to the size of the program segment [1..8]. */
/* FETCH_CACHE_LINES_shift = 24, */
-/* UNCACHED_FIRST_INST_bit = 1 << 28, */
- SQ_PGM_START_ES = 0x00028880,
- SQ_PGM_RESOURCES_ES = 0x00028890,
-/* NUM_GPRS_mask = 0xff << 0, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */ /* Ensure that the first instruction is not read from the first instruction cache. Should only be used for debugging if there is a problem with the cache. */
+ SQ_PGM_START_ES = 0x00028880, /* (8-state) Memory address of the (256-byte aligned) first CF instruction of the shader code for the export shader (ES) */
+ /* PGM_START: Format is [39:8] */
+ SQ_PGM_RESOURCES_ES = 0x00028890, /* (8-state). Resource requirements to run the ES program. Can only read most recent version, not all 8 states. */
+/* NUM_GPRS_mask = 0xff << 0, */ /* number of GPRs required to run this program [0..127] */
/* NUM_GPRS_shift = 0, */
-/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_mask = 0xff << 8, */ /* number of stack entries needed [0..255] */
/* STACK_SIZE_shift = 8, */
- SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit = 1 << 21,
-/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+ SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit = 1 << 21, /* DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. See SP doc for details. */
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ /* number of program cache lines to fetch on a cache miss, up to the size of the program segment [1..8]. */
/* FETCH_CACHE_LINES_shift = 24, */
-/* UNCACHED_FIRST_INST_bit = 1 << 28, */
- SQ_PGM_START_FS = 0x00028894,
- SQ_PGM_RESOURCES_FS = 0x000288a4,
-/* NUM_GPRS_mask = 0xff << 0, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */ /* Ensure that the first instruction is not read from the first instruction cache. Should only be used for debugging if there is a problem with the cache. */
+ SQ_PGM_START_FS = 0x00028894, /* (8-state) Memory address of the (256-byte aligned) first CF instruction of the shader code for the fetch shader (FS) */
+ /* PGM_START: Format is [39:8] */
+ SQ_PGM_RESOURCES_FS = 0x000288a4, /* (8-state). Resource requirements to run the FS program. The FS shares with either the VS (gs-off) or ES (gs-on) and performs a single allocation equal to the VS+FS or ES+FS resource requirements. The SPI allocates stack space as (VS/ES + FS_stack_size) in the same manner as GPRs. Max_call_depth and fetch_cache_lines will be inherited from the parent shader (VS or ES). Can only read most recent version, not all 8 states. */
+/* NUM_GPRS_mask = 0xff << 0, */ /* number of GPRs required to run this program [0..127] */
/* NUM_GPRS_shift = 0, */
-/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_mask = 0xff << 8, */ /* number of stack entries needed [0..255] */
/* STACK_SIZE_shift = 8, */
- SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit = 1 << 21,
- SQ_ESGS_RING_ITEMSIZE = 0x000288a8,
- ITEMSIZE_mask = 0x7fff << 0,
+ SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit = 1 << 21, /* DX10 clamp mode. (1 = dx10 mode, 0 = dx9 mode). This applies to all shaders. This affects how the SP output clamp treats NaN. See SP doc for details. */
+ SQ_ESGS_RING_ITEMSIZE = 0x000288a8, /* (8-state) Space allocated to a single pixel/vertex in the ES->GS ring buffer (in DWORDs). Itemsize is the true count, not count-1 and represents [0..32767] dwords. */
+ ITEMSIZE_mask = 0x7fff << 0, /* Format is [16:2] */
ITEMSIZE_shift = 0,
- SQ_GSVS_RING_ITEMSIZE = 0x000288ac,
-/* ITEMSIZE_mask = 0x7fff << 0, */
+ SQ_GSVS_RING_ITEMSIZE = 0x000288ac, /* (8-state) Space allocated to a single pixel/vertex in the GS->ES ring buffer (in DWORDs). This defines the max number of dwords a single invocation of the GS can output to the ring buffer. */
+/* ITEMSIZE_mask = 0x7fff << 0, */ /* Format is [16:2] */
/* ITEMSIZE_shift = 0, */
- SQ_ESTMP_RING_ITEMSIZE = 0x000288b0,
-/* ITEMSIZE_mask = 0x7fff << 0, */
+ SQ_ESTMP_RING_ITEMSIZE = 0x000288b0, /* (8-state) Space allocated to a single pixel/vertex in the ES Temp buffer (in DWORDs). */
+/* ITEMSIZE_mask = 0x7fff << 0, */ /* Format is [16:2] */
/* ITEMSIZE_shift = 0, */
- SQ_GSTMP_RING_ITEMSIZE = 0x000288b4,
-/* ITEMSIZE_mask = 0x7fff << 0, */
+ SQ_GSTMP_RING_ITEMSIZE = 0x000288b4, /* (8-state) Space allocated to a single pixel/vertex in the GS Temp buffer (in DWORDs). */
+/* ITEMSIZE_mask = 0x7fff << 0, */ /* Format is [16:2] */
/* ITEMSIZE_shift = 0, */
- SQ_VSTMP_RING_ITEMSIZE = 0x000288b8,
-/* ITEMSIZE_mask = 0x7fff << 0, */
+ SQ_VSTMP_RING_ITEMSIZE = 0x000288b8, /* (8-state) Space allocated to a single pixel/vertex in the VS Temp buffer (in DWORDs) */
+/* ITEMSIZE_mask = 0x7fff << 0, */ /* Format is [16:2] */
/* ITEMSIZE_shift = 0, */
- SQ_PSTMP_RING_ITEMSIZE = 0x000288bc,
-/* ITEMSIZE_mask = 0x7fff << 0, */
+ SQ_PSTMP_RING_ITEMSIZE = 0x000288bc, /* (8-state) Space allocated to a single pixel/vertex in the PS Temp buffer (in DWORDs) */
+/* ITEMSIZE_mask = 0x7fff << 0, */ /* Format is [16:2] */
/* ITEMSIZE_shift = 0, */
- SQ_FBUF_RING_ITEMSIZE = 0x000288c0,
-/* ITEMSIZE_mask = 0x7fff << 0, */
+ SQ_FBUF_RING_ITEMSIZE = 0x000288c0, /* (8-state) Space allocated to a single pixel/vertex in the FBUFFER */
+/* ITEMSIZE_mask = 0x7fff << 0, */ /* Format is [16:2] */
/* ITEMSIZE_shift = 0, */
- SQ_REDUC_RING_ITEMSIZE = 0x000288c4,
-/* ITEMSIZE_mask = 0x7fff << 0, */
+ SQ_REDUC_RING_ITEMSIZE = 0x000288c4, /* (8-state) Space allocated to a single pixel/vertex in the Reduction Buffer */
+/* ITEMSIZE_mask = 0x7fff << 0, */ /* Format is [16:2] */
/* ITEMSIZE_shift = 0, */
- SQ_GS_VERT_ITEMSIZE = 0x000288c8,
-/* ITEMSIZE_mask = 0x7fff << 0, */
+ SQ_GS_VERT_ITEMSIZE = 0x000288c8, /* (8-state) Space allocated to a single GS output vertex in GS Temp Buffer. This defines the size of a single vertex output by the GS. Multiple vertices can be output so long as the total output size does not exceed SQ_GSVS_RING_ITEMSIZE. */
+/* ITEMSIZE_mask = 0x7fff << 0, */ /* Format is [16:2] */
/* ITEMSIZE_shift = 0, */
- SQ_PGM_CF_OFFSET_PS = 0x000288cc,
- PGM_CF_OFFSET_mask = 0xfffff << 0,
+ SQ_PGM_CF_OFFSET_PS = 0x000288cc, /* (8-state) Memory offset from the program start (SQ_PGM_START_PS) of the (8-byte aligned) entry point for the pixel shader (PS) program. This is the first CF instruction that each thread will execute. */
+ PGM_CF_OFFSET_mask = 0xfffff << 0, /* Format is [22:3] */
PGM_CF_OFFSET_shift = 0,
- SQ_PGM_CF_OFFSET_VS = 0x000288d0,
-/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+ SQ_PGM_CF_OFFSET_VS = 0x000288d0, /* (8-state) Memory offset from the program start (SQ_PGM_START_VS) of the (8-byte aligned) entry point for the vertex shader (VS) program. This is the first CF instruction that each thread will execute. */
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ /* Format is [22:3] */
/* PGM_CF_OFFSET_shift = 0, */
- SQ_PGM_CF_OFFSET_GS = 0x000288d4,
-/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+ SQ_PGM_CF_OFFSET_GS = 0x000288d4, /* (8-state) Memory offset from the program start (SQ_PGM_START_GS) of the (8-byte aligned) entry point for the geometry shader (GS) program. This is the first CF instruction that each thread will execute. */
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ /* Format is [22:3] */
/* PGM_CF_OFFSET_shift = 0, */
- SQ_PGM_CF_OFFSET_ES = 0x000288d8,
-/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+ SQ_PGM_CF_OFFSET_ES = 0x000288d8, /* (8-state) Memory offset from the program start (SQ_PGM_START_ES) of the (8-byte aligned) entry point for the export shader (ES) program. This is the first CF instruction that each thread will execute. */
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ /* Format is [22:3] */
/* PGM_CF_OFFSET_shift = 0, */
- SQ_PGM_CF_OFFSET_FS = 0x000288dc,
-/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+ SQ_PGM_CF_OFFSET_FS = 0x000288dc, /* (8-state) Memory offset from the program start (SQ_PGM_START_FS) of the (8-byte aligned) entry point for the fetch shader (FS) program. This is the first CF instruction that each thread will execute. */
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ /* Format is [22:3] */
/* PGM_CF_OFFSET_shift = 0, */
- SQ_VTX_SEMANTIC_CLEAR = 0x000288e0,
- SQ_ALU_CONST_CACHE_PS_0 = 0x00028940,
+ SQ_VTX_SEMANTIC_CLEAR = 0x000288e0, /* (8-state) This register is used to clear the contents of the vertex semantic table. Entries can be cleared independently -- each has one bit in this register to clear or leave alone. This register is write-only (not readable). */
+ /* clear or preserve table entry */
+ SQ_ALU_CONST_CACHE_PS_0 = 0x00028940, /* (8-state) Base address of constant-buffer #0 used by the constant cache, 256B aligned address [39:8]. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
SQ_ALU_CONST_CACHE_PS_0_num = 16,
- SQ_ALU_CONST_CACHE_VS_0 = 0x00028980,
+ /* DATA: TBD */
+ SQ_ALU_CONST_CACHE_VS_0 = 0x00028980, /* (8-state) Base address of constant-buffer #0 used by the constant cache, 256B aligned address [39:8]. Used by both VS and ES shaders. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
SQ_ALU_CONST_CACHE_VS_0_num = 16,
- SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0,
+ /* DATA: TBD 10. Shader Program Setup Registers */
+ SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0, /* (8-state) Base address of constant-buffer #0 used by the constant cache, 256B aligned address [39:8]. You must always write both CONST_BUFFER_SIZE and CONST_CACHE, unless size=0 in which case you may write only size. */
SQ_ALU_CONST_CACHE_GS_0_num = 16,
- PA_SU_POINT_SIZE = 0x00028a00,
- PA_SU_POINT_SIZE__HEIGHT_mask = 0xffff << 0,
+ /* DATA: TBD */
+ PA_SU_POINT_SIZE = 0x00028a00, /* Dimensions for Points */
+ PA_SU_POINT_SIZE__HEIGHT_mask = 0xffff << 0, /* 1/2 Height (Vertical Radius) of point; fixed (12.4), 12 bits integer, 4 bits fractional pixels. */
PA_SU_POINT_SIZE__HEIGHT_shift = 0,
- PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16,
+ PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16, /* 1/2 Width (Horizontal Radius)of point; fixed (12.4), 12 bits integer, 4 bits fractional pixels. */
PA_SU_POINT_SIZE__WIDTH_shift = 16,
- PA_SU_POINT_MINMAX = 0x00028a04,
- MIN_SIZE_mask = 0xffff << 0,
+ PA_SU_POINT_MINMAX = 0x00028a04, /* Specifies maximum and minimum point & sprite sizes for per vertex size specification. */
+ MIN_SIZE_mask = 0xffff << 0, /* Minimum point & sprite radius size to allow. fixed point (12.4), 12 bits integer, 4 bits fractional pixels */
MIN_SIZE_shift = 0,
- MAX_SIZE_mask = 0xffff << 16,
+ MAX_SIZE_mask = 0xffff << 16, /* Maximum point & sprite radius size to allow. fixed point (12.4), 12 bits integer, 4 bits fractional pixels */
MAX_SIZE_shift = 16,
- PA_SU_LINE_CNTL = 0x00028a08,
- PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0,
+ PA_SU_LINE_CNTL = 0x00028a08, /* Line control */
+ PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0, /* 1/2 width of line, in subpixels; (16.0) fixed format. */
PA_SU_LINE_CNTL__WIDTH_shift = 0,
- PA_SC_LINE_STIPPLE = 0x00028a0c,
- LINE_PATTERN_mask = 0xffff << 0,
+ PA_SC_LINE_STIPPLE = 0x00028a0c, /* Line Stipple Control */
+ LINE_PATTERN_mask = 0xffff << 0, /* 16-bit pattern */
LINE_PATTERN_shift = 0,
- REPEAT_COUNT_mask = 0xff << 16,
+ REPEAT_COUNT_mask = 0xff << 16, /* Pattern bit repeat count (minus 1). Field has a valid range of 0-255 which maps to OGL api values of 1-256. */
REPEAT_COUNT_shift = 16,
- PATTERN_BIT_ORDER_bit = 1 << 28,
- AUTO_RESET_CNTL_mask = 0x03 << 29,
+ PATTERN_BIT_ORDER_bit = 1 << 28, /* Bit Ordering of Pattern Bits: 0 = Little Bit Order, 1 = Big Bit Order */
+ AUTO_RESET_CNTL_mask = 0x03 << 29, /* Auto reset control of current pattern count/pointer. 0 = Never reset current pattern count/pointer. 1 = Reset current pattern count/pointer at each primitive (line list). 2 = Reset current pattern count/pointer at each packet (line strip). */
AUTO_RESET_CNTL_shift = 29,
- VGT_OUTPUT_PATH_CNTL = 0x00028a10,
- PATH_SELECT_mask = 0x03 << 0,
+ VGT_OUTPUT_PATH_CNTL = 0x00028a10, /* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register selects which backend path will be used by the VGT block. */
+ PATH_SELECT_mask = 0x03 << 0, /* This field indicates the VGT back-end path to be used. */
PATH_SELECT_shift = 0,
- VGT_OUTPATH_VTX_REUSE = 0x00,
- VGT_OUTPATH_TESS_EN = 0x01,
- VGT_OUTPATH_PASSTHRU = 0x02,
- VGT_OUTPATH_GS_BLOCK = 0x03,
- VGT_HOS_CNTL = 0x00028a14,
- TESS_MODE_mask = 0x03 << 0,
+ VGT_OUTPATH_VTX_REUSE = 0x00, /* VGT_OUTPATH_VTX_REUSE: VGT_OUTPATH_VTX_REUSE */
+ VGT_OUTPATH_TESS_EN = 0x01, /* VGT_OUTPATH_TESS_EN: VGT_OUTPATH_TESS_EN */
+ VGT_OUTPATH_PASSTHRU = 0x02, /* VGT_OUTPATH_PASSTHRU: VGT_OUTPATH_PASSTHRU */
+ VGT_OUTPATH_GS_BLOCK = 0x03, /* VGT_OUTPATH_GS_BLOCK: VGT_OUTPATH_GS_BLOCK */
+ VGT_HOS_CNTL = 0x00028a14, /* This register controls the behavior of the Tessellation Engine block at the backend of the VGT. This register is relevant only if the VGT_OUTPUT_PATH_CNTL register specifies the Tessellation Engine block for the VGT backend path. Note that the tessellation engine is enabled by selecting the tessellation engine path in the VGT_OUTPUT_PATH_CNTL register as opposed to the single enable bit that was used in previous architectures. */
+ TESS_MODE_mask = 0x03 << 0, /* Tessellation Mode 0 : Discrete 1 : Continuous 2 : Adaptive */
TESS_MODE_shift = 0,
- VGT_HOS_MAX_TESS_LEVEL = 0x00028a18,
- VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c,
+ VGT_HOS_MAX_TESS_LEVEL = 0x00028a18, /* For continuous and discrete tessellation modes, this register contains the tessellation level. For adaptive tessellation, this register contains the maximum tessellation level. The adaptive tessellation levels will be clamped less-than or equal to this level by the tessellation engine. In all cases, the format of this register is 32-bit IEEE floating point. This register is relevant only when the VGT_OUT_CNTL register specifies `Tessellation Engine` in the Path Select field. */
+ /* MAX_TESS: For adaptive tessellation mode, this is the maximum tessellation clamp value. For continuous and discrete tessellation modes, this is the tessellation level. For discrete modes, values in the range (1.0, 14.0) are legal. For non-discrete modes, values in the range (1.0, 15.0) are legal. MAX_TESS must be greater than or equal to MIN_TESS. */
+ VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c, /* For continuous and discrete tessellation modes, this register is not applicable. For adaptive tessellation, this register contains the minimum tessellation level. The adaptive tessellation levels will be clamped greater-than or equal to this level by the tessellation engine. The format of this register is 32-bit IEEE floating point. This register is relevant only when the VGT_OUT_CNTL register specifies `Tessellation Engine` in the Path Select field and the VGT_HOS_CNTL register specifies adaptive tessellation mode. */
+ /* MIN_TESS: For adpative tessellation mode, this is the minimum tessellation clamp value. For continuous and discrete tessellartion modes, this register is not applicable. For discrete modes values in the range (1.0, 14.0) are legal. For non-discrete modes, values in the range (1.0, 15.0) are legal. MIN_TESS must be less than or equal to MAX_TESS. */
VGT_HOS_REUSE_DEPTH = 0x00028a20,
REUSE_DEPTH_mask = 0xff << 0,
REUSE_DEPTH_shift = 0,
- VGT_GROUP_PRIM_TYPE = 0x00028a24,
- VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0,
+ VGT_GROUP_PRIM_TYPE = 0x00028a24, /* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register contains the prim type output by the grouper stage of the VGT */
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0, /* Prim type output by grouper stage of the VGT. */
VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0,
- VGT_GRP_3D_POINT = 0x00,
- VGT_GRP_3D_LINE = 0x01,
- VGT_GRP_3D_TRI = 0x02,
- VGT_GRP_3D_RECT = 0x03,
- VGT_GRP_3D_QUAD = 0x04,
- VGT_GRP_2D_COPY_RECT_V0 = 0x05,
- VGT_GRP_2D_COPY_RECT_V1 = 0x06,
- VGT_GRP_2D_COPY_RECT_V2 = 0x07,
- VGT_GRP_2D_COPY_RECT_V3 = 0x08,
- VGT_GRP_2D_FILL_RECT = 0x09,
- VGT_GRP_2D_LINE = 0x0a,
- VGT_GRP_2D_TRI = 0x0b,
- VGT_GRP_PRIM_INDEX_LINE = 0x0c,
- VGT_GRP_PRIM_INDEX_TRI = 0x0d,
- VGT_GRP_PRIM_INDEX_QUAD = 0x0e,
- VGT_GRP_3D_LINE_ADJ = 0x0f,
- VGT_GRP_3D_TRI_ADJ = 0x10,
- RETAIN_ORDER_bit = 1 << 14,
- RETAIN_QUADS_bit = 1 << 15,
- PRIM_ORDER_mask = 0x07 << 16,
+ VGT_GRP_3D_POINT = 0x00, /* VGT_GRP_3D_POINT: VGT_GRP_3D_POINT */
+ VGT_GRP_3D_LINE = 0x01, /* VGT_GRP_3D_LINE: VGT_GRP_3D_LINE */
+ VGT_GRP_3D_TRI = 0x02, /* VGT_GRP_3D_TRI: VGT_GRP_3D_TRI */
+ VGT_GRP_3D_RECT = 0x03, /* VGT_GRP_3D_RECT: VGT_GRP_3D_RECT */
+ VGT_GRP_3D_QUAD = 0x04, /* VGT_GRP_3D_QUAD: VGT_GRP_3D_QUAD */
+ VGT_GRP_2D_COPY_RECT_V0 = 0x05, /* VGT_GRP_2D_COPY_RECT_V0: VGT_GRP_2D_COPY_RECT_V0 */
+ VGT_GRP_2D_COPY_RECT_V1 = 0x06, /* VGT_GRP_2D_COPY_RECT_V1: VGT_GRP_2D_COPY_RECT_V1 */
+ VGT_GRP_2D_COPY_RECT_V2 = 0x07, /* VGT_GRP_2D_COPY_RECT_V2: VGT_GRP_2D_COPY_RECT_V2 */
+ VGT_GRP_2D_COPY_RECT_V3 = 0x08, /* VGT_GRP_2D_COPY_RECT_V3: VGT_GRP_2D_COPY_RECT_V3 */
+ VGT_GRP_2D_FILL_RECT = 0x09, /* VGT_GRP_2D_FILL_RECT: VGT_GRP_2D_FILL_RECT */
+ VGT_GRP_2D_LINE = 0x0a, /* VGT_GRP_2D_LINE: VGT_GRP_2D_LINE */
+ VGT_GRP_2D_TRI = 0x0b, /* VGT_GRP_2D_TRI: VGT_GRP_2D_TRI */
+ VGT_GRP_PRIM_INDEX_LINE = 0x0c, /* VGT_GRP_PRIM_INDEX_LINE: VGT_GRP_PRIM_INDEX_LINE */
+ VGT_GRP_PRIM_INDEX_TRI = 0x0d, /* VGT_GRP_PRIM_INDEX_TRI: VGT_GRP_PRIM_INDEX_TRI */
+ VGT_GRP_PRIM_INDEX_QUAD = 0x0e, /* VGT_GRP_PRIM_INDEX_QUAD: VGT_GRP_PRIM_INDEX_QUAD */
+ VGT_GRP_3D_LINE_ADJ = 0x0f, /* VGT_GRP_3D_LINE_ADJ: VGT_GRP_3D_LINE_ADJ */
+ VGT_GRP_3D_TRI_ADJ = 0x10, /* VGT_GRP_3D_TRI_ADJ: VGT_GRP_3D_TRI_ADJ */
+ RETAIN_ORDER_bit = 1 << 14, /* Resetting this bit to zero causes the Grouper within the VGT to convert strips, fans, loops, and polygons into regular lists in the vgt_grouper block. It also causes the primitive indices to be re-ordered to have the provoking vertex in the correct position. This bit should be set to zero if the VGT_OUTPUT_PATH_CNTL register specifies VGT_OUTPATH_VTX_REUSE or VGT_OUTPATH_TESS_EN and the VGT_DRAW_INITIATOR prim type is between 0 and 15, inclusive, (tri list, tri strip, tri fan, etc...). This bit is implied to be zero for VGT_DRAW_INITIATOR prim types 0 thru 15 if the Major Mode of the VGT_DRAW_INIITIATOR is 0. If this bit is set for prim types 0 thru 15, then the primitive index order from the grouper will be retained and the indices will be incorrect for loops, fans, and polygons. Note that if the VGT_DRAW_INITIATOR.MAJOR_MODE is set to MAJOR_MODE_1 and VGT_OUTPUT_PATH_CNTL is set to VGT_OUTPATH_PASSTHRU and the VGT_GROUP_PRIM_TYPE.PRIM_TYPE is set to VGT_GRP_3D_TRI or VGT_GRP_2D_TRI and VGT_GROUP_PRIM_TYPE.PRIM_ORDER is set to VGT_GRP_STRIP, then the passthru block will perform DX/OpenGL index re-ordering for tri-strips. */
+ RETAIN_QUADS_bit = 1 << 15, /* This bit can only be legally set if the VGT_OUTPUT_PATH_CNTL register specifies the Tessellation Engine and the Major Mode of the VGT_DRAW_INITATOR is 1. The RETAIN_QUADS bit indicates that quads should be passed intact to the tessellation engine. If this bit is not set, then the quads will be decomposed into triangles. */
+ PRIM_ORDER_mask = 0x07 << 16, /* Prim order output by grouper stage of the VGT. */
PRIM_ORDER_shift = 16,
- VGT_GRP_LIST = 0x00,
- VGT_GRP_STRIP = 0x01,
- VGT_GRP_FAN = 0x02,
- VGT_GRP_LOOP = 0x03,
- VGT_GRP_POLYGON = 0x04,
- VGT_GROUP_FIRST_DECR = 0x00028a28,
- FIRST_DECR_mask = 0x0f << 0,
+ VGT_GRP_LIST = 0x00, /* VGT_GRP_LIST: VGT_GRP_LIST */
+ VGT_GRP_STRIP = 0x01, /* VGT_GRP_STRIP: VGT_GRP_STRIP */
+ VGT_GRP_FAN = 0x02, /* VGT_GRP_FAN: VGT_GRP_FAN */
+ VGT_GRP_LOOP = 0x03, /* VGT_GRP_LOOP: VGT_GRP_LOOP */
+ VGT_GRP_POLYGON = 0x04, /* VGT_GRP_POLYGON: VGT_GRP_POLYGON */
+ VGT_GROUP_FIRST_DECR = 0x00028a28, /* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register contains the amount by which the draw initiator index count is decremented for the first group taken from the input stream. */
+ FIRST_DECR_mask = 0x0f << 0, /* Decrement amount for the first group */
FIRST_DECR_shift = 0,
- VGT_GROUP_DECR = 0x00028a2c,
- DECR_mask = 0x0f << 0,
+ VGT_GROUP_DECR = 0x00028a2c, /* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register contains the amount by which the draw initiator index count is decremented for all groups taken from the input stream except for the first group. */
+ DECR_mask = 0x0f << 0, /* Decrement amount for groups except the first */
DECR_shift = 0,
- VGT_GROUP_VECT_0_CNTL = 0x00028a30,
- COMP_X_EN_bit = 1 << 0,
- COMP_Y_EN_bit = 1 << 1,
- COMP_Z_EN_bit = 1 << 2,
- COMP_W_EN_bit = 1 << 3,
- VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_0_CNTL = 0x00028a30, /* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register indicates, with bits flags, which components are relevant for vector 0 of a group. At least one component of vector 0 must be indicated. This register also contains the stride of vector 0 (in 16-bit words) in the input stream and the amount to shift the input stream (in 16-bit words) after extracting the vector. */
+ COMP_X_EN_bit = 1 << 0, /* Indicates that component X will be output from the grouper for vector 0 */
+ COMP_Y_EN_bit = 1 << 1, /* Indicates that component Y will be output from the grouper for vector 0 */
+ COMP_Z_EN_bit = 1 << 2, /* Indicates that component Z will be output from the grouper for vector 0 */
+ COMP_W_EN_bit = 1 << 3, /* Indicates that component W will be output from the grouper for vector 0 */
+ VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8, /* The stride of vector 0 data in the input stream (in 16-bit words). Zero is NOT a legal value for an active vector. See the programming guidelines for the situation in which a vector uses no data from the shifter. */
VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8,
- SHIFT_mask = 0xff << 16,
+ SHIFT_mask = 0xff << 16, /* The amount to shift the input stream after extracting vector 0 (in 16-bit words). This field must be less than or equal to the STRIDE field for proper shifter operation. */
SHIFT_shift = 16,
- VGT_GROUP_VECT_1_CNTL = 0x00028a34,
+ VGT_GROUP_VECT_1_CNTL = 0x00028a34, /* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register is identical to VGT_GROUP_VECT_0_CNTL except that it applies to vector 1 of the group instead of vector 0. Also, vector 0 is required to have at least one component set; however, vector 1 may have none set. */
/* COMP_X_EN_bit = 1 << 0, */
/* COMP_Y_EN_bit = 1 << 1, */
/* COMP_Z_EN_bit = 1 << 2, */
@@ -2268,496 +2340,544 @@ enum {
VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8,
/* SHIFT_mask = 0xff << 16, */
/* SHIFT_shift = 16, */
- VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38,
- X_CONV_mask = 0x0f << 0,
+ VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38, /* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register indicates how the value each component of vector 0 will be determined. If the VGT_GROUP_VECT_0_CNTL register indicates that a particular component is not selected for output from the grouper, then that component`s format control fields are ignored. */
+ X_CONV_mask = 0x0f << 0, /* X Component Determination. */
X_CONV_shift = 0,
- VGT_GRP_INDEX_16 = 0x00,
- VGT_GRP_INDEX_32 = 0x01,
- VGT_GRP_UINT_16 = 0x02,
- VGT_GRP_UINT_32 = 0x03,
- VGT_GRP_SINT_16 = 0x04,
- VGT_GRP_SINT_32 = 0x05,
- VGT_GRP_FLOAT_32 = 0x06,
- VGT_GRP_AUTO_PRIM = 0x07,
- VGT_GRP_FIX_1_23_TO_FLOAT = 0x08,
- X_OFFSET_mask = 0x0f << 4,
+ VGT_GRP_INDEX_16 = 0x00, /* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
+ VGT_GRP_INDEX_32 = 0x01, /* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
+ VGT_GRP_UINT_16 = 0x02, /* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
+ VGT_GRP_UINT_32 = 0x03, /* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
+ VGT_GRP_SINT_16 = 0x04, /* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
+ VGT_GRP_SINT_32 = 0x05, /* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
+ VGT_GRP_FLOAT_32 = 0x06, /* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
+ VGT_GRP_AUTO_PRIM = 0x07, /* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
+ VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, /* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
+ X_OFFSET_mask = 0x0f << 4, /* X Component Offset. This field is the offset, in 16-bit words, of the X component in the input cycle. */
X_OFFSET_shift = 4,
- Y_CONV_mask = 0x0f << 8,
+ Y_CONV_mask = 0x0f << 8, /* Y Component Determination. See the X component determination field for description. */
Y_CONV_shift = 8,
-/* VGT_GRP_INDEX_16 = 0x00, */
-/* VGT_GRP_INDEX_32 = 0x01, */
-/* VGT_GRP_UINT_16 = 0x02, */
-/* VGT_GRP_UINT_32 = 0x03, */
-/* VGT_GRP_SINT_16 = 0x04, */
-/* VGT_GRP_SINT_32 = 0x05, */
-/* VGT_GRP_FLOAT_32 = 0x06, */
-/* VGT_GRP_AUTO_PRIM = 0x07, */
-/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
- Y_OFFSET_mask = 0x0f << 12,
+/* VGT_GRP_INDEX_16 = 0x00, */ /* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
+/* VGT_GRP_INDEX_32 = 0x01, */ /* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
+/* VGT_GRP_UINT_16 = 0x02, */ /* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
+/* VGT_GRP_UINT_32 = 0x03, */ /* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
+/* VGT_GRP_SINT_16 = 0x04, */ /* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
+/* VGT_GRP_SINT_32 = 0x05, */ /* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
+/* VGT_GRP_FLOAT_32 = 0x06, */ /* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
+/* VGT_GRP_AUTO_PRIM = 0x07, */ /* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ /* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
+ Y_OFFSET_mask = 0x0f << 12, /* Y Component Offset. This field is the offset, in 16-bit words, of the Y component in the input cycle. */
Y_OFFSET_shift = 12,
- Z_CONV_mask = 0x0f << 16,
+ Z_CONV_mask = 0x0f << 16, /* Z Component Determination. See the X component determination field for description. */
Z_CONV_shift = 16,
-/* VGT_GRP_INDEX_16 = 0x00, */
-/* VGT_GRP_INDEX_32 = 0x01, */
-/* VGT_GRP_UINT_16 = 0x02, */
-/* VGT_GRP_UINT_32 = 0x03, */
-/* VGT_GRP_SINT_16 = 0x04, */
-/* VGT_GRP_SINT_32 = 0x05, */
-/* VGT_GRP_FLOAT_32 = 0x06, */
-/* VGT_GRP_AUTO_PRIM = 0x07, */
-/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
- Z_OFFSET_mask = 0x0f << 20,
+/* VGT_GRP_INDEX_16 = 0x00, */ /* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
+/* VGT_GRP_INDEX_32 = 0x01, */ /* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
+/* VGT_GRP_UINT_16 = 0x02, */ /* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
+/* VGT_GRP_UINT_32 = 0x03, */ /* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
+/* VGT_GRP_SINT_16 = 0x04, */ /* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
+/* VGT_GRP_SINT_32 = 0x05, */ /* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
+/* VGT_GRP_FLOAT_32 = 0x06, */ /* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
+/* VGT_GRP_AUTO_PRIM = 0x07, */ /* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ /* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
+ Z_OFFSET_mask = 0x0f << 20, /* Z Component Offset. This field is the offset, in 16-bit words, of the Z component in the input cycle. */
Z_OFFSET_shift = 20,
- W_CONV_mask = 0x0f << 24,
+ W_CONV_mask = 0x0f << 24, /* W Component Determination. See the X component determination field for description. */
W_CONV_shift = 24,
-/* VGT_GRP_INDEX_16 = 0x00, */
-/* VGT_GRP_INDEX_32 = 0x01, */
-/* VGT_GRP_UINT_16 = 0x02, */
-/* VGT_GRP_UINT_32 = 0x03, */
-/* VGT_GRP_SINT_16 = 0x04, */
-/* VGT_GRP_SINT_32 = 0x05, */
-/* VGT_GRP_FLOAT_32 = 0x06, */
-/* VGT_GRP_AUTO_PRIM = 0x07, */
-/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
- W_OFFSET_mask = 0x0f << 28,
+/* VGT_GRP_INDEX_16 = 0x00, */ /* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
+/* VGT_GRP_INDEX_32 = 0x01, */ /* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
+/* VGT_GRP_UINT_16 = 0x02, */ /* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
+/* VGT_GRP_UINT_32 = 0x03, */ /* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
+/* VGT_GRP_SINT_16 = 0x04, */ /* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
+/* VGT_GRP_SINT_32 = 0x05, */ /* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
+/* VGT_GRP_FLOAT_32 = 0x06, */ /* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
+/* VGT_GRP_AUTO_PRIM = 0x07, */ /* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ /* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
+ W_OFFSET_mask = 0x0f << 28, /* W Component Offset. This field is the offset, in 16-bit words, of the Z component in the input cycle. */
W_OFFSET_shift = 28,
- VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c,
+ VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c, /* THIS REGISTER IS IGNORED IN MAJOR MODE 0 FOR PRIM TYPES 0 THRU 21 !! This register is identical to VGT_GROUP_VECT_0_FMT_CNTL except that it controls the formatting of output vector 1 instead of output vector 0. */
/* X_CONV_mask = 0x0f << 0, */
/* X_CONV_shift = 0, */
-/* VGT_GRP_INDEX_16 = 0x00, */
-/* VGT_GRP_INDEX_32 = 0x01, */
-/* VGT_GRP_UINT_16 = 0x02, */
-/* VGT_GRP_UINT_32 = 0x03, */
-/* VGT_GRP_SINT_16 = 0x04, */
-/* VGT_GRP_SINT_32 = 0x05, */
-/* VGT_GRP_FLOAT_32 = 0x06, */
-/* VGT_GRP_AUTO_PRIM = 0x07, */
-/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* VGT_GRP_INDEX_16 = 0x00, */ /* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
+/* VGT_GRP_INDEX_32 = 0x01, */ /* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
+/* VGT_GRP_UINT_16 = 0x02, */ /* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
+/* VGT_GRP_UINT_32 = 0x03, */ /* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
+/* VGT_GRP_SINT_16 = 0x04, */ /* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
+/* VGT_GRP_SINT_32 = 0x05, */ /* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
+/* VGT_GRP_FLOAT_32 = 0x06, */ /* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
+/* VGT_GRP_AUTO_PRIM = 0x07, */ /* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ /* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
/* X_OFFSET_mask = 0x0f << 4, */
/* X_OFFSET_shift = 4, */
/* Y_CONV_mask = 0x0f << 8, */
/* Y_CONV_shift = 8, */
-/* VGT_GRP_INDEX_16 = 0x00, */
-/* VGT_GRP_INDEX_32 = 0x01, */
-/* VGT_GRP_UINT_16 = 0x02, */
-/* VGT_GRP_UINT_32 = 0x03, */
-/* VGT_GRP_SINT_16 = 0x04, */
-/* VGT_GRP_SINT_32 = 0x05, */
-/* VGT_GRP_FLOAT_32 = 0x06, */
-/* VGT_GRP_AUTO_PRIM = 0x07, */
-/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* VGT_GRP_INDEX_16 = 0x00, */ /* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
+/* VGT_GRP_INDEX_32 = 0x01, */ /* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
+/* VGT_GRP_UINT_16 = 0x02, */ /* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
+/* VGT_GRP_UINT_32 = 0x03, */ /* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
+/* VGT_GRP_SINT_16 = 0x04, */ /* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
+/* VGT_GRP_SINT_32 = 0x05, */ /* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
+/* VGT_GRP_FLOAT_32 = 0x06, */ /* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
+/* VGT_GRP_AUTO_PRIM = 0x07, */ /* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ /* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
/* Y_OFFSET_mask = 0x0f << 12, */
/* Y_OFFSET_shift = 12, */
/* Z_CONV_mask = 0x0f << 16, */
/* Z_CONV_shift = 16, */
-/* VGT_GRP_INDEX_16 = 0x00, */
-/* VGT_GRP_INDEX_32 = 0x01, */
-/* VGT_GRP_UINT_16 = 0x02, */
-/* VGT_GRP_UINT_32 = 0x03, */
-/* VGT_GRP_SINT_16 = 0x04, */
-/* VGT_GRP_SINT_32 = 0x05, */
-/* VGT_GRP_FLOAT_32 = 0x06, */
-/* VGT_GRP_AUTO_PRIM = 0x07, */
-/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* VGT_GRP_INDEX_16 = 0x00, */ /* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
+/* VGT_GRP_INDEX_32 = 0x01, */ /* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
+/* VGT_GRP_UINT_16 = 0x02, */ /* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
+/* VGT_GRP_UINT_32 = 0x03, */ /* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
+/* VGT_GRP_SINT_16 = 0x04, */ /* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
+/* VGT_GRP_SINT_32 = 0x05, */ /* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
+/* VGT_GRP_FLOAT_32 = 0x06, */ /* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
+/* VGT_GRP_AUTO_PRIM = 0x07, */ /* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ /* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
/* Z_OFFSET_mask = 0x0f << 20, */
/* Z_OFFSET_shift = 20, */
/* W_CONV_mask = 0x0f << 24, */
/* W_CONV_shift = 24, */
-/* VGT_GRP_INDEX_16 = 0x00, */
-/* VGT_GRP_INDEX_32 = 0x01, */
-/* VGT_GRP_UINT_16 = 0x02, */
-/* VGT_GRP_UINT_32 = 0x03, */
-/* VGT_GRP_SINT_16 = 0x04, */
-/* VGT_GRP_SINT_32 = 0x05, */
-/* VGT_GRP_FLOAT_32 = 0x06, */
-/* VGT_GRP_AUTO_PRIM = 0x07, */
-/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* VGT_GRP_INDEX_16 = 0x00, */ /* VGT_GRP_INDEX_16: VGT_GRP_INDEX_16 16 bits from stream with index offset and clamp */
+/* VGT_GRP_INDEX_32 = 0x01, */ /* VGT_GRP_INDEX_32: VGT_GRP_INDEX_32 32 bits from stream with index offset and clamp */
+/* VGT_GRP_UINT_16 = 0x02, */ /* VGT_GRP_UINT_16: VGT_GRP_UINT_16 16 bits from stream as unsigned int */
+/* VGT_GRP_UINT_32 = 0x03, */ /* VGT_GRP_UINT_32: VGT_GRP_UINT_32 32 bits from stream as unsigned int */
+/* VGT_GRP_SINT_16 = 0x04, */ /* VGT_GRP_SINT_16: VGT_GRP_SINT_16 16 bits from stream as signed int */
+/* VGT_GRP_SINT_32 = 0x05, */ /* VGT_GRP_SINT_32: VGT_GRP_SINT_32 32 bits from stream as signed int */
+/* VGT_GRP_FLOAT_32 = 0x06, */ /* VGT_GRP_FLOAT_32: VGT_GRP_FLOAT_32 32 bits from stream as float */
+/* VGT_GRP_AUTO_PRIM = 0x07, */ /* VGT_GRP_AUTO_PRIM: VGT_GRP_AUTO_PRIM 24 bits from auto primitive counter */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ /* VGT_GRP_FIX_1_23_TO_FLOAT: VGT_GRP_FIX_1_23_TO_FLOAT 24 bit barycentric value from tessellation engine */
/* W_OFFSET_mask = 0x0f << 28, */
/* W_OFFSET_shift = 28, */
- VGT_GS_MODE = 0x00028a40,
- MODE_mask = 0x03 << 0,
+ VGT_GS_MODE = 0x00028a40, /* VGT GS Enable Mode */
+ MODE_mask = 0x03 << 0, /* Indicates which of GS scenerio is enabled */
MODE_shift = 0,
- GS_OFF = 0x00,
- GS_SCENARIO_A = 0x01,
- GS_SCENARIO_B = 0x02,
- GS_SCENARIO_G = 0x03,
- ES_PASSTHRU_bit = 1 << 2,
- CUT_MODE_mask = 0x03 << 3,
+ GS_OFF = 0x00, /* GS_OFF: GS_OFF */
+ GS_SCENARIO_A = 0x01, /* GS_SCENARIO_A: GS_SCENARIO_A */
+ GS_SCENARIO_B = 0x02, /* GS_SCENARIO_B: GS_SCENARIO_B */
+ GS_SCENARIO_G = 0x03, /* GS_SCENARIO_G: GS_SCENARIO_G */
+ ES_PASSTHRU_bit = 1 << 2, /* sets to one if VS shader is passthru when GS scenario G is used */
+ CUT_MODE_mask = 0x03 << 3, /* 00: 1024 max gs emit vertices, 01:512 max gs emit vertices, 10:256 max gs emit vertices, 11: 128 max gs emit vertices */
CUT_MODE_shift = 3,
- GS_CUT_1024 = 0x00,
- GS_CUT_512 = 0x01,
- GS_CUT_256 = 0x02,
- GS_CUT_128 = 0x03,
- PA_SC_MPASS_PS_CNTL = 0x00028a48,
- MPASS_PIX_VEC_PER_PASS_mask = 0xfffff << 0,
+ GS_CUT_1024 = 0x00, /* GS_CUT_1024: GS_CUT_1024 */
+ GS_CUT_512 = 0x01, /* GS_CUT_512: GS_CUT_512 */
+ GS_CUT_256 = 0x02, /* GS_CUT_256: GS_CUT_256 */
+ GS_CUT_128 = 0x03, /* GS_CUT_128: GS_CUT_128 */
+ PA_SC_MPASS_PS_CNTL = 0x00028a48, /* Multi-Pass Pixel Shader Control Register */
+ MPASS_PIX_VEC_PER_PASS_mask = 0xfffff << 0, /* Specifies the number of pixel vectors to process for each pass. Should be based on the amount of memory available for pixel shader export to memory and size of each pixels output data. Note there are 64 pixels per pixel vector in R600. There will likely be 32 pixels /pixel vector and 16 in derivative parts */
MPASS_PIX_VEC_PER_PASS_shift = 0,
- MPASS_PS_ENA_bit = 1 << 31,
- PA_SC_MODE_CNTL = 0x00028a4c,
- MSAA_ENABLE_bit = 1 << 0,
- CLIPRECT_ENABLE_bit = 1 << 1,
- LINE_STIPPLE_ENABLE_bit = 1 << 2,
- MULTI_CHIP_PRIM_DISCARD_ENAB_bit = 1 << 3,
- WALK_ORDER_ENABLE_bit = 1 << 4,
- HALVE_DETAIL_SAMPLE_PERF_bit = 1 << 5,
- WALK_SIZE_bit = 1 << 6,
- WALK_ALIGNMENT_bit = 1 << 7,
- WALK_ALIGN8_PRIM_FITS_ST_bit = 1 << 8,
- TILE_COVER_NO_SCISSOR_bit = 1 << 9,
- KILL_PIX_POST_HI_Z_bit = 1 << 10,
- KILL_PIX_POST_DETAIL_MASK_bit = 1 << 11,
- MULTI_CHIP_SUPERTILE_ENABLE_bit = 1 << 12,
- TILE_COVER_DISABLE_bit = 1 << 13,
- FORCE_EOV_CNTDWN_ENABLE_bit = 1 << 14,
- FORCE_EOV_TILE_ENABLE_bit = 1 << 15,
- FORCE_EOV_REZ_ENABLE_bit = 1 << 16,
- PS_ITER_SAMPLE_bit = 1 << 17,
- VGT_ENHANCE = 0x00028a50,
- MI_TIMESTAMP_RES_mask = 0x03 << 0,
+ MPASS_PS_ENA_bit = 1 << 31, /* If set, enables multipass pixel shader operation. */
+ PA_SC_MODE_CNTL = 0x00028a4c, /* SC Mode Control Register for Various Enables Bit Defaul */
+ MSAA_ENABLE_bit = 1 << 0, /* Enable MultiSample AA. If set, the MSAA_NUM_SAMPLES+1 used for MSAA will have unique subpixel locations as described below and MSAA_NUM_SAMPLES must not equal 0. If clear, all MSAA_NUM_SAMPLES+1 will be sampled using the pixel center All sample locations are specified as an offset from pixel cetner. 2 SAMPLE Sample 0: -4, 4 Sample 1: 4, -4 4 SAMPLE Sample 0: -2, -2 Sample 1: 2, 2 Sample 2: -6, 6 Sample 3: 6, -6 8 SAMPLE Sample 0: -2, -5 Sample 1: 4, -4 Sample 2: 1, 6 Sample 3: -6, -2 Sample 4: 6, 1 Sample 5: 0, 0 Sample 6: -5, 4 Sample 7: 7, -8 */
+ CLIPRECT_ENABLE_bit = 1 << 1, /* Enables 4 cliprects (same as setting CLIPRECT_RULE to 0xffff) */
+ LINE_STIPPLE_ENABLE_bit = 1 << 2, /* Enable line stipple processing */
+ MULTI_CHIP_PRIM_DISCARD_ENAB_bit = 1 << 3, /* Enables primitives to be discarded based on */
+ WALK_ORDER_ENABLE_bit = 1 << 4, /* Enables fixed pattern for quad walk order. Must be disabled for overlapping blit rendering. */
+ HALVE_DETAIL_SAMPLE_PERF_bit = 1 << 5, /* Enables the ability to halve the performance of the detail samplers in all MSAA modes. */
+ WALK_SIZE_bit = 1 << 6, /* Defines the size of the SC walk stamp. 0 : walk by supertiles (32 bits); 1 : walk by tiles (8 bits). */
+ WALK_ALIGNMENT_bit = 1 << 7, /* Defines the alignment value of the SC walker. 0 : align by supertiles (32 bits); 1 : align by tiles (8 bits). */
+ WALK_ALIGN8_PRIM_FITS_ST_bit = 1 << 8, /* When alignment value is set to supertiles (32 bits), enables the walker to align by tiles (8 bits) if primitive fits within one supertile. */
+ TILE_COVER_NO_SCISSOR_bit = 1 << 9, /* Disables the use of scissors when determining tile covered. */
+ KILL_PIX_POST_HI_Z_bit = 1 << 10, /* If set, all pixels are killed in the SC after the HI-Z test. Typically set for VizQuery geometry */
+ KILL_PIX_POST_DETAIL_MASK_bit = 1 << 11, /* If set, all pixels are killed in the SC after the detail mask. Can be used for performance info */
+ MULTI_CHIP_SUPERTILE_ENABLE_bit = 1 << 12, /* Enables Multi-Chip supertile mode with the configuration defined in PA_SC_MULTI_CHIP_CNTL. */
+ TILE_COVER_DISABLE_bit = 1 << 13, /* Disables tile covered (Hi-Z optimization) that is sent to the DBs. */
+ FORCE_EOV_CNTDWN_ENABLE_bit = 1 << 14, /* Enables forcing out pixel vectors prematurely based on the cycle count programmed in PA_SC_ENHANCE::FORCE_EOV_MAX_CLK_CNT[11 :0] */
+ FORCE_EOV_TILE_ENABLE_bit = 1 << 15, /* Enables forcing out pixel vectors prematurely based on the tile count programmed in PA_SC_ENHANCE::FORCE_EOV_MAX_TILE_CNT[1 1:0] */
+ FORCE_EOV_REZ_ENABLE_bit = 1 << 16, /* Enables forcing out pixel vectors prematurely based on the ReZ hang condition(ie. cache locked) detected in the DB */
+ PS_ITER_SAMPLE_bit = 1 << 17, /* Enables per-sample (i.e. unique shader-computed value per sample) pixel shader execution. */
+ VGT_ENHANCE = 0x00028a50, /* Used for Late Additions of Control Bits */
+ MI_TIMESTAMP_RES_mask = 0x03 << 0, /* POSSIBLE VALUES: 00 - 0 -> 992 Clocks latency range in steps of 32 01 - 0 -> 496 Clocks latency range in steps of 16 02 - 0 -> 248 Clocks latency range in steps of 8 03 - 0 -> 124 Clocks latency range in steps of 4 */
MI_TIMESTAMP_RES_shift = 0,
- X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32 = 0x00,
- X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16 = 0x01,
- X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8 = 0x02,
- X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4 = 0x03,
- MISC_mask = 0x3fffffff << 2,
+ X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32 = 0x00, /* 0 -> 992 Clocks latency range in steps of 32 */
+ X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16 = 0x01, /* 0 -> 496 Clocks latency range in steps of 16 */
+ X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8 = 0x02, /* 0 -> 248 Clocks latency range in steps of 8 */
+ X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4 = 0x03, /* 0 -> 124 Clocks latency range in steps of 4 */
+ MISC_mask = 0x3fffffff << 2, /* Misc bit */
MISC_shift = 2,
- VGT_GS_OUT_PRIM_TYPE = 0x00028a6c,
- OUTPRIM_TYPE_mask = 0x3f << 0,
+ VGT_GS_OUT_PRIM_TYPE = 0x00028a6c, /* VGT GS output primitive type */
+ OUTPRIM_TYPE_mask = 0x3f << 0, /* GS output primitive type */
OUTPRIM_TYPE_shift = 0,
- POINTLIST = 0x00,
- LINESTRIP = 0x01,
- TRISTRIP = 0x02,
- VGT_DMA_SIZE = 0x00028a74,
- VGT_DMA_INDEX_TYPE = 0x00028a7c,
-/* INDEX_TYPE_mask = 0x03 << 0, */
+ POINTLIST = 0x00, /* POINTLIST: POINTLIST */
+ LINESTRIP = 0x01, /* LINESTRIP: LINESTRIP */
+ TRISTRIP = 0x02, /* TRISTRIP: TRISTRIP */
+ VGT_DMA_SIZE = 0x00028a74, /* VGT DMA Size */
+ /* NUM_INDICES: VGT DMA Number of indices */
+ VGT_DMA_INDEX_TYPE = 0x00028a7c, /* VGT DMA Index Type and Mode */
+/* INDEX_TYPE_mask = 0x03 << 0, */ /* VGT DMA Index Type */
/* INDEX_TYPE_shift = 0, */
- VGT_INDEX_16 = 0x00,
- VGT_INDEX_32 = 0x01,
- SWAP_MODE_mask = 0x03 << 2,
+ VGT_INDEX_16 = 0x00, /* VGT_INDEX_16: VGT_INDEX_16 16-bit index */
+ VGT_INDEX_32 = 0x01, /* VGT_INDEX_32: VGT_INDEX_32 32-bit index */
+ SWAP_MODE_mask = 0x03 << 2, /* DMA Swap mode */
SWAP_MODE_shift = 2,
- VGT_DMA_SWAP_NONE = 0x00,
- VGT_DMA_SWAP_16_BIT = 0x01,
- VGT_DMA_SWAP_32_BIT = 0x02,
- VGT_DMA_SWAP_WORD = 0x03,
- VGT_PRIMITIVEID_EN = 0x00028a84,
- PRIMITIVEID_EN_bit = 1 << 0,
- VGT_DMA_NUM_INSTANCES = 0x00028a88,
- VGT_EVENT_INITIATOR = 0x00028a90,
- EVENT_TYPE_mask = 0x3f << 0,
+ VGT_DMA_SWAP_NONE = 0x00, /* VGT_DMA_SWAP_NONE: VGT_DMA_SWAP_NONE No swap */
+ VGT_DMA_SWAP_16_BIT = 0x01, /* VGT_DMA_SWAP_16_BIT: VGT_DMA_SWAP_16_BIT 16-bit swap 0xAABBCCDD -> 0xBBAADDCC */
+ VGT_DMA_SWAP_32_BIT = 0x02, /* VGT_DMA_SWAP_32_BIT: VGT_DMA_SWAP_32_BIT 32-bit swap 0xAABBCCDD -> 0xDDCCBBAA */
+ VGT_DMA_SWAP_WORD = 0x03, /* VGT_DMA_SWAP_WORD: VGT_DMA_SWAP_WORD word swap 0xAABBCCDD -> 0xCCDDAABB */
+ VGT_PRIMITIVEID_EN = 0x00028a84, /* VGT Primitive ID enable */
+ PRIMITIVEID_EN_bit = 1 << 0, /* PrimitiveID generation is enabled */
+ VGT_DMA_NUM_INSTANCES = 0x00028a88, /* VGT DMA Number of Instances */
+ /* VGT DMA Number of Instances, minimum value is 1 */
+ VGT_EVENT_INITIATOR = 0x00028a90, /* Event Initiator */
+ EVENT_TYPE_mask = 0x3f << 0, /* Event Type (also called Event ID) -- Currently, the hardware interface between the VGT and the PA supports only 6-bit event type. */
EVENT_TYPE_shift = 0,
- CACHE_FLUSH_TS = 0x04,
- CONTEXT_DONE = 0x05,
- CACHE_FLUSH = 0x06,
- VIZQUERY_START = 0x07,
- VIZQUERY_END = 0x08,
- SC_WAIT_WC = 0x09,
- MPASS_PS_CP_REFETCH = 0x0a,
- MPASS_PS_RST_START = 0x0b,
- MPASS_PS_INCR_START = 0x0c,
- RST_PIX_CNT = 0x0d,
- RST_VTX_CNT = 0x0e,
- VS_PARTIAL_FLUSH = 0x0f,
- PS_PARTIAL_FLUSH = 0x10,
- CACHE_FLUSH_AND_INV_TS_EVENT = 0x14,
- ZPASS_DONE = 0x15,
- CACHE_FLUSH_AND_INV_EVENT = 0x16,
- PERFCOUNTER_START = 0x17,
- PERFCOUNTER_STOP = 0x18,
- PIPELINESTAT_START = 0x19,
- PIPELINESTAT_STOP = 0x1a,
- PERFCOUNTER_SAMPLE = 0x1b,
- FLUSH_ES_OUTPUT = 0x1c,
- FLUSH_GS_OUTPUT = 0x1d,
- SAMPLE_PIPELINESTAT = 0x1e,
- SO_VGTSTREAMOUT_FLUSH = 0x1f,
- SAMPLE_STREAMOUTSTATS = 0x20,
- RESET_VTX_CNT = 0x21,
- BLOCK_CONTEXT_DONE = 0x22,
- CR_CONTEXT_DONE = 0x23,
- VGT_FLUSH = 0x24,
- CR_DONE_TS = 0x25,
- SQ_NON_EVENT = 0x26,
- SC_SEND_DB_VPZ = 0x27,
- BOTTOM_OF_PIPE_TS = 0x28,
- DB_CACHE_FLUSH_AND_INV = 0x2a,
- ADDRESS_HI_mask = 0xff << 19,
+ CACHE_FLUSH_TS = 0x04, /* CACHE_FLUSH_TS: Destination Cache Flush with Timestamp -- Inserted by the driver to request the CBs, DBs, and SMX to signal the CP when all prior rendering is flushed to memory. */
+ CONTEXT_DONE = 0x05, /* CONTEXT_DONE: GFXDEC Context Done -- Inserted by the CP on the first GFXDEC state update after a draw. */
+ CACHE_FLUSH = 0x06, /* CACHE_FLUSH: Destination Caches Flushed -- Inserted by the driver to request the CBs, DBs, and SMX to flushed their caches to memory (No Timestamp is Generated). */
+ VIZQUERY_START = 0x07, /* VIZQUERY_START: No longer supported */
+ VIZQUERY_END = 0x08, /* VIZQUERY_END: No longer supported */
+ SC_WAIT_WC = 0x09, /* SC_WAIT_WC: SC Wait for WC from CP -- Inserted by the CP to inform the SC to wait for the write confirm signal (wire) from the CP before submitting future pixel vectors. This is used to synchronize 2D source surface (brush, a.ka. texture) with user of that surface. */
+ MPASS_PS_CP_REFETCH = 0x0a, /* MPASS_PS_CP_REFETCH: Multi-Pass Pixel Shader CP Refetch -- Inserted by the driver to inform the SC it needs to report to CP to refetch buffer for multi- pass pixel shader or continue. */
+ MPASS_PS_RST_START = 0x0b, /* MPASS_PS_RST_START: Multi-Pass Pixel Shader Reset Start -- Inserted by the driver just before an INDIRECT_BUFFER_MP packet to instruct the SC to reset the multi-pass start pixel vector. */
+ MPASS_PS_INCR_START = 0x0c, /* MPASS_PS_INCR_START: Multi-Pass Pixel Shader Increment Start -- Inserted by the driver to instruct the SC to increment the multi-pass start vector by vectors_per_pass. */
+ RST_PIX_CNT = 0x0d, /* RST_PIX_CNT: Reset SQ`s auto Pixel Counter AND reset SC`s multi-pass pixel vector count -- Inserted by the driver. */
+ RST_VTX_CNT = 0x0e, /* RST_VTX_CNT: Reset SQ`s auto Vertex Counter -- Inserted by the driver. */
+ VS_PARTIAL_FLUSH = 0x0f, /* VS_PARTIAL_FLUSH: Used to flush all work between the CP and the ES, GS, VS shaders including the VGT. */
+ PS_PARTIAL_FLUSH = 0x10, /* PS_PARTIAL_FLUSH: Used to flush all work between the CP and the ES, GS, VS, PS shaders including scan conversion, primitive assembly, and VGT. */
+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x14, /* CACHE_FLUSH_AND_INV_TS_EVENT: Same as CACHE_FLUSH_TS with an invalidate -- Inserted by the driver. */
+ ZPASS_DONE = 0x15, /* ZPASS_DONE: Write ZPASS counts to memory -- Inserted by the driver to instruct the DBs to write out the ZPASS counters to memory. Used to support DX10 occlusion queries. */
+ CACHE_FLUSH_AND_INV_EVENT = 0x16, /* CACHE_FLUSH_AND_INV_EVENT: Same as CACHE_FLUSH with an invalidate -- Inserted by the driver. */
+ PERFCOUNTER_START = 0x17, /* PERFCOUNTER_START: Start enabled event based Performance counters -- Inserted by the driver. */
+ PERFCOUNTER_STOP = 0x18, /* PERFCOUNTER_STOP: Stop enabled event based Performance counters that are event-enabled -- Inserted by the driver. */
+ PIPELINESTAT_START = 0x19, /* PIPELINESTAT_START: Start pipeline/strmout stat -- Inserted by the driver. */
+ PIPELINESTAT_STOP = 0x1a, /* PIPELINESTAT_STOP: Stop pipeline/strmout stat -- Inserted by the driver. */
+ PERFCOUNTER_SAMPLE = 0x1b, /* PERFCOUNTER_SAMPLE: Sample the performance counters of all blocks -- Inserted by the driver to read the performance counters. */
+ FLUSH_ES_OUTPUT = 0x1c, /* FLUSH_ES_OUTPUT: Flush Export Shader Output -- Inserted by the VGT to instruct the SMX to flush all the ES output to memory. */
+ FLUSH_GS_OUTPUT = 0x1d, /* FLUSH_GS_OUTPUT: Flush Geometry Shader Output -- Inserted by the VGT to instruct the SMX to flush all the GS output to memory. */
+ SAMPLE_PIPELINESTAT = 0x1e, /* SAMPLE_PIPELINESTAT: Sample Pipeline Statistics counters -- Inserted by the driver to request the GPU to sample counters associated with pipelinestats. The CP will subsequently write them to memory. */
+ SO_VGTSTREAMOUT_FLUSH = 0x1f, /* SO_VGTSTREAMOUT_FLUSH: VGT Streamout Flush -- This event will cause VGT to update the read only offsets registers and then send a VGT_CP_strmout_flushed to instruct the CP to read the offsets. */
+ SAMPLE_STREAMOUTSTATS = 0x20, /* SAMPLE_STREAMOUTSTATS: Sample Streamout Statitics counters -- Inserted by the driver to request the GPU to sample counters associated with streamout. The CP will subsequently write them to memory. */
+ RESET_VTX_CNT = 0x21, /* RESET_VTX_CNT: Reset Vertex Count -- Inserted by the driver to reset the auto index count for vertex count. There are tow counters one for gs and non- gs and these should be reset seperately */
+ BLOCK_CONTEXT_DONE = 0x22, /* BLOCK_CONTEXT_DONE: Block Managed State (SQCONSDEC) Context Done - Inserted by the CP on the first SQCONSDEC constant update after a draw. */
+ CR_CONTEXT_DONE = 0x23, /* CR_CONTEXT_DONE: CR Context Done -- Inserted by the driver with an EVENT_WRITE packet, before the first CR state update after a draw (CR_CMD register write) */
+ VGT_FLUSH = 0x24, /* VGT_FLUSH: VGT Flush - Inserted by the driver to cause the VGT to be flushed. Used when GS ring buffer sizes are changed */
+ CR_DONE_TS = 0x25, /* CR_DONE_TS: CR Done Timestamp - Inserted by the driver to request a time stamp when the CR has completed previous work, flush of destination cache is assumed. */
+ SQ_NON_EVENT = 0x26, /* SQ_NON_EVENT: SQ Non-Event -- This event is reserved for SQ */
+ SC_SEND_DB_VPZ = 0x27, /* SC_SEND_DB_VPZ: SC Send Depth Block VPort Z -- Inserted by the driver to cause the SC to send the vport array Zmin and Zmax values to the DBs. */
+ BOTTOM_OF_PIPE_TS = 0x28, /* BOTTOM_OF_PIPE_TS: Bottom of the Pipe Timestamp -- Inserted by the driver to request a bottom of pipe timestamp be sent to memory, no flushing required. */
+ DB_CACHE_FLUSH_AND_INV = 0x2a, /* DB_CACHE_FLUSH_AND_INV: DB Flush and Invalidate - Inserted by the driver when the depth surface is paged out of memory. */
+ ADDRESS_HI_mask = 0xff << 19, /* address bit 39:32 for zpass event */
ADDRESS_HI_shift = 19,
- EXTENDED_EVENT_bit = 1 << 27,
- VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94,
- RESET_EN_bit = 1 << 0,
- VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0,
- VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4,
- VGT_STRMOUT_EN = 0x00028ab0,
- STREAMOUT_bit = 1 << 0,
- VGT_REUSE_OFF = 0x00028ab4,
- REUSE_OFF_bit = 1 << 0,
- VGT_VTX_CNT_EN = 0x00028ab8,
- VTX_CNT_EN_bit = 1 << 0,
- VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0,
- VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4,
- VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0,
+ EXTENDED_EVENT_bit = 1 << 27, /* 0 for single DW event, 1 for two DW event */
+ VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94, /* This register enabling reseting of prim based on reset index */
+ RESET_EN_bit = 1 << 0, /* IF SET, THEN RESET INDEX IS USED FOR RESETING A PRIM */
+ VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0, /* This register defines the first instance step rate */
+ /* STEP_RATE: Instance step rate */
+ VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4, /* This register defines the second instance step rate */
+ /* STEP_RATE: Instance step rate */
+ VGT_STRMOUT_EN = 0x00028ab0, /* This register enables streaming out */
+ STREAMOUT_bit = 1 << 0, /* If set, streaming output is enabled */
+ VGT_REUSE_OFF = 0x00028ab4, /* VGT reuse is off. This will expand strip primitives to list primitives */
+ REUSE_OFF_bit = 1 << 0, /* reuse is off (set to 1) */
+ VGT_VTX_CNT_EN = 0x00028ab8, /* Auto -index generation is on. */
+ VTX_CNT_EN_bit = 1 << 0, /* Set to one if auto index generation is enabled */
+ VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0, /* Stream-out size. */
+ /* SIZE: DWORD Buffer size for given stream out buffer. */
+ VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4, /* Stream out stride. */
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0, /* DWORD stride between vertices in given stream-out buffer. From stream output declarations details of dx10 spec, the max stride 2048 bytes or 512 words defined to be the spacing between the beginning of each vertex. */
VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0,
- VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8,
- VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc,
- VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0,
- VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4,
- VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8, /* Stream-out base. */
+ /* BASE: DWORD Buffer base for given stream out buffer. Bits 31:0 corresponds to 39:8 of memory address. This data can be stored in the coherency registers. This register is snooped by CP. */
+ VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc, /* Stream out offset. */
+ /* OFFSET: DWORD offset for given stream out buffer. Writing this register will cause the VGT to load a Zero into BufDwordWritten[4] and SO_CurVertIndex. */
+ VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0, /* Stream-out size. */
+ /* SIZE: DWORD Buffer size for given stream out buffer. */
+ VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4, /* Stream out stride. */
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0, /* DWORD stride between vertices in given stream-out buffer. From stream output declarations details of dx10 spec, the max stride 2048 bytes or 512 words defined to be the spacing between the beginning of each vertex. */
VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0,
- VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8,
- VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec,
- VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0,
- VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4,
- VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8, /* Stream-out base. */
+ /* BASE: DWORD Buffer base for given stream out buffer. Bits 31:0 corresponds to 39:8 of memory address. This data can be stored in the coherency registers. This register is snooped by CP. */
+ VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec, /* Stream out offset. */
+ /* OFFSET: DWORD offset for given stream out buffer. Writing this register will cause the VGT to load a Zero into BufDwordWritten[4] and SO_CurVertIndex. */
+ VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0, /* Stream-out size. */
+ /* SIZE: DWORD Buffer size for given stream out buffer. */
+ VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4, /* Stream out stride. */
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0, /* DWORD stride between vertices in given stream-out buffer. From stream output declarations details of dx10 spec, the max stride 2048 bytes or 512 words defined to be the spacing between the beginning of each vertex. */
VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0,
- VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8,
- VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc,
- VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00,
- VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04,
- VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8, /* Stream-out base. */
+ /* BASE: DWORD Buffer base for given stream out buffer. Bits 31:0 corresponds to 39:8 of memory address. This data can be stored in the coherency registers. This register is snooped by CP. */
+ VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc, /* Stream out offset. */
+ /* OFFSET: DWORD offset for given stream out buffer. Writing this register will cause the VGT to load a Zero into BufDwordWritten[4] and SO_CurVertIndex. */
+ VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00, /* Stream-out size. */
+ /* SIZE: DWORD Buffer size for given stream out buffer. */
+ VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04, /* Stream out stride. */
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0, /* DWORD stride between vertices in given stream-out buffer. From stream output declarations details of dx10 spec, the max stride 2048 bytes or 512 words defined to be the spacing between the beginning of each vertex. */
VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0,
- VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08,
- VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c,
- VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10,
- VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14,
- VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18,
- VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c,
- VGT_STRMOUT_BUFFER_EN = 0x00028b20,
- BUFFER_0_EN_bit = 1 << 0,
- BUFFER_1_EN_bit = 1 << 1,
- BUFFER_2_EN_bit = 1 << 2,
- BUFFER_3_EN_bit = 1 << 3,
- VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28,
- VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c,
- VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30,
- VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44,
- VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08, /* Stream-out base. */
+ /* BASE: DWORD Buffer base for given stream out buffer. Bits 31:0 corresponds to 39:8 of memory address. This data can be stored in the coherency registers. This register is snooped by CP. */
+ VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c, /* Stream out offset. */
+ /* OFFSET: DWORD offset for given stream out buffer. Writing this register will cause the VGT to load a Zero into BufDwordWritten[4] and SO_CurVertIndex. */
+ VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10, /* Stream out base_0 + offset_0. This register is snooped by SQ. */
+ /* BASE_OFFSET: DWORD base+offset for given stream out buffer. Set by CP or driver. */
+ VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14, /* Stream out base_1 + offset_1. This register is snooped by SQ. */
+ /* BASE_OFFSET: DWORD base+offset for given stream out buffer. Set by CP or driver. */
+ VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18, /* Stream out base_2 + offset_2. This register is snooped by SQ. */
+ /* BASE_OFFSET: DWORD base+offset for given stream out buffer. Set by CP or driver. */
+ VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c, /* Stream out base_3 + offset_3. This register is snooped by SQ. */
+ /* BASE_OFFSET: DWORD base+offset for given stream out buffer. Set by CP or driver. */
+ VGT_STRMOUT_BUFFER_EN = 0x00028b20, /* Stream out enable bits. CP will use for SO coherency register validness. */
+ BUFFER_0_EN_bit = 1 << 0, /* Enable buffer 0 stream out. */
+ BUFFER_1_EN_bit = 1 << 1, /* Enable buffer 1 stream out. */
+ BUFFER_2_EN_bit = 1 << 2, /* Enable buffer 2 stream out. */
+ BUFFER_3_EN_bit = 1 << 3, /* Enable buffer 3 stream out. */
+ VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28, /* Draw opaque offset. */
+ /* pOffsets from the IASetVertexBuffers binding of a stream out buffer that is to be used as src data. The retrived BufferFilledSize minus this poffset if positive, will determine the amount of data from which primitives can be created. */
+ VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c, /* Draw opaque size. */
+ /* This will be loaded by the CP for a DrawOpaque call by fetching a memory address containing last bufferfilledsize associated with the previous stream out buffer bound to the IA. */
+ VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30, /* Draw opaque vertex stride. */
+ /* vertex stride used for draw opaque call */
+ VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44, /* Upper 6-bits of 40-bits Stream out base_0 + offset_0. This register is snooped by SQ. */
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0, /* Upper 6-bits of 40-bits DWORD base+offset for given stream out buffer. Set by CP or driver. */
VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0,
- VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48,
- VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48, /* Upper 6-bits of 40-bits Stream out base_1 + offset_1. This register is snooped by SQ. */
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0, /* Upper 6-bits of 40-bits DWORD base+offset for given stream out buffer. Set by CP or driver. */
VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0,
- VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c,
- VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c, /* Upper 6-bits of 40-bits Stream out base_2 + offset_2. This register is snooped by SQ. */
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0, /* Upper 6-bits of 40-bits DWORD base+offset for given stream out buffer. Set by CP or driver. */
VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0,
- VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50,
- VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50, /* Upper 6-bits of 40-bits Stream out base_3 + offset_3. This register is snooped by SQ. */
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0, /* Upper 6-bits of 40-bits DWORD base+offset for given stream out buffer. Set by CP or driver. */
VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0,
- PA_SC_LINE_CNTL = 0x00028c00,
- BRES_CNTL_mask = 0xff << 0,
+ PA_SC_LINE_CNTL = 0x00028c00, /* Line Drawing Control */
+ BRES_CNTL_mask = 0xff << 0, /* This field indicates what the hardware should do on the minor axis of the line, when the line is exactly half way between two pixels (bresenham error = 0). This field is a LUT (BRES_CNTL[7:0] w/ 1-bit per entry, where if the bit BRES_CNTL[index] = `1` then that means to step the minor axis. The 3-bit index is calculated from the attributes of the line ((abs(Xend - Xstart) >= abs(Yend - Ystart)) << 2) | ((Xstart <= Xend) << 1) | (Ystart <= Yend) */
BRES_CNTL_shift = 0,
- USE_BRES_CNTL_bit = 1 << 8,
- EXPAND_LINE_WIDTH_bit = 1 << 9,
- LAST_PIXEL_bit = 1 << 10,
- PA_SC_AA_CONFIG = 0x00028c04,
- MSAA_NUM_SAMPLES_mask = 0x03 << 0,
+ USE_BRES_CNTL_bit = 1 << 8, /* If set, use the bresenham control field. Should be set for 2D lines, clear for 3D lines. */
+ EXPAND_LINE_WIDTH_bit = 1 << 9, /* If set, the line width will be expanded by the 1/cos(a) where a the minimum angle from horz or vertical. This bit most likely should be set whenever MSAA_ENABLE is set or Line Antialiasing is being done in pixel shader. */
+ LAST_PIXEL_bit = 1 << 10, /* If set the last pixel of a line will not be killed by the diamond exit rule. */
+ PA_SC_AA_CONFIG = 0x00028c04, /* Multisample Antialiasing Control */
+ MSAA_NUM_SAMPLES_mask = 0x03 << 0, /* Specifies the number of samples to use for MSAA. Representative of size of surface allocated for Color and Depth. 0 = 1-sample, 1 = 2-sample, 2 = 4-sample, 3 = 8- sample. */
MSAA_NUM_SAMPLES_shift = 0,
- AA_MASK_CENTROID_DTMN_bit = 1 << 4,
- MAX_SAMPLE_DIST_mask = 0x0f << 13,
+ AA_MASK_CENTROID_DTMN_bit = 1 << 4, /* Specifies whether to apply the MSAA Mask before or after the centroid determination. 0 = before; 1 = after. */
+ MAX_SAMPLE_DIST_mask = 0x0f << 13, /* Specifies the maximum distance (in subpixels) between the pixel center and the outermost subpixel sample. This value is used to optimize coarse walk and quad identity. Should be set to 0 when not anti-aliasing. Max value for R600 should be 8(16ths). */
MAX_SAMPLE_DIST_shift = 13,
- PA_SU_VTX_CNTL = 0x00028c08,
- PIX_CENTER_bit = 1 << 0,
- PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1,
+ PA_SU_VTX_CNTL = 0x00028c08, /* Miscellaneous SU Control */
+ PIX_CENTER_bit = 1 << 0, /* Specifies where the pixel center of the incoming vertex is. The drawing engine itself has pixel centers @ 0.5, so if this bit is `0`, 0.5 will be added to the X,Y coordinates to move the incoming vertex onto our internal grid. */
+ PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1, /* Controls conversion of X,Y coordinates from IEEE to fixed-point */
PA_SU_VTX_CNTL__ROUND_MODE_shift = 1,
- X_TRUNCATE = 0x00,
- X_ROUND = 0x01,
- X_ROUND_TO_EVEN = 0x02,
- X_ROUND_TO_ODD = 0x03,
- QUANT_MODE_mask = 0x07 << 3,
+ X_TRUNCATE = 0x00, /* 0 = Truncate (OGL) */
+ X_ROUND = 0x01, /* 1 = Round */
+ X_ROUND_TO_EVEN = 0x02, /* 2 = Round to Even (D3D) */
+ X_ROUND_TO_ODD = 0x03, /* 3 = Round to Odd */
+ QUANT_MODE_mask = 0x07 << 3, /* Controls conversion of X,Y coordinates from IEEE to fixed-point */
QUANT_MODE_shift = 3,
- X_1_16TH = 0x00,
- X_1_8TH = 0x01,
- X_1_4TH = 0x02,
- X_1_2 = 0x03,
- X_1 = 0x04,
- X_1_256TH = 0x05,
- PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c,
- PA_CL_GB_VERT_DISC_ADJ = 0x00028c10,
- PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14,
- PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18,
- PA_SC_AA_SAMPLE_LOCS_MCTX = 0x00028c1c,
-/* S0_X_mask = 0x0f << 0, */
+ X_1_16TH = 0x00, /* 0 = 1/16th */
+ X_1_8TH = 0x01, /* 1 = 1/8th */
+ X_1_4TH = 0x02, /* 2 = 1/4th */
+ X_1_2 = 0x03, /* 3 = 1/2 */
+ X_1 = 0x04, /* 4 = 1 */
+ X_1_256TH = 0x05, /* 5 = 1/256th 3. General Shader Registers */
+ PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c, /* Vertical Guard Band Clip Adjust Register */
+ /* DATA_REGISTER: 32-bit floating point value. Should be set to 1.0 for no guard band. */
+ PA_CL_GB_VERT_DISC_ADJ = 0x00028c10, /* Vertical Guard Band Discard Adjust Register */
+ /* DATA_REGISTER: 32-bit floating point value. Should be set to 1.0 for no guard band. */
+ PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14, /* Horizontal Guard Band Clip Adjust Register */
+ /* DATA_REGISTER: 32-bit floating point value. Should be set to 1.0 for no guard band. */
+ PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18, /* Horizontal Guard Band Discard Adjust Register */
+ /* DATA_REGISTER: 32-bit floating point value. Should be set to 1.0 for no guard band. */
+ PA_SC_AA_SAMPLE_LOCS_MCTX = 0x00028c1c, /* Multi-Sample Programmable Sample Locations for 2-Sample, 4-Sample, 8-Sample First Word - Used by SC, SPI & CB`s */
+/* S0_X_mask = 0x0f << 0, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S0_X_shift = 0, */
-/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_mask = 0x0f << 4, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S0_Y_shift = 4, */
-/* S1_X_mask = 0x0f << 8, */
+/* S1_X_mask = 0x0f << 8, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S1_X_shift = 8, */
-/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_mask = 0x0f << 12, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S1_Y_shift = 12, */
-/* S2_X_mask = 0x0f << 16, */
+/* S2_X_mask = 0x0f << 16, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S2_X_shift = 16, */
-/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_mask = 0x0f << 20, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S2_Y_shift = 20, */
-/* S3_X_mask = 0x0f << 24, */
+/* S3_X_mask = 0x0f << 24, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S3_X_shift = 24, */
-/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_mask = 0x0f << 28, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S3_Y_shift = 28, */
- PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX = 0x00028c20,
-/* S4_X_mask = 0x0f << 0, */
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX = 0x00028c20, /* Multi-Sample Programmable Sample Locations for 8-Sample Second Word - Used by SC, SPI & CB`s */
+/* S4_X_mask = 0x0f << 0, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S4_X_shift = 0, */
-/* S4_Y_mask = 0x0f << 4, */
+/* S4_Y_mask = 0x0f << 4, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S4_Y_shift = 4, */
-/* S5_X_mask = 0x0f << 8, */
+/* S5_X_mask = 0x0f << 8, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S5_X_shift = 8, */
-/* S5_Y_mask = 0x0f << 12, */
+/* S5_Y_mask = 0x0f << 12, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S5_Y_shift = 12, */
-/* S6_X_mask = 0x0f << 16, */
+/* S6_X_mask = 0x0f << 16, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S6_X_shift = 16, */
-/* S6_Y_mask = 0x0f << 20, */
+/* S6_Y_mask = 0x0f << 20, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S6_Y_shift = 20, */
-/* S7_X_mask = 0x0f << 24, */
+/* S7_X_mask = 0x0f << 24, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S7_X_shift = 24, */
-/* S7_Y_mask = 0x0f << 28, */
+/* S7_Y_mask = 0x0f << 28, */ /* 4b signed offset from pixel center. Range -8/16 to 7/16. */
/* S7_Y_shift = 28, */
- CB_CLRCMP_CONTROL = 0x00028c30,
- CLRCMP_FCN_SRC_mask = 0x07 << 0,
+ CB_CLRCMP_CONTROL = 0x00028c30, /* This register controls color keying, which masks individual pixel writes based on comparing the source (pre-ROP) color and/or the dest (frame buffer) color to comparison values, after masking both by CLRCMP_MSK. Source color keying is a legacy operation that is not supported if any enabled render target has >32-bit pixels or sets the BLEND_FLOAT32 bit. */
+ CLRCMP_FCN_SRC_mask = 0x07 << 0, /* Color Compare Source Function, Specifies the function to perform on the source color compare. */
CLRCMP_FCN_SRC_shift = 0,
- CLRCMP_DRAW_ALWAYS = 0x00,
- CLRCMP_DRAW_NEVER = 0x01,
- CLRCMP_DRAW_ON_NEQ = 0x04,
- CLRCMP_DRAW_ON_EQ = 0x05,
- CLRCMP_FCN_DST_mask = 0x07 << 8,
+ CLRCMP_DRAW_ALWAYS = 0x00, /* CLRCMP_DRAW_ALWAYS: always draw */
+ CLRCMP_DRAW_NEVER = 0x01, /* CLRCMP_DRAW_NEVER: never draw */
+ CLRCMP_DRAW_ON_NEQ = 0x04, /* CLRCMP_DRAW_ON_NEQ: draw if xxx!=CLRCMP_XXX */
+ CLRCMP_DRAW_ON_EQ = 0x05, /* CLRCMP_DRAW_ON_EQ: draw if xxx==CLRCMP_XXX */
+ CLRCMP_FCN_DST_mask = 0x07 << 8, /* Color Compare Destination Function, Specifies the function to perform on the destination color compare. */
CLRCMP_FCN_DST_shift = 8,
-/* CLRCMP_DRAW_ALWAYS = 0x00, */
-/* CLRCMP_DRAW_NEVER = 0x01, */
-/* CLRCMP_DRAW_ON_NEQ = 0x04, */
-/* CLRCMP_DRAW_ON_EQ = 0x05, */
- CLRCMP_FCN_SEL_mask = 0x03 << 24,
+/* CLRCMP_DRAW_ALWAYS = 0x00, */ /* CLRCMP_DRAW_ALWAYS: always draw */
+/* CLRCMP_DRAW_NEVER = 0x01, */ /* CLRCMP_DRAW_NEVER: never draw */
+/* CLRCMP_DRAW_ON_NEQ = 0x04, */ /* CLRCMP_DRAW_ON_NEQ: draw if xxx!=CLRCMP_XXX */
+/* CLRCMP_DRAW_ON_EQ = 0x05, */ /* CLRCMP_DRAW_ON_EQ: draw if xxx==CLRCMP_XXX */
+ CLRCMP_FCN_SEL_mask = 0x03 << 24, /* Color Compare Function Select, Selects which color compare results to use in the final compare results. */
CLRCMP_FCN_SEL_shift = 24,
- CLRCMP_SEL_DST = 0x00,
- CLRCMP_SEL_SRC = 0x01,
- CLRCMP_SEL_AND = 0x02,
+ CLRCMP_SEL_DST = 0x00, /* CLRCMP_SEL_DST: use CLRCMP_FCN_DST */
+ CLRCMP_SEL_SRC = 0x01, /* CLRCMP_SEL_SRC: use CLRCMP_FCN_SRC */
+ CLRCMP_SEL_AND = 0x02, /* CLRCMP_SEL_AND: draw if allowed by both CLRCMP_FCN_SRC and CLRCMP_FCN_DST */
CB_CLRCMP_SRC = 0x00028c34,
+ /* Comparison color for source, in frame buffer format. Ignored for pixels larger than 32-bits. Zero-fill high bits for pixels smaller than 32-bits. */
CB_CLRCMP_DST = 0x00028c38,
+ /* Comparison color for destination, in frame buffer format. Ignored for pixels larger than 32-bits. Zero-fill high bits for pixels smaller than 32-bits. */
CB_CLRCMP_MSK = 0x00028c3c,
- PA_SC_AA_MASK = 0x00028c48,
- VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58,
- VTX_REUSE_DEPTH_mask = 0xff << 0,
+ /* Compare mask, which is ANDed with source and destination before the comparsion. Ignored for pixels larger than 32-bits. Zero-fill high bits for pixels smaller than 32-bits. */
+ PA_SC_AA_MASK = 0x00028c48, /* Multisample AA Mask */
+ /* This mask is used for Multisample AA. It contains 4 8- bit masks. The 4 masks are applied to each 2x2 screen- aligned pixels as follows: ULC 7:0, URC 15:8, LLC 23:16, LRC 31:24, LSB is Sample0, MSB is Sample7. */
+ VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58, /* This register controls the behavior of the Vertex Reuse block at the backend of the VGT. This register is relevant only if the VGT_OUTPUT_PATH_CNTL register (or the prim type in Major Mode 0) specifies the Vertex Reuse Block for the VGT backend path. */
+ VTX_REUSE_DEPTH_mask = 0xff << 0, /* In general, for processing triangles, the vertex reuse depth should be programmed to ((num_enabled_pipes * 4) - 2) */
VTX_REUSE_DEPTH_shift = 0,
- VGT_OUT_DEALLOC_CNTL = 0x00028c5c,
- DEALLOC_DIST_mask = 0x7f << 0,
+ VGT_OUT_DEALLOC_CNTL = 0x00028c5c, /* This register controls, within a process vector, when the previous process vector is de-allocated. */
+ DEALLOC_DIST_mask = 0x7f << 0, /* Distance (in indices) which the vertex vector slot assignment leads the deallocation. This field should typically be set to (num_enabled_pipes * 4). */
DEALLOC_DIST_shift = 0,
DB_RENDER_CONTROL = 0x00028d0c,
- DEPTH_CLEAR_ENABLE_bit = 1 << 0,
- STENCIL_CLEAR_ENABLE_bit = 1 << 1,
- DEPTH_COPY_bit = 1 << 2,
- STENCIL_COPY_bit = 1 << 3,
- RESUMMARIZE_ENABLE_bit = 1 << 4,
+ DEPTH_CLEAR_ENABLE_bit = 1 << 0, /* Clears Z to the Clear Value. */
+ STENCIL_CLEAR_ENABLE_bit = 1 << 1, /* Clears Stencil to the Clear Value */
+ DEPTH_COPY_bit = 1 << 2, /* Enables Z expansion to color render target 0. CB must be programmed to the desired destination format. */
+ STENCIL_COPY_bit = 1 << 3, /* Enables Stencil expansion to color render target 0. CB must be programmed to the desired destination format. */
+ RESUMMARIZE_ENABLE_bit = 1 << 4, /* If set, all tiles touched will update the HTILE surface info. */
STENCIL_COMPRESS_DISABLE_bit = 1 << 5,
DEPTH_COMPRESS_DISABLE_bit = 1 << 6,
- COPY_CENTROID_bit = 1 << 7,
- COPY_SAMPLE_mask = 0x07 << 8,
+ COPY_CENTROID_bit = 1 << 7, /* If set, copy the 1st lit sample in the pixel after the COPY_SAMPLE`th sample (wraps back to lower samples). */
+ COPY_SAMPLE_mask = 0x07 << 8, /* If COPY_CENTROID, copy 1st lit after this sample number. Else copy this sample whether lit or not. */
COPY_SAMPLE_shift = 8,
- ZPASS_INCREMENT_DISABLE_bit = 1 << 11,
+ ZPASS_INCREMENT_DISABLE_bit = 1 << 11, /* Disable incrementing the ZPass count for this context. */
DB_RENDER_OVERRIDE = 0x00028d10,
- FORCE_HIZ_ENABLE_mask = 0x03 << 0,
+ FORCE_HIZ_ENABLE_mask = 0x03 << 0, /* Forces hierarchical depth culling to be enabled ignoring what is in DB_SHADER_CONTROL and all other render states. */
FORCE_HIZ_ENABLE_shift = 0,
- FORCE_OFF = 0x00,
- FORCE_ENABLE = 0x01,
- FORCE_DISABLE = 0x02,
- FORCE_RESERVED = 0x03,
- FORCE_HIS_ENABLE0_mask = 0x03 << 2,
+ FORCE_OFF = 0x00, /* FORCE_OFF */
+ FORCE_ENABLE = 0x01, /* FORCE_ENABLE */
+ FORCE_DISABLE = 0x02, /* FORCE_DISABLE */
+ FORCE_RESERVED = 0x03, /* FORCE_RESERVED */
+ FORCE_HIS_ENABLE0_mask = 0x03 << 2, /* Forces hierarchical stencil culling to be enabled for compare state 0, ignoring what is in DB_SHADER_CONTROL and all other render states. */
FORCE_HIS_ENABLE0_shift = 2,
-/* FORCE_OFF = 0x00, */
-/* FORCE_ENABLE = 0x01, */
-/* FORCE_DISABLE = 0x02, */
-/* FORCE_RESERVED = 0x03, */
- FORCE_HIS_ENABLE1_mask = 0x03 << 4,
+/* FORCE_OFF = 0x00, */ /* FORCE_OFF */
+/* FORCE_ENABLE = 0x01, */ /* FORCE_ENABLE */
+/* FORCE_DISABLE = 0x02, */ /* FORCE_DISABLE */
+/* FORCE_RESERVED = 0x03, */ /* FORCE_RESERVED */
+ FORCE_HIS_ENABLE1_mask = 0x03 << 4, /* Forces hierarchical stencil culling to be enabled for compare state 1, ignoring what is in DB_SHADER_CONTROL and all other render states. */
FORCE_HIS_ENABLE1_shift = 4,
-/* FORCE_OFF = 0x00, */
-/* FORCE_ENABLE = 0x01, */
-/* FORCE_DISABLE = 0x02, */
-/* FORCE_RESERVED = 0x03, */
- FORCE_SHADER_Z_ORDER_bit = 1 << 6,
- FAST_Z_DISABLE_bit = 1 << 7,
- FAST_STENCIL_DISABLE_bit = 1 << 8,
- NOOP_CULL_DISABLE_bit = 1 << 9,
- FORCE_COLOR_KILL_bit = 1 << 10,
- FORCE_Z_READ_bit = 1 << 11,
- FORCE_STENCIL_READ_bit = 1 << 12,
- FORCE_FULL_Z_RANGE_mask = 0x03 << 13,
+/* FORCE_OFF = 0x00, */ /* FORCE_OFF */
+/* FORCE_ENABLE = 0x01, */ /* FORCE_ENABLE */
+/* FORCE_DISABLE = 0x02, */ /* FORCE_DISABLE */
+/* FORCE_RESERVED = 0x03, */ /* FORCE_RESERVED */
+ FORCE_SHADER_Z_ORDER_bit = 1 << 6, /* Forces the setting specified in DB_SHADER_CONTROL.Z_ORDER to be used for early/late/re Z+S test. If not set the shader preference is used unless precluded by other render states. */
+ FAST_Z_DISABLE_bit = 1 << 7, /* Do not accelerate Z clears or write operations. Prevents killing quads before detail rasterization if depth operations are needed. */
+ FAST_STENCIL_DISABLE_bit = 1 << 8, /* Do not accelerate stencil clears or write operations. Prevents killing quads before detail rasterization if stencil operations are needed. */
+ NOOP_CULL_DISABLE_bit = 1 << 9, /* Prevents hierarchically killing quads that will pass Z and Stencil, but do not write Z, Stencil or Color. This would be used to make sure ZPass counts are perfect. */
+ FORCE_COLOR_KILL_bit = 1 << 10, /* DB does any possible depth optimizations assuming the shader results are not needed and kills all samples before the color operation. */
+ FORCE_Z_READ_bit = 1 << 11, /* Read all Z data for a tile even if it is not needed. Used for resummarization blts. */
+ FORCE_STENCIL_READ_bit = 1 << 12, /* Read all stencil data for a tile even if it is not needed. Used for resummarization blts. */
+ FORCE_FULL_Z_RANGE_mask = 0x03 << 13, /* Forces hierarchical depth to treat each primitive as if its range is 0.0 -> 1.0f or not. If disabled, it is implicitly derived from DB_SHADER_CONTROL.Z_EXPORT_ENABLE and other enabling registers. Can be used to reset the Z range to 0-1 as well. */
FORCE_FULL_Z_RANGE_shift = 13,
-/* FORCE_OFF = 0x00, */
-/* FORCE_ENABLE = 0x01, */
-/* FORCE_DISABLE = 0x02, */
-/* FORCE_RESERVED = 0x03, */
- FORCE_QC_SMASK_CONFLICT_bit = 1 << 15,
- DISABLE_VIEWPORT_CLAMP_bit = 1 << 16,
- IGNORE_SC_ZRANGE_bit = 1 << 17,
+/* FORCE_OFF = 0x00, */ /* FORCE_OFF */
+/* FORCE_ENABLE = 0x01, */ /* FORCE_ENABLE */
+/* FORCE_DISABLE = 0x02, */ /* FORCE_DISABLE */
+/* FORCE_RESERVED = 0x03, */ /* FORCE_RESERVED */
+ FORCE_QC_SMASK_CONFLICT_bit = 1 << 15, /* Forces Quad Coherency to mark a quad with a matching dtileid, x, and y as a conflict and stall it even if the sample mask doesn`t overrlap. */
+ DISABLE_VIEWPORT_CLAMP_bit = 1 << 16, /* Disables the viewport clamp, which allows Z data to go through untouched. */
+ IGNORE_SC_ZRANGE_bit = 1 << 17, /* Ignore the SC`s vertex bounds on the minZ/maxZ for a tile during HiZ. */
DB_HTILE_SURFACE = 0x00028d24,
- HTILE_WIDTH_bit = 1 << 0,
- HTILE_HEIGHT_bit = 1 << 1,
- LINEAR_bit = 1 << 2,
- FULL_CACHE_bit = 1 << 3,
- HTILE_USES_PRELOAD_WIN_bit = 1 << 4,
- PRELOAD_bit = 1 << 5,
- PREFETCH_WIDTH_mask = 0x3f << 6,
+ HTILE_WIDTH_bit = 1 << 0, /* How many pixels wide each entry in the htile buffer represents. 0 = 4, 1 = 8 */
+ HTILE_HEIGHT_bit = 1 << 1, /* How many pixels high each entry in the htile buffer represents. 0 = 4, 1 = 8 */
+ LINEAR_bit = 1 << 2, /* Surface is stored linearly in swaths of 8 htiles high until the surface is complete. */
+ FULL_CACHE_bit = 1 << 3, /* This htile buffer uses the entire htile cache. */
+ HTILE_USES_PRELOAD_WIN_bit = 1 << 4, /* If set, the htile surface dimensions will be that of the preload window; otherwise, it will be that of the depth buffer */
+ PRELOAD_bit = 1 << 5, /* Preload all data that fits as soon as room is available once the VGT_DRAW_INITIATOR is seen on a context. */
+ PREFETCH_WIDTH_mask = 0x3f << 6, /* The Prefetch window width. Prefetcher tries to keep this window around the last rasterized htile in cache at all times. */
PREFETCH_WIDTH_shift = 6,
- PREFETCH_HEIGHT_mask = 0x3f << 12,
+ PREFETCH_HEIGHT_mask = 0x3f << 12, /* The Prefetch window height. Prefetcher tries to keep this window around the last rasterized htile in cache at all times. */
PREFETCH_HEIGHT_shift = 12,
DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c,
- COMPAREFUNC1_mask = 0x07 << 0,
+ COMPAREFUNC1_mask = 0x07 << 0, /* Used to determine the meaning of the MayPass and MayFail smask bits during hierarchical stencil testing. NEVER or ALWAYS invalidates the SResults in the HTile Buffer */
COMPAREFUNC1_shift = 0,
-/* REF_NEVER = 0x00, */
-/* REF_LESS = 0x01, */
-/* REF_EQUAL = 0x02, */
-/* REF_LEQUAL = 0x03, */
-/* REF_GREATER = 0x04, */
-/* REF_NOTEQUAL = 0x05, */
-/* REF_GEQUAL = 0x06, */
-/* REF_ALWAYS = 0x07, */
- COMPAREVALUE1_mask = 0xff << 4,
+/* REF_NEVER = 0x00, */ /* REF_NEVER: never pass */
+/* REF_LESS = 0x01, */ /* REF_LESS: pass if left < right */
+/* REF_EQUAL = 0x02, */ /* REF_EQUAL: pass if left = right */
+/* REF_LEQUAL = 0x03, */ /* REF_LEQUAL: pass if left <= right */
+/* REF_GREATER = 0x04, */ /* REF_GREATER: pass if left > right */
+/* REF_NOTEQUAL = 0x05, */ /* REF_NOTEQUAL: pass if left != right */
+/* REF_GEQUAL = 0x06, */ /* REF_GEQUAL: pass if left >= right */
+/* REF_ALWAYS = 0x07, */ /* REF_ALWAYS: always pass */
+ COMPAREVALUE1_mask = 0xff << 4, /* Stencil value compared against the stencil reference value during hierarchical stencil testing. */
COMPAREVALUE1_shift = 4,
- COMPAREMASK1_mask = 0xff << 12,
+ COMPAREMASK1_mask = 0xff << 12, /* This value is ANDed with the SResults compare value. A mask of 0 invalidates the SResults in the HTile Buffer */
COMPAREMASK1_shift = 12,
- ENABLE1_bit = 1 << 24,
+ ENABLE1_bit = 1 << 24, /* If set, use SResults in HiS test. Set when compare state is known and clear when doing a resummarize. */
DB_PRELOAD_CONTROL = 0x00028d30,
- START_X_mask = 0xff << 0,
+ START_X_mask = 0xff << 0, /* Starting X position of the preload window, in 32 pixel increments */
START_X_shift = 0,
- START_Y_mask = 0xff << 8,
+ START_Y_mask = 0xff << 8, /* Starting Y position of the preload window, in 32 pixel increments */
START_Y_shift = 8,
- MAX_X_mask = 0xff << 16,
+ MAX_X_mask = 0xff << 16, /* Ending X position of the preload window, in 32 pixel increments */
MAX_X_shift = 16,
- MAX_Y_mask = 0xff << 24,
+ MAX_Y_mask = 0xff << 24, /* Ending Y position of the preload window, in 32 pixel increments */
MAX_Y_shift = 24,
DB_PREFETCH_LIMIT = 0x00028d34,
- DEPTH_HEIGHT_TILE_MAX_mask = 0x3ff << 0,
+ DEPTH_HEIGHT_TILE_MAX_mask = 0x3ff << 0, /* Height of the depth buffer in 8x8 pixels (height - 1) */
DEPTH_HEIGHT_TILE_MAX_shift = 0,
- PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028df8,
- POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0,
+ PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028df8, /* Polygon Offset Depth Buffer Format Control */
+ POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0, /* Specifies the number of bits in the depth buffer format. Specified as a negative value typically. For fixed point formats, should be number of bits (i.e. -16, -24), for float formats should be number of mantissa bits (i.e. - 23). This is a signed 8b value, range -128,127 */
POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0,
- POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8,
- PA_SU_POLY_OFFSET_CLAMP = 0x00028dfc,
- PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028e00,
- PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028e04,
- PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028e08,
- PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028e0c,
- PA_CL_POINT_X_RAD = 0x00028e10,
- PA_CL_POINT_Y_RAD = 0x00028e14,
- PA_CL_POINT_SIZE = 0x00028e18,
- PA_CL_POINT_CULL_RAD = 0x00028e1c,
- PA_CL_UCP_0_X = 0x00028e20,
+ POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8, /* Specifies whether the depth buffer format is fixed or float. The NEG_NUM_DB_BITS is used differently (i.e. different POLY_OFFSET equation for fixed vs. float buffer formats. */
+ PA_SU_POLY_OFFSET_CLAMP = 0x00028dfc, /* Clamp Value for Polygon Offset */
+ /* Specifies the maximum (if clamp is positive) or minimum (if clamp is negative) value clamp for the polygon offset result. */
+ PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028e00, /* Front-Facing Polygon Offset Scale */
+ /* Specifies polygon offset scale for front-facing polygons; 32-bit IEEE float format. */
+ PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028e04, /* Front-Facing Polygon Offset Offset */
+ /* Specifies polygon offset offset for front-facing polygons; 32b IEEE fixed format. */
+ PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028e08, /* Back-Facing Polygon Offset Scale */
+ /* Specifies polygon offset scale for back-facing polygons; 32-bit IEEE float format. */
+ PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028e0c, /* Back-Facing Polygon Offset Offset */
+ /* Specifies polygon offset offset for back-facing polygons; 32b IEEE fixed format. */
+ PA_CL_POINT_X_RAD = 0x00028e10, /* Point Sprite X Radius Expansion */
+ /* DATA_REGISTER: */
+ PA_CL_POINT_Y_RAD = 0x00028e14, /* Point Sprite Y Radius Expansion */
+ /* DATA_REGISTER: */
+ PA_CL_POINT_SIZE = 0x00028e18, /* Point Sprite Constant Size */
+ /* DATA_REGISTER: */
+ PA_CL_POINT_CULL_RAD = 0x00028e1c, /* Point Sprite Culling Radius Expansion SQRT(XRadExp^2 + YRadExp^2) */
+ /* DATA_REGISTER: */
+ PA_CL_UCP_0_X = 0x00028e20, /* User Clip Plane Data */
PA_CL_UCP_0_X_num = 6,
PA_CL_UCP_0_X_offset = 16,
- PA_CL_UCP_0_Y = 0x00028e24,
+ /* DATA_REGISTER: */
+ PA_CL_UCP_0_Y = 0x00028e24, /* User Clip Plane Data */
PA_CL_UCP_0_Y_num = 6,
PA_CL_UCP_0_Y_offset = 16,
- PA_CL_UCP_0_Z = 0x00028e28,
+ /* DATA_REGISTER: */
+ PA_CL_UCP_0_Z = 0x00028e28, /* User Clip Plane Data */
PA_CL_UCP_0_Z_num = 6,
PA_CL_UCP_0_Z_offset = 16,
- SQ_ALU_CONSTANT0_0 = 0x00030000,
+ /* DATA_REGISTER: */
+ SQ_ALU_CONSTANT0_0 = 0x00030000, /* (64-state) ALU Constant store data for use in DX9 mode (DX10 mode uses the constant-cache instead and this constant-file is not available). All four components of a constant must be written for that constant to be updated - the physical write to the constant store only occurs after the fourth component has been written. The first set of 256 constants (0-255) are reserved for the pixel shader (PS). The second set of 256 constants (256-511) are reserved for the vertex shader (VS). None are available to the GS or ES. */
+ /* X: Format is IEEE float */
SQ_ALU_CONSTANT1_0 = 0x00030004,
+ /* Y: Format is IEEE float */
SQ_ALU_CONSTANT2_0 = 0x00030008,
+ /* Z: Format is IEEE float */
SQ_ALU_CONSTANT3_0 = 0x0003000c,
+ /* W: Format is IEEE float */
SQ_VTX_CONSTANT_WORD0_0 = 0x00038000,
+ /* BASE_ADDRESS: */
SQ_TEX_RESOURCE_WORD0_0 = 0x00038000,
DIM_mask = 0x07 << 0,
DIM_shift = 0,
- SQ_TEX_DIM_1D = 0x00,
- SQ_TEX_DIM_2D = 0x01,
- SQ_TEX_DIM_3D = 0x02,
- SQ_TEX_DIM_CUBEMAP = 0x03,
- SQ_TEX_DIM_1D_ARRAY = 0x04,
- SQ_TEX_DIM_2D_ARRAY = 0x05,
- SQ_TEX_DIM_2D_MSAA = 0x06,
- SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07,
+ SQ_TEX_DIM_1D = 0x00, /* SQ_TEX_DIM_1D */
+ SQ_TEX_DIM_2D = 0x01, /* SQ_TEX_DIM_2D */
+ SQ_TEX_DIM_3D = 0x02, /* SQ_TEX_DIM_3D */
+ SQ_TEX_DIM_CUBEMAP = 0x03, /* SQ_TEX_DIM_CUBEMAP */
+ SQ_TEX_DIM_1D_ARRAY = 0x04, /* SQ_TEX_DIM_1D_ARRAY */
+ SQ_TEX_DIM_2D_ARRAY = 0x05, /* SQ_TEX_DIM_2D_ARRAY */
+ SQ_TEX_DIM_2D_MSAA = 0x06, /* SQ_TEX_DIM_2D_MSAA */
+ SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07, /* SQ_TEX_DIM_2D_ARRAY_MSAA */
SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask = 0x0f << 3,
SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift = 3,
TILE_TYPE_bit = 1 << 7,
@@ -2766,6 +2886,7 @@ enum {
TEX_WIDTH_mask = 0x1fff << 19,
TEX_WIDTH_shift = 19,
SQ_VTX_CONSTANT_WORD1_0 = 0x00038004,
+ /* SIZE: */
SQ_TEX_RESOURCE_WORD1_0 = 0x00038004,
TEX_HEIGHT_mask = 0x1fff << 0,
TEX_HEIGHT_shift = 0,
@@ -2783,88 +2904,90 @@ enum {
SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20,
SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26,
SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26,
-/* SQ_NUM_FORMAT_NORM = 0x00, */
-/* SQ_NUM_FORMAT_INT = 0x01, */
-/* SQ_NUM_FORMAT_SCALED = 0x02, */
+/* SQ_NUM_FORMAT_NORM = 0x00, */ /* SQ_NUM_FORMAT_NORM: repeating fraction number (0.N) with range [0, 1] if unsigned, or [- 1, 1] if signed. */
+/* SQ_NUM_FORMAT_INT = 0x01, */ /* SQ_NUM_FORMAT_INT: integer number (N.0) with range [0, 2^N] if unsigned, or [-2^M, 2^M] if signed (M = N - 1). */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */ /* SQ_NUM_FORMAT_SCALED: integer number stored as a S23E8 floating-point representation (1 == 0x3f800000). */
SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28,
SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29,
SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30,
SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30,
-/* SQ_ENDIAN_NONE = 0x00, */
-/* SQ_ENDIAN_8IN16 = 0x01, */
-/* SQ_ENDIAN_8IN32 = 0x02, */
+/* SQ_ENDIAN_NONE = 0x00, */ /* SQ_ENDIAN_NONE: no endian swap (XOR by 0) */
+/* SQ_ENDIAN_8IN16 = 0x01, */ /* SQ_ENDIAN_8IN16: 8 bit swap in 16 bit word (XOR by 1): AABBCCDD -> BBAADDCC */
+/* SQ_ENDIAN_8IN32 = 0x02, */ /* SQ_ENDIAN_8IN32: 8 bit swap in 32 bit word (XOR by 3): AABBCCDD -> DDCCBBAA */
SQ_TEX_RESOURCE_WORD2_0 = 0x00038008,
+ /* BASE_ADDRESS: */
SQ_VTX_CONSTANT_WORD3_0 = 0x0003800c,
MEM_REQUEST_SIZE_mask = 0x03 << 0,
MEM_REQUEST_SIZE_shift = 0,
SQ_TEX_RESOURCE_WORD3_0 = 0x0003800c,
+ /* MIP_ADDRESS: */
SQ_TEX_RESOURCE_WORD4_0 = 0x00038010,
FORMAT_COMP_X_mask = 0x03 << 0,
FORMAT_COMP_X_shift = 0,
- SQ_FORMAT_COMP_UNSIGNED = 0x00,
- SQ_FORMAT_COMP_SIGNED = 0x01,
- SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02,
+ SQ_FORMAT_COMP_UNSIGNED = 0x00, /* SQ_FORMAT_COMP_UNSIGNED */
+ SQ_FORMAT_COMP_SIGNED = 0x01, /* SQ_FORMAT_COMP_SIGNED */
+ SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, /* SQ_FORMAT_COMP_UNSIGNED_BIASED */
FORMAT_COMP_Y_mask = 0x03 << 2,
FORMAT_COMP_Y_shift = 2,
-/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
-/* SQ_FORMAT_COMP_SIGNED = 0x01, */
-/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ /* SQ_FORMAT_COMP_UNSIGNED */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */ /* SQ_FORMAT_COMP_SIGNED */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ /* SQ_FORMAT_COMP_UNSIGNED_BIASED */
FORMAT_COMP_Z_mask = 0x03 << 4,
FORMAT_COMP_Z_shift = 4,
-/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
-/* SQ_FORMAT_COMP_SIGNED = 0x01, */
-/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ /* SQ_FORMAT_COMP_UNSIGNED */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */ /* SQ_FORMAT_COMP_SIGNED */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ /* SQ_FORMAT_COMP_UNSIGNED_BIASED */
FORMAT_COMP_W_mask = 0x03 << 6,
FORMAT_COMP_W_shift = 6,
-/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
-/* SQ_FORMAT_COMP_SIGNED = 0x01, */
-/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ /* SQ_FORMAT_COMP_UNSIGNED */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */ /* SQ_FORMAT_COMP_SIGNED */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ /* SQ_FORMAT_COMP_UNSIGNED_BIASED */
SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8,
SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8,
-/* SQ_NUM_FORMAT_NORM = 0x00, */
-/* SQ_NUM_FORMAT_INT = 0x01, */
-/* SQ_NUM_FORMAT_SCALED = 0x02, */
+/* SQ_NUM_FORMAT_NORM = 0x00, */ /* SQ_NUM_FORMAT_NORM: repeating fraction number (0.N) with range [0, 1] if unsigned, or [- 1, 1] if signed. */
+/* SQ_NUM_FORMAT_INT = 0x01, */ /* SQ_NUM_FORMAT_INT: integer number (N.0) with range [0, 2^N] if unsigned, or [-2^M, 2^M] if signed (M = N - 1). */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */ /* SQ_NUM_FORMAT_SCALED: integer number stored as a S23E8 floating-point representation (1 == 0x3f800000). */
SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10,
SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11,
SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12,
SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12,
-/* SQ_ENDIAN_NONE = 0x00, */
-/* SQ_ENDIAN_8IN16 = 0x01, */
-/* SQ_ENDIAN_8IN32 = 0x02, */
+/* SQ_ENDIAN_NONE = 0x00, */ /* SQ_ENDIAN_NONE: no endian swap (XOR by 0) */
+/* SQ_ENDIAN_8IN16 = 0x01, */ /* SQ_ENDIAN_8IN16: 8 bit swap in 16 bit word (XOR by 1): AABBCCDD -> BBAADDCC */
+/* SQ_ENDIAN_8IN32 = 0x02, */ /* SQ_ENDIAN_8IN32: 8 bit swap in 32 bit word (XOR by 3): AABBCCDD -> DDCCBBAA */
REQUEST_SIZE_mask = 0x03 << 14,
REQUEST_SIZE_shift = 14,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25,
-/* SQ_SEL_X = 0x00, */
-/* SQ_SEL_Y = 0x01, */
-/* SQ_SEL_Z = 0x02, */
-/* SQ_SEL_W = 0x03, */
-/* SQ_SEL_0 = 0x04, */
-/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_X = 0x00, */ /* SQ_SEL_X: use X component */
+/* SQ_SEL_Y = 0x01, */ /* SQ_SEL_Y: use Y component */
+/* SQ_SEL_Z = 0x02, */ /* SQ_SEL_Z: use Z component */
+/* SQ_SEL_W = 0x03, */ /* SQ_SEL_W: use W component */
+/* SQ_SEL_0 = 0x04, */ /* SQ_SEL_0: use constant 0.0 */
+/* SQ_SEL_1 = 0x05, */ /* SQ_SEL_1: use constant 1.0 */
BASE_LEVEL_mask = 0x0f << 28,
BASE_LEVEL_shift = 28,
SQ_TEX_RESOURCE_WORD5_0 = 0x00038014,
@@ -2877,99 +3000,99 @@ enum {
SQ_TEX_RESOURCE_WORD6_0 = 0x00038018,
MPEG_CLAMP_mask = 0x03 << 0,
MPEG_CLAMP_shift = 0,
- SQ_TEX_MPEG_CLAMP_OFF = 0x00,
- SQ_TEX_MPEG_9 = 0x01,
- SQ_TEX_MPEG_10 = 0x02,
+ SQ_TEX_MPEG_CLAMP_OFF = 0x00, /* SQ_TEX_MPEG_CLAMP_OFF: no clamping (FMT_16 is plain 16b fixed/normalized number). */
+ SQ_TEX_MPEG_9 = 0x01, /* SQ_TEX_MPEG_9: consider FMT_16 as s9 in LSBs, clamp range to [-256, 255). */
+ SQ_TEX_MPEG_10 = 0x02, /* SQ_TEX_MPEG_10: mask bottom 6b of FMT_16. */
PERF_MODULATION_mask = 0x07 << 5,
PERF_MODULATION_shift = 5,
INTERLACED_bit = 1 << 8,
SQ_TEX_RESOURCE_WORD6_0__TYPE_mask = 0x03 << 30,
SQ_TEX_RESOURCE_WORD6_0__TYPE_shift = 30,
- SQ_TEX_VTX_INVALID_TEXTURE = 0x00,
- SQ_TEX_VTX_INVALID_BUFFER = 0x01,
- SQ_TEX_VTX_VALID_TEXTURE = 0x02,
- SQ_TEX_VTX_VALID_BUFFER = 0x03,
+ SQ_TEX_VTX_INVALID_TEXTURE = 0x00, /* SQ_TEX_VTX_INVALID_TEXTURE */
+ SQ_TEX_VTX_INVALID_BUFFER = 0x01, /* SQ_TEX_VTX_INVALID_BUFFER */
+ SQ_TEX_VTX_VALID_TEXTURE = 0x02, /* SQ_TEX_VTX_VALID_TEXTURE */
+ SQ_TEX_VTX_VALID_BUFFER = 0x03, /* SQ_TEX_VTX_VALID_BUFFER 8. Shader Texture Sampler Constants */
SQ_VTX_CONSTANT_WORD6_0 = 0x00038018,
SQ_VTX_CONSTANT_WORD6_0__TYPE_mask = 0x03 << 30,
SQ_VTX_CONSTANT_WORD6_0__TYPE_shift = 30,
-/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */
-/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */
-/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */
-/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */
+/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */ /* SQ_TEX_VTX_INVALID_TEXTURE */
+/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */ /* SQ_TEX_VTX_INVALID_BUFFER */
+/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */ /* SQ_TEX_VTX_VALID_TEXTURE */
+/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */ /* SQ_TEX_VTX_VALID_BUFFER 7. Shader Texture Resource Constants */
SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000,
SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0,
SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0,
- SQ_TEX_WRAP = 0x00,
- SQ_TEX_MIRROR = 0x01,
- SQ_TEX_CLAMP_LAST_TEXEL = 0x02,
- SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03,
- SQ_TEX_CLAMP_HALF_BORDER = 0x04,
- SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05,
- SQ_TEX_CLAMP_BORDER = 0x06,
- SQ_TEX_MIRROR_ONCE_BORDER = 0x07,
+ SQ_TEX_WRAP = 0x00, /* SQ_TEX_WRAP */
+ SQ_TEX_MIRROR = 0x01, /* SQ_TEX_MIRROR */
+ SQ_TEX_CLAMP_LAST_TEXEL = 0x02, /* SQ_TEX_CLAMP_LAST_TEXEL: [0,1] normalized, [0,dimen] unnormalized */
+ SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, /* SQ_TEX_MIRROR_ONCE_LAST_TEXEL: [- 1,1] */
+ SQ_TEX_CLAMP_HALF_BORDER = 0x04, /* SQ_TEX_CLAMP_HALF_BORDER: [0,1] normalized, [0,dimen] unnormalized */
+ SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, /* SQ_TEX_MIRROR_ONCE_HALF_BORDER: [-1,1] */
+ SQ_TEX_CLAMP_BORDER = 0x06, /* SQ_TEX_CLAMP_BORDER: [0,1] normalized, [0,dimen] unnormalized */
+ SQ_TEX_MIRROR_ONCE_BORDER = 0x07, /* SQ_TEX_MIRROR_ONCE_BORDER: [-1,1] */
CLAMP_Y_mask = 0x07 << 3,
CLAMP_Y_shift = 3,
-/* SQ_TEX_WRAP = 0x00, */
-/* SQ_TEX_MIRROR = 0x01, */
-/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
-/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
-/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
-/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
-/* SQ_TEX_CLAMP_BORDER = 0x06, */
-/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+/* SQ_TEX_WRAP = 0x00, */ /* SQ_TEX_WRAP */
+/* SQ_TEX_MIRROR = 0x01, */ /* SQ_TEX_MIRROR */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ /* SQ_TEX_CLAMP_LAST_TEXEL: [0,1] normalized, [0,dimen] unnormalized */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ /* SQ_TEX_MIRROR_ONCE_LAST_TEXEL: [- 1,1] */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ /* SQ_TEX_CLAMP_HALF_BORDER: [0,1] normalized, [0,dimen] unnormalized */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ /* SQ_TEX_MIRROR_ONCE_HALF_BORDER: [-1,1] */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */ /* SQ_TEX_CLAMP_BORDER: [0,1] normalized, [0,dimen] unnormalized */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ /* SQ_TEX_MIRROR_ONCE_BORDER: [-1,1] */
CLAMP_Z_mask = 0x07 << 6,
CLAMP_Z_shift = 6,
-/* SQ_TEX_WRAP = 0x00, */
-/* SQ_TEX_MIRROR = 0x01, */
-/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
-/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
-/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
-/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
-/* SQ_TEX_CLAMP_BORDER = 0x06, */
-/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+/* SQ_TEX_WRAP = 0x00, */ /* SQ_TEX_WRAP */
+/* SQ_TEX_MIRROR = 0x01, */ /* SQ_TEX_MIRROR */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ /* SQ_TEX_CLAMP_LAST_TEXEL: [0,1] normalized, [0,dimen] unnormalized */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ /* SQ_TEX_MIRROR_ONCE_LAST_TEXEL: [- 1,1] */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ /* SQ_TEX_CLAMP_HALF_BORDER: [0,1] normalized, [0,dimen] unnormalized */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ /* SQ_TEX_MIRROR_ONCE_HALF_BORDER: [-1,1] */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */ /* SQ_TEX_CLAMP_BORDER: [0,1] normalized, [0,dimen] unnormalized */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ /* SQ_TEX_MIRROR_ONCE_BORDER: [-1,1] */
XY_MAG_FILTER_mask = 0x07 << 9,
XY_MAG_FILTER_shift = 9,
- SQ_TEX_XY_FILTER_POINT = 0x00,
- SQ_TEX_XY_FILTER_BILINEAR = 0x01,
- SQ_TEX_XY_FILTER_BICUBIC = 0x02,
+ SQ_TEX_XY_FILTER_POINT = 0x00, /* SQ_TEX_XY_FILTER_POINT */
+ SQ_TEX_XY_FILTER_BILINEAR = 0x01, /* SQ_TEX_XY_FILTER_BILINEAR */
+ SQ_TEX_XY_FILTER_BICUBIC = 0x02, /* SQ_TEX_XY_FILTER_BICUBIC */
XY_MIN_FILTER_mask = 0x07 << 12,
XY_MIN_FILTER_shift = 12,
-/* SQ_TEX_XY_FILTER_POINT = 0x00, */
-/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */
-/* SQ_TEX_XY_FILTER_BICUBIC = 0x02, */
+/* SQ_TEX_XY_FILTER_POINT = 0x00, */ /* SQ_TEX_XY_FILTER_POINT */
+/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */ /* SQ_TEX_XY_FILTER_BILINEAR */
+/* SQ_TEX_XY_FILTER_BICUBIC = 0x02, */ /* SQ_TEX_XY_FILTER_BICUBIC */
Z_FILTER_mask = 0x03 << 15,
Z_FILTER_shift = 15,
- SQ_TEX_Z_FILTER_NONE = 0x00,
- SQ_TEX_Z_FILTER_POINT = 0x01,
- SQ_TEX_Z_FILTER_LINEAR = 0x02,
+ SQ_TEX_Z_FILTER_NONE = 0x00, /* SQ_TEX_Z_FILTER_NONE */
+ SQ_TEX_Z_FILTER_POINT = 0x01, /* SQ_TEX_Z_FILTER_POINT */
+ SQ_TEX_Z_FILTER_LINEAR = 0x02, /* SQ_TEX_Z_FILTER_LINEAR */
MIP_FILTER_mask = 0x03 << 17,
MIP_FILTER_shift = 17,
-/* SQ_TEX_Z_FILTER_NONE = 0x00, */
-/* SQ_TEX_Z_FILTER_POINT = 0x01, */
-/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */
+/* SQ_TEX_Z_FILTER_NONE = 0x00, */ /* SQ_TEX_Z_FILTER_NONE */
+/* SQ_TEX_Z_FILTER_POINT = 0x01, */ /* SQ_TEX_Z_FILTER_POINT */
+/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */ /* SQ_TEX_Z_FILTER_LINEAR */
BORDER_COLOR_TYPE_mask = 0x03 << 22,
BORDER_COLOR_TYPE_shift = 22,
- SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00,
- SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01,
- SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02,
- SQ_TEX_BORDER_COLOR_REGISTER = 0x03,
+ SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00, /* SQ_TEX_BORDER_COLOR_TRANS_BLACK: (0.0, 0.0, 0.0, 0.0) */
+ SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01, /* SQ_TEX_BORDER_COLOR_OPAQUE_BLACK: (0.0, 0.0, 0.0, 1.0) */
+ SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02, /* SQ_TEX_BORDER_COLOR_OPAQUE_WHITE: (1.0, 1.0, 1.0, 1.0) */
+ SQ_TEX_BORDER_COLOR_REGISTER = 0x03, /* SQ_TEX_BORDER_COLOR_REGISTER: use BORDER_COLOR_[XYZW] */
POINT_SAMPLING_CLAMP_bit = 1 << 24,
TEX_ARRAY_OVERRIDE_bit = 1 << 25,
DEPTH_COMPARE_FUNCTION_mask = 0x07 << 26,
DEPTH_COMPARE_FUNCTION_shift = 26,
- SQ_TEX_DEPTH_COMPARE_NEVER = 0x00,
- SQ_TEX_DEPTH_COMPARE_LESS = 0x01,
- SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02,
- SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03,
- SQ_TEX_DEPTH_COMPARE_GREATER = 0x04,
- SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05,
- SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06,
- SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07,
+ SQ_TEX_DEPTH_COMPARE_NEVER = 0x00, /* SQ_TEX_DEPTH_COMPARE_NEVER: always 0 */
+ SQ_TEX_DEPTH_COMPARE_LESS = 0x01, /* SQ_TEX_DEPTH_COMPARE_LESS: 1 if incoming Z < fetched data */
+ SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02, /* SQ_TEX_DEPTH_COMPARE_EQUAL: 1 if incoming Z == fetched data */
+ SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03, /* SQ_TEX_DEPTH_COMPARE_LESSEQUAL: 1 if incoming Z <= fetched data */
+ SQ_TEX_DEPTH_COMPARE_GREATER = 0x04, /* SQ_TEX_DEPTH_COMPARE_GREATER: 1 if incoming Z > fetched data */
+ SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05, /* SQ_TEX_DEPTH_COMPARE_NOTEQUAL: 1 if incoming Z != fetched data */
+ SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06, /* SQ_TEX_DEPTH_COMPARE_GREATEREQUAL: 1 if incoming Z >= fetched data */
+ SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07, /* SQ_TEX_DEPTH_COMPARE_ALWAYS: always 1 */
CHROMA_KEY_mask = 0x03 << 29,
CHROMA_KEY_shift = 29,
- SQ_TEX_CHROMA_KEY_DISABLED = 0x00,
- SQ_TEX_CHROMA_KEY_KILL = 0x01,
- SQ_TEX_CHROMA_KEY_BLEND = 0x02,
+ SQ_TEX_CHROMA_KEY_DISABLED = 0x00, /* SQ_TEX_CHROMA_KEY_DISABLED: no chroma keying */
+ SQ_TEX_CHROMA_KEY_KILL = 0x01, /* SQ_TEX_CHROMA_KEY_KILL: returns negative value if any texel matches chroma key */
+ SQ_TEX_CHROMA_KEY_BLEND = 0x02, /* SQ_TEX_CHROMA_KEY_BLEND: sets matching texels to 0 before blending */
LOD_USES_MINOR_AXIS_bit = 1 << 31,
SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004,
MIN_LOD_mask = 0x3ff << 0,
@@ -2990,19 +3113,23 @@ enum {
PERF_Z_shift = 18,
FETCH_4_bit = 1 << 26,
SAMPLE_IS_PCF_bit = 1 << 27,
- SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31,
- SQ_VTX_BASE_VTX_LOC = 0x0003cff0,
- SQ_VTX_START_INST_LOC = 0x0003cff4,
- SQ_LOOP_CONST_DX10_0 = 0x0003e200,
- SQ_LOOP_CONST_0 = 0x0003e200,
- SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0,
+ SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31, /* 9. Shader ALU Constants */
+ SQ_VTX_BASE_VTX_LOC = 0x0003cff0, /* (64-state) Vertex fetch base location. can be used as an index offset for vertex fetch. one entry per state (up to 64 states). */
+ /* OFFSET: Vertex Base location for vertex fetching */
+ SQ_VTX_START_INST_LOC = 0x0003cff4, /* (64-state) Vertex fetch instance offset. can be used as an index offset for vertex fetch. one entry per state (up to 64 states, but probably less than base_vtx_loc). */
+ /* OFFSET: Instance start location for vertex fetching 4. R6xx Shader Instructions */
+ SQ_LOOP_CONST_DX10_0 = 0x0003e200, /* (64-state) DX9 loop counter constants - these are used to define the behaviour of a programmed loop. There are 96 loop counter constants available - 32 each for the PS, VS, and GS. First 32 for PS, next 32 for VS, last 32 for GS. The loop counter is usable in both DX9 and DX10 modes. This version is used for SQ_CF_INST_LOOP_DX10 statements. */
+ /* COUNT: Total number of loop iterations (unsigned) */
+ SQ_LOOP_CONST_0 = 0x0003e200, /* (64-state) DX9 loop counter constants - these are used to define the behaviour of a programmed loop. There are 96 loop counter constants available - 32 each for the PS, VS, and GS. First 32 for PS, next 32 for VS, last 32 for GS. The loop counter is usable in both DX9 and DX10 modes. This version is used for SQ_CF_INST_LOOP and SQ_CF_INST_LOOP_NO_AL statements. */
+ SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0, /* Total number of loop iterations (unsigned) */
SQ_LOOP_CONST_0__COUNT_shift = 0,
- INIT_mask = 0xfff << 12,
+ INIT_mask = 0xfff << 12, /* Initial value of loop counter AL (unsigned) */
INIT_shift = 12,
- INC_mask = 0xff << 24,
+ INC_mask = 0xff << 24, /* Amount loop counter increments after each loop iteration (signed) */
INC_shift = 24,
- SQ_BOOL_CONST_0 = 0x0003e380,
+ SQ_BOOL_CONST_0 = 0x0003e380, /* (64-state) DX9 Boolean constants - these are available as input to flow control instructions such as `IF`.There are 96 boolean constants available - 32 bits for each of the PS, VS, and GS. First for PS, next for VS, last for GS. The booleans are usable in both dx9 and dx10 modes. */
SQ_BOOL_CONST_0_num = 3,
+ /* BOOLEANS: 32 one-bit booleans for static branching */
} ;