diff options
author | Luc Verhaegen <libv@skynet.be> | 2010-03-14 07:04:46 +0100 |
---|---|---|
committer | Luc Verhaegen <libv@skynet.be> | 2010-03-14 07:04:46 +0100 |
commit | 50d4922305e925896a71e705c438ededbaedb80f (patch) | |
tree | d9a44227dcdda1de61337280b20170d0deb6211d | |
parent | 5dee9b7b19c1aa3a13618b08bc24f00677b5364b (diff) |
Import radeon, r200 and r300 dri drivers from mesa 7.0.3.7.0.3
109 files changed, 57751 insertions, 11 deletions
diff --git a/Makefile.am b/Makefile.am index 15ea2b3..37b4466 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,3 +1,3 @@ AUTOMAKE_OPTIONS = foreign -SUBDIRS = src +SUBDIRS = radeon r200 r300 diff --git a/configure.ac b/configure.ac index 2a78cfd..c6e6dfe 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ # Process this file with autoconf to produce a configure script AC_PREREQ(2.57) -AC_INIT([mesa-dri-xxx], 7.0.3, [], mesa-dri-xxx) +AC_INIT([mesa-dri-radeon], 7.0.3, [], mesa-dri-radeon) AM_INIT_AUTOMAKE([dist-bzip2]) @@ -16,9 +16,12 @@ AC_PROG_CC AC_HEADER_STDC PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0]) -PKG_CHECK_MODULES([DRI], [libmesadri = 7.0.3 libmesadricommon = 7.0.3]) +PKG_CHECK_MODULES([DRI], [libmesadri >= 7.0.3 libmesadri < 7.1.0 + libmesadricommon >= 7.0.3 libmesadricommon < 7.1.0]) AC_OUTPUT([ Makefile - src/Makefile + radeon/Makefile + r200/Makefile + r300/Makefile ]) diff --git a/r200/Doxyfile b/r200/Doxyfile new file mode 100644 index 0000000..27b3d03 --- /dev/null +++ b/r200/Doxyfile @@ -0,0 +1,232 @@ +# Doxyfile 1.3.2-Gideon + +#--------------------------------------------------------------------------- +# General configuration options +#--------------------------------------------------------------------------- +PROJECT_NAME = r200 +PROJECT_NUMBER = $VERSION$ +OUTPUT_DIRECTORY = +OUTPUT_LANGUAGE = English +USE_WINDOWS_ENCODING = NO +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = YES +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = NO +STRIP_FROM_PATH = +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +SHORT_NAMES = NO +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = YES +JAVADOC_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +DETAILS_AT_TOP = NO +INHERIT_DOCS = YES +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +DISTRIBUTE_GROUP_DOC = NO +TAB_SIZE = 8 +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ALIASES = +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +SHOW_USED_FILES = YES +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = /home/temp/Mesa/src/drv/r200 +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.idl \ + *.odl \ + *.cs \ + *.C \ + *.H \ + *.tlh \ + *.diff \ + *.patch \ + *.moc \ + *.xpm +RECURSIVE = yes +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = * +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_SOURCE_FILES = NO +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = NO +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = YES +REFERENCES_RELATION = YES +VERBATIM_HEADERS = YES +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = NO +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_ALIGN_MEMBERS = YES +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +BINARY_TOC = NO +TOC_EXPAND = NO +DISABLE_INDEX = NO +ENUM_VALUES_PER_LINE = 4 +GENERATE_TREEVIEW = NO +TREEVIEW_WIDTH = 250 +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = YES +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4wide +EXTRA_PACKAGES = +LATEX_HEADER = +PDF_HYPERLINKS = NO +USE_PDFLATEX = NO +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = yes +XML_OUTPUT = xml +XML_SCHEMA = +XML_DTD = +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration::addtions related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = YES +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = NO +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DOT_IMAGE_FORMAT = png +DOT_PATH = +DOTFILE_DIRS = +MAX_DOT_GRAPH_WIDTH = 1024 +MAX_DOT_GRAPH_HEIGHT = 1024 +MAX_DOT_GRAPH_DEPTH = 1000 +GENERATE_LEGEND = YES +DOT_CLEANUP = YES +#--------------------------------------------------------------------------- +# Configuration::addtions related to the search engine +#--------------------------------------------------------------------------- +SEARCHENGINE = NO +CGI_NAME = search.cgi +CGI_URL = +DOC_URL = +DOC_ABSPATH = +BIN_ABSPATH = /usr/local/bin/ +EXT_DOC_PATHS = diff --git a/r200/Makefile.am b/r200/Makefile.am new file mode 100644 index 0000000..0234e1d --- /dev/null +++ b/r200/Makefile.am @@ -0,0 +1,29 @@ +AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING + +R200_CFLAGS = -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R200 +R200_CFLAGS += -I../radeon -I../radeon/server + +r200_dri_la_LTLIBRARIES = r200_dri.la +r200_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R200_CFLAGS) +r200_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \ + $(DRM_LIBS) $(DRI_LIBS) +r200_dri_ladir = @libdir@/dri +r200_dri_la_SOURCES = \ + r200_context.c \ + r200_ioctl.c \ + r200_lock.c \ + r200_state.c \ + r200_state_init.c \ + r200_cmdbuf.c \ + r200_pixel.c \ + r200_tex.c \ + r200_texmem.c \ + r200_texstate.c \ + r200_tcl.c \ + r200_swtcl.c \ + r200_span.c \ + r200_maos.c \ + r200_sanity.c \ + r200_fragshader.c \ + r200_vertprog.c \ + ../radeon/radeon_screen.c diff --git a/r200/r200_cmdbuf.c b/r200/r200_cmdbuf.c new file mode 100644 index 0000000..2920cea --- /dev/null +++ b/r200/r200_cmdbuf.c @@ -0,0 +1,429 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_cmdbuf.c,v 1.1 2002/10/30 12:51:51 alanh Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "macros.h" +#include "context.h" +#include "swrast/swrast.h" +#include "simple_list.h" + +#include "r200_context.h" +#include "r200_state.h" +#include "r200_ioctl.h" +#include "r200_tcl.h" +#include "r200_sanity.h" +#include "radeon_reg.h" + +static void print_state_atom( struct r200_state_atom *state ) +{ + int i; + + fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size); + + if (0 & R200_DEBUG & DEBUG_VERBOSE) + for (i = 0 ; i < state->cmd_size ; i++) + fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]); + +} + +/* The state atoms will be emitted in the order they appear in the atom list, + * so this step is important. + */ +void r200SetUpAtomList( r200ContextPtr rmesa ) +{ + int i, mtu; + + mtu = rmesa->glCtx->Const.MaxTextureUnits; + + make_empty_list(&rmesa->hw.atomlist); + rmesa->hw.atomlist.name = "atom-list"; + + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf ); + for (i = 0; i < mtu; ++i) + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] ); + for (i = 0; i < mtu; ++i) + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] ); + for (i = 0; i < 6; ++i) + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] ); + for (i = 0; i < 8; ++i) + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] ); + for (i = 0; i < 3 + mtu; ++i) + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt ); + for (i = 0; i < 2; ++i) + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] ); + for (i = 0; i < 6; ++i) + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ptp ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] ); + insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] ); +} + +static void r200SaveHwState( r200ContextPtr rmesa ) +{ + struct r200_state_atom *atom; + char * dest = rmesa->backup_store.cmd_buf; + + if (R200_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s\n", __FUNCTION__); + + rmesa->backup_store.cmd_used = 0; + + foreach( atom, &rmesa->hw.atomlist ) { + if ( atom->check( rmesa->glCtx, atom->idx ) ) { + int size = atom->cmd_size * 4; + memcpy( dest, atom->cmd, size); + dest += size; + rmesa->backup_store.cmd_used += size; + if (R200_DEBUG & DEBUG_STATE) + print_state_atom( atom ); + } + } + + assert( rmesa->backup_store.cmd_used <= R200_CMD_BUF_SZ ); + if (R200_DEBUG & DEBUG_STATE) + fprintf(stderr, "Returning to r200EmitState\n"); +} + +void r200EmitState( r200ContextPtr rmesa ) +{ + char *dest; + int mtu; + struct r200_state_atom *atom; + + if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->save_on_next_emit) { + r200SaveHwState(rmesa); + rmesa->save_on_next_emit = GL_FALSE; + } + + if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty) + return; + + mtu = rmesa->glCtx->Const.MaxTextureUnits; + + /* To avoid going across the entire set of states multiple times, just check + * for enough space for the case of emitting all state, and inline the + * r200AllocCmdBuf code here without all the checks. + */ + r200EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size ); + + /* we need to calculate dest after EnsureCmdBufSpace + as we may flush the buffer - airlied */ + dest = rmesa->store.cmd_buf + rmesa->store.cmd_used; + if (R200_DEBUG & DEBUG_STATE) { + foreach( atom, &rmesa->hw.atomlist ) { + if ( atom->dirty || rmesa->hw.all_dirty ) { + if ( atom->check( rmesa->glCtx, atom->idx ) ) + print_state_atom( atom ); + else + fprintf(stderr, "skip state %s\n", atom->name); + } + } + } + + foreach( atom, &rmesa->hw.atomlist ) { + if ( rmesa->hw.all_dirty ) + atom->dirty = GL_TRUE; + if ( atom->dirty ) { + if ( atom->check( rmesa->glCtx, atom->idx ) ) { + int size = atom->cmd_size * 4; + memcpy( dest, atom->cmd, size); + dest += size; + rmesa->store.cmd_used += size; + atom->dirty = GL_FALSE; + } + } + } + + assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ ); + + rmesa->hw.is_dirty = GL_FALSE; + rmesa->hw.all_dirty = GL_FALSE; +} + +/* Fire a section of the retained (indexed_verts) buffer as a regular + * primtive. + */ +void r200EmitVbufPrim( r200ContextPtr rmesa, + GLuint primitive, + GLuint vertex_nr ) +{ + drm_radeon_cmd_header_t *cmd; + + assert(!(primitive & R200_VF_PRIM_WALK_IND)); + + r200EmitState( rmesa ); + + if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS)) + fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__, + rmesa->store.cmd_used/4, primitive, vertex_nr); + + cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VBUF_BUFSZ, + __FUNCTION__ ); + cmd[0].i = 0; + cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; + cmd[1].i = R200_CP_CMD_3D_DRAW_VBUF_2; + cmd[2].i = (primitive | + R200_VF_PRIM_WALK_LIST | + R200_VF_COLOR_ORDER_RGBA | + (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT)); +} + + +void r200FlushElts( r200ContextPtr rmesa ) +{ + int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start); + int dwords; + int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 12)) / 2; + + if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS)) + fprintf(stderr, "%s\n", __FUNCTION__); + + assert( rmesa->dma.flush == r200FlushElts ); + rmesa->dma.flush = NULL; + + /* Cope with odd number of elts: + */ + rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2; + dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4; + + cmd[1] |= (dwords - 3) << 16; + cmd[2] |= nr << R200_VF_VERTEX_NUMBER_SHIFT; + + if (R200_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__); + r200Finish( rmesa->glCtx ); + } +} + + +GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, + GLuint primitive, + GLuint min_nr ) +{ + drm_radeon_cmd_header_t *cmd; + GLushort *retval; + + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); + + assert((primitive & R200_VF_PRIM_WALK_IND)); + + r200EmitState( rmesa ); + + cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, ELTS_BUFSZ(min_nr), + __FUNCTION__ ); + cmd[0].i = 0; + cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; + cmd[1].i = R200_CP_CMD_3D_DRAW_INDX_2; + cmd[2].i = (primitive | + R200_VF_PRIM_WALK_IND | + R200_VF_COLOR_ORDER_RGBA); + + + retval = (GLushort *)(cmd+3); + + if (R200_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s: header 0x%x prim %x \n", + __FUNCTION__, + cmd[1].i, primitive); + + assert(!rmesa->dma.flush); + rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->dma.flush = r200FlushElts; + + rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf; + + return retval; +} + + + +void r200EmitVertexAOS( r200ContextPtr rmesa, + GLuint vertex_size, + GLuint offset ) +{ + drm_radeon_cmd_header_t *cmd; + + if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL)) + fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + + cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VERT_AOS_BUFSZ, + __FUNCTION__ ); + + cmd[0].header.cmd_type = RADEON_CMD_PACKET3; + cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (2 << 16); + cmd[2].i = 1; + cmd[3].i = vertex_size | (vertex_size << 8); + cmd[4].i = offset; +} + + +void r200EmitAOS( r200ContextPtr rmesa, + struct r200_dma_region **component, + GLuint nr, + GLuint offset ) +{ + drm_radeon_cmd_header_t *cmd; + int sz = AOS_BUFSZ(nr); + int i; + int *tmp; + + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s nr arrays: %d\n", __FUNCTION__, nr); + + cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sz, __FUNCTION__ ); + cmd[0].i = 0; + cmd[0].header.cmd_type = RADEON_CMD_PACKET3; + cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (((sz / sizeof(int)) - 3) << 16); + cmd[2].i = nr; + tmp = &cmd[0].i; + cmd += 3; + + for (i = 0 ; i < nr ; i++) { + if (i & 1) { + cmd[0].i |= ((component[i]->aos_stride << 24) | + (component[i]->aos_size << 16)); + cmd[2].i = (component[i]->aos_start + + offset * component[i]->aos_stride * 4); + cmd += 3; + } + else { + cmd[0].i = ((component[i]->aos_stride << 8) | + (component[i]->aos_size << 0)); + cmd[1].i = (component[i]->aos_start + + offset * component[i]->aos_stride * 4); + } + } + + if (R200_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "%s:\n", __FUNCTION__); + for (i = 0 ; i < sz ; i++) + fprintf(stderr, " %d: %x\n", i, tmp[i]); + } +} + +void r200EmitBlit( r200ContextPtr rmesa, + GLuint color_fmt, + GLuint src_pitch, + GLuint src_offset, + GLuint dst_pitch, + GLuint dst_offset, + GLint srcx, GLint srcy, + GLint dstx, GLint dsty, + GLuint w, GLuint h ) +{ + drm_radeon_cmd_header_t *cmd; + + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", + __FUNCTION__, + src_pitch, src_offset, srcx, srcy, + dst_pitch, dst_offset, dstx, dsty, + w, h); + + assert( (src_pitch & 63) == 0 ); + assert( (dst_pitch & 63) == 0 ); + assert( (src_offset & 1023) == 0 ); + assert( (dst_offset & 1023) == 0 ); + assert( w < (1<<16) ); + assert( h < (1<<16) ); + + cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 8 * sizeof(int), + __FUNCTION__ ); + + + cmd[0].header.cmd_type = RADEON_CMD_PACKET3; + cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16); + cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + (color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_S | + RADEON_DP_SRC_SOURCE_MEMORY | + RADEON_GMC_CLR_CMP_CNTL_DIS | + RADEON_GMC_WR_MSK_DIS ); + + cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10); + cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10); + cmd[5].i = (srcx << 16) | srcy; + cmd[6].i = (dstx << 16) | dsty; /* dst */ + cmd[7].i = (w << 16) | h; +} + + +void r200EmitWait( r200ContextPtr rmesa, GLuint flags ) +{ + drm_radeon_cmd_header_t *cmd; + + assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) ); + + cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 1 * sizeof(int), + __FUNCTION__ ); + cmd[0].i = 0; + cmd[0].wait.cmd_type = RADEON_CMD_WAIT; + cmd[0].wait.flags = flags; +} diff --git a/r200/r200_context.c b/r200/r200_context.c new file mode 100644 index 0000000..786a298 --- /dev/null +++ b/r200/r200_context.c @@ -0,0 +1,714 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_context.c,v 1.3 2003/05/06 23:52:08 daenzer Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "api_arrayelt.h" +#include "context.h" +#include "simple_list.h" +#include "imports.h" +#include "matrix.h" +#include "extensions.h" +#include "framebuffer.h" +#include "state.h" + +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" + +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" + +#include "drivers/common/driverfuncs.h" + +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_state.h" +#include "r200_span.h" +#include "r200_pixel.h" +#include "r200_tex.h" +#include "r200_swtcl.h" +#include "r200_tcl.h" +#include "r200_maos.h" +#include "r200_vertprog.h" + +#define need_GL_ARB_multisample +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object +#define need_GL_ARB_vertex_program +#define need_GL_ATI_fragment_shader +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_fog_coord +#define need_GL_EXT_secondary_color +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_NV_vertex_program +#define need_GL_ARB_point_parameters +#include "extension_helper.h" + +#define DRIVER_DATE "20060602" + +#include "vblank.h" +#include "utils.h" +#include "xmlpool.h" /* for symbolic values of enum-type options */ +#ifndef R200_DEBUG +int R200_DEBUG = (0); +#endif + +/* Return various strings for glGetString(). + */ +static const GLubyte *r200GetString( GLcontext *ctx, GLenum name ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + static char buffer[128]; + unsigned offset; + GLuint agp_mode = (rmesa->r200Screen->card_type == RADEON_CARD_PCI)? 0 : + rmesa->r200Screen->AGPMode; + + switch ( name ) { + case GL_VENDOR: + return (GLubyte *)"Tungsten Graphics, Inc."; + + case GL_RENDERER: + offset = driGetRendererString( buffer, "R200", DRIVER_DATE, + agp_mode ); + + sprintf( & buffer[ offset ], " %sTCL", + !(rmesa->TclFallback & R200_TCL_FALLBACK_TCL_DISABLE) + ? "" : "NO-" ); + + return (GLubyte *)buffer; + + default: + return NULL; + } +} + + +/* Extension strings exported by the R200 driver. + */ +const struct dri_extension card_extensions[] = +{ + { "GL_ARB_multisample", GL_ARB_multisample_functions }, + { "GL_ARB_multitexture", NULL }, + { "GL_ARB_texture_border_clamp", NULL }, + { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, + { "GL_ARB_texture_env_add", NULL }, + { "GL_ARB_texture_env_combine", NULL }, + { "GL_ARB_texture_env_dot3", NULL }, + { "GL_ARB_texture_env_crossbar", NULL }, + { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, + { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, + { "GL_EXT_blend_subtract", NULL }, + { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, + { "GL_EXT_stencil_wrap", NULL }, + { "GL_EXT_texture_edge_clamp", NULL }, + { "GL_EXT_texture_env_combine", NULL }, + { "GL_EXT_texture_env_dot3", NULL }, + { "GL_EXT_texture_filter_anisotropic", NULL }, + { "GL_EXT_texture_lod_bias", NULL }, + { "GL_EXT_texture_mirror_clamp", NULL }, + { "GL_EXT_texture_rectangle", NULL }, + { "GL_ATI_texture_env_combine3", NULL }, + { "GL_ATI_texture_mirror_once", NULL }, + { "GL_MESA_pack_invert", NULL }, + { "GL_NV_blend_square", NULL }, + { "GL_SGIS_generate_mipmap", NULL }, + { NULL, NULL } +}; + +const struct dri_extension blend_extensions[] = { + { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, + { "GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions }, + { NULL, NULL } +}; + +const struct dri_extension ARB_vp_extension[] = { + { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions } +}; + +const struct dri_extension NV_vp_extension[] = { + { "GL_NV_vertex_program", GL_NV_vertex_program_functions } +}; + +const struct dri_extension ATI_fs_extension[] = { + { "GL_ATI_fragment_shader", GL_ATI_fragment_shader_functions } +}; + +const struct dri_extension point_extensions[] = { + { "GL_ARB_point_sprite", NULL }, + { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, + { NULL, NULL } +}; + +extern const struct tnl_pipeline_stage _r200_render_stage; +extern const struct tnl_pipeline_stage _r200_tcl_stage; + +static const struct tnl_pipeline_stage *r200_pipeline[] = { + + /* Try and go straight to t&l + */ + &_r200_tcl_stage, + + /* Catch any t&l fallbacks + */ + &_tnl_vertex_transform_stage, + &_tnl_normal_transform_stage, + &_tnl_lighting_stage, + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, + &_tnl_point_attenuation_stage, + &_tnl_vertex_program_stage, + /* Try again to go to tcl? + * - no good for asymmetric-twoside (do with multipass) + * - no good for asymmetric-unfilled (do with multipass) + * - good for material + * - good for texgen + * - need to manipulate a bit of state + * + * - worth it/not worth it? + */ + + /* Else do them here. + */ +/* &_r200_render_stage, */ /* FIXME: bugs with ut2003 */ + &_tnl_render_stage, /* FALLBACK: */ + NULL, +}; + + + +/* Initialize the driver's misc functions. + */ +static void r200InitDriverFuncs( struct dd_function_table *functions ) +{ + functions->GetBufferSize = NULL; /* OBSOLETE */ + functions->GetString = r200GetString; +} + +static const struct dri_debug_control debug_control[] = +{ + { "fall", DEBUG_FALLBACKS }, + { "tex", DEBUG_TEXTURE }, + { "ioctl", DEBUG_IOCTL }, + { "prim", DEBUG_PRIMS }, + { "vert", DEBUG_VERTS }, + { "state", DEBUG_STATE }, + { "code", DEBUG_CODEGEN }, + { "vfmt", DEBUG_VFMT }, + { "vtxf", DEBUG_VFMT }, + { "verb", DEBUG_VERBOSE }, + { "dri", DEBUG_DRI }, + { "dma", DEBUG_DMA }, + { "san", DEBUG_SANITY }, + { "sync", DEBUG_SYNC }, + { "pix", DEBUG_PIXEL }, + { "mem", DEBUG_MEMORY }, + { NULL, 0 } +}; + + +/* Create the device specific rendering context. + */ +GLboolean r200CreateContext( const __GLcontextModes *glVisual, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private); + struct dd_function_table functions; + r200ContextPtr rmesa; + GLcontext *ctx, *shareCtx; + int i; + int tcl_mode, fthrottle_mode; + + assert(glVisual); + assert(driContextPriv); + assert(screen); + + /* Allocate the R200 context */ + rmesa = (r200ContextPtr) CALLOC( sizeof(*rmesa) ); + if ( !rmesa ) + return GL_FALSE; + + /* init exp fog table data */ + r200InitStaticFogData(); + + /* Parse configuration files. + * Do this here so that initialMaxAnisotropy is set before we create + * the default textures. + */ + driParseConfigFiles (&rmesa->optionCache, &screen->optionCache, + screen->driScreen->myNum, "r200"); + rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache, + "def_max_anisotropy"); + + if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) { + if ( sPriv->drmMinor < 13 ) + fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " + "disabling.\n",sPriv->drmMinor ); + else + rmesa->using_hyperz = GL_TRUE; + } + + if ( sPriv->drmMinor >= 15 ) + rmesa->texmicrotile = GL_TRUE; + + /* Init default driver functions then plug in our R200-specific functions + * (the texture functions are especially important) + */ + _mesa_init_driver_functions(&functions); + r200InitDriverFuncs(&functions); + r200InitIoctlFuncs(&functions); + r200InitStateFuncs(&functions); + r200InitTextureFuncs(&functions); + r200InitShaderFuncs(&functions); + + /* Allocate and initialize the Mesa context */ + if (sharedContextPrivate) + shareCtx = ((r200ContextPtr) sharedContextPrivate)->glCtx; + else + shareCtx = NULL; + rmesa->glCtx = _mesa_create_context(glVisual, shareCtx, + &functions, (void *) rmesa); + if (!rmesa->glCtx) { + FREE(rmesa); + return GL_FALSE; + } + driContextPriv->driverPrivate = rmesa; + + /* Init r200 context data */ + rmesa->dri.context = driContextPriv; + rmesa->dri.screen = sPriv; + rmesa->dri.drawable = NULL; /* Set by XMesaMakeCurrent */ + rmesa->dri.hwContext = driContextPriv->hHWContext; + rmesa->dri.hwLock = &sPriv->pSAREA->lock; + rmesa->dri.fd = sPriv->fd; + rmesa->dri.drmMinor = sPriv->drmMinor; + + rmesa->r200Screen = screen; + rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA + + screen->sarea_priv_offset); + + + rmesa->dma.buf0_address = rmesa->r200Screen->buffers->list[0].address; + + (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) ); + make_empty_list( & rmesa->swapped ); + + rmesa->nr_heaps = 1 /* screen->numTexHeaps */ ; + assert(rmesa->nr_heaps < RADEON_NR_TEX_HEAPS); + for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { + rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa, + screen->texSize[i], + 12, + RADEON_NR_TEX_REGIONS, + (drmTextureRegionPtr)rmesa->sarea->tex_list[i], + & rmesa->sarea->tex_age[i], + & rmesa->swapped, + sizeof( r200TexObj ), + (destroy_texture_object_t *) r200DestroyTexObj ); + } + rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache, + "texture_depth"); + if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) + rmesa->texture_depth = ( screen->cpp == 4 ) ? + DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; + + rmesa->swtcl.RenderIndex = ~0; + rmesa->hw.all_dirty = 1; + + /* Set the maximum texture size small enough that we can guarentee that + * all texture units can bind a maximal texture and have all of them in + * texturable memory at once. Depending on the allow_large_textures driconf + * setting allow larger textures. + */ + + ctx = rmesa->glCtx; + ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache, + "texture_units"); + ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits; + ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits; + + i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures"); + + driCalculateMaxTextureLevels( rmesa->texture_heaps, + rmesa->nr_heaps, + & ctx->Const, + 4, + 11, /* max 2D texture size is 2048x2048 */ +#if ENABLE_HW_3D_TEXTURE + 8, /* max 3D texture size is 256^3 */ +#else + 0, /* 3D textures unsupported */ +#endif + 11, /* max cube texture size is 2048x2048 */ + 11, /* max texture rectangle size is 2048x2048 */ + 12, + GL_FALSE, + i ); + + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + + /* No wide AA points. + */ + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSizeAA = 1.0; + ctx->Const.PointSizeGranularity = 0.0625; + if (rmesa->r200Screen->drmSupportsPointSprites) + ctx->Const.MaxPointSize = 2047.0; + else + ctx->Const.MaxPointSize = 1.0; + + /* mesa initialization problem - _mesa_init_point was already called */ + ctx->Point.MaxSize = ctx->Const.MaxPointSize; + + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = 10.0; + ctx->Const.MaxLineWidthAA = 10.0; + ctx->Const.LineWidthGranularity = 0.0625; + + ctx->Const.VertexProgram.MaxNativeInstructions = R200_VSF_MAX_INST; + ctx->Const.VertexProgram.MaxNativeAttribs = 12; + ctx->Const.VertexProgram.MaxNativeTemps = R200_VSF_MAX_TEMPS; + ctx->Const.VertexProgram.MaxNativeParameters = R200_VSF_MAX_PARAM; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + + /* Initialize the software rasterizer and helper modules. + */ + _swrast_CreateContext( ctx ); + _vbo_CreateContext( ctx ); + _tnl_CreateContext( ctx ); + _swsetup_CreateContext( ctx ); + _ae_create_context( ctx ); + + /* Install the customized pipeline: + */ + _tnl_destroy_pipeline( ctx ); + _tnl_install_pipeline( ctx, r200_pipeline ); + + /* Try and keep materials and vertices separate: + */ +/* _tnl_isolate_materials( ctx, GL_TRUE ); */ + + + /* Configure swrast and TNL to match hardware characteristics: + */ + _swrast_allow_pixel_fog( ctx, GL_FALSE ); + _swrast_allow_vertex_fog( ctx, GL_TRUE ); + _tnl_allow_pixel_fog( ctx, GL_FALSE ); + _tnl_allow_vertex_fog( ctx, GL_TRUE ); + + + for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS ; i++ ) { + _math_matrix_ctr( &rmesa->TexGenMatrix[i] ); + _math_matrix_set_identity( &rmesa->TexGenMatrix[i] ); + } + _math_matrix_ctr( &rmesa->tmpmat ); + _math_matrix_set_identity( &rmesa->tmpmat ); + + driInitExtensions( ctx, card_extensions, GL_TRUE ); + if (!(rmesa->r200Screen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) { + /* yuv textures don't work with some chips - R200 / rv280 okay so far + others get the bit ordering right but don't actually do YUV-RGB conversion */ + _mesa_enable_extension( ctx, "GL_MESA_ycbcr_texture" ); + } + if (rmesa->glCtx->Mesa_DXTn) { + _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); + _mesa_enable_extension( ctx, "GL_S3_s3tc" ); + } + else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) { + _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); + } + + if (rmesa->r200Screen->drmSupportsCubeMapsR200) + _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" ); + if (rmesa->r200Screen->drmSupportsBlendColor) { + driInitExtensions( ctx, blend_extensions, GL_FALSE ); + } + if(rmesa->r200Screen->drmSupportsVertexProgram) + driInitSingleExtension( ctx, ARB_vp_extension ); + if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program")) + driInitSingleExtension( ctx, NV_vp_extension ); + + if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader) + driInitSingleExtension( ctx, ATI_fs_extension ); + if (rmesa->r200Screen->drmSupportsPointSprites) + driInitExtensions( ctx, point_extensions, GL_FALSE ); +#if 0 + r200InitDriverFuncs( ctx ); + r200InitIoctlFuncs( ctx ); + r200InitStateFuncs( ctx ); + r200InitTextureFuncs( ctx ); +#endif + /* plug in a few more device driver functions */ + /* XXX these should really go right after _mesa_init_driver_functions() */ + r200InitPixelFuncs( ctx ); + r200InitSpanFuncs( ctx ); + r200InitTnlFuncs( ctx ); + r200InitState( rmesa ); + r200InitSwtcl( ctx ); + + fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode"); + rmesa->iw.irq_seq = -1; + rmesa->irqsEmitted = 0; + rmesa->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && + rmesa->r200Screen->irq); + + rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); + + if (!rmesa->do_irqs) + fprintf(stderr, + "IRQ's not enabled, falling back to %s: %d %d\n", + rmesa->do_usleeps ? "usleeps" : "busy waits", + fthrottle_mode, + rmesa->r200Screen->irq); + + rmesa->vblank_flags = (rmesa->r200Screen->irq != 0) + ? driGetDefaultVBlankFlags(&rmesa->optionCache) : VBLANK_FLAG_NO_IRQ; + + rmesa->prefer_gart_client_texturing = + (getenv("R200_GART_CLIENT_TEXTURES") != 0); + + (*dri_interface->getUST)( & rmesa->swap_ust ); + + +#if DO_DEBUG + R200_DEBUG = driParseDebugString( getenv( "R200_DEBUG" ), + debug_control ); + R200_DEBUG |= driParseDebugString( getenv( "RADEON_DEBUG" ), + debug_control ); +#endif + + tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode"); + if (driQueryOptionb(&rmesa->optionCache, "no_rast")) { + fprintf(stderr, "disabling 3D acceleration\n"); + FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1); + } + else if (tcl_mode == DRI_CONF_TCL_SW || getenv("R200_NO_TCL") || + !(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) { + if (rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL) { + rmesa->r200Screen->chip_flags &= ~RADEON_CHIPSET_TCL; + fprintf(stderr, "Disabling HW TCL support\n"); + } + TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1); + } + + return GL_TRUE; +} + + +/* Destroy the device specific context. + */ +/* Destroy the Mesa and driver specific context data. + */ +void r200DestroyContext( __DRIcontextPrivate *driContextPriv ) +{ + GET_CURRENT_CONTEXT(ctx); + r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate; + r200ContextPtr current = ctx ? R200_CONTEXT(ctx) : NULL; + + /* check if we're deleting the currently bound context */ + if (rmesa == current) { + R200_FIREVERTICES( rmesa ); + _mesa_make_current(NULL, NULL, NULL); + } + + /* Free r200 context resources */ + assert(rmesa); /* should never be null */ + if ( rmesa ) { + GLboolean release_texture_heaps; + + + release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1); + _swsetup_DestroyContext( rmesa->glCtx ); + _tnl_DestroyContext( rmesa->glCtx ); + _vbo_DestroyContext( rmesa->glCtx ); + _swrast_DestroyContext( rmesa->glCtx ); + + r200DestroySwtcl( rmesa->glCtx ); + r200ReleaseArrays( rmesa->glCtx, ~0 ); + + if (rmesa->dma.current.buf) { + r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); + r200FlushCmdBuf( rmesa, __FUNCTION__ ); + } + + if (rmesa->state.scissor.pClipRects) { + FREE(rmesa->state.scissor.pClipRects); + rmesa->state.scissor.pClipRects = NULL; + } + + if ( release_texture_heaps ) { + /* This share group is about to go away, free our private + * texture object data. + */ + int i; + + for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { + driDestroyTextureHeap( rmesa->texture_heaps[ i ] ); + rmesa->texture_heaps[ i ] = NULL; + } + + assert( is_empty_list( & rmesa->swapped ) ); + } + + /* free the Mesa context */ + rmesa->glCtx->DriverCtx = NULL; + _mesa_destroy_context( rmesa->glCtx ); + + /* free the option cache */ + driDestroyOptionCache (&rmesa->optionCache); + + FREE( rmesa ); + } +} + + + + +void +r200SwapBuffers( __DRIdrawablePrivate *dPriv ) +{ + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + r200ContextPtr rmesa; + GLcontext *ctx; + rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; + ctx = rmesa->glCtx; + if (ctx->Visual.doubleBufferMode) { + _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ + if ( rmesa->doPageFlip ) { + r200PageFlip( dPriv ); + } + else { + r200CopyBuffer( dPriv, NULL ); + } + } + } + else { + /* XXX this shouldn't be an error but we can't handle it for now */ + _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__); + } +} + +void +r200CopySubBuffer( __DRIdrawablePrivate *dPriv, + int x, int y, int w, int h ) +{ + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + r200ContextPtr rmesa; + GLcontext *ctx; + rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; + ctx = rmesa->glCtx; + if (ctx->Visual.doubleBufferMode) { + drm_clip_rect_t rect; + rect.x1 = x + dPriv->x; + rect.y1 = (dPriv->h - y - h) + dPriv->y; + rect.x2 = rect.x1 + w; + rect.y2 = rect.y1 + h; + _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ + r200CopyBuffer( dPriv, &rect ); + } + } + else { + /* XXX this shouldn't be an error but we can't handle it for now */ + _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__); + } +} + +/* Force the context `c' to be the current context and associate with it + * buffer `b'. + */ +GLboolean +r200MakeCurrent( __DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv ) +{ + if ( driContextPriv ) { + r200ContextPtr newCtx = + (r200ContextPtr) driContextPriv->driverPrivate; + + if (R200_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)newCtx->glCtx); + + if ( newCtx->dri.drawable != driDrawPriv ) { + driDrawableInitVBlank( driDrawPriv, newCtx->vblank_flags, + &newCtx->vbl_seq ); + } + + newCtx->dri.readable = driReadPriv; + + if ( newCtx->dri.drawable != driDrawPriv || + newCtx->lastStamp != driDrawPriv->lastStamp ) { + newCtx->dri.drawable = driDrawPriv; + + r200SetCliprects(newCtx); + r200UpdateViewportOffset( newCtx->glCtx ); + } + + _mesa_make_current( newCtx->glCtx, + (GLframebuffer *) driDrawPriv->driverPrivate, + (GLframebuffer *) driReadPriv->driverPrivate ); + + _mesa_update_state( newCtx->glCtx ); + r200ValidateState( newCtx->glCtx ); + + } else { + if (R200_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx is null\n", __FUNCTION__); + _mesa_make_current( NULL, NULL, NULL ); + } + + if (R200_DEBUG & DEBUG_DRI) + fprintf(stderr, "End %s\n", __FUNCTION__); + return GL_TRUE; +} + +/* Force the context `c' to be unbound from its buffer. + */ +GLboolean +r200UnbindContext( __DRIcontextPrivate *driContextPriv ) +{ + r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate; + + if (R200_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->glCtx); + + return GL_TRUE; +} diff --git a/r200/r200_context.h b/r200/r200_context.h new file mode 100644 index 0000000..a06a2f5 --- /dev/null +++ b/r200/r200_context.h @@ -0,0 +1,988 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_context.h,v 1.2 2002/12/16 16:18:54 dawes Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __R200_CONTEXT_H__ +#define __R200_CONTEXT_H__ + +#include "tnl/t_vertex.h" +#include "drm.h" +#include "radeon_drm.h" +#include "dri_util.h" +#include "texmem.h" + +#include "macros.h" +#include "mtypes.h" +#include "colormac.h" +#include "r200_reg.h" +#include "r200_vertprog.h" + +#define ENABLE_HW_3D_TEXTURE 1 /* XXX this is temporary! */ + +#ifndef R200_EMIT_VAP_PVS_CNTL +#error This driver requires a newer libdrm to compile +#endif + +struct r200_context; +typedef struct r200_context r200ContextRec; +typedef struct r200_context *r200ContextPtr; + +/* This union is used to avoid warnings/miscompilation + with float to uint32_t casts due to strict-aliasing */ +typedef union { GLfloat f; uint32_t ui32; } float_ui32_type; + +#include "r200_lock.h" +#include "radeon_screen.h" +#include "mm.h" + +/* Flags for software fallback cases */ +/* See correponding strings in r200_swtcl.c */ +#define R200_FALLBACK_TEXTURE 0x01 +#define R200_FALLBACK_DRAW_BUFFER 0x02 +#define R200_FALLBACK_STENCIL 0x04 +#define R200_FALLBACK_RENDER_MODE 0x08 +#define R200_FALLBACK_DISABLE 0x10 +#define R200_FALLBACK_BORDER_MODE 0x20 + +/* The blit width for texture uploads + */ +#define BLIT_WIDTH_BYTES 1024 + +/* Use the templated vertex format: + */ +#define COLOR_IS_RGBA +#define TAG(x) r200##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + +typedef void (*r200_tri_func)( r200ContextPtr, + r200Vertex *, + r200Vertex *, + r200Vertex * ); + +typedef void (*r200_line_func)( r200ContextPtr, + r200Vertex *, + r200Vertex * ); + +typedef void (*r200_point_func)( r200ContextPtr, + r200Vertex * ); + + +struct r200_vertex_program { + struct gl_vertex_program mesa_program; /* Must be first */ + int translated; + /* need excess instr: 1 for late loop checking, 2 for + additional instr due to instr/attr, 3 for fog */ + VERTEX_SHADER_INSTRUCTION instr[R200_VSF_MAX_INST + 6]; + int pos_end; + int inputs[VERT_ATTRIB_MAX]; + GLubyte inputmap_rev[16]; + int native; + int fogpidx; + int fogmode; +}; + +struct r200_colorbuffer_state { + GLuint clear; +#if 000 + GLint drawOffset, drawPitch; +#endif + int roundEnable; +}; + + +struct r200_depthbuffer_state { + GLuint clear; + GLfloat scale; +}; + +#if 000 +struct r200_pixel_state { + GLint readOffset, readPitch; +}; +#endif + +struct r200_scissor_state { + drm_clip_rect_t rect; + GLboolean enabled; + + GLuint numClipRects; /* Cliprects active */ + GLuint numAllocedClipRects; /* Cliprects available */ + drm_clip_rect_t *pClipRects; +}; + +struct r200_stencilbuffer_state { + GLboolean hwBuffer; + GLuint clear; /* rb3d_stencilrefmask value */ +}; + +struct r200_stipple_state { + GLuint mask[32]; +}; + + + +#define TEX_0 0x1 +#define TEX_1 0x2 +#define TEX_2 0x4 +#define TEX_3 0x8 +#define TEX_4 0x10 +#define TEX_5 0x20 +#define TEX_ALL 0x3f + +typedef struct r200_tex_obj r200TexObj, *r200TexObjPtr; + +/* Texture object in locally shared texture space. + */ +struct r200_tex_obj { + driTextureObject base; + + GLuint bufAddr; /* Offset to start of locally + shared texture block */ + + GLuint dirty_state; /* Flags (1 per texunit) for + whether or not this texobj + has dirty hardware state + (pp_*) that needs to be + brought into the + texunit. */ + + drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; + /* Six, for the cube faces */ + GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ + + GLuint pp_txfilter; /* hardware register values */ + GLuint pp_txformat; + GLuint pp_txformat_x; + GLuint pp_txoffset; /* Image location in texmem. + All cube faces follow. */ + GLuint pp_txsize; /* npot only */ + GLuint pp_txpitch; /* npot only */ + GLuint pp_border_color; + GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ + + GLboolean border_fallback; + + GLuint tile_bits; /* hw texture tile bits used on this texture */ +}; + + +struct r200_texture_env_state { + r200TexObjPtr texobj; + GLuint outputreg; + GLuint unitneeded; +}; + +#define R200_MAX_TEXTURE_UNITS 6 + +struct r200_texture_state { + struct r200_texture_env_state unit[R200_MAX_TEXTURE_UNITS]; +}; + + +struct r200_state_atom { + struct r200_state_atom *next, *prev; + const char *name; /* for debug */ + int cmd_size; /* size in bytes */ + GLuint idx; + int *cmd; /* one or more cmd's */ + int *lastcmd; /* one or more cmd's */ + GLboolean dirty; + GLboolean (*check)( GLcontext *, int ); /* is this state active? */ +}; + + + +/* Trying to keep these relatively short as the variables are becoming + * extravagently long. Drop the driver name prefix off the front of + * everything - I think we know which driver we're in by now, and keep the + * prefix to 3 letters unless absolutely impossible. + */ + +#define CTX_CMD_0 0 +#define CTX_PP_MISC 1 +#define CTX_PP_FOG_COLOR 2 +#define CTX_RE_SOLID_COLOR 3 +#define CTX_RB3D_BLENDCNTL 4 +#define CTX_RB3D_DEPTHOFFSET 5 +#define CTX_RB3D_DEPTHPITCH 6 +#define CTX_RB3D_ZSTENCILCNTL 7 +#define CTX_CMD_1 8 +#define CTX_PP_CNTL 9 +#define CTX_RB3D_CNTL 10 +#define CTX_RB3D_COLOROFFSET 11 +#define CTX_CMD_2 12 /* why */ +#define CTX_RB3D_COLORPITCH 13 /* why */ +#define CTX_STATE_SIZE_OLDDRM 14 +#define CTX_CMD_3 14 +#define CTX_RB3D_BLENDCOLOR 15 +#define CTX_RB3D_ABLENDCNTL 16 +#define CTX_RB3D_CBLENDCNTL 17 +#define CTX_STATE_SIZE_NEWDRM 18 + +#define SET_CMD_0 0 +#define SET_SE_CNTL 1 +#define SET_RE_CNTL 2 /* replace se_coord_fmt */ +#define SET_STATE_SIZE 3 + +#define VTE_CMD_0 0 +#define VTE_SE_VTE_CNTL 1 +#define VTE_STATE_SIZE 2 + +#define LIN_CMD_0 0 +#define LIN_RE_LINE_PATTERN 1 +#define LIN_RE_LINE_STATE 2 +#define LIN_CMD_1 3 +#define LIN_SE_LINE_WIDTH 4 +#define LIN_STATE_SIZE 5 + +#define MSK_CMD_0 0 +#define MSK_RB3D_STENCILREFMASK 1 +#define MSK_RB3D_ROPCNTL 2 +#define MSK_RB3D_PLANEMASK 3 +#define MSK_STATE_SIZE 4 + +#define VPT_CMD_0 0 +#define VPT_SE_VPORT_XSCALE 1 +#define VPT_SE_VPORT_XOFFSET 2 +#define VPT_SE_VPORT_YSCALE 3 +#define VPT_SE_VPORT_YOFFSET 4 +#define VPT_SE_VPORT_ZSCALE 5 +#define VPT_SE_VPORT_ZOFFSET 6 +#define VPT_STATE_SIZE 7 + +#define ZBS_CMD_0 0 +#define ZBS_SE_ZBIAS_FACTOR 1 +#define ZBS_SE_ZBIAS_CONSTANT 2 +#define ZBS_STATE_SIZE 3 + +#define MSC_CMD_0 0 +#define MSC_RE_MISC 1 +#define MSC_STATE_SIZE 2 + +#define TAM_CMD_0 0 +#define TAM_DEBUG3 1 +#define TAM_STATE_SIZE 2 + +#define TEX_CMD_0 0 +#define TEX_PP_TXFILTER 1 /*2c00*/ +#define TEX_PP_TXFORMAT 2 /*2c04*/ +#define TEX_PP_TXFORMAT_X 3 /*2c08*/ +#define TEX_PP_TXSIZE 4 /*2c0c*/ +#define TEX_PP_TXPITCH 5 /*2c10*/ +#define TEX_PP_BORDER_COLOR 6 /*2c14*/ +#define TEX_CMD_1_OLDDRM 7 +#define TEX_PP_TXOFFSET_OLDDRM 8 /*2d00 */ +#define TEX_STATE_SIZE_OLDDRM 9 +#define TEX_PP_CUBIC_FACES 7 +#define TEX_PP_TXMULTI_CTL 8 +#define TEX_CMD_1_NEWDRM 9 +#define TEX_PP_TXOFFSET_NEWDRM 10 +#define TEX_STATE_SIZE_NEWDRM 11 + +#define CUBE_CMD_0 0 /* 1 register follows */ /* this command unnecessary */ +#define CUBE_PP_CUBIC_FACES 1 /* 0x2c18 */ /* with new enough drm */ +#define CUBE_CMD_1 2 /* 5 registers follow */ +#define CUBE_PP_CUBIC_OFFSET_F1 3 /* 0x2d04 */ +#define CUBE_PP_CUBIC_OFFSET_F2 4 /* 0x2d08 */ +#define CUBE_PP_CUBIC_OFFSET_F3 5 /* 0x2d0c */ +#define CUBE_PP_CUBIC_OFFSET_F4 6 /* 0x2d10 */ +#define CUBE_PP_CUBIC_OFFSET_F5 7 /* 0x2d14 */ +#define CUBE_STATE_SIZE 8 + +#define PIX_CMD_0 0 +#define PIX_PP_TXCBLEND 1 +#define PIX_PP_TXCBLEND2 2 +#define PIX_PP_TXABLEND 3 +#define PIX_PP_TXABLEND2 4 +#define PIX_STATE_SIZE 5 + +#define TF_CMD_0 0 +#define TF_TFACTOR_0 1 +#define TF_TFACTOR_1 2 +#define TF_TFACTOR_2 3 +#define TF_TFACTOR_3 4 +#define TF_TFACTOR_4 5 +#define TF_TFACTOR_5 6 +#define TF_STATE_SIZE 7 + +#define ATF_CMD_0 0 +#define ATF_TFACTOR_0 1 +#define ATF_TFACTOR_1 2 +#define ATF_TFACTOR_2 3 +#define ATF_TFACTOR_3 4 +#define ATF_TFACTOR_4 5 +#define ATF_TFACTOR_5 6 +#define ATF_TFACTOR_6 7 +#define ATF_TFACTOR_7 8 +#define ATF_STATE_SIZE 9 + +/* ATI_FRAGMENT_SHADER */ +#define AFS_CMD_0 0 +#define AFS_IC0 1 /* 2f00 */ +#define AFS_IC1 2 /* 2f04 */ +#define AFS_IA0 3 /* 2f08 */ +#define AFS_IA1 4 /* 2f0c */ +#define AFS_STATE_SIZE 33 + +#define PVS_CMD_0 0 +#define PVS_CNTL_1 1 +#define PVS_CNTL_2 2 +#define PVS_STATE_SIZE 3 + +/* those are quite big... */ +#define VPI_CMD_0 0 +#define VPI_OPDST_0 1 +#define VPI_SRC0_0 2 +#define VPI_SRC1_0 3 +#define VPI_SRC2_0 4 +#define VPI_OPDST_63 253 +#define VPI_SRC0_63 254 +#define VPI_SRC1_63 255 +#define VPI_SRC2_63 256 +#define VPI_STATE_SIZE 257 + +#define VPP_CMD_0 0 +#define VPP_PARAM0_0 1 +#define VPP_PARAM1_0 2 +#define VPP_PARAM2_0 3 +#define VPP_PARAM3_0 4 +#define VPP_PARAM0_95 381 +#define VPP_PARAM1_95 382 +#define VPP_PARAM2_95 383 +#define VPP_PARAM3_95 384 +#define VPP_STATE_SIZE 385 + +#define TCL_CMD_0 0 +#define TCL_LIGHT_MODEL_CTL_0 1 +#define TCL_LIGHT_MODEL_CTL_1 2 +#define TCL_PER_LIGHT_CTL_0 3 +#define TCL_PER_LIGHT_CTL_1 4 +#define TCL_PER_LIGHT_CTL_2 5 +#define TCL_PER_LIGHT_CTL_3 6 +#define TCL_CMD_1 7 +#define TCL_UCP_VERT_BLEND_CTL 8 +#define TCL_STATE_SIZE 9 + +#define MSL_CMD_0 0 +#define MSL_MATRIX_SELECT_0 1 +#define MSL_MATRIX_SELECT_1 2 +#define MSL_MATRIX_SELECT_2 3 +#define MSL_MATRIX_SELECT_3 4 +#define MSL_MATRIX_SELECT_4 5 +#define MSL_STATE_SIZE 6 + +#define TCG_CMD_0 0 +#define TCG_TEX_PROC_CTL_2 1 +#define TCG_TEX_PROC_CTL_3 2 +#define TCG_TEX_PROC_CTL_0 3 +#define TCG_TEX_PROC_CTL_1 4 +#define TCG_TEX_CYL_WRAP_CTL 5 +#define TCG_STATE_SIZE 6 + +#define MTL_CMD_0 0 +#define MTL_EMMISSIVE_RED 1 +#define MTL_EMMISSIVE_GREEN 2 +#define MTL_EMMISSIVE_BLUE 3 +#define MTL_EMMISSIVE_ALPHA 4 +#define MTL_AMBIENT_RED 5 +#define MTL_AMBIENT_GREEN 6 +#define MTL_AMBIENT_BLUE 7 +#define MTL_AMBIENT_ALPHA 8 +#define MTL_DIFFUSE_RED 9 +#define MTL_DIFFUSE_GREEN 10 +#define MTL_DIFFUSE_BLUE 11 +#define MTL_DIFFUSE_ALPHA 12 +#define MTL_SPECULAR_RED 13 +#define MTL_SPECULAR_GREEN 14 +#define MTL_SPECULAR_BLUE 15 +#define MTL_SPECULAR_ALPHA 16 +#define MTL_CMD_1 17 +#define MTL_SHININESS 18 +#define MTL_STATE_SIZE 19 + +#define VAP_CMD_0 0 +#define VAP_SE_VAP_CNTL 1 +#define VAP_STATE_SIZE 2 + +/* Replaces a lot of packet info from radeon + */ +#define VTX_CMD_0 0 +#define VTX_VTXFMT_0 1 +#define VTX_VTXFMT_1 2 +#define VTX_TCL_OUTPUT_VTXFMT_0 3 +#define VTX_TCL_OUTPUT_VTXFMT_1 4 +#define VTX_CMD_1 5 +#define VTX_TCL_OUTPUT_COMPSEL 6 +#define VTX_CMD_2 7 +#define VTX_STATE_CNTL 8 +#define VTX_STATE_SIZE 9 + +/* SPR - point sprite state + */ +#define SPR_CMD_0 0 +#define SPR_POINT_SPRITE_CNTL 1 +#define SPR_STATE_SIZE 2 + +#define PTP_CMD_0 0 +#define PTP_VPORT_SCALE_0 1 +#define PTP_VPORT_SCALE_1 2 +#define PTP_VPORT_SCALE_PTSIZE 3 +#define PTP_VPORT_SCALE_3 4 +#define PTP_CMD_1 5 +#define PTP_ATT_CONST_QUAD 6 +#define PTP_ATT_CONST_LIN 7 +#define PTP_ATT_CONST_CON 8 +#define PTP_ATT_CONST_3 9 +#define PTP_EYE_X 10 +#define PTP_EYE_Y 11 +#define PTP_EYE_Z 12 +#define PTP_EYE_3 13 +#define PTP_CLAMP_MIN 14 +#define PTP_CLAMP_MAX 15 +#define PTP_CLAMP_2 16 +#define PTP_CLAMP_3 17 +#define PTP_STATE_SIZE 18 + +#define VTX_COLOR(v,n) (((v)>>(R200_VTX_COLOR_0_SHIFT+(n)*2))&\ + R200_VTX_COLOR_MASK) + +/** + * Given the \c R200_SE_VTX_FMT_1 for the current vertex state, determine + * how many components are in texture coordinate \c n. + */ +#define VTX_TEXn_COUNT(v,n) (((v) >> (3 * n)) & 0x07) + +#define MAT_CMD_0 0 +#define MAT_ELT_0 1 +#define MAT_STATE_SIZE 17 + +#define GRD_CMD_0 0 +#define GRD_VERT_GUARD_CLIP_ADJ 1 +#define GRD_VERT_GUARD_DISCARD_ADJ 2 +#define GRD_HORZ_GUARD_CLIP_ADJ 3 +#define GRD_HORZ_GUARD_DISCARD_ADJ 4 +#define GRD_STATE_SIZE 5 + +/* position changes frequently when lighting in modelpos - separate + * out to new state item? + */ +#define LIT_CMD_0 0 +#define LIT_AMBIENT_RED 1 +#define LIT_AMBIENT_GREEN 2 +#define LIT_AMBIENT_BLUE 3 +#define LIT_AMBIENT_ALPHA 4 +#define LIT_DIFFUSE_RED 5 +#define LIT_DIFFUSE_GREEN 6 +#define LIT_DIFFUSE_BLUE 7 +#define LIT_DIFFUSE_ALPHA 8 +#define LIT_SPECULAR_RED 9 +#define LIT_SPECULAR_GREEN 10 +#define LIT_SPECULAR_BLUE 11 +#define LIT_SPECULAR_ALPHA 12 +#define LIT_POSITION_X 13 +#define LIT_POSITION_Y 14 +#define LIT_POSITION_Z 15 +#define LIT_POSITION_W 16 +#define LIT_DIRECTION_X 17 +#define LIT_DIRECTION_Y 18 +#define LIT_DIRECTION_Z 19 +#define LIT_DIRECTION_W 20 +#define LIT_ATTEN_QUADRATIC 21 +#define LIT_ATTEN_LINEAR 22 +#define LIT_ATTEN_CONST 23 +#define LIT_ATTEN_XXX 24 +#define LIT_CMD_1 25 +#define LIT_SPOT_DCD 26 +#define LIT_SPOT_DCM 27 +#define LIT_SPOT_EXPONENT 28 +#define LIT_SPOT_CUTOFF 29 +#define LIT_SPECULAR_THRESH 30 +#define LIT_RANGE_CUTOFF 31 /* ? */ +#define LIT_ATTEN_CONST_INV 32 +#define LIT_STATE_SIZE 33 + +/* Fog + */ +#define FOG_CMD_0 0 +#define FOG_R 1 +#define FOG_C 2 +#define FOG_D 3 +#define FOG_PAD 4 +#define FOG_STATE_SIZE 5 + +/* UCP + */ +#define UCP_CMD_0 0 +#define UCP_X 1 +#define UCP_Y 2 +#define UCP_Z 3 +#define UCP_W 4 +#define UCP_STATE_SIZE 5 + +/* GLT - Global ambient + */ +#define GLT_CMD_0 0 +#define GLT_RED 1 +#define GLT_GREEN 2 +#define GLT_BLUE 3 +#define GLT_ALPHA 4 +#define GLT_STATE_SIZE 5 + +/* EYE + */ +#define EYE_CMD_0 0 +#define EYE_X 1 +#define EYE_Y 2 +#define EYE_Z 3 +#define EYE_RESCALE_FACTOR 4 +#define EYE_STATE_SIZE 5 + +/* CST - constant state + */ +#define CST_CMD_0 0 +#define CST_PP_CNTL_X 1 +#define CST_CMD_1 2 +#define CST_RB3D_DEPTHXY_OFFSET 3 +#define CST_CMD_2 4 +#define CST_RE_AUX_SCISSOR_CNTL 5 +#define CST_CMD_3 6 +#define CST_RE_SCISSOR_TL_0 7 +#define CST_RE_SCISSOR_BR_0 8 +#define CST_CMD_4 9 +#define CST_SE_VAP_CNTL_STATUS 10 +#define CST_CMD_5 11 +#define CST_RE_POINTSIZE 12 +#define CST_CMD_6 13 +#define CST_SE_TCL_INPUT_VTX_0 14 +#define CST_SE_TCL_INPUT_VTX_1 15 +#define CST_SE_TCL_INPUT_VTX_2 16 +#define CST_SE_TCL_INPUT_VTX_3 17 +#define CST_STATE_SIZE 18 + +#define PRF_CMD_0 0 +#define PRF_PP_TRI_PERF 1 +#define PRF_PP_PERF_CNTL 2 +#define PRF_STATE_SIZE 3 + + +struct r200_hw_state { + /* Head of the linked list of state atoms. */ + struct r200_state_atom atomlist; + + /* Hardware state, stored as cmdbuf commands: + * -- Need to doublebuffer for + * - reviving state after loss of context + * - eliding noop statechange loops? (except line stipple count) + */ + struct r200_state_atom ctx; + struct r200_state_atom set; + struct r200_state_atom vte; + struct r200_state_atom lin; + struct r200_state_atom msk; + struct r200_state_atom vpt; + struct r200_state_atom vap; + struct r200_state_atom vtx; + struct r200_state_atom tcl; + struct r200_state_atom msl; + struct r200_state_atom tcg; + struct r200_state_atom msc; + struct r200_state_atom cst; + struct r200_state_atom tam; + struct r200_state_atom tf; + struct r200_state_atom tex[6]; + struct r200_state_atom cube[6]; + struct r200_state_atom zbs; + struct r200_state_atom mtl[2]; + struct r200_state_atom mat[9]; + struct r200_state_atom lit[8]; /* includes vec, scl commands */ + struct r200_state_atom ucp[6]; + struct r200_state_atom pix[6]; /* pixshader stages */ + struct r200_state_atom eye; /* eye pos */ + struct r200_state_atom grd; /* guard band clipping */ + struct r200_state_atom fog; + struct r200_state_atom glt; + struct r200_state_atom prf; + struct r200_state_atom afs[2]; + struct r200_state_atom pvs; + struct r200_state_atom vpi[2]; + struct r200_state_atom vpp[2]; + struct r200_state_atom atf; + struct r200_state_atom spr; + struct r200_state_atom ptp; + + int max_state_size; /* Number of bytes necessary for a full state emit. */ + GLboolean is_dirty, all_dirty; +}; + +struct r200_state { + /* Derived state for internal purposes: + */ + struct r200_colorbuffer_state color; + struct r200_depthbuffer_state depth; +#if 00 + struct r200_pixel_state pixel; +#endif + struct r200_scissor_state scissor; + struct r200_stencilbuffer_state stencil; + struct r200_stipple_state stipple; + struct r200_texture_state texture; + GLuint envneeded; +}; + +/* Need refcounting on dma buffers: + */ +struct r200_dma_buffer { + int refcount; /* the number of retained regions in buf */ + drmBufPtr buf; +}; + +#define GET_START(rvb) (rmesa->r200Screen->gart_buffer_offset + \ + (rvb)->address - rmesa->dma.buf0_address + \ + (rvb)->start) + +/* A retained region, eg vertices for indexed vertices. + */ +struct r200_dma_region { + struct r200_dma_buffer *buf; + char *address; /* == buf->address */ + int start, end, ptr; /* offsets from start of buf */ + int aos_start; + int aos_stride; + int aos_size; +}; + + +struct r200_dma { + /* Active dma region. Allocations for vertices and retained + * regions come from here. Also used for emitting random vertices, + * these may be flushed by calling flush_current(); + */ + struct r200_dma_region current; + + void (*flush)( r200ContextPtr ); + + char *buf0_address; /* start of buf[0], for index calcs */ + GLuint nr_released_bufs; /* flush after so many buffers released */ +}; + +struct r200_dri_mirror { + __DRIcontextPrivate *context; /* DRI context */ + __DRIscreenPrivate *screen; /* DRI screen */ + __DRIdrawablePrivate *drawable; /* DRI drawable bound to this ctx */ + __DRIdrawablePrivate *readable; /* DRI readable bound to this ctx */ + + drm_context_t hwContext; + drm_hw_lock_t *hwLock; + int fd; + int drmMinor; +}; + + +#define R200_CMD_BUF_SZ (16*1024) + +struct r200_store { + GLuint statenr; + GLuint primnr; + char cmd_buf[R200_CMD_BUF_SZ]; + int cmd_used; + int elts_start; +}; + + +/* r200_tcl.c + */ +struct r200_tcl_info { + GLuint hw_primitive; + +/* hw can handle 12 components max */ + struct r200_dma_region *aos_components[12]; + GLuint nr_aos_components; + + GLuint *Elts; + + struct r200_dma_region indexed_verts; + struct r200_dma_region vertex_data[15]; +}; + + +/* r200_swtcl.c + */ +struct r200_swtcl_info { + GLuint RenderIndex; + + /** + * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is + * installed in the Mesa state vector. + */ + GLuint vertex_size; + + /** + * Attributes instructing the Mesa TCL pipeline where / how to put vertex + * data in the hardware buffer. + */ + struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + + /** + * Number of elements of \c ::vertex_attrs that are actually used. + */ + GLuint vertex_attr_count; + + /** + * Cached pointer to the buffer where Mesa will store vertex data. + */ + GLubyte *verts; + + /* Fallback rasterization functions + */ + r200_point_func draw_point; + r200_line_func draw_line; + r200_tri_func draw_tri; + + GLuint hw_primitive; + GLenum render_primitive; + GLuint numverts; + + /** + * Offset of the 4UB color data within a hardware (swtcl) vertex. + */ + GLuint coloroffset; + + /** + * Offset of the 3UB specular color data within a hardware (swtcl) vertex. + */ + GLuint specoffset; + + /** + * Should Mesa project vertex data or will the hardware do it? + */ + GLboolean needproj; + + struct r200_dma_region indexed_verts; +}; + + +struct r200_ioctl { + GLuint vertex_offset; + GLuint vertex_size; +}; + + + +#define R200_MAX_PRIMS 64 + + + +struct r200_prim { + GLuint start; + GLuint end; + GLuint prim; +}; + + /* A maximum total of 29 elements per vertex: 3 floats for position, 3 + * floats for normal, 4 floats for color, 4 bytes for secondary color, + * 3 floats for each texture unit (18 floats total). + * + * we maybe need add. 4 to prevent segfault if someone specifies + * GL_TEXTURE6/GL_TEXTURE7 (esp. for the codegen-path) (FIXME: ) + * + * The position data is never actually stored here, so 3 elements could be + * trimmed out of the buffer. + */ + +#define R200_MAX_VERTEX_SIZE ((3*6)+11) + + +struct r200_context { + GLcontext *glCtx; /* Mesa context */ + + /* Driver and hardware state management + */ + struct r200_hw_state hw; + struct r200_state state; + struct r200_vertex_program *curr_vp_hw; + + /* Texture object bookkeeping + */ + unsigned nr_heaps; + driTexHeap * texture_heaps[ RADEON_NR_TEX_HEAPS ]; + driTextureObject swapped; + int texture_depth; + float initialMaxAnisotropy; + + /* Rasterization and vertex state: + */ + GLuint TclFallback; + GLuint Fallback; + GLuint NewGLState; + DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ + + /* Vertex buffers + */ + struct r200_ioctl ioctl; + struct r200_dma dma; + struct r200_store store; + /* A full state emit as of the first state emit in the main store, in case + * the context is lost. + */ + struct r200_store backup_store; + + /* Page flipping + */ + GLuint doPageFlip; + + /* Busy waiting + */ + GLuint do_usleeps; + GLuint do_irqs; + GLuint irqsEmitted; + drm_radeon_irq_wait_t iw; + + /* Clientdata textures; + */ + GLuint prefer_gart_client_texturing; + + /* Drawable, cliprect and scissor information + */ + GLuint numClipRects; /* Cliprects for the draw buffer */ + drm_clip_rect_t *pClipRects; + unsigned int lastStamp; + GLboolean lost_context; + GLboolean save_on_next_emit; + radeonScreenPtr r200Screen; /* Screen private DRI data */ + drm_radeon_sarea_t *sarea; /* Private SAREA data */ + + /* TCL stuff + */ + GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS]; + GLboolean recheck_texgen[R200_MAX_TEXTURE_UNITS]; + GLboolean TexGenNeedNormals[R200_MAX_TEXTURE_UNITS]; + GLuint TexMatEnabled; + GLuint TexMatCompSel; + GLuint TexGenEnabled; + GLuint TexGenCompSel; + GLmatrix tmpmat; + + /* VBI / buffer swap + */ + GLuint vbl_seq; + GLuint vblank_flags; + + int64_t swap_ust; + int64_t swap_missed_ust; + + GLuint swap_count; + GLuint swap_missed_count; + + + /* r200_tcl.c + */ + struct r200_tcl_info tcl; + + /* r200_swtcl.c + */ + struct r200_swtcl_info swtcl; + + /* Mirrors of some DRI state + */ + struct r200_dri_mirror dri; + + /* Configuration cache + */ + driOptionCache optionCache; + + GLboolean using_hyperz; + GLboolean texmicrotile; + + struct ati_fragment_shader *afs_loaded; +}; + +#define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx)) + + +static __inline GLuint r200PackColor( GLuint cpp, + GLubyte r, GLubyte g, + GLubyte b, GLubyte a ) +{ + switch ( cpp ) { + case 2: + return PACK_COLOR_565( r, g, b ); + case 4: + return PACK_COLOR_8888( a, r, g, b ); + default: + return 0; + } +} + + +extern void r200DestroyContext( __DRIcontextPrivate *driContextPriv ); +extern GLboolean r200CreateContext( const __GLcontextModes *glVisual, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate); +extern void r200SwapBuffers( __DRIdrawablePrivate *dPriv ); +extern void r200CopySubBuffer( __DRIdrawablePrivate * dPriv, + int x, int y, int w, int h ); +extern GLboolean r200MakeCurrent( __DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv ); +extern GLboolean r200UnbindContext( __DRIcontextPrivate *driContextPriv ); + +/* ================================================================ + * Debugging: + */ +#define DO_DEBUG 1 + +#if DO_DEBUG +extern int R200_DEBUG; +#else +#define R200_DEBUG 0 +#endif + +#define DEBUG_TEXTURE 0x001 +#define DEBUG_STATE 0x002 +#define DEBUG_IOCTL 0x004 +#define DEBUG_PRIMS 0x008 +#define DEBUG_VERTS 0x010 +#define DEBUG_FALLBACKS 0x020 +#define DEBUG_VFMT 0x040 +#define DEBUG_CODEGEN 0x080 +#define DEBUG_VERBOSE 0x100 +#define DEBUG_DRI 0x200 +#define DEBUG_DMA 0x400 +#define DEBUG_SANITY 0x800 +#define DEBUG_SYNC 0x1000 +#define DEBUG_PIXEL 0x2000 +#define DEBUG_MEMORY 0x4000 + +#endif /* __R200_CONTEXT_H__ */ diff --git a/r200/r200_fragshader.c b/r200/r200_fragshader.c new file mode 100644 index 0000000..5dd3ada --- /dev/null +++ b/r200/r200_fragshader.c @@ -0,0 +1,548 @@ +/************************************************************************** + * + * Copyright 2004 David Airlie + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL DAVID AIRLIE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "tnl/t_context.h" +#include "atifragshader.h" +#include "program.h" +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_tex.h" + +#define SET_INST(inst, type) afs_cmd[((inst<<2) + (type<<1) + 1)] +#define SET_INST_2(inst, type) afs_cmd[((inst<<2) + (type<<1) + 2)] + +static void r200SetFragShaderArg( GLuint *afs_cmd, GLuint opnum, GLuint optype, + const struct atifragshader_src_register srcReg, + GLuint argPos, GLuint *tfactor ) +{ + const GLuint index = srcReg.Index; + const GLuint srcmod = srcReg.argMod; + const GLuint srcrep = srcReg.argRep; + GLuint reg0 = 0; + GLuint reg2 = 0; + GLuint useOddSrc = 0; + + switch(srcrep) { + case GL_RED: + reg2 |= R200_TXC_REPL_RED << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos)); + if (optype) + useOddSrc = 1; + break; + case GL_GREEN: + reg2 |= R200_TXC_REPL_GREEN << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos)); + if (optype) + useOddSrc = 1; + break; + case GL_BLUE: + if (!optype) + reg2 |= R200_TXC_REPL_BLUE << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos)); + else + useOddSrc = 1; + break; + case GL_ALPHA: + if (!optype) + useOddSrc = 1; + break; + } + + if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI) + reg0 |= (((index - GL_REG_0_ATI)*2) + 10 + useOddSrc) << (5*argPos); + else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) { + if ((*tfactor == 0) || (index == *tfactor)) { + reg0 |= (R200_TXC_ARG_A_TFACTOR_COLOR + useOddSrc) << (5*argPos); + reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR_SEL_SHIFT; + *tfactor = index; + } + else { + reg0 |= (R200_TXC_ARG_A_TFACTOR1_COLOR + useOddSrc) << (5*argPos); + reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR1_SEL_SHIFT; + } + } + else if (index == GL_PRIMARY_COLOR_EXT) { + reg0 |= (R200_TXC_ARG_A_DIFFUSE_COLOR + useOddSrc) << (5*argPos); + } + else if (index == GL_SECONDARY_INTERPOLATOR_ATI) { + reg0 |= (R200_TXC_ARG_A_SPECULAR_COLOR + useOddSrc) << (5*argPos); + } + /* GL_ZERO is a noop, for GL_ONE we set the complement */ + else if (index == GL_ONE) { + reg0 |= R200_TXC_COMP_ARG_A << (4*argPos); + } + + if (srcmod & GL_COMP_BIT_ATI) + reg0 ^= R200_TXC_COMP_ARG_A << (4*argPos); + if (srcmod & GL_BIAS_BIT_ATI) + reg0 |= R200_TXC_BIAS_ARG_A << (4*argPos); + if (srcmod & GL_2X_BIT_ATI) + reg0 |= R200_TXC_SCALE_ARG_A << (4*argPos); + if (srcmod & GL_NEGATE_BIT_ATI) + reg0 ^= R200_TXC_NEG_ARG_A << (4*argPos); + + SET_INST(opnum, optype) |= reg0; + SET_INST_2(opnum, optype) |= reg2; +} + +static GLuint dstmask_table[8] = +{ + R200_TXC_OUTPUT_MASK_RGB, + R200_TXC_OUTPUT_MASK_R, + R200_TXC_OUTPUT_MASK_G, + R200_TXC_OUTPUT_MASK_RG, + R200_TXC_OUTPUT_MASK_B, + R200_TXC_OUTPUT_MASK_RB, + R200_TXC_OUTPUT_MASK_GB, + R200_TXC_OUTPUT_MASK_RGB +}; + +static void r200UpdateFSArith( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint *afs_cmd; + const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; + GLuint pass; + + R200_STATECHANGE( rmesa, afs[0] ); + R200_STATECHANGE( rmesa, afs[1] ); + + if (shader->NumPasses < 2) { + afs_cmd = (GLuint *) rmesa->hw.afs[1].cmd; + } + else { + afs_cmd = (GLuint *) rmesa->hw.afs[0].cmd; + } + for (pass = 0; pass < shader->NumPasses; pass++) { + GLuint opnum = 0; + GLuint pc; + for (pc = 0; pc < shader->numArithInstr[pass]; pc++) { + GLuint optype; + struct atifs_instruction *inst = &shader->Instructions[pass][pc]; + + SET_INST(opnum, 0) = 0; + SET_INST_2(opnum, 0) = 0; + SET_INST(opnum, 1) = 0; + SET_INST_2(opnum, 1) = 0; + + for (optype = 0; optype < 2; optype++) { + GLuint tfactor = 0; + + if (inst->Opcode[optype]) { + switch (inst->Opcode[optype]) { + /* these are all MADD in disguise + MADD is A * B + C + so for GL_ADD use arg B/C and make A complement 0 + for GL_SUB use arg B/C, negate C and make A complement 0 + for GL_MOV use arg C + for GL_MUL use arg A + for GL_MAD all good */ + case GL_SUB_ATI: + /* negate C */ + SET_INST(opnum, optype) |= R200_TXC_NEG_ARG_C; + /* fallthrough */ + case GL_ADD_ATI: + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][1], 2, &tfactor); + /* A = complement 0 */ + SET_INST(opnum, optype) |= R200_TXC_COMP_ARG_A; + SET_INST(opnum, optype) |= R200_TXC_OP_MADD; + break; + case GL_MOV_ATI: + /* put arg0 in C */ + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 2, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_MADD; + break; + case GL_MAD_ATI: + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][2], 2, &tfactor); + /* fallthrough */ + case GL_MUL_ATI: + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][1], 1, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_MADD; + break; + case GL_LERP_ATI: + /* arg order is not native chip order, swap A and C */ + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 2, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][1], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][2], 0, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_LERP; + break; + case GL_CND_ATI: + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][1], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][2], 2, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_CONDITIONAL; + break; + case GL_CND0_ATI: + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][1], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, optype, + inst->SrcReg[optype][2], 2, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_CND0; + break; + /* cannot specify dot ops as alpha ops directly */ + case GL_DOT2_ADD_ATI: + if (optype) + SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA; + else { + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][1], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][2], 2, &tfactor); + SET_INST(opnum, 0) |= R200_TXC_OP_DOT2_ADD; + } + break; + case GL_DOT3_ATI: + if (optype) + SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA; + else { + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][1], 1, &tfactor); + SET_INST(opnum, 0) |= R200_TXC_OP_DOT3; + } + break; + case GL_DOT4_ATI: + /* experimental verification: for dot4 setup of alpha args is needed + (dstmod is ignored, though, so dot2/dot3 should be safe) + the hardware apparently does R1*R2 + G1*G2 + B1*B2 + A3*A4 + but the API doesn't allow it */ + if (optype) + SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA; + else { + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 0, + inst->SrcReg[0][1], 1, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 1, + inst->SrcReg[0][0], 0, &tfactor); + r200SetFragShaderArg(afs_cmd, opnum, 1, + inst->SrcReg[0][1], 1, &tfactor); + SET_INST(opnum, optype) |= R200_TXC_OP_DOT4; + } + break; + } + } + + /* destination */ + if (inst->DstReg[optype].Index) { + GLuint dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI; + GLuint dstmask = inst->DstReg[optype].dstMask; + GLuint sat = inst->DstReg[optype].dstMod & GL_SATURATE_BIT_ATI; + GLuint dstmod = inst->DstReg[optype].dstMod; + + dstmod &= ~GL_SATURATE_BIT_ATI; + + SET_INST_2(opnum, optype) |= (dstreg + 1) << R200_TXC_OUTPUT_REG_SHIFT; + SET_INST_2(opnum, optype) |= dstmask_table[dstmask]; + + /* fglrx does clamp the last instructions to 0_1 it seems */ + /* this won't necessarily catch the last instruction + which writes to reg0 */ + if (sat || (pc == (shader->numArithInstr[pass] - 1) && + ((pass == 1) || (shader->NumPasses == 1)))) + SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_0_1; + else + /*should we clamp or not? spec is vague, I would suppose yes but fglrx doesn't */ + SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_8_8; +/* SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_WRAP;*/ + switch(dstmod) { + case GL_2X_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_2X; + break; + case GL_4X_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_4X; + break; + case GL_8X_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_8X; + break; + case GL_HALF_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV2; + break; + case GL_QUARTER_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV4; + break; + case GL_EIGHTH_BIT_ATI: + SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV8; + break; + default: + break; + } + } + } +/* fprintf(stderr, "pass %d nr %d inst 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n", + pass, opnum, SET_INST(opnum, 0), SET_INST_2(opnum, 0), + SET_INST(opnum, 1), SET_INST_2(opnum, 1));*/ + opnum++; + } + afs_cmd = (GLuint *) rmesa->hw.afs[1].cmd; + } + rmesa->afs_loaded = ctx->ATIFragmentShader.Current; +} + +static void r200UpdateFSRouting( GLcontext *ctx ) { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; + GLuint reg; + + R200_STATECHANGE( rmesa, ctx ); + R200_STATECHANGE( rmesa, cst ); + + for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) { + if (shader->swizzlerq & (1 << (2 * reg))) + /* r coord */ + set_re_cntl_d3d( ctx, reg, 1); + /* q coord */ + else set_re_cntl_d3d( ctx, reg, 0); + } + + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_MULTI_PASS_ENABLE | + R200_TEX_BLEND_ENABLE_MASK | + R200_TEX_ENABLE_MASK); + rmesa->hw.cst.cmd[CST_PP_CNTL_X] &= ~(R200_PPX_PFS_INST_ENABLE_MASK | + R200_PPX_TEX_ENABLE_MASK | + R200_PPX_OUTPUT_REG_MASK); + + /* first pass registers use slots 8 - 15 + but single pass shaders use slots 0 - 7 */ + if (shader->NumPasses < 2) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[0] == 8 ? + 0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) : + (0xff >> (8 - shader->numArithInstr[0])) << R200_TEX_BLEND_0_ENABLE_SHIFT; + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_MULTI_PASS_ENABLE; + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[1] == 8 ? + 0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) : + (0xff >> (8 - shader->numArithInstr[1])) << R200_TEX_BLEND_0_ENABLE_SHIFT; + rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= + (0xff >> (8 - shader->numArithInstr[0])) << R200_PPX_FPS_INST0_ENABLE_SHIFT; + } + + if (shader->NumPasses < 2) { + for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) { + GLbitfield targetbit = ctx->Texture.Unit[reg]._ReallyEnabled; + R200_STATECHANGE( rmesa, tex[reg] ); + rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = 0; + if (shader->SetupInst[0][reg].Opcode) { + GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] + & ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE); + GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK; + txformat |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB) + << R200_TXFORMAT_ST_ROUTE_SHIFT; + /* fix up texcoords for proj/non-proj 2d (3d and cube are not defined when + using projection so don't have to worry there). + When passing coords, need R200_TEXCOORD_VOLUME, otherwise loose a coord */ + /* FIXME: someone might rely on default tex coords r/q, which we unfortunately + don't provide (we have the same problem without shaders) */ + if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { + txformat |= R200_TXFORMAT_LOOKUP_DISABLE; + if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_x |= R200_TEXCOORD_VOLUME; + } + else { + txformat_x |= R200_TEXCOORD_PROJ; + } + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg; + } + else if (targetbit == TEXTURE_3D_BIT) { + txformat_x |= R200_TEXCOORD_VOLUME; + } + else if (targetbit == TEXTURE_CUBE_BIT) { + txformat_x |= R200_TEXCOORD_CUBIC_ENV; + } + else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_x |= R200_TEXCOORD_NONPROJ; + } + else { + txformat_x |= R200_TEXCOORD_PROJ; + } + rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat; + rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x; + /* enabling texturing when unit isn't correctly configured may not be safe */ + if (targetbit) + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg; + } + } + + } else { + /* setup 1st pass */ + for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) { + GLbitfield targetbit = ctx->Texture.Unit[reg]._ReallyEnabled; + R200_STATECHANGE( rmesa, tex[reg] ); + GLuint txformat_multi = 0; + if (shader->SetupInst[0][reg].Opcode) { + txformat_multi |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB) + << R200_PASS1_ST_ROUTE_SHIFT; + if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { + txformat_multi |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE; + if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_multi |= R200_PASS1_TEXCOORD_VOLUME; + } + else { + txformat_multi |= R200_PASS1_TEXCOORD_PROJ; + } + rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_0_ENABLE << reg; + } + else if (targetbit == TEXTURE_3D_BIT) { + txformat_multi |= R200_PASS1_TEXCOORD_VOLUME; + } + else if (targetbit == TEXTURE_CUBE_BIT) { + txformat_multi |= R200_PASS1_TEXCOORD_CUBIC_ENV; + } + else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_multi |= R200_PASS1_TEXCOORD_NONPROJ; + } + else { + txformat_multi |= R200_PASS1_TEXCOORD_PROJ; + } + if (targetbit) + rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_0_ENABLE << reg; + } + rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi; + } + + /* setup 2nd pass */ + for (reg=0; reg < R200_MAX_TEXTURE_UNITS; reg++) { + GLbitfield targetbit = ctx->Texture.Unit[reg]._ReallyEnabled; + if (shader->SetupInst[1][reg].Opcode) { + GLuint coord = shader->SetupInst[1][reg].src; + GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] + & ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE); + GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK; + R200_STATECHANGE( rmesa, tex[reg] ); + if (shader->SetupInst[1][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { + txformat |= R200_TXFORMAT_LOOKUP_DISABLE; + txformat_x |= R200_TEXCOORD_VOLUME; + if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_x |= R200_TEXCOORD_VOLUME; + } + else { + txformat_x |= R200_TEXCOORD_PROJ; + } + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg; + } + else if (targetbit == TEXTURE_3D_BIT) { + txformat_x |= R200_TEXCOORD_VOLUME; + } + else if (targetbit == TEXTURE_CUBE_BIT) { + txformat_x |= R200_TEXCOORD_CUBIC_ENV; + } + else if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI || + shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) { + txformat_x |= R200_TEXCOORD_NONPROJ; + } + else { + txformat_x |= R200_TEXCOORD_PROJ; + } + if (coord >= GL_REG_0_ATI) { + GLuint txformat_multi = rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL]; + txformat_multi |= (coord - GL_REG_0_ATI + 2) << R200_PASS2_COORDS_REG_SHIFT; + rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi; + rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= 1 << + (R200_PPX_OUTPUT_REG_0_SHIFT + coord - GL_REG_0_ATI); + } else { + txformat |= (coord - GL_TEXTURE0_ARB) << R200_TXFORMAT_ST_ROUTE_SHIFT; + } + rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x; + rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat; + if (targetbit) + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg; + } + } + } +} + +static void r200UpdateFSConstants( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; + GLuint i; + + /* update constants */ + R200_STATECHANGE(rmesa, atf); + for (i = 0; i < 8; i++) + { + GLubyte con_byte[4]; + if ((shader->LocalConstDef >> i) & 1) { + CLAMPED_FLOAT_TO_UBYTE(con_byte[0], shader->Constants[i][0]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[1], shader->Constants[i][1]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[2], shader->Constants[i][2]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[3], shader->Constants[i][3]); + } + else { + CLAMPED_FLOAT_TO_UBYTE(con_byte[0], ctx->ATIFragmentShader.GlobalConstants[i][0]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[1], ctx->ATIFragmentShader.GlobalConstants[i][1]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.GlobalConstants[i][2]); + CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.GlobalConstants[i][3]); + } + rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor ( + 4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] ); + } +} + +/* update routing, constants and arithmetic + * constants need to be updated always (globals can change, no separate notification) + * routing needs to be updated always too (non-shader code will overwrite state, plus + * some of the routing depends on what sort of texture is bound) + * for both of them, we need to update anyway because of disabling/enabling ati_fs which + * we'd need to track otherwise + * arithmetic is only updated if current shader changes (and probably the data should be + * stored in some DriverData object attached to the mesa atifs object, i.e. binding a + * shader wouldn't force us to "recompile" the shader). + */ +void r200UpdateFragmentShader( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + r200UpdateFSConstants( ctx ); + r200UpdateFSRouting( ctx ); + if (rmesa->afs_loaded != ctx->ATIFragmentShader.Current) + r200UpdateFSArith( ctx ); +} diff --git a/r200/r200_ioctl.c b/r200/r200_ioctl.c new file mode 100644 index 0000000..463bd64 --- /dev/null +++ b/r200/r200_ioctl.c @@ -0,0 +1,992 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_ioctl.c,v 1.4 2002/12/17 00:32:56 dawes Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <sched.h> +#include <errno.h> + +#include "glheader.h" +#include "imports.h" +#include "macros.h" +#include "context.h" +#include "swrast/swrast.h" + +#include "r200_context.h" +#include "r200_state.h" +#include "r200_ioctl.h" +#include "r200_tcl.h" +#include "r200_sanity.h" +#include "radeon_reg.h" + +#include "drirenderbuffer.h" +#include "vblank.h" + +#define R200_TIMEOUT 512 +#define R200_IDLE_RETRY 16 + + +static void r200WaitForIdle( r200ContextPtr rmesa ); + + +/* At this point we were in FlushCmdBufLocked but we had lost our context, so + * we need to unwire our current cmdbuf, hook the one with the saved state in + * it, flush it, and then put the current one back. This is so commands at the + * start of a cmdbuf can rely on the state being kept from the previous one. + */ +static void r200BackUpAndEmitLostStateLocked( r200ContextPtr rmesa ) +{ + GLuint nr_released_bufs; + struct r200_store saved_store; + + if (rmesa->backup_store.cmd_used == 0) + return; + + if (R200_DEBUG & DEBUG_STATE) + fprintf(stderr, "Emitting backup state on lost context\n"); + + rmesa->lost_context = GL_FALSE; + + nr_released_bufs = rmesa->dma.nr_released_bufs; + saved_store = rmesa->store; + rmesa->dma.nr_released_bufs = 0; + rmesa->store = rmesa->backup_store; + r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + rmesa->dma.nr_released_bufs = nr_released_bufs; + rmesa->store = saved_store; +} + +int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller ) +{ + int ret, i; + drm_radeon_cmd_buffer_t cmd; + + if (rmesa->lost_context) + r200BackUpAndEmitLostStateLocked( rmesa ); + + if (R200_DEBUG & DEBUG_IOCTL) { + fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); + + if (0 & R200_DEBUG & DEBUG_VERBOSE) + for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 ) + fprintf(stderr, "%d: %x\n", i/4, + *(int *)(&rmesa->store.cmd_buf[i])); + } + + if (R200_DEBUG & DEBUG_DMA) + fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__, + rmesa->dma.nr_released_bufs); + + + if (R200_DEBUG & DEBUG_SANITY) { + if (rmesa->state.scissor.enabled) + ret = r200SanityCmdBuffer( rmesa, + rmesa->state.scissor.numClipRects, + rmesa->state.scissor.pClipRects); + else + ret = r200SanityCmdBuffer( rmesa, + rmesa->numClipRects, + rmesa->pClipRects); + if (ret) { + fprintf(stderr, "drmSanityCommandWrite: %d\n", ret); + goto out; + } + } + + + if (R200_DEBUG & DEBUG_MEMORY) { + if (! driValidateTextureHeaps( rmesa->texture_heaps, rmesa->nr_heaps, + & rmesa->swapped ) ) { + fprintf( stderr, "%s: texture memory is inconsistent - expect " + "mangled textures\n", __FUNCTION__ ); + } + } + + + cmd.bufsz = rmesa->store.cmd_used; + cmd.buf = rmesa->store.cmd_buf; + + if (rmesa->state.scissor.enabled) { + cmd.nbox = rmesa->state.scissor.numClipRects; + cmd.boxes = (drm_clip_rect_t *)rmesa->state.scissor.pClipRects; + } else { + cmd.nbox = rmesa->numClipRects; + cmd.boxes = (drm_clip_rect_t *)rmesa->pClipRects; + } + + ret = drmCommandWrite( rmesa->dri.fd, + DRM_RADEON_CMDBUF, + &cmd, sizeof(cmd) ); + + if (ret) + fprintf(stderr, "drmCommandWrite: %d\n", ret); + + if (R200_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__); + r200WaitForIdleLocked( rmesa ); + } + + + out: + rmesa->store.primnr = 0; + rmesa->store.statenr = 0; + rmesa->store.cmd_used = 0; + rmesa->dma.nr_released_bufs = 0; + rmesa->save_on_next_emit = 1; + + return ret; +} + + +/* Note: does not emit any commands to avoid recursion on + * r200AllocCmdBuf. + */ +void r200FlushCmdBuf( r200ContextPtr rmesa, const char *caller ) +{ + int ret; + + LOCK_HARDWARE( rmesa ); + + ret = r200FlushCmdBufLocked( rmesa, caller ); + + UNLOCK_HARDWARE( rmesa ); + + if (ret) { + fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret); + exit(ret); + } +} + + +/* ============================================================= + * Hardware vertex buffer handling + */ + + +void r200RefillCurrentDmaRegion( r200ContextPtr rmesa ) +{ + struct r200_dma_buffer *dmabuf; + int fd = rmesa->dri.fd; + int index = 0; + int size = 0; + drmDMAReq dma; + int ret; + + if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->dma.flush) { + rmesa->dma.flush( rmesa ); + } + + if (rmesa->dma.current.buf) + r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); + + if (rmesa->dma.nr_released_bufs > 4) + r200FlushCmdBuf( rmesa, __FUNCTION__ ); + + dma.context = rmesa->dri.hwContext; + dma.send_count = 0; + dma.send_list = NULL; + dma.send_sizes = NULL; + dma.flags = 0; + dma.request_count = 1; + dma.request_size = RADEON_BUFFER_SIZE; + dma.request_list = &index; + dma.request_sizes = &size; + dma.granted_count = 0; + + LOCK_HARDWARE(rmesa); /* no need to validate */ + + while (1) { + ret = drmDMA( fd, &dma ); + if (ret == 0) + break; + + if (rmesa->dma.nr_released_bufs) { + r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + } + + if (rmesa->do_usleeps) { + UNLOCK_HARDWARE( rmesa ); + DO_USLEEP( 1 ); + LOCK_HARDWARE( rmesa ); + } + } + + UNLOCK_HARDWARE(rmesa); + + if (R200_DEBUG & DEBUG_DMA) + fprintf(stderr, "Allocated buffer %d\n", index); + + dmabuf = CALLOC_STRUCT( r200_dma_buffer ); + dmabuf->buf = &rmesa->r200Screen->buffers->list[index]; + dmabuf->refcount = 1; + + rmesa->dma.current.buf = dmabuf; + rmesa->dma.current.address = dmabuf->buf->address; + rmesa->dma.current.end = dmabuf->buf->total; + rmesa->dma.current.start = 0; + rmesa->dma.current.ptr = 0; +} + +void r200ReleaseDmaRegion( r200ContextPtr rmesa, + struct r200_dma_region *region, + const char *caller ) +{ + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); + + if (!region->buf) + return; + + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + if (--region->buf->refcount == 0) { + drm_radeon_cmd_header_t *cmd; + + if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) + fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__, + region->buf->buf->idx); + + cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sizeof(*cmd), + __FUNCTION__ ); + cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD; + cmd->dma.buf_idx = region->buf->buf->idx; + FREE(region->buf); + rmesa->dma.nr_released_bufs++; + } + + region->buf = NULL; + region->start = 0; +} + +/* Allocates a region from rmesa->dma.current. If there isn't enough + * space in current, grab a new buffer (and discard what was left of current) + */ +void r200AllocDmaRegion( r200ContextPtr rmesa, + struct r200_dma_region *region, + int bytes, + int alignment ) +{ + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); + + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + if (region->buf) + r200ReleaseDmaRegion( rmesa, region, __FUNCTION__ ); + + alignment--; + rmesa->dma.current.start = rmesa->dma.current.ptr = + (rmesa->dma.current.ptr + alignment) & ~alignment; + + if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) + r200RefillCurrentDmaRegion( rmesa ); + + region->start = rmesa->dma.current.start; + region->ptr = rmesa->dma.current.start; + region->end = rmesa->dma.current.start + bytes; + region->address = rmesa->dma.current.address; + region->buf = rmesa->dma.current.buf; + region->buf->refcount++; + + rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ + rmesa->dma.current.start = + rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; + + assert( rmesa->dma.current.ptr <= rmesa->dma.current.end ); +} + +/* ================================================================ + * SwapBuffers with client-side throttling + */ + +static u_int32_t r200GetLastFrame(r200ContextPtr rmesa) +{ + drm_radeon_getparam_t gp; + int ret; + u_int32_t frame; + + gp.param = RADEON_PARAM_LAST_FRAME; + gp.value = (int *)&frame; + ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM, + &gp, sizeof(gp) ); + if ( ret ) { + fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret ); + exit(1); + } + + return frame; +} + +static void r200EmitIrqLocked( r200ContextPtr rmesa ) +{ + drm_radeon_irq_emit_t ie; + int ret; + + ie.irq_seq = &rmesa->iw.irq_seq; + ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, + &ie, sizeof(ie) ); + if ( ret ) { + fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret ); + exit(1); + } +} + + +static void r200WaitIrq( r200ContextPtr rmesa ) +{ + int ret; + + do { + ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT, + &rmesa->iw, sizeof(rmesa->iw) ); + } while (ret && (errno == EINTR || errno == EBUSY)); + + if ( ret ) { + fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret ); + exit(1); + } +} + + +static void r200WaitForFrameCompletion( r200ContextPtr rmesa ) +{ + drm_radeon_sarea_t *sarea = rmesa->sarea; + + if (rmesa->do_irqs) { + if (r200GetLastFrame(rmesa) < sarea->last_frame) { + if (!rmesa->irqsEmitted) { + while (r200GetLastFrame (rmesa) < sarea->last_frame) + ; + } + else { + UNLOCK_HARDWARE( rmesa ); + r200WaitIrq( rmesa ); + LOCK_HARDWARE( rmesa ); + } + rmesa->irqsEmitted = 10; + } + + if (rmesa->irqsEmitted) { + r200EmitIrqLocked( rmesa ); + rmesa->irqsEmitted--; + } + } + else { + while (r200GetLastFrame (rmesa) < sarea->last_frame) { + UNLOCK_HARDWARE( rmesa ); + if (rmesa->do_usleeps) + DO_USLEEP( 1 ); + LOCK_HARDWARE( rmesa ); + } + } +} + + + +/* Copy the back color buffer to the front color buffer. + */ +void r200CopyBuffer( const __DRIdrawablePrivate *dPriv, + const drm_clip_rect_t *rect) +{ + r200ContextPtr rmesa; + GLint nbox, i, ret; + GLboolean missed_target; + int64_t ust; + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; + + if ( R200_DEBUG & DEBUG_IOCTL ) { + fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *)rmesa->glCtx ); + } + + R200_FIREVERTICES( rmesa ); + + LOCK_HARDWARE( rmesa ); + + + /* Throttle the frame rate -- only allow one pending swap buffers + * request at a time. + */ + r200WaitForFrameCompletion( rmesa ); + if (!rect) + { + UNLOCK_HARDWARE( rmesa ); + driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target ); + LOCK_HARDWARE( rmesa ); + } + + nbox = dPriv->numClipRects; /* must be in locked region */ + + for ( i = 0 ; i < nbox ; ) { + GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox ); + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t *b = rmesa->sarea->boxes; + GLint n = 0; + + for ( ; i < nr ; i++ ) { + + *b = box[i]; + + if (rect) + { + if (rect->x1 > b->x1) + b->x1 = rect->x1; + if (rect->y1 > b->y1) + b->y1 = rect->y1; + if (rect->x2 < b->x2) + b->x2 = rect->x2; + if (rect->y2 < b->y2) + b->y2 = rect->y2; + + if (b->x1 < b->x2 && b->y1 < b->y2) + b++; + } + else + b++; + + n++; + } + rmesa->sarea->nbox = n; + + ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); + + if ( ret ) { + fprintf( stderr, "DRM_R200_SWAP_BUFFERS: return = %d\n", ret ); + UNLOCK_HARDWARE( rmesa ); + exit( 1 ); + } + } + + UNLOCK_HARDWARE( rmesa ); + if (!rect) + { + rmesa->hw.all_dirty = GL_TRUE; + + rmesa->swap_count++; + (*dri_interface->getUST)( & ust ); + if ( missed_target ) { + rmesa->swap_missed_count++; + rmesa->swap_missed_ust = ust - rmesa->swap_ust; + } + + rmesa->swap_ust = ust; + + sched_yield(); + } +} + +void r200PageFlip( const __DRIdrawablePrivate *dPriv ) +{ + r200ContextPtr rmesa; + GLint ret; + GLboolean missed_target; + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; + + if ( R200_DEBUG & DEBUG_IOCTL ) { + fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, + rmesa->sarea->pfCurrentPage); + } + + R200_FIREVERTICES( rmesa ); + LOCK_HARDWARE( rmesa ); + + if (!dPriv->numClipRects) { + UNLOCK_HARDWARE( rmesa ); + usleep( 10000 ); /* throttle invisible client 10ms */ + return; + } + + /* Need to do this for the perf box placement: + */ + { + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t *b = rmesa->sarea->boxes; + b[0] = box[0]; + rmesa->sarea->nbox = 1; + } + + /* Throttle the frame rate -- only allow a few pending swap buffers + * request at a time. + */ + r200WaitForFrameCompletion( rmesa ); + UNLOCK_HARDWARE( rmesa ); + driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target ); + if ( missed_target ) { + rmesa->swap_missed_count++; + (void) (*dri_interface->getUST)( & rmesa->swap_missed_ust ); + } + LOCK_HARDWARE( rmesa ); + + ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP ); + + UNLOCK_HARDWARE( rmesa ); + + if ( ret ) { + fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret ); + exit( 1 ); + } + + rmesa->swap_count++; + (void) (*dri_interface->getUST)( & rmesa->swap_ust ); + +#if 000 + if ( rmesa->sarea->pfCurrentPage == 1 ) { + rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset; + rmesa->state.color.drawPitch = rmesa->r200Screen->frontPitch; + } else { + rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset; + rmesa->state.color.drawPitch = rmesa->r200Screen->backPitch; + } + + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset + + rmesa->r200Screen->fbLocation; + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch; + if (rmesa->sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; + } +#else + /* Get ready for drawing next frame. Update the renderbuffers' + * flippedOffset/Pitch fields so we draw into the right place. + */ + driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, + rmesa->sarea->pfCurrentPage); + + + r200UpdateDrawBuffer(rmesa->glCtx); +#endif +} + + +/* ================================================================ + * Buffer clear + */ +static void r200Clear( GLcontext *ctx, GLbitfield mask ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + GLuint flags = 0; + GLuint color_mask = 0; + GLint ret, i; + GLint cx, cy, cw, ch; + + if ( R200_DEBUG & DEBUG_IOCTL ) { + fprintf( stderr, "r200Clear\n"); + } + + { + LOCK_HARDWARE( rmesa ); + UNLOCK_HARDWARE( rmesa ); + if ( dPriv->numClipRects == 0 ) + return; + } + + r200Flush( ctx ); + + if ( mask & BUFFER_BIT_FRONT_LEFT ) { + flags |= RADEON_FRONT; + color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; + mask &= ~BUFFER_BIT_FRONT_LEFT; + } + + if ( mask & BUFFER_BIT_BACK_LEFT ) { + flags |= RADEON_BACK; + color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; + mask &= ~BUFFER_BIT_BACK_LEFT; + } + + if ( mask & BUFFER_BIT_DEPTH ) { + flags |= RADEON_DEPTH; + mask &= ~BUFFER_BIT_DEPTH; + } + + if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) { + flags |= RADEON_STENCIL; + mask &= ~BUFFER_BIT_STENCIL; + } + + if ( mask ) { + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask); + _swrast_Clear( ctx, mask ); + } + + if ( !flags ) + return; + + if (rmesa->using_hyperz) { + flags |= RADEON_USE_COMP_ZBUF; +/* if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) + flags |= RADEON_USE_HIERZ; */ + if (!(rmesa->state.stencil.hwBuffer) || + ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && + ((rmesa->state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) { + flags |= RADEON_CLEAR_FASTZ; + } + } + + LOCK_HARDWARE( rmesa ); + + /* compute region after locking: */ + cx = ctx->DrawBuffer->_Xmin; + cy = ctx->DrawBuffer->_Ymin; + cw = ctx->DrawBuffer->_Xmax - cx; + ch = ctx->DrawBuffer->_Ymax - cy; + + /* Flip top to bottom */ + cx += dPriv->x; + cy = dPriv->y + dPriv->h - cy - ch; + + /* Throttle the number of clear ioctls we do. + */ + while ( 1 ) { + drm_radeon_getparam_t gp; + int ret; + int clear; + + gp.param = RADEON_PARAM_LAST_CLEAR; + gp.value = (int *)&clear; + ret = drmCommandWriteRead( rmesa->dri.fd, + DRM_RADEON_GETPARAM, &gp, sizeof(gp) ); + + if ( ret ) { + fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret ); + exit(1); + } + + /* Clear throttling needs more thought. + */ + if ( rmesa->sarea->last_clear - clear <= 25 ) { + break; + } + + if (rmesa->do_usleeps) { + UNLOCK_HARDWARE( rmesa ); + DO_USLEEP( 1 ); + LOCK_HARDWARE( rmesa ); + } + } + + /* Send current state to the hardware */ + r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + + for ( i = 0 ; i < dPriv->numClipRects ; ) { + GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects ); + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t *b = rmesa->sarea->boxes; + drm_radeon_clear_t clear; + drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS]; + GLint n = 0; + + if (cw != dPriv->w || ch != dPriv->h) { + /* clear subregion */ + for ( ; i < nr ; i++ ) { + GLint x = box[i].x1; + GLint y = box[i].y1; + GLint w = box[i].x2 - x; + GLint h = box[i].y2 - y; + + if ( x < cx ) w -= cx - x, x = cx; + if ( y < cy ) h -= cy - y, y = cy; + if ( x + w > cx + cw ) w = cx + cw - x; + if ( y + h > cy + ch ) h = cy + ch - y; + if ( w <= 0 ) continue; + if ( h <= 0 ) continue; + + b->x1 = x; + b->y1 = y; + b->x2 = x + w; + b->y2 = y + h; + b++; + n++; + } + } else { + /* clear whole window */ + for ( ; i < nr ; i++ ) { + *b++ = box[i]; + n++; + } + } + + rmesa->sarea->nbox = n; + + clear.flags = flags; + clear.clear_color = rmesa->state.color.clear; + clear.clear_depth = rmesa->state.depth.clear; /* needed for hyperz */ + clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; + clear.depth_mask = rmesa->state.stencil.clear; + clear.depth_boxes = depth_boxes; + + n--; + b = rmesa->sarea->boxes; + for ( ; n >= 0 ; n-- ) { + depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1; + depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1; + depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2; + depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2; + depth_boxes[n].f[CLEAR_DEPTH] = ctx->Depth.Clear; + } + + ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR, + &clear, sizeof(clear)); + + + if ( ret ) { + UNLOCK_HARDWARE( rmesa ); + fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret ); + exit( 1 ); + } + } + + UNLOCK_HARDWARE( rmesa ); + rmesa->hw.all_dirty = GL_TRUE; +} + + +void r200WaitForIdleLocked( r200ContextPtr rmesa ) +{ + int ret; + int i = 0; + + do { + ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_CP_IDLE); + if (ret) + DO_USLEEP( 1 ); + } while (ret && ++i < 100); + + if ( ret < 0 ) { + UNLOCK_HARDWARE( rmesa ); + fprintf( stderr, "Error: R200 timed out... exiting\n" ); + exit( -1 ); + } +} + + +static void r200WaitForIdle( r200ContextPtr rmesa ) +{ + LOCK_HARDWARE(rmesa); + r200WaitForIdleLocked( rmesa ); + UNLOCK_HARDWARE(rmesa); +} + + +void r200Flush( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + r200EmitState( rmesa ); + + if (rmesa->store.cmd_used) + r200FlushCmdBuf( rmesa, __FUNCTION__ ); +} + +/* Make sure all commands have been sent to the hardware and have + * completed processing. + */ +void r200Finish( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + r200Flush( ctx ); + + if (rmesa->do_irqs) { + LOCK_HARDWARE( rmesa ); + r200EmitIrqLocked( rmesa ); + UNLOCK_HARDWARE( rmesa ); + r200WaitIrq( rmesa ); + } + else + r200WaitForIdle( rmesa ); +} + + +/* This version of AllocateMemoryMESA allocates only GART memory, and + * only does so after the point at which the driver has been + * initialized. + * + * Theoretically a valid context isn't required. However, in this + * implementation, it is, as I'm using the hardware lock to protect + * the kernel data structures, and the current context to get the + * device fd. + */ +void *r200AllocateMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLsizei size, + GLfloat readfreq, GLfloat writefreq, + GLfloat priority) +{ + GET_CURRENT_CONTEXT(ctx); + r200ContextPtr rmesa; + int region_offset; + drm_radeon_mem_alloc_t alloc; + int ret; + + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, + writefreq, priority); + + if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) + return NULL; + + if (getenv("R200_NO_ALLOC")) + return NULL; + + alloc.region = RADEON_MEM_REGION_GART; + alloc.alignment = 0; + alloc.size = size; + alloc.region_offset = ®ion_offset; + + ret = drmCommandWriteRead( rmesa->r200Screen->driScreen->fd, + DRM_RADEON_ALLOC, + &alloc, sizeof(alloc)); + + if (ret) { + fprintf(stderr, "%s: DRM_RADEON_ALLOC ret %d\n", __FUNCTION__, ret); + return NULL; + } + + { + char *region_start = (char *)rmesa->r200Screen->gartTextures.map; + return (void *)(region_start + region_offset); + } +} + + +/* Called via glXFreeMemoryMESA() */ +void r200FreeMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLvoid *pointer) +{ + GET_CURRENT_CONTEXT(ctx); + r200ContextPtr rmesa; + ptrdiff_t region_offset; + drm_radeon_mem_free_t memfree; + int ret; + + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %p\n", __FUNCTION__, pointer); + + if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) { + fprintf(stderr, "%s: no context\n", __FUNCTION__); + return; + } + + region_offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map; + + if (region_offset < 0 || + region_offset > rmesa->r200Screen->gartTextures.size) { + fprintf(stderr, "offset %d outside range 0..%d\n", region_offset, + rmesa->r200Screen->gartTextures.size); + return; + } + + memfree.region = RADEON_MEM_REGION_GART; + memfree.region_offset = region_offset; + + ret = drmCommandWrite( rmesa->r200Screen->driScreen->fd, + DRM_RADEON_FREE, + &memfree, sizeof(memfree)); + + if (ret) + fprintf(stderr, "%s: DRM_RADEON_FREE ret %d\n", __FUNCTION__, ret); +} + +/* Called via glXGetMemoryOffsetMESA() */ +GLuint r200GetMemoryOffsetMESA(__DRInativeDisplay *dpy, int scrn, const GLvoid *pointer) +{ + GET_CURRENT_CONTEXT(ctx); + r200ContextPtr rmesa; + GLuint card_offset; + + if (!ctx || !(rmesa = R200_CONTEXT(ctx)) ) { + fprintf(stderr, "%s: no context\n", __FUNCTION__); + return ~0; + } + + if (!r200IsGartMemory( rmesa, pointer, 0 )) + return ~0; + + card_offset = r200GartOffsetFromVirtual( rmesa, pointer ); + + return card_offset - rmesa->r200Screen->gart_base; +} + +GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer, + GLint size ) +{ + ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map; + int valid = (size >= 0 && + offset >= 0 && + offset + size < rmesa->r200Screen->gartTextures.size); + + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid ); + + return valid; +} + + +GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer ) +{ + ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map; + + if (offset < 0 || offset > rmesa->r200Screen->gartTextures.size) + return ~0; + else + return rmesa->r200Screen->gart_texture_offset + offset; +} + + + +void r200InitIoctlFuncs( struct dd_function_table *functions ) +{ + functions->Clear = r200Clear; + functions->Finish = r200Finish; + functions->Flush = r200Flush; +} + diff --git a/r200/r200_ioctl.h b/r200/r200_ioctl.h new file mode 100644 index 0000000..f537527 --- /dev/null +++ b/r200/r200_ioctl.h @@ -0,0 +1,210 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_ioctl.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __R200_IOCTL_H__ +#define __R200_IOCTL_H__ + +#include "simple_list.h" +#include "radeon_dri.h" +#include "r200_lock.h" + +#include "xf86drm.h" +#include "drm.h" +#include "radeon_drm.h" + +extern void r200EmitState( r200ContextPtr rmesa ); +extern void r200EmitVertexAOS( r200ContextPtr rmesa, + GLuint vertex_size, + GLuint offset ); + +extern void r200EmitVbufPrim( r200ContextPtr rmesa, + GLuint primitive, + GLuint vertex_nr ); + +extern void r200FlushElts( r200ContextPtr rmesa ); + +extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, + GLuint primitive, + GLuint min_nr ); + +extern void r200EmitAOS( r200ContextPtr rmesa, + struct r200_dma_region **regions, + GLuint n, + GLuint offset ); + +extern void r200EmitBlit( r200ContextPtr rmesa, + GLuint color_fmt, + GLuint src_pitch, + GLuint src_offset, + GLuint dst_pitch, + GLuint dst_offset, + GLint srcx, GLint srcy, + GLint dstx, GLint dsty, + GLuint w, GLuint h ); + +extern void r200EmitWait( r200ContextPtr rmesa, GLuint flags ); + +extern void r200FlushCmdBuf( r200ContextPtr rmesa, const char * ); +extern int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller ); + +extern void r200RefillCurrentDmaRegion( r200ContextPtr rmesa ); + +extern void r200AllocDmaRegion( r200ContextPtr rmesa, + struct r200_dma_region *region, + int bytes, + int alignment ); + +extern void r200ReleaseDmaRegion( r200ContextPtr rmesa, + struct r200_dma_region *region, + const char *caller ); + +extern void r200CopyBuffer( const __DRIdrawablePrivate *drawable, + const drm_clip_rect_t *rect); +extern void r200PageFlip( const __DRIdrawablePrivate *drawable ); +extern void r200Flush( GLcontext *ctx ); +extern void r200Finish( GLcontext *ctx ); +extern void r200WaitForIdleLocked( r200ContextPtr rmesa ); +extern void r200WaitForVBlank( r200ContextPtr rmesa ); +extern void r200InitIoctlFuncs( struct dd_function_table *functions ); + +extern void *r200AllocateMemoryMESA( __DRInativeDisplay *dpy, int scrn, GLsizei size, GLfloat readfreq, + GLfloat writefreq, GLfloat priority ); +extern void r200FreeMemoryMESA( __DRInativeDisplay *dpy, int scrn, GLvoid *pointer ); +extern GLuint r200GetMemoryOffsetMESA( __DRInativeDisplay *dpy, int scrn, const GLvoid *pointer ); + +extern GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer, + GLint size ); + +extern GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, + const GLvoid *pointer ); + +void r200SetUpAtomList( r200ContextPtr rmesa ); + +/* ================================================================ + * Helper macros: + */ + +/* Close off the last primitive, if it exists. + */ +#define R200_NEWPRIM( rmesa ) \ +do { \ + if ( rmesa->dma.flush ) \ + rmesa->dma.flush( rmesa ); \ +} while (0) + +/* Can accomodate several state changes and primitive changes without + * actually firing the buffer. + */ +#define R200_STATECHANGE( rmesa, ATOM ) \ +do { \ + R200_NEWPRIM( rmesa ); \ + rmesa->hw.ATOM.dirty = GL_TRUE; \ + rmesa->hw.is_dirty = GL_TRUE; \ +} while (0) + +#define R200_DB_STATE( ATOM ) \ + memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \ + rmesa->hw.ATOM.cmd_size * 4) + +static __inline int R200_DB_STATECHANGE( + r200ContextPtr rmesa, + struct r200_state_atom *atom ) +{ + if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) { + int *tmp; + R200_NEWPRIM( rmesa ); + atom->dirty = GL_TRUE; + rmesa->hw.is_dirty = GL_TRUE; + tmp = atom->cmd; + atom->cmd = atom->lastcmd; + atom->lastcmd = tmp; + return 1; + } + else + return 0; +} + + +/* Fire the buffered vertices no matter what. + */ +#define R200_FIREVERTICES( rmesa ) \ +do { \ + if ( rmesa->store.cmd_used || rmesa->dma.flush ) { \ + r200Flush( rmesa->glCtx ); \ + } \ +} while (0) + +/* Command lengths. Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ + * are available, you will also be adding an rmesa->state.max_state_size because + * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts. + */ +#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int)) +#define VERT_AOS_BUFSZ (5 * sizeof(int)) +#define ELTS_BUFSZ(nr) (12 + nr * 2) +#define VBUF_BUFSZ (3 * sizeof(int)) + +/* Ensure that a minimum amount of space is available in the command buffer. + * This is used to ensure atomicity of state updates with the rendering requests + * that rely on them. + * + * An alternative would be to implement a "soft lock" such that when the buffer + * wraps at an inopportune time, we grab the lock, flush the current buffer, + * and hang on to the lock until the critical section is finished and we flush + * the buffer again and unlock. + */ +static __inline void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes ) +{ + if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ) + r200FlushCmdBuf( rmesa, __FUNCTION__ ); + assert( bytes <= R200_CMD_BUF_SZ ); +} + +/* Alloc space in the command buffer + */ +static __inline char *r200AllocCmdBuf( r200ContextPtr rmesa, + int bytes, const char *where ) +{ + char * head; + + if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ) + r200FlushCmdBuf( rmesa, where ); + + head = rmesa->store.cmd_buf + rmesa->store.cmd_used; + rmesa->store.cmd_used += bytes; + assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ ); + return head; +} + +#endif /* __R200_IOCTL_H__ */ diff --git a/r200/r200_lock.c b/r200/r200_lock.c new file mode 100644 index 0000000..b050dd7 --- /dev/null +++ b/r200/r200_lock.c @@ -0,0 +1,117 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_lock.c,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "r200_context.h" +#include "r200_lock.h" +#include "r200_tex.h" +#include "r200_state.h" +#include "r200_ioctl.h" + +#include "drirenderbuffer.h" + + +#if DEBUG_LOCKING +char *prevLockFile = NULL; +int prevLockLine = 0; +#endif + +/* Turn on/off page flipping according to the flags in the sarea: + */ +static void +r200UpdatePageFlipping( r200ContextPtr rmesa ) +{ + rmesa->doPageFlip = rmesa->sarea->pfState; + if (rmesa->glCtx->WinSysDrawBuffer) { + driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, + rmesa->sarea->pfCurrentPage); + } +} + + + +/* Update the hardware state. This is called if another context has + * grabbed the hardware lock, which includes the X server. This + * function also updates the driver's window state after the X server + * moves, resizes or restacks a window -- the change will be reflected + * in the drawable position and clip rects. Since the X server grabs + * the hardware lock when it changes the window state, this routine will + * automatically be called after such a change. + */ +void r200GetLock( r200ContextPtr rmesa, GLuint flags ) +{ + __DRIdrawablePrivate *drawable = rmesa->dri.drawable; + __DRIdrawablePrivate *readable = rmesa->dri.readable; + __DRIscreenPrivate *sPriv = rmesa->dri.screen; + drm_radeon_sarea_t *sarea = rmesa->sarea; + int i; + + drmGetLock( rmesa->dri.fd, rmesa->dri.hwContext, flags ); + + /* The window might have moved, so we might need to get new clip + * rects. + * + * NOTE: This releases and regrabs the hw lock to allow the X server + * to respond to the DRI protocol request for new drawable info. + * Since the hardware state depends on having the latest drawable + * clip rects, all state checking must be done _after_ this call. + */ + DRI_VALIDATE_DRAWABLE_INFO( sPriv, drawable ); + if (drawable != readable) { + DRI_VALIDATE_DRAWABLE_INFO( sPriv, readable ); + } + + if ( rmesa->lastStamp != drawable->lastStamp ) { + r200UpdatePageFlipping( rmesa ); + r200SetCliprects( rmesa ); + r200UpdateViewportOffset( rmesa->glCtx ); + driUpdateFramebufferSize(rmesa->glCtx, drawable); + } + + R200_STATECHANGE( rmesa, ctx ); + if (rmesa->sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; + } + else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE; + + if ( sarea->ctx_owner != rmesa->dri.hwContext ) { + sarea->ctx_owner = rmesa->dri.hwContext; + } + + for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { + DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] ); + } + + rmesa->lost_context = GL_TRUE; +} diff --git a/r200/r200_lock.h b/r200/r200_lock.h new file mode 100644 index 0000000..e4c3a7e --- /dev/null +++ b/r200/r200_lock.h @@ -0,0 +1,107 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_lock.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __R200_LOCK_H__ +#define __R200_LOCK_H__ + +extern void r200GetLock( r200ContextPtr rmesa, GLuint flags ); + +/* Turn DEBUG_LOCKING on to find locking conflicts. + */ +#define DEBUG_LOCKING 0 + +#if DEBUG_LOCKING +extern char *prevLockFile; +extern int prevLockLine; + +#define DEBUG_LOCK() \ + do { \ + prevLockFile = (__FILE__); \ + prevLockLine = (__LINE__); \ + } while (0) + +#define DEBUG_RESET() \ + do { \ + prevLockFile = 0; \ + prevLockLine = 0; \ + } while (0) + +#define DEBUG_CHECK_LOCK() \ + do { \ + if ( prevLockFile ) { \ + fprintf( stderr, \ + "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \ + prevLockFile, prevLockLine, __FILE__, __LINE__ ); \ + exit( 1 ); \ + } \ + } while (0) + +#else + +#define DEBUG_LOCK() +#define DEBUG_RESET() +#define DEBUG_CHECK_LOCK() + +#endif + +/* + * !!! We may want to separate locks from locks with validation. This + * could be used to improve performance for those things commands that + * do not do any drawing !!! + */ + + +/* Lock the hardware and validate our state. + */ +#define LOCK_HARDWARE( rmesa ) \ + do { \ + char __ret = 0; \ + DEBUG_CHECK_LOCK(); \ + DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext, \ + (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret ); \ + if ( __ret ) \ + r200GetLock( rmesa, 0 ); \ + DEBUG_LOCK(); \ + } while (0) + +#define UNLOCK_HARDWARE( rmesa ) \ + do { \ + DRM_UNLOCK( rmesa->dri.fd, \ + rmesa->dri.hwLock, \ + rmesa->dri.hwContext ); \ + DEBUG_RESET(); \ + } while (0) + +#endif /* __R200_LOCK_H__ */ diff --git a/r200/r200_maos.c b/r200/r200_maos.c new file mode 100644 index 0000000..23e1c96 --- /dev/null +++ b/r200/r200_maos.c @@ -0,0 +1,15 @@ + + +/* Currently, can only use arrays, verts are not implemented, though + * verts are suspected to be faster. + * To get an idea how the verts path works, look at the radeon implementation. + */ +#include <string.h> + +#include "r200_context.h" +#define R200_MAOS_VERTS 0 +#if (R200_MAOS_VERTS) +#include "r200_maos_verts.c" +#else +#include "r200_maos_arrays.c" +#endif diff --git a/r200/r200_maos.h b/r200/r200_maos.h new file mode 100644 index 0000000..4998f67 --- /dev/null +++ b/r200/r200_maos.h @@ -0,0 +1,44 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_maos.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __R200_MAOS_H__ +#define __R200_MAOS_H__ + +#include "r200_context.h" + +extern void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev ); +extern void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs ); + +#endif diff --git a/r200/r200_maos_arrays.c b/r200/r200_maos_arrays.c new file mode 100644 index 0000000..3162b50 --- /dev/null +++ b/r200/r200_maos_arrays.c @@ -0,0 +1,513 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_maos_arrays.c,v 1.3 2003/02/23 23:59:01 dawes Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "mtypes.h" +#include "colormac.h" +#include "imports.h" +#include "macros.h" + +#include "swrast_setup/swrast_setup.h" +#include "math/m_translate.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" + +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_state.h" +#include "r200_swtcl.h" +#include "r200_maos.h" +#include "r200_tcl.h" + + +#if 0 +/* Usage: + * - from r200_tcl_render + * - call r200EmitArrays to ensure uptodate arrays in dma + * - emit primitives (new type?) which reference the data + * -- need to use elts for lineloop, quads, quadstrip/flat + * -- other primitives are all well-formed (need tristrip-1,fake-poly) + * + */ +static void emit_ubyte_rgba3( GLcontext *ctx, + struct r200_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + r200_color_t *out = (r200_color_t *)(rvb->start + rvb->address); + + if (R200_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p\n", + __FUNCTION__, count, stride, (void *)out); + + for (i = 0; i < count; i++) { + out->red = *data; + out->green = *(data+1); + out->blue = *(data+2); + out->alpha = 0xFF; + out++; + data += stride; + } +} + +static void emit_ubyte_rgba4( GLcontext *ctx, + struct r200_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (R200_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 4) { + for (i = 0; i < count; i++) + ((int *)out)[i] = LE32_TO_CPU(((int *)data)[i]); + } else { + for (i = 0; i < count; i++) { + *(int *)out++ = LE32_TO_CPU(*(int *)data); + data += stride; + } + } +} + + +static void emit_ubyte_rgba( GLcontext *ctx, + struct r200_dma_region *rvb, + char *data, + int size, + int stride, + int count ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if (R200_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); + + assert (!rvb->buf); + + if (stride == 0) { + r200AllocDmaRegion( rmesa, rvb, 4, 4 ); + count = 1; + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 0; + rvb->aos_size = 1; + } + else { + r200AllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */ + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 1; + rvb->aos_size = 1; + } + + /* Emit the data + */ + switch (size) { + case 3: + emit_ubyte_rgba3( ctx, rvb, data, stride, count ); + break; + case 4: + emit_ubyte_rgba4( ctx, rvb, data, stride, count ); + break; + default: + assert(0); + exit(1); + break; + } +} +#endif + + +#if defined(USE_X86_ASM) +#define COPY_DWORDS( dst, src, nr ) \ +do { \ + int __tmp; \ + __asm__ __volatile__( "rep ; movsl" \ + : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ + : "0" (nr), \ + "D" ((long)dst), \ + "S" ((long)src) ); \ +} while (0) +#else +#define COPY_DWORDS( dst, src, nr ) \ +do { \ + int j; \ + for ( j = 0 ; j < nr ; j++ ) \ + dst[j] = ((int *)src)[j]; \ + dst += nr; \ +} while (0) +#endif + + +static void emit_vecfog( GLcontext *ctx, + struct r200_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + GLfloat *out; + + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if (R200_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + assert (!rvb->buf); + + if (stride == 0) { + r200AllocDmaRegion( rmesa, rvb, 4, 4 ); + count = 1; + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 0; + rvb->aos_size = 1; + } + else { + r200AllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */ + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 1; + rvb->aos_size = 1; + } + + /* Emit the data + */ + out = (GLfloat *)(rvb->address + rvb->start); + for (i = 0; i < count; i++) { + out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data ); + out++; + data += stride; + } + +} + + +static void emit_vec4( GLcontext *ctx, + struct r200_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (R200_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 4) + COPY_DWORDS( out, data, count ); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out++; + data += stride; + } +} + + +static void emit_vec8( GLcontext *ctx, + struct r200_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (R200_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 8) + COPY_DWORDS( out, data, count*2 ); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data+4); + out += 2; + data += stride; + } +} + +static void emit_vec12( GLcontext *ctx, + struct r200_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (R200_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 12) + COPY_DWORDS( out, data, count*3 ); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data+4); + out[2] = *(int *)(data+8); + out += 3; + data += stride; + } +} + +static void emit_vec16( GLcontext *ctx, + struct r200_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (R200_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 16) + COPY_DWORDS( out, data, count*4 ); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data+4); + out[2] = *(int *)(data+8); + out[3] = *(int *)(data+12); + out += 4; + data += stride; + } +} + + +static void emit_vector( GLcontext *ctx, + struct r200_dma_region *rvb, + char *data, + int size, + int stride, + int count ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if (R200_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d size %d stride %d\n", + __FUNCTION__, count, size, stride); + + assert (!rvb->buf); + + if (stride == 0) { + r200AllocDmaRegion( rmesa, rvb, size * 4, 4 ); + count = 1; + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 0; + rvb->aos_size = size; + } + else { + r200AllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */ + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = size; + rvb->aos_size = size; + } + + /* Emit the data + */ + switch (size) { + case 1: + emit_vec4( ctx, rvb, data, stride, count ); + break; + case 2: + emit_vec8( ctx, rvb, data, stride, count ); + break; + case 3: + emit_vec12( ctx, rvb, data, stride, count ); + break; + case 4: + emit_vec16( ctx, rvb, data, stride, count ); + break; + default: + assert(0); + exit(1); + break; + } + +} + + + +/* Emit any changed arrays to new GART memory, re-emit a packet to + * update the arrays. + */ +void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev ) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb; + struct r200_dma_region **component = rmesa->tcl.aos_components; + GLuint nr = 0; + GLuint vfmt0 = 0, vfmt1 = 0; + GLuint count = VB->Count; + GLuint i, emitsize; + + for ( i = 0; i < 15; i++ ) { + GLubyte attrib = vimap_rev[i]; + if (attrib != 255) { + switch (i) { + case 0: + emitsize = (VB->AttribPtr[attrib]->size); + switch (emitsize) { + case 4: + vfmt0 |= R200_VTX_W0; + /* fallthrough */ + case 3: + vfmt0 |= R200_VTX_Z0; + break; + case 2: + break; + default: assert(0); + } + break; + case 1: + assert(attrib == VERT_ATTRIB_WEIGHT); + emitsize = (VB->AttribPtr[attrib]->size); + vfmt0 |= emitsize << R200_VTX_WEIGHT_COUNT_SHIFT; + break; + case 2: + assert(attrib == VERT_ATTRIB_NORMAL); + emitsize = 3; + vfmt0 |= R200_VTX_N0; + break; + case 3: + /* special handling to fix up fog. Will get us into trouble with vbos...*/ + assert(attrib == VERT_ATTRIB_FOG); + if (!rmesa->tcl.vertex_data[i].buf) { + if (ctx->VertexProgram._Enabled) + emit_vector( ctx, + &(rmesa->tcl.vertex_data[i]), + (char *)VB->AttribPtr[attrib]->data, + 1, + VB->AttribPtr[attrib]->stride, + count); + else + emit_vecfog( ctx, + &(rmesa->tcl.vertex_data[i]), + (char *)VB->AttribPtr[attrib]->data, + VB->AttribPtr[attrib]->stride, + count); + } + vfmt0 |= R200_VTX_DISCRETE_FOG; + goto after_emit; + break; + case 4: + case 5: + case 6: + case 7: + if (VB->AttribPtr[attrib]->size == 4 && + (VB->AttribPtr[attrib]->stride != 0 || + VB->AttribPtr[attrib]->data[0][3] != 1.0)) emitsize = 4; + else emitsize = 3; + if (emitsize == 4) + vfmt0 |= R200_VTX_FP_RGBA << (R200_VTX_COLOR_0_SHIFT + (i - 4) * 2); + else { + vfmt0 |= R200_VTX_FP_RGB << (R200_VTX_COLOR_0_SHIFT + (i - 4) * 2); + } + break; + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + emitsize = VB->AttribPtr[attrib]->size; + vfmt1 |= emitsize << (R200_VTX_TEX0_COMP_CNT_SHIFT + (i - 8) * 3); + break; + case 14: + emitsize = VB->AttribPtr[attrib]->size >= 2 ? VB->AttribPtr[attrib]->size : 2; + switch (emitsize) { + case 2: + vfmt0 |= R200_VTX_XY1; + /* fallthrough */ + case 3: + vfmt0 |= R200_VTX_Z1; + /* fallthrough */ + case 4: + vfmt0 |= R200_VTX_W1; + break; + } + default: + assert(0); + } + if (!rmesa->tcl.vertex_data[i].buf) { + emit_vector( ctx, + &(rmesa->tcl.vertex_data[i]), + (char *)VB->AttribPtr[attrib]->data, + emitsize, + VB->AttribPtr[attrib]->stride, + count ); + } +after_emit: + assert(nr < 12); + component[nr++] = &rmesa->tcl.vertex_data[i]; + } + } + + if (vfmt0 != rmesa->hw.vtx.cmd[VTX_VTXFMT_0] || + vfmt1 != rmesa->hw.vtx.cmd[VTX_VTXFMT_1]) { + R200_STATECHANGE( rmesa, vtx ); + rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = vfmt0; + rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = vfmt1; + } + + rmesa->tcl.nr_aos_components = nr; +} + + +void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs ) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + + /* only do it for changed inputs ? */ + int i; + for (i = 0; i < 15; i++) { + if (newinputs & (1 << i)) + r200ReleaseDmaRegion( rmesa, + &rmesa->tcl.vertex_data[i], __FUNCTION__ ); + } +} diff --git a/r200/r200_pixel.c b/r200/r200_pixel.c new file mode 100644 index 0000000..7b060f9 --- /dev/null +++ b/r200/r200_pixel.c @@ -0,0 +1,491 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_pixel.c,v 1.2 2002/12/16 16:18:54 dawes Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "enums.h" +#include "mtypes.h" +#include "macros.h" +#include "swrast/swrast.h" + +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_pixel.h" +#include "r200_swtcl.h" + +#include "drirenderbuffer.h" + + +static GLboolean +check_color( const GLcontext *ctx, GLenum type, GLenum format, + const struct gl_pixelstore_attrib *packing, + const void *pixels, GLint sz, GLint pitch ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint cpp = rmesa->r200Screen->cpp; + + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + if ( (pitch & 63) || + ctx->_ImageTransferState || + packing->SwapBytes || + packing->LsbFirst) { + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: failed 1\n", __FUNCTION__); + return GL_FALSE; + } + + if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && + cpp == 4 && + format == GL_BGRA ) { + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: passed 2\n", __FUNCTION__); + return GL_TRUE; + } + + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: failed\n", __FUNCTION__); + + return GL_FALSE; +} + +static GLboolean +check_color_per_fragment_ops( const GLcontext *ctx ) +{ + int result; + result = (!( ctx->Color.AlphaEnabled || + ctx->Depth.Test || + ctx->Fog.Enabled || + ctx->Scissor.Enabled || + ctx->Stencil.Enabled || + !ctx->Color.ColorMask[0] || + !ctx->Color.ColorMask[1] || + !ctx->Color.ColorMask[2] || + !ctx->Color.ColorMask[3] || + ctx->Color.ColorLogicOpEnabled || + ctx->Texture._EnabledUnits + ) && + ctx->Current.RasterPosValid); + + return result; +} + + + +static GLboolean +clip_pixelrect( const GLcontext *ctx, + const GLframebuffer *buffer, + GLint *x, GLint *y, + GLsizei *width, GLsizei *height, + GLint *size ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + /* left clipping */ + if (*x < buffer->_Xmin) { + *width -= (buffer->_Xmin - *x); + *x = buffer->_Xmin; + } + + /* right clipping */ + if (*x + *width > buffer->_Xmax) + *width -= (*x + *width - buffer->_Xmax - 1); + + if (*width <= 0) + return GL_FALSE; + + /* bottom clipping */ + if (*y < buffer->_Ymin) { + *height -= (buffer->_Ymin - *y); + *y = buffer->_Ymin; + } + + /* top clipping */ + if (*y + *height > buffer->_Ymax) + *height -= (*y + *height - buffer->_Ymax - 1); + + if (*height <= 0) + return GL_FALSE; + + *size = ((*y + *height - 1) * rmesa->r200Screen->frontPitch + + (*x + *width - 1) * rmesa->r200Screen->cpp); + + return GL_TRUE; +} + +static GLboolean +r200TryReadPixels( GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, + GLvoid *pixels ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLint pitch = pack->RowLength ? pack->RowLength : width; + GLint blit_format; + GLuint cpp = rmesa->r200Screen->cpp; + GLint size = width * height * cpp; + + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + /* Only accelerate reading to GART buffers. + */ + if ( !r200IsGartMemory(rmesa, pixels, + pitch * height * rmesa->r200Screen->cpp ) ) { + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: dest not GART\n", __FUNCTION__); + return GL_FALSE; + } + + /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from + * blitter: + */ + if (!pack->Invert) { + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: MESA_PACK_INVERT not set\n", __FUNCTION__); + return GL_FALSE; + } + + if (!check_color(ctx, type, format, pack, pixels, size, pitch)) + return GL_FALSE; + + switch ( rmesa->r200Screen->cpp ) { + case 4: + blit_format = R200_CP_COLOR_FORMAT_ARGB8888; + break; + default: + return GL_FALSE; + } + + + /* Although the blits go on the command buffer, need to do this and + * fire with lock held to guarentee cliprects and drawOffset are + * correct. + * + * This is an unusual situation however, as the code which flushes + * a full command buffer expects to be called unlocked. As a + * workaround, immediately flush the buffer on aquiring the lock. + */ + LOCK_HARDWARE( rmesa ); + + if (rmesa->store.cmd_used) + r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + + if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height, + &size)) { + UNLOCK_HARDWARE( rmesa ); + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s totally clipped -- nothing to do\n", + __FUNCTION__); + return GL_TRUE; + } + + { + __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + driRenderbuffer *drb = (driRenderbuffer *) ctx->ReadBuffer->_ColorReadBuffer; + int nbox = dPriv->numClipRects; + int src_offset = drb->offset + + rmesa->r200Screen->fbLocation; + int src_pitch = drb->pitch * drb->cpp; + int dst_offset = r200GartOffsetFromVirtual( rmesa, pixels ); + int dst_pitch = pitch * rmesa->r200Screen->cpp; + drm_clip_rect_t *box = dPriv->pClipRects; + int i; + + r200EmitWait( rmesa, RADEON_WAIT_3D ); + + y = dPriv->h - y - height; + x += dPriv->x; + y += dPriv->y; + + + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "readpixel blit src_pitch %d dst_pitch %d\n", + src_pitch, dst_pitch); + + for (i = 0 ; i < nbox ; i++) + { + GLint bx = box[i].x1; + GLint by = box[i].y1; + GLint bw = box[i].x2 - bx; + GLint bh = box[i].y2 - by; + + if (bx < x) bw -= x - bx, bx = x; + if (by < y) bh -= y - by, by = y; + if (bx + bw > x + width) bw = x + width - bx; + if (by + bh > y + height) bh = y + height - by; + if (bw <= 0) continue; + if (bh <= 0) continue; + + r200EmitBlit( rmesa, + blit_format, + src_pitch, src_offset, + dst_pitch, dst_offset, + bx, by, + bx - x, by - y, + bw, bh ); + } + + r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + } + UNLOCK_HARDWARE( rmesa ); + + r200Finish( ctx ); /* required by GL */ + + return GL_TRUE; +} + +static void +r200ReadPixels( GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, + GLvoid *pixels ) +{ + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (!r200TryReadPixels( ctx, x, y, width, height, format, type, pack, + pixels)) + _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, + pixels); +} + + + + +static void do_draw_pix( GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint pitch, + const void *pixels, + GLuint planemask) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + drm_clip_rect_t *box = dPriv->pClipRects; + struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0][0]; + driRenderbuffer *drb = (driRenderbuffer *) rb; + int nbox = dPriv->numClipRects; + int i; + int blit_format; + int size; + int src_offset = r200GartOffsetFromVirtual( rmesa, pixels ); + int src_pitch = pitch * rmesa->r200Screen->cpp; + + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + switch ( rmesa->r200Screen->cpp ) { + case 2: + blit_format = R200_CP_COLOR_FORMAT_RGB565; + break; + case 4: + blit_format = R200_CP_COLOR_FORMAT_ARGB8888; + break; + default: + return; + } + + + LOCK_HARDWARE( rmesa ); + + if (rmesa->store.cmd_used) + r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + + y -= height; /* cope with pixel zoom */ + + if (!clip_pixelrect(ctx, ctx->DrawBuffer, + &x, &y, &width, &height, + &size)) { + UNLOCK_HARDWARE( rmesa ); + return; + } + + y = dPriv->h - y - height; /* convert from gl to hardware coords */ + x += dPriv->x; + y += dPriv->y; + + + r200EmitWait( rmesa, RADEON_WAIT_3D ); + + for (i = 0 ; i < nbox ; i++ ) + { + GLint bx = box[i].x1; + GLint by = box[i].y1; + GLint bw = box[i].x2 - bx; + GLint bh = box[i].y2 - by; + + if (bx < x) bw -= x - bx, bx = x; + if (by < y) bh -= y - by, by = y; + if (bx + bw > x + width) bw = x + width - bx; + if (by + bh > y + height) bh = y + height - by; + if (bw <= 0) continue; + if (bh <= 0) continue; + + r200EmitBlit( rmesa, + blit_format, + src_pitch, src_offset, + drb->pitch * drb->cpp, + drb->offset + rmesa->r200Screen->fbLocation, + bx - x, by - y, + bx, by, + bw, bh ); + } + + r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + r200WaitForIdleLocked( rmesa ); /* required by GL */ + UNLOCK_HARDWARE( rmesa ); +} + + + + +static GLboolean +r200TryDrawPixels( GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLint pitch = unpack->RowLength ? unpack->RowLength : width; + GLuint planemask; + GLuint cpp = rmesa->r200Screen->cpp; + GLint size = width * pitch * cpp; + + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + /* check that we're drawing to exactly one color buffer */ + if (ctx->DrawBuffer->_NumColorDrawBuffers[0] != 1) + return GL_FALSE; + + switch (format) { + case GL_RGB: + case GL_RGBA: + case GL_BGRA: + planemask = r200PackColor(cpp, + ctx->Color.ColorMask[RCOMP], + ctx->Color.ColorMask[GCOMP], + ctx->Color.ColorMask[BCOMP], + ctx->Color.ColorMask[ACOMP]); + + if (cpp == 2) + planemask |= planemask << 16; + + if (planemask != ~0) + return GL_FALSE; /* fix me -- should be possible */ + + /* Can't do conversions on GART reads/draws. + */ + if ( !r200IsGartMemory( rmesa, pixels, size ) ) { + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: not GART memory\n", __FUNCTION__); + return GL_FALSE; + } + + if (!check_color(ctx, type, format, unpack, pixels, size, pitch)) { + return GL_FALSE; + } + if (!check_color_per_fragment_ops(ctx)) { + return GL_FALSE; + } + + if (ctx->Pixel.ZoomX != 1.0F || + ctx->Pixel.ZoomY != -1.0F) + return GL_FALSE; + break; + + default: + return GL_FALSE; + } + + if ( r200IsGartMemory(rmesa, pixels, size) ) + { + do_draw_pix( ctx, x, y, width, height, pitch, pixels, planemask ); + return GL_TRUE; + } + else if (0) + { + /* Pixels is in regular memory -- get dma buffers and perform + * upload through them. + */ + } + else + return GL_FALSE; +} + +static void +r200DrawPixels( GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels ) +{ + if (R200_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (!r200TryDrawPixels( ctx, x, y, width, height, format, type, + unpack, pixels )) + _swrast_DrawPixels( ctx, x, y, width, height, format, type, + unpack, pixels ); +} + + +static void +r200Bitmap( GLcontext *ctx, GLint px, GLint py, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if (rmesa->Fallback) + _swrast_Bitmap( ctx, px, py, width, height, unpack, bitmap ); + else + r200PointsBitmap( ctx, px, py, width, height, unpack, bitmap ); +} + + + +void r200InitPixelFuncs( GLcontext *ctx ) +{ + if (!getenv("R200_NO_BLITS")) { + ctx->Driver.ReadPixels = r200ReadPixels; + ctx->Driver.DrawPixels = r200DrawPixels; + if (getenv("R200_HW_BITMAP")) + ctx->Driver.Bitmap = r200Bitmap; + } +} diff --git a/r200/r200_pixel.h b/r200/r200_pixel.h new file mode 100644 index 0000000..8f3923b --- /dev/null +++ b/r200/r200_pixel.h @@ -0,0 +1,40 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_pixel.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __R200_PIXEL_H__ +#define __R200_PIXEL_H__ + +extern void r200InitPixelFuncs( GLcontext *ctx ); + +#endif diff --git a/r200/r200_reg.h b/r200/r200_reg.h new file mode 100644 index 0000000..a88ea4c --- /dev/null +++ b/r200/r200_reg.h @@ -0,0 +1,1586 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_reg.h,v 1.2 2002/12/16 16:18:54 dawes Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef _R200_REG_H_ +#define _R200_REG_H_ + +#define R200_PP_MISC 0x1c14 +#define R200_REF_ALPHA_MASK 0x000000ff +#define R200_ALPHA_TEST_FAIL (0 << 8) +#define R200_ALPHA_TEST_LESS (1 << 8) +#define R200_ALPHA_TEST_LEQUAL (2 << 8) +#define R200_ALPHA_TEST_EQUAL (3 << 8) +#define R200_ALPHA_TEST_GEQUAL (4 << 8) +#define R200_ALPHA_TEST_GREATER (5 << 8) +#define R200_ALPHA_TEST_NEQUAL (6 << 8) +#define R200_ALPHA_TEST_PASS (7 << 8) +#define R200_ALPHA_TEST_OP_MASK (7 << 8) +#define R200_CHROMA_FUNC_FAIL (0 << 16) +#define R200_CHROMA_FUNC_PASS (1 << 16) +#define R200_CHROMA_FUNC_NEQUAL (2 << 16) +#define R200_CHROMA_FUNC_EQUAL (3 << 16) +#define R200_CHROMA_KEY_NEAREST (0 << 18) +#define R200_CHROMA_KEY_ZERO (1 << 18) +#define R200_RIGHT_HAND_CUBE_D3D (0 << 24) +#define R200_RIGHT_HAND_CUBE_OGL (1 << 24) +#define R200_PP_FOG_COLOR 0x1c18 +#define R200_FOG_COLOR_MASK 0x00ffffff +#define R200_FOG_VERTEX (0 << 24) +#define R200_FOG_TABLE (1 << 24) +#define R200_FOG_USE_DEPTH (0 << 25) +#define R200_FOG_USE_W (1 << 25) +#define R200_FOG_USE_DIFFUSE_ALPHA (2 << 25) +#define R200_FOG_USE_SPEC_ALPHA (3 << 25) +#define R200_FOG_USE_VTX_FOG (4 << 25) +#define R200_FOG_USE_MASK (7 << 25) +#define R200_RE_SOLID_COLOR 0x1c1c +#define R200_RB3D_BLENDCNTL 0x1c20 +#define R200_COMB_FCN_MASK (7 << 12) +#define R200_COMB_FCN_ADD_CLAMP (0 << 12) +#define R200_COMB_FCN_ADD_NOCLAMP (1 << 12) +#define R200_COMB_FCN_SUB_CLAMP (2 << 12) +#define R200_COMB_FCN_SUB_NOCLAMP (3 << 12) +#define R200_COMB_FCN_MIN (4 << 12) +#define R200_COMB_FCN_MAX (5 << 12) +#define R200_COMB_FCN_RSUB_CLAMP (6 << 12) +#define R200_COMB_FCN_RSUB_NOCLAMP (7 << 12) +#define R200_BLEND_GL_ZERO (32) +#define R200_BLEND_GL_ONE (33) +#define R200_BLEND_GL_SRC_COLOR (34) +#define R200_BLEND_GL_ONE_MINUS_SRC_COLOR (35) +#define R200_BLEND_GL_DST_COLOR (36) +#define R200_BLEND_GL_ONE_MINUS_DST_COLOR (37) +#define R200_BLEND_GL_SRC_ALPHA (38) +#define R200_BLEND_GL_ONE_MINUS_SRC_ALPHA (39) +#define R200_BLEND_GL_DST_ALPHA (40) +#define R200_BLEND_GL_ONE_MINUS_DST_ALPHA (41) +#define R200_BLEND_GL_SRC_ALPHA_SATURATE (42) /* src factor only */ +#define R200_BLEND_GL_CONST_COLOR (43) +#define R200_BLEND_GL_ONE_MINUS_CONST_COLOR (44) +#define R200_BLEND_GL_CONST_ALPHA (45) +#define R200_BLEND_GL_ONE_MINUS_CONST_ALPHA (46) +#define R200_BLEND_MASK (63) +#define R200_SRC_BLEND_SHIFT (16) +#define R200_DST_BLEND_SHIFT (24) +#define R200_RB3D_DEPTHOFFSET 0x1c24 +#define R200_RB3D_DEPTHPITCH 0x1c28 +#define R200_DEPTHPITCH_MASK 0x00001ff8 +#define R200_DEPTH_HYPERZ (3 << 16) +#define R200_DEPTH_ENDIAN_NO_SWAP (0 << 18) +#define R200_DEPTH_ENDIAN_WORD_SWAP (1 << 18) +#define R200_DEPTH_ENDIAN_DWORD_SWAP (2 << 18) +#define R200_RB3D_ZSTENCILCNTL 0x1c2c +#define R200_DEPTH_FORMAT_MASK (0xf << 0) +#define R200_DEPTH_FORMAT_16BIT_INT_Z (0 << 0) +#define R200_DEPTH_FORMAT_24BIT_INT_Z (2 << 0) +#define R200_DEPTH_FORMAT_24BIT_FLOAT_Z (3 << 0) +#define R200_DEPTH_FORMAT_32BIT_INT_Z (4 << 0) +#define R200_DEPTH_FORMAT_32BIT_FLOAT_Z (5 << 0) +#define R200_DEPTH_FORMAT_24BIT_FLOAT_W (9 << 0) +#define R200_DEPTH_FORMAT_32BIT_FLOAT_W (11 << 0) +#define R200_Z_TEST_NEVER (0 << 4) +#define R200_Z_TEST_LESS (1 << 4) +#define R200_Z_TEST_LEQUAL (2 << 4) +#define R200_Z_TEST_EQUAL (3 << 4) +#define R200_Z_TEST_GEQUAL (4 << 4) +#define R200_Z_TEST_GREATER (5 << 4) +#define R200_Z_TEST_NEQUAL (6 << 4) +#define R200_Z_TEST_ALWAYS (7 << 4) +#define R200_Z_TEST_MASK (7 << 4) +#define R200_Z_HIERARCHY_ENABLE (1 << 8) +#define R200_STENCIL_TEST_NEVER (0 << 12) +#define R200_STENCIL_TEST_LESS (1 << 12) +#define R200_STENCIL_TEST_LEQUAL (2 << 12) +#define R200_STENCIL_TEST_EQUAL (3 << 12) +#define R200_STENCIL_TEST_GEQUAL (4 << 12) +#define R200_STENCIL_TEST_GREATER (5 << 12) +#define R200_STENCIL_TEST_NEQUAL (6 << 12) +#define R200_STENCIL_TEST_ALWAYS (7 << 12) +#define R200_STENCIL_TEST_MASK (0x7 << 12) +#define R200_STENCIL_FAIL_KEEP (0 << 16) +#define R200_STENCIL_FAIL_ZERO (1 << 16) +#define R200_STENCIL_FAIL_REPLACE (2 << 16) +#define R200_STENCIL_FAIL_INC (3 << 16) +#define R200_STENCIL_FAIL_DEC (4 << 16) +#define R200_STENCIL_FAIL_INVERT (5 << 16) +#define R200_STENCIL_FAIL_INC_WRAP (6 << 16) +#define R200_STENCIL_FAIL_DEC_WRAP (7 << 16) +#define R200_STENCIL_FAIL_MASK (0x7 << 16) +#define R200_STENCIL_ZPASS_KEEP (0 << 20) +#define R200_STENCIL_ZPASS_ZERO (1 << 20) +#define R200_STENCIL_ZPASS_REPLACE (2 << 20) +#define R200_STENCIL_ZPASS_INC (3 << 20) +#define R200_STENCIL_ZPASS_DEC (4 << 20) +#define R200_STENCIL_ZPASS_INVERT (5 << 20) +#define R200_STENCIL_ZPASS_INC_WRAP (6 << 20) +#define R200_STENCIL_ZPASS_DEC_WRAP (7 << 20) +#define R200_STENCIL_ZPASS_MASK (0x7 << 20) +#define R200_STENCIL_ZFAIL_KEEP (0 << 24) +#define R200_STENCIL_ZFAIL_ZERO (1 << 24) +#define R200_STENCIL_ZFAIL_REPLACE (2 << 24) +#define R200_STENCIL_ZFAIL_INC (3 << 24) +#define R200_STENCIL_ZFAIL_DEC (4 << 24) +#define R200_STENCIL_ZFAIL_INVERT (5 << 24) +#define R200_STENCIL_ZFAIL_INC_WRAP (6 << 24) +#define R200_STENCIL_ZFAIL_DEC_WRAP (7 << 24) +#define R200_STENCIL_ZFAIL_MASK (0x7 << 24) +#define R200_Z_COMPRESSION_ENABLE (1 << 28) +#define R200_FORCE_Z_DIRTY (1 << 29) +#define R200_Z_WRITE_ENABLE (1 << 30) +#define R200_Z_DECOMPRESSION_ENABLE (1 << 31) +/*gap*/ +#define R200_PP_CNTL 0x1c38 +#define R200_TEX_0_ENABLE 0x00000010 +#define R200_TEX_1_ENABLE 0x00000020 +#define R200_TEX_2_ENABLE 0x00000040 +#define R200_TEX_3_ENABLE 0x00000080 +#define R200_TEX_4_ENABLE 0x00000100 +#define R200_TEX_5_ENABLE 0x00000200 +#define R200_TEX_ENABLE_MASK 0x000003f0 +#define R200_FILTER_ROUND_MODE_MASK 0x00000400 +#define R200_TEX_BLEND_7_ENABLE 0x00000800 +#define R200_TEX_BLEND_0_ENABLE 0x00001000 +#define R200_TEX_BLEND_1_ENABLE 0x00002000 +#define R200_TEX_BLEND_2_ENABLE 0x00004000 +#define R200_TEX_BLEND_3_ENABLE 0x00008000 +#define R200_TEX_BLEND_4_ENABLE 0x00010000 +#define R200_TEX_BLEND_5_ENABLE 0x00020000 +#define R200_TEX_BLEND_6_ENABLE 0x00040000 +#define R200_TEX_BLEND_ENABLE_MASK 0x0007f800 +#define R200_TEX_BLEND_0_ENABLE_SHIFT (12) +#define R200_MULTI_PASS_ENABLE 0x00080000 +#define R200_SPECULAR_ENABLE 0x00200000 +#define R200_FOG_ENABLE 0x00400000 +#define R200_ALPHA_TEST_ENABLE 0x00800000 +#define R200_ANTI_ALIAS_NONE 0x00000000 +#define R200_ANTI_ALIAS_LINE 0x01000000 +#define R200_ANTI_ALIAS_POLY 0x02000000 +#define R200_ANTI_ALIAS_MASK 0x03000000 +#define R200_RB3D_CNTL 0x1c3c +#define R200_ALPHA_BLEND_ENABLE (1 << 0) +#define R200_PLANE_MASK_ENABLE (1 << 1) +#define R200_DITHER_ENABLE (1 << 2) +#define R200_ROUND_ENABLE (1 << 3) +#define R200_SCALE_DITHER_ENABLE (1 << 4) +#define R200_DITHER_INIT (1 << 5) +#define R200_ROP_ENABLE (1 << 6) +#define R200_STENCIL_ENABLE (1 << 7) +#define R200_Z_ENABLE (1 << 8) +#define R200_DEPTH_XZ_OFFEST_ENABLE (1 << 9) +#define R200_COLOR_FORMAT_ARGB1555 (3 << 10) +#define R200_COLOR_FORMAT_RGB565 (4 << 10) +#define R200_COLOR_FORMAT_ARGB8888 (6 << 10) +#define R200_COLOR_FORMAT_RGB332 (7 << 10) +#define R200_COLOR_FORMAT_Y8 (8 << 10) +#define R200_COLOR_FORMAT_RGB8 (9 << 10) +#define R200_COLOR_FORMAT_YUV422_VYUY (11 << 10) +#define R200_COLOR_FORMAT_YUV422_YVYU (12 << 10) +#define R200_COLOR_FORMAT_aYUV444 (14 << 10) +#define R200_COLOR_FORMAT_ARGB4444 (15 << 10) +#define R200_CLRCMP_FLIP_ENABLE (1 << 14) +#define R200_SEPARATE_ALPHA_ENABLE (1 << 16) +#define R200_RB3D_COLOROFFSET 0x1c40 +#define R200_COLOROFFSET_MASK 0xfffffff0 +#define R200_RE_WIDTH_HEIGHT 0x1c44 +#define R200_RE_WIDTH_SHIFT 0 +#define R200_RE_HEIGHT_SHIFT 16 +#define R200_RB3D_COLORPITCH 0x1c48 +#define R200_COLORPITCH_MASK 0x000001ff8 +#define R200_COLOR_TILE_ENABLE (1 << 16) +#define R200_COLOR_MICROTILE_ENABLE (1 << 17) +#define R200_COLOR_ENDIAN_NO_SWAP (0 << 18) +#define R200_COLOR_ENDIAN_WORD_SWAP (1 << 18) +#define R200_COLOR_ENDIAN_DWORD_SWAP (2 << 18) +#define R200_SE_CNTL 0x1c4c +#define R200_FFACE_CULL_CW (0 << 0) +#define R200_FFACE_CULL_CCW (1 << 0) +#define R200_FFACE_CULL_DIR_MASK (1 << 0) +#define R200_BFACE_CULL (0 << 1) +#define R200_BFACE_SOLID (3 << 1) +#define R200_FFACE_CULL (0 << 3) +#define R200_FFACE_SOLID (3 << 3) +#define R200_FFACE_CULL_MASK (3 << 3) +#define R200_FLAT_SHADE_VTX_0 (0 << 6) +#define R200_FLAT_SHADE_VTX_1 (1 << 6) +#define R200_FLAT_SHADE_VTX_2 (2 << 6) +#define R200_FLAT_SHADE_VTX_LAST (3 << 6) +#define R200_DIFFUSE_SHADE_SOLID (0 << 8) +#define R200_DIFFUSE_SHADE_FLAT (1 << 8) +#define R200_DIFFUSE_SHADE_GOURAUD (2 << 8) +#define R200_DIFFUSE_SHADE_MASK (3 << 8) +#define R200_ALPHA_SHADE_SOLID (0 << 10) +#define R200_ALPHA_SHADE_FLAT (1 << 10) +#define R200_ALPHA_SHADE_GOURAUD (2 << 10) +#define R200_ALPHA_SHADE_MASK (3 << 10) +#define R200_SPECULAR_SHADE_SOLID (0 << 12) +#define R200_SPECULAR_SHADE_FLAT (1 << 12) +#define R200_SPECULAR_SHADE_GOURAUD (2 << 12) +#define R200_SPECULAR_SHADE_MASK (3 << 12) +#define R200_FOG_SHADE_SOLID (0 << 14) +#define R200_FOG_SHADE_FLAT (1 << 14) +#define R200_FOG_SHADE_GOURAUD (2 << 14) +#define R200_FOG_SHADE_MASK (3 << 14) +#define R200_ZBIAS_ENABLE_POINT (1 << 16) +#define R200_ZBIAS_ENABLE_LINE (1 << 17) +#define R200_ZBIAS_ENABLE_TRI (1 << 18) +#define R200_WIDELINE_ENABLE (1 << 20) +#define R200_DISC_FOG_SHADE_SOLID (0 << 24) +#define R200_DISC_FOG_SHADE_FLAT (1 << 24) +#define R200_DISC_FOG_SHADE_GOURAUD (2 << 24) +#define R200_DISC_FOG_SHADE_MASK (3 << 24) +#define R200_VTX_PIX_CENTER_D3D (0 << 27) +#define R200_VTX_PIX_CENTER_OGL (1 << 27) +#define R200_ROUND_MODE_TRUNC (0 << 28) +#define R200_ROUND_MODE_ROUND (1 << 28) +#define R200_ROUND_MODE_ROUND_EVEN (2 << 28) +#define R200_ROUND_MODE_ROUND_ODD (3 << 28) +#define R200_ROUND_PREC_16TH_PIX (0 << 30) +#define R200_ROUND_PREC_8TH_PIX (1 << 30) +#define R200_ROUND_PREC_4TH_PIX (2 << 30) +#define R200_ROUND_PREC_HALF_PIX (3 << 30) +#define R200_RE_CNTL 0x1c50 +#define R200_STIPPLE_ENABLE 0x1 +#define R200_SCISSOR_ENABLE 0x2 +#define R200_PATTERN_ENABLE 0x4 +#define R200_PERSPECTIVE_ENABLE 0x8 +#define R200_POINT_SMOOTH 0x20 +#define R200_VTX_STQ0_D3D 0x00010000 +#define R200_VTX_STQ1_D3D 0x00040000 +#define R200_VTX_STQ2_D3D 0x00100000 +#define R200_VTX_STQ3_D3D 0x00400000 +#define R200_VTX_STQ4_D3D 0x01000000 +#define R200_VTX_STQ5_D3D 0x04000000 +/* gap */ +#define R200_RE_STIPPLE_ADDR 0x1cc8 +#define R200_RE_STIPPLE_DATA 0x1ccc +#define R200_RE_LINE_PATTERN 0x1cd0 +#define R200_LINE_PATTERN_MASK 0x0000ffff +#define R200_LINE_REPEAT_COUNT_SHIFT 16 +#define R200_LINE_PATTERN_START_SHIFT 24 +#define R200_LINE_PATTERN_LITTLE_BIT_ORDER (0 << 28) +#define R200_LINE_PATTERN_BIG_BIT_ORDER (1 << 28) +#define R200_LINE_PATTERN_AUTO_RESET (1 << 29) +#define R200_RE_LINE_STATE 0x1cd4 +#define R200_LINE_CURRENT_PTR_SHIFT 0 +#define R200_LINE_CURRENT_COUNT_SHIFT 8 +#define R200_RE_SCISSOR_TL_0 0x1cd8 +#define R200_RE_SCISSOR_BR_0 0x1cdc +#define R200_RE_SCISSOR_TL_1 0x1ce0 +#define R200_RE_SCISSOR_BR_1 0x1ce4 +#define R200_RE_SCISSOR_TL_2 0x1ce8 +#define R200_RE_SCISSOR_BR_2 0x1cec +/* gap */ +#define R200_RB3D_DEPTHXY_OFFSET 0x1d60 +#define R200_DEPTHX_SHIFT 0 +#define R200_DEPTHY_SHIFT 16 +/* gap */ +#define R200_RB3D_STENCILREFMASK 0x1d7c +#define R200_STENCIL_REF_SHIFT 0 +#define R200_STENCIL_REF_MASK (0xff << 0) +#define R200_STENCIL_MASK_SHIFT 16 +#define R200_STENCIL_VALUE_MASK (0xff << 16) +#define R200_STENCIL_WRITEMASK_SHIFT 24 +#define R200_STENCIL_WRITE_MASK (0xff << 24) +#define R200_RB3D_ROPCNTL 0x1d80 +#define R200_ROP_MASK (15 << 8) +#define R200_ROP_CLEAR (0 << 8) +#define R200_ROP_NOR (1 << 8) +#define R200_ROP_AND_INVERTED (2 << 8) +#define R200_ROP_COPY_INVERTED (3 << 8) +#define R200_ROP_AND_REVERSE (4 << 8) +#define R200_ROP_INVERT (5 << 8) +#define R200_ROP_XOR (6 << 8) +#define R200_ROP_NAND (7 << 8) +#define R200_ROP_AND (8 << 8) +#define R200_ROP_EQUIV (9 << 8) +#define R200_ROP_NOOP (10 << 8) +#define R200_ROP_OR_INVERTED (11 << 8) +#define R200_ROP_COPY (12 << 8) +#define R200_ROP_OR_REVERSE (13 << 8) +#define R200_ROP_OR (14 << 8) +#define R200_ROP_SET (15 << 8) +#define R200_RB3D_PLANEMASK 0x1d84 +/* gap */ +#define R200_SE_VPORT_XSCALE 0x1d98 +#define R200_SE_VPORT_XOFFSET 0x1d9c +#define R200_SE_VPORT_YSCALE 0x1da0 +#define R200_SE_VPORT_YOFFSET 0x1da4 +#define R200_SE_VPORT_ZSCALE 0x1da8 +#define R200_SE_VPORT_ZOFFSET 0x1dac +#define R200_SE_ZBIAS_FACTOR 0x1db0 +#define R200_SE_ZBIAS_CONSTANT 0x1db4 +#define R200_SE_LINE_WIDTH 0x1db8 +#define R200_LINE_WIDTH_SHIFT 0x00000000 +#define R200_MINPOINTSIZE_SHIFT 0x00000010 +/* gap */ +#define R200_SE_VAP_CNTL 0x2080 +#define R200_VAP_TCL_ENABLE 0x00000001 +#define R200_VAP_PROG_VTX_SHADER_ENABLE 0x00000004 +#define R200_VAP_SINGLE_BUF_STATE_ENABLE 0x00000010 +#define R200_VAP_FORCE_W_TO_ONE 0x00010000 +#define R200_VAP_D3D_TEX_DEFAULT 0x00020000 +#define R200_VAP_VF_MAX_VTX_NUM__SHIFT 18 +#define R200_VAP_DX_CLIP_SPACE_DEF 0x00400000 +#define R200_SE_VF_CNTL 0x2084 +#define R200_VF_PRIM_NONE 0x00000000 +#define R200_VF_PRIM_POINTS 0x00000001 +#define R200_VF_PRIM_LINES 0x00000002 +#define R200_VF_PRIM_LINE_STRIP 0x00000003 +#define R200_VF_PRIM_TRIANGLES 0x00000004 +#define R200_VF_PRIM_TRIANGLE_FAN 0x00000005 +#define R200_VF_PRIM_TRIANGLE_STRIP 0x00000006 +#define R200_VF_PRIM_RECT_LIST 0x00000008 +#define R200_VF_PRIM_3VRT_POINTS 0x00000009 +#define R200_VF_PRIM_3VRT_LINES 0x0000000a +#define R200_VF_PRIM_POINT_SPRITES 0x0000000b +#define R200_VF_PRIM_LINE_LOOP 0x0000000c +#define R200_VF_PRIM_QUADS 0x0000000d +#define R200_VF_PRIM_QUAD_STRIP 0x0000000e +#define R200_VF_PRIM_POLYGON 0x0000000f +#define R200_VF_PRIM_MASK 0x0000000f +#define R200_VF_PRIM_WALK_IND 0x00000010 +#define R200_VF_PRIM_WALK_LIST 0x00000020 +#define R200_VF_PRIM_WALK_RING 0x00000030 +#define R200_VF_PRIM_WALK_MASK 0x00000030 +#define R200_VF_COLOR_ORDER_RGBA 0x00000040 +#define R200_VF_TCL_OUTPUT_VTX_ENABLE 0x00000200 +#define R200_VF_INDEX_SZ_4 0x00000800 +#define R200_VF_VERTEX_NUMBER_MASK 0xffff0000 +#define R200_VF_VERTEX_NUMBER_SHIFT 16 +#define R200_SE_VTX_FMT_0 0x2088 +#define R200_VTX_XY 0 /* always have xy */ +#define R200_VTX_Z0 (1<<0) +#define R200_VTX_W0 (1<<1) +#define R200_VTX_WEIGHT_COUNT_SHIFT (2) +#define R200_VTX_PV_MATRIX_SEL (1<<5) +#define R200_VTX_N0 (1<<6) +#define R200_VTX_POINT_SIZE (1<<7) +#define R200_VTX_DISCRETE_FOG (1<<8) +#define R200_VTX_SHININESS_0 (1<<9) +#define R200_VTX_SHININESS_1 (1<<10) +#define R200_VTX_COLOR_NOT_PRESENT 0 +#define R200_VTX_PK_RGBA 1 +#define R200_VTX_FP_RGB 2 +#define R200_VTX_FP_RGBA 3 +#define R200_VTX_COLOR_MASK 3 +#define R200_VTX_COLOR_0_SHIFT 11 +#define R200_VTX_COLOR_1_SHIFT 13 +#define R200_VTX_COLOR_2_SHIFT 15 +#define R200_VTX_COLOR_3_SHIFT 17 +#define R200_VTX_COLOR_4_SHIFT 19 +#define R200_VTX_COLOR_5_SHIFT 21 +#define R200_VTX_COLOR_6_SHIFT 23 +#define R200_VTX_COLOR_7_SHIFT 25 +#define R200_VTX_XY1 (1<<28) +#define R200_VTX_Z1 (1<<29) +#define R200_VTX_W1 (1<<30) +#define R200_VTX_N1 (1<<31) +#define R200_SE_VTX_FMT_1 0x208c +#define R200_VTX_TEX0_COMP_CNT_SHIFT 0 +#define R200_VTX_TEX1_COMP_CNT_SHIFT 3 +#define R200_VTX_TEX2_COMP_CNT_SHIFT 6 +#define R200_VTX_TEX3_COMP_CNT_SHIFT 9 +#define R200_VTX_TEX4_COMP_CNT_SHIFT 12 +#define R200_VTX_TEX5_COMP_CNT_SHIFT 15 +#define R200_SE_TCL_OUTPUT_VTX_FMT_0 0x2090 +#define R200_SE_TCL_OUTPUT_VTX_FMT_1 0x2094 +/* gap */ +#define R200_SE_VTE_CNTL 0x20b0 +#define R200_VPORT_X_SCALE_ENA 0x00000001 +#define R200_VPORT_X_OFFSET_ENA 0x00000002 +#define R200_VPORT_Y_SCALE_ENA 0x00000004 +#define R200_VPORT_Y_OFFSET_ENA 0x00000008 +#define R200_VPORT_Z_SCALE_ENA 0x00000010 +#define R200_VPORT_Z_OFFSET_ENA 0x00000020 +#define R200_VTX_XY_FMT 0x00000100 +#define R200_VTX_Z_FMT 0x00000200 +#define R200_VTX_W0_FMT 0x00000400 +#define R200_VTX_W0_NORMALIZE 0x00000800 +#define R200_VTX_ST_DENORMALIZED 0x00001000 +/* gap */ +#define R200_SE_VTX_NUM_ARRAYS 0x20c0 +#define R200_SE_VTX_AOS_ATTR01 0x20c4 +#define R200_SE_VTX_AOS_ADDR0 0x20c8 +#define R200_SE_VTX_AOS_ADDR1 0x20cc +#define R200_SE_VTX_AOS_ATTR23 0x20d0 +#define R200_SE_VTX_AOS_ADDR2 0x20d4 +#define R200_SE_VTX_AOS_ADDR3 0x20d8 +#define R200_SE_VTX_AOS_ATTR45 0x20dc +#define R200_SE_VTX_AOS_ADDR4 0x20e0 +#define R200_SE_VTX_AOS_ADDR5 0x20e4 +#define R200_SE_VTX_AOS_ATTR67 0x20e8 +#define R200_SE_VTX_AOS_ADDR6 0x20ec +#define R200_SE_VTX_AOS_ADDR7 0x20f0 +#define R200_SE_VTX_AOS_ATTR89 0x20f4 +#define R200_SE_VTX_AOS_ADDR8 0x20f8 +#define R200_SE_VTX_AOS_ADDR9 0x20fc +#define R200_SE_VTX_AOS_ATTR1011 0x2100 +#define R200_SE_VTX_AOS_ADDR10 0x2104 +#define R200_SE_VTX_AOS_ADDR11 0x2108 +#define R200_SE_VF_MAX_VTX_INDX 0x210c +#define R200_SE_VF_MIN_VTX_INDX 0x2110 +/* gap */ +#define R200_SE_VAP_CNTL_STATUS 0x2140 +#define R200_VC_NO_SWAP (0 << 0) +#define R200_VC_16BIT_SWAP (1 << 0) +#define R200_VC_32BIT_SWAP (2 << 0) +/* gap */ +#define R200_SE_VTX_STATE_CNTL 0x2180 +#define R200_VSC_COLOR_0_ASSEMBLY_CNTL_SHIFT 0x00000000 +#define R200_VSC_COLOR_1_ASSEMBLY_CNTL_SHIFT 0x00000002 +#define R200_VSC_COLOR_2_ASSEMBLY_CNTL_SHIFT 0x00000004 +#define R200_VSC_COLOR_3_ASSEMBLY_CNTL_SHIFT 0x00000006 +#define R200_VSC_COLOR_4_ASSEMBLY_CNTL_SHIFT 0x00000008 +#define R200_VSC_COLOR_5_ASSEMBLY_CNTL_SHIFT 0x0000000a +#define R200_VSC_COLOR_6_ASSEMBLY_CNTL_SHIFT 0x0000000c +#define R200_VSC_COLOR_7_ASSEMBLY_CNTL_SHIFT 0x0000000e +#define R200_VSC_UPDATE_USER_COLOR_0_ENABLE 0x00010000 +#define R200_VSC_UPDATE_USER_COLOR_1_ENABLE 0x00020000 +/* gap */ +#define R200_SE_TCL_VECTOR_INDX_REG 0x2200 +#define R200_SE_TCL_VECTOR_DATA_REG 0x2204 +#define R200_SE_TCL_SCALAR_INDX_REG 0x2208 +#define R200_SE_TCL_SCALAR_DATA_REG 0x220c +/* gap */ +#define R200_SE_TCL_MATRIX_SEL_0 0x2230 +#define R200_MODELVIEW_0_SHIFT (0) +#define R200_MODELVIEW_1_SHIFT (8) +#define R200_MODELVIEW_2_SHIFT (16) +#define R200_MODELVIEW_3_SHIFT (24) +#define R200_SE_TCL_MATRIX_SEL_1 0x2234 +#define R200_IT_MODELVIEW_0_SHIFT (0) +#define R200_IT_MODELVIEW_1_SHIFT (8) +#define R200_IT_MODELVIEW_2_SHIFT (16) +#define R200_IT_MODELVIEW_3_SHIFT (24) +#define R200_SE_TCL_MATRIX_SEL_2 0x2238 +#define R200_MODELPROJECT_0_SHIFT (0) +#define R200_MODELPROJECT_1_SHIFT (8) +#define R200_MODELPROJECT_2_SHIFT (16) +#define R200_MODELPROJECT_3_SHIFT (24) +#define R200_SE_TCL_MATRIX_SEL_3 0x223c +#define R200_TEXMAT_0_SHIFT 0 +#define R200_TEXMAT_1_SHIFT 8 +#define R200_TEXMAT_2_SHIFT 16 +#define R200_TEXMAT_3_SHIFT 24 +#define R200_SE_TCL_MATRIX_SEL_4 0x2240 +#define R200_TEXMAT_4_SHIFT 0 +#define R200_TEXMAT_5_SHIFT 8 +/* gap */ +#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL 0x2250 +#define R200_OUTPUT_XYZW (1<<0) +#define R200_OUTPUT_COLOR_0 (1<<8) +#define R200_OUTPUT_COLOR_1 (1<<9) +#define R200_OUTPUT_TEX_0 (1<<16) +#define R200_OUTPUT_TEX_1 (1<<17) +#define R200_OUTPUT_TEX_2 (1<<18) +#define R200_OUTPUT_TEX_3 (1<<19) +#define R200_OUTPUT_TEX_4 (1<<20) +#define R200_OUTPUT_TEX_5 (1<<21) +#define R200_OUTPUT_TEX_MASK (0x3f<<16) +#define R200_OUTPUT_DISCRETE_FOG (1<<24) +#define R200_OUTPUT_PT_SIZE (1<<25) +#define R200_FORCE_INORDER_PROC (1<<31) +#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0 0x2254 +#define R200_VERTEX_POSITION_ADDR__SHIFT 0x00000000 +#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1 0x2258 +#define R200_VTX_COLOR_0_ADDR__SHIFT 0x00000000 +#define R200_VTX_COLOR_1_ADDR__SHIFT 0x00000008 +#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2 0x225c +#define R200_VTX_TEX_0_ADDR__SHIFT 0x00000000 +#define R200_VTX_TEX_1_ADDR__SHIFT 0x00000008 +#define R200_VTX_TEX_2_ADDR__SHIFT 0x00000010 +#define R200_VTX_TEX_3_ADDR__SHIFT 0x00000018 +#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3 0x2260 +#define R200_VTX_TEX_4_ADDR__SHIFT 0x00000000 +#define R200_VTX_TEX_5_ADDR__SHIFT 0x00000008 + +/* gap */ +#define R200_SE_TCL_LIGHT_MODEL_CTL_0 0x2268 +#define R200_LIGHTING_ENABLE (1<<0) +#define R200_LIGHT_IN_MODELSPACE (1<<1) +#define R200_LOCAL_VIEWER (1<<2) +#define R200_NORMALIZE_NORMALS (1<<3) +#define R200_RESCALE_NORMALS (1<<4) +#define R200_SPECULAR_LIGHTS (1<<5) +#define R200_DIFFUSE_SPECULAR_COMBINE (1<<6) +#define R200_LIGHT_ALPHA (1<<7) +#define R200_LOCAL_LIGHT_VEC_GL (1<<8) +#define R200_LIGHT_NO_NORMAL_AMBIENT_ONLY (1<<9) +#define R200_LIGHT_TWOSIDE (1<<10) +#define R200_FRONT_SHININESS_SOURCE_SHIFT (0xb) +#define R200_BACK_SHININESS_SOURCE_SHIFT (0xd) +#define R200_LM0_SOURCE_MATERIAL_0 (0) +#define R200_LM0_SOURCE_MATERIAL_1 (1) +#define R200_LM0_SOURCE_VERTEX_SHININESS_0 (2) +#define R200_LM0_SOURCE_VERTEX_SHININESS_1 (3) +#define R200_SE_TCL_LIGHT_MODEL_CTL_1 0x226c +#define R200_LM1_SOURCE_LIGHT_PREMULT (0) +#define R200_LM1_SOURCE_MATERIAL_0 (1) +#define R200_LM1_SOURCE_VERTEX_COLOR_0 (2) +#define R200_LM1_SOURCE_VERTEX_COLOR_1 (3) +#define R200_LM1_SOURCE_VERTEX_COLOR_2 (4) +#define R200_LM1_SOURCE_VERTEX_COLOR_3 (5) +#define R200_LM1_SOURCE_VERTEX_COLOR_4 (6) +#define R200_LM1_SOURCE_VERTEX_COLOR_5 (7) +#define R200_LM1_SOURCE_VERTEX_COLOR_6 (8) +#define R200_LM1_SOURCE_VERTEX_COLOR_7 (9) +#define R200_LM1_SOURCE_MATERIAL_1 (0xf) +#define R200_FRONT_EMISSIVE_SOURCE_SHIFT (0) +#define R200_FRONT_AMBIENT_SOURCE_SHIFT (4) +#define R200_FRONT_DIFFUSE_SOURCE_SHIFT (8) +#define R200_FRONT_SPECULAR_SOURCE_SHIFT (12) +#define R200_BACK_EMISSIVE_SOURCE_SHIFT (16) +#define R200_BACK_AMBIENT_SOURCE_SHIFT (20) +#define R200_BACK_DIFFUSE_SOURCE_SHIFT (24) +#define R200_BACK_SPECULAR_SOURCE_SHIFT (28) +#define R200_SE_TCL_PER_LIGHT_CTL_0 0x2270 +#define R200_LIGHT_0_ENABLE (1<<0) +#define R200_LIGHT_0_ENABLE_AMBIENT (1<<1) +#define R200_LIGHT_0_ENABLE_SPECULAR (1<<2) +#define R200_LIGHT_0_IS_LOCAL (1<<3) +#define R200_LIGHT_0_IS_SPOT (1<<4) +#define R200_LIGHT_0_DUAL_CONE (1<<5) +#define R200_LIGHT_0_ENABLE_RANGE_ATTEN (1<<6) +#define R200_LIGHT_0_CONSTANT_RANGE_ATTEN (1<<7) +#define R200_LIGHT_1_ENABLE (1<<16) +#define R200_LIGHT_1_ENABLE_AMBIENT (1<<17) +#define R200_LIGHT_1_ENABLE_SPECULAR (1<<18) +#define R200_LIGHT_1_IS_LOCAL (1<<19) +#define R200_LIGHT_1_IS_SPOT (1<<20) +#define R200_LIGHT_1_DUAL_CONE (1<<21) +#define R200_LIGHT_1_ENABLE_RANGE_ATTEN (1<<22) +#define R200_LIGHT_1_CONSTANT_RANGE_ATTEN (1<<23) +#define R200_LIGHT_0_SHIFT (0) +#define R200_LIGHT_1_SHIFT (16) +#define R200_SE_TCL_PER_LIGHT_CTL_1 0x2274 +#define R200_LIGHT_2_SHIFT (0) +#define R200_LIGHT_3_SHIFT (16) +#define R200_SE_TCL_PER_LIGHT_CTL_2 0x2278 +#define R200_LIGHT_4_SHIFT (0) +#define R200_LIGHT_5_SHIFT (16) +#define R200_SE_TCL_PER_LIGHT_CTL_3 0x227c +#define R200_LIGHT_6_SHIFT (0) +#define R200_LIGHT_7_SHIFT (16) +/* gap */ +#define R200_SE_TCL_TEX_PROC_CTL_2 0x22a8 +#define R200_TEXGEN_COMP_MASK (0xf) +#define R200_TEXGEN_COMP_S (0x1) +#define R200_TEXGEN_COMP_T (0x2) +#define R200_TEXGEN_COMP_R (0x4) +#define R200_TEXGEN_COMP_Q (0x8) +#define R200_TEXGEN_0_COMP_MASK_SHIFT (0) +#define R200_TEXGEN_1_COMP_MASK_SHIFT (4) +#define R200_TEXGEN_2_COMP_MASK_SHIFT (8) +#define R200_TEXGEN_3_COMP_MASK_SHIFT (12) +#define R200_TEXGEN_4_COMP_MASK_SHIFT (16) +#define R200_TEXGEN_5_COMP_MASK_SHIFT (20) +#define R200_SE_TCL_TEX_PROC_CTL_3 0x22ac +#define R200_TEXGEN_0_INPUT_TEX_SHIFT (0) +#define R200_TEXGEN_1_INPUT_TEX_SHIFT (4) +#define R200_TEXGEN_2_INPUT_TEX_SHIFT (8) +#define R200_TEXGEN_3_INPUT_TEX_SHIFT (12) +#define R200_TEXGEN_4_INPUT_TEX_SHIFT (16) +#define R200_TEXGEN_5_INPUT_TEX_SHIFT (20) +#define R200_SE_TCL_TEX_PROC_CTL_0 0x22b0 +#define R200_TEXGEN_TEXMAT_0_ENABLE (1<<0) +#define R200_TEXGEN_TEXMAT_1_ENABLE (1<<1) +#define R200_TEXGEN_TEXMAT_2_ENABLE (1<<2) +#define R200_TEXGEN_TEXMAT_3_ENABLE (1<<3) +#define R200_TEXGEN_TEXMAT_4_ENABLE (1<<4) +#define R200_TEXGEN_TEXMAT_5_ENABLE (1<<5) +#define R200_TEXMAT_0_ENABLE (1<<8) +#define R200_TEXMAT_1_ENABLE (1<<9) +#define R200_TEXMAT_2_ENABLE (1<<10) +#define R200_TEXMAT_3_ENABLE (1<<11) +#define R200_TEXMAT_4_ENABLE (1<<12) +#define R200_TEXMAT_5_ENABLE (1<<13) +#define R200_TEXGEN_FORCE_W_TO_ONE (1<<16) +#define R200_SE_TCL_TEX_PROC_CTL_1 0x22b4 +#define R200_TEXGEN_INPUT_MASK (0xf) +#define R200_TEXGEN_INPUT_TEXCOORD_0 (0) +#define R200_TEXGEN_INPUT_TEXCOORD_1 (1) +#define R200_TEXGEN_INPUT_TEXCOORD_2 (2) +#define R200_TEXGEN_INPUT_TEXCOORD_3 (3) +#define R200_TEXGEN_INPUT_TEXCOORD_4 (4) +#define R200_TEXGEN_INPUT_TEXCOORD_5 (5) +#define R200_TEXGEN_INPUT_OBJ (8) +#define R200_TEXGEN_INPUT_EYE (9) +#define R200_TEXGEN_INPUT_EYE_NORMAL (0xa) +#define R200_TEXGEN_INPUT_EYE_REFLECT (0xb) +#define R200_TEXGEN_INPUT_SPHERE (0xd) +#define R200_TEXGEN_0_INPUT_SHIFT (0) +#define R200_TEXGEN_1_INPUT_SHIFT (4) +#define R200_TEXGEN_2_INPUT_SHIFT (8) +#define R200_TEXGEN_3_INPUT_SHIFT (12) +#define R200_TEXGEN_4_INPUT_SHIFT (16) +#define R200_TEXGEN_5_INPUT_SHIFT (20) +#define R200_SE_TC_TEX_CYL_WRAP_CTL 0x22b8 +/* gap */ +#define R200_SE_TCL_UCP_VERT_BLEND_CTL 0x22c0 +#define R200_UCP_IN_CLIP_SPACE (1<<0) +#define R200_UCP_IN_MODEL_SPACE (1<<1) +#define R200_UCP_ENABLE_0 (1<<2) +#define R200_UCP_ENABLE_1 (1<<3) +#define R200_UCP_ENABLE_2 (1<<4) +#define R200_UCP_ENABLE_3 (1<<5) +#define R200_UCP_ENABLE_4 (1<<6) +#define R200_UCP_ENABLE_5 (1<<7) +#define R200_TCL_FOG_MASK (3<<8) +#define R200_TCL_FOG_DISABLE (0<<8) +#define R200_TCL_FOG_EXP (1<<8) +#define R200_TCL_FOG_EXP2 (2<<8) +#define R200_TCL_FOG_LINEAR (3<<8) +#define R200_RNG_BASED_FOG (1<<10) +#define R200_CLIP_DISABLE (1<<11) +#define R200_CULL_FRONT_IS_CW (0<<28) +#define R200_CULL_FRONT_IS_CCW (1<<28) +#define R200_CULL_FRONT (1<<29) +#define R200_CULL_BACK (1<<30) +#define R200_SE_TCL_POINT_SPRITE_CNTL 0x22c4 +#define R200_PS_MULT_PVATTENCONST (0<<0) +#define R200_PS_MULT_PVATTEN (1<<0) +#define R200_PS_MULT_ATTENCONST (2<<0) +#define R200_PS_MULT_PVCONST (3<<0) +#define R200_PS_MULT_CONST (4<<0) +#define R200_PS_MULT_MASK (7<<0) +#define R200_PS_LIN_ATT_ZERO (1<<3) +#define R200_PS_USE_MODEL_EYE_VEC (1<<4) +#define R200_PS_ATT_ALPHA (1<<5) +#define R200_PS_UCP_MODE_MASK (3<<6) +#define R200_PS_GEN_TEX_0 (1<<8) +#define R200_PS_GEN_TEX_1 (1<<9) +#define R200_PS_GEN_TEX_2 (1<<10) +#define R200_PS_GEN_TEX_3 (1<<11) +#define R200_PS_GEN_TEX_4 (1<<12) +#define R200_PS_GEN_TEX_5 (1<<13) +#define R200_PS_GEN_TEX_0_SHIFT (8) +#define R200_PS_GEN_TEX_MASK (0x3f<<8) +#define R200_PS_SE_SEL_STATE (1<<16) +/* gap */ +/* taken from r300, see comments there */ +#define R200_VAP_PVS_CNTL_1 0x22d0 +# define R200_PVS_CNTL_1_PROGRAM_START_SHIFT 0 +# define R200_PVS_CNTL_1_POS_END_SHIFT 10 +# define R200_PVS_CNTL_1_PROGRAM_END_SHIFT 20 +/* Addresses are relative the the vertex program parameters area. */ +#define R200_VAP_PVS_CNTL_2 0x22d4 +# define R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 +# define R200_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 +/* gap */ + +#define R200_SE_VTX_ST_POS_0_X_4 0x2300 +#define R200_SE_VTX_ST_POS_0_Y_4 0x2304 +#define R200_SE_VTX_ST_POS_0_Z_4 0x2308 +#define R200_SE_VTX_ST_POS_0_W_4 0x230c +#define R200_SE_VTX_ST_NORM_0_X 0x2310 +#define R200_SE_VTX_ST_NORM_0_Y 0x2314 +#define R200_SE_VTX_ST_NORM_0_Z 0x2318 +#define R200_SE_VTX_ST_PVMS 0x231c +#define R200_SE_VTX_ST_CLR_0_R 0x2320 +#define R200_SE_VTX_ST_CLR_0_G 0x2324 +#define R200_SE_VTX_ST_CLR_0_B 0x2328 +#define R200_SE_VTX_ST_CLR_0_A 0x232c +#define R200_SE_VTX_ST_CLR_1_R 0x2330 +#define R200_SE_VTX_ST_CLR_1_G 0x2334 +#define R200_SE_VTX_ST_CLR_1_B 0x2338 +#define R200_SE_VTX_ST_CLR_1_A 0x233c +#define R200_SE_VTX_ST_CLR_2_R 0x2340 +#define R200_SE_VTX_ST_CLR_2_G 0x2344 +#define R200_SE_VTX_ST_CLR_2_B 0x2348 +#define R200_SE_VTX_ST_CLR_2_A 0x234c +#define R200_SE_VTX_ST_CLR_3_R 0x2350 +#define R200_SE_VTX_ST_CLR_3_G 0x2354 +#define R200_SE_VTX_ST_CLR_3_B 0x2358 +#define R200_SE_VTX_ST_CLR_3_A 0x235c +#define R200_SE_VTX_ST_CLR_4_R 0x2360 +#define R200_SE_VTX_ST_CLR_4_G 0x2364 +#define R200_SE_VTX_ST_CLR_4_B 0x2368 +#define R200_SE_VTX_ST_CLR_4_A 0x236c +#define R200_SE_VTX_ST_CLR_5_R 0x2370 +#define R200_SE_VTX_ST_CLR_5_G 0x2374 +#define R200_SE_VTX_ST_CLR_5_B 0x2378 +#define R200_SE_VTX_ST_CLR_5_A 0x237c +#define R200_SE_VTX_ST_CLR_6_R 0x2380 +#define R200_SE_VTX_ST_CLR_6_G 0x2384 +#define R200_SE_VTX_ST_CLR_6_B 0x2388 +#define R200_SE_VTX_ST_CLR_6_A 0x238c +#define R200_SE_VTX_ST_CLR_7_R 0x2390 +#define R200_SE_VTX_ST_CLR_7_G 0x2394 +#define R200_SE_VTX_ST_CLR_7_B 0x2398 +#define R200_SE_VTX_ST_CLR_7_A 0x239c +#define R200_SE_VTX_ST_TEX_0_S 0x23a0 +#define R200_SE_VTX_ST_TEX_0_T 0x23a4 +#define R200_SE_VTX_ST_TEX_0_R 0x23a8 +#define R200_SE_VTX_ST_TEX_0_Q 0x23ac +#define R200_SE_VTX_ST_TEX_1_S 0x23b0 +#define R200_SE_VTX_ST_TEX_1_T 0x23b4 +#define R200_SE_VTX_ST_TEX_1_R 0x23b8 +#define R200_SE_VTX_ST_TEX_1_Q 0x23bc +#define R200_SE_VTX_ST_TEX_2_S 0x23c0 +#define R200_SE_VTX_ST_TEX_2_T 0x23c4 +#define R200_SE_VTX_ST_TEX_2_R 0x23c8 +#define R200_SE_VTX_ST_TEX_2_Q 0x23cc +#define R200_SE_VTX_ST_TEX_3_S 0x23d0 +#define R200_SE_VTX_ST_TEX_3_T 0x23d4 +#define R200_SE_VTX_ST_TEX_3_R 0x23d8 +#define R200_SE_VTX_ST_TEX_3_Q 0x23dc +#define R200_SE_VTX_ST_TEX_4_S 0x23e0 +#define R200_SE_VTX_ST_TEX_4_T 0x23e4 +#define R200_SE_VTX_ST_TEX_4_R 0x23e8 +#define R200_SE_VTX_ST_TEX_4_Q 0x23ec +#define R200_SE_VTX_ST_TEX_5_S 0x23f0 +#define R200_SE_VTX_ST_TEX_5_T 0x23f4 +#define R200_SE_VTX_ST_TEX_5_R 0x23f8 +#define R200_SE_VTX_ST_TEX_5_Q 0x23fc +#define R200_SE_VTX_ST_PNT_SPRT_SZ 0x2400 +#define R200_SE_VTX_ST_DISC_FOG 0x2404 +#define R200_SE_VTX_ST_SHININESS_0 0x2408 +#define R200_SE_VTX_ST_SHININESS_1 0x240c +#define R200_SE_VTX_ST_BLND_WT_0 0x2410 +#define R200_SE_VTX_ST_BLND_WT_1 0x2414 +#define R200_SE_VTX_ST_BLND_WT_2 0x2418 +#define R200_SE_VTX_ST_BLND_WT_3 0x241c +#define R200_SE_VTX_ST_POS_1_X 0x2420 +#define R200_SE_VTX_ST_POS_1_Y 0x2424 +#define R200_SE_VTX_ST_POS_1_Z 0x2428 +#define R200_SE_VTX_ST_POS_1_W 0x242c +#define R200_SE_VTX_ST_NORM_1_X 0x2430 +#define R200_SE_VTX_ST_NORM_1_Y 0x2434 +#define R200_SE_VTX_ST_NORM_1_Z 0x2438 +#define R200_SE_VTX_ST_USR_CLR_0_R 0x2440 +#define R200_SE_VTX_ST_USR_CLR_0_G 0x2444 +#define R200_SE_VTX_ST_USR_CLR_0_B 0x2448 +#define R200_SE_VTX_ST_USR_CLR_0_A 0x244c +#define R200_SE_VTX_ST_USR_CLR_1_R 0x2450 +#define R200_SE_VTX_ST_USR_CLR_1_G 0x2454 +#define R200_SE_VTX_ST_USR_CLR_1_B 0x2458 +#define R200_SE_VTX_ST_USR_CLR_1_A 0x245c +#define R200_SE_VTX_ST_CLR_0_PKD 0x2460 +#define R200_SE_VTX_ST_CLR_1_PKD 0x2464 +#define R200_SE_VTX_ST_CLR_2_PKD 0x2468 +#define R200_SE_VTX_ST_CLR_3_PKD 0x246c +#define R200_SE_VTX_ST_CLR_4_PKD 0x2470 +#define R200_SE_VTX_ST_CLR_5_PKD 0x2474 +#define R200_SE_VTX_ST_CLR_6_PKD 0x2478 +#define R200_SE_VTX_ST_CLR_7_PKD 0x247c +#define R200_SE_VTX_ST_POS_0_X_2 0x2480 +#define R200_SE_VTX_ST_POS_0_Y_2 0x2484 +#define R200_SE_VTX_ST_PAR_CLR_LD 0x2488 +#define R200_SE_VTX_ST_USR_CLR_PKD 0x248c +#define R200_SE_VTX_ST_POS_0_X_3 0x2490 +#define R200_SE_VTX_ST_POS_0_Y_3 0x2494 +#define R200_SE_VTX_ST_POS_0_Z_3 0x2498 +#define R200_SE_VTX_ST_END_OF_PKT 0x249c +/* gap */ +#define R200_RE_POINTSIZE 0x2648 +#define R200_POINTSIZE_SHIFT 0 +#define R200_MAXPOINTSIZE_SHIFT 16 +/* gap */ +#define R200_RE_TOP_LEFT 0x26c0 +#define R200_RE_LEFT_SHIFT 0 +#define R200_RE_TOP_SHIFT 16 +#define R200_RE_MISC 0x26c4 +#define R200_STIPPLE_COORD_MASK 0x1f +#define R200_STIPPLE_X_OFFSET_SHIFT 0 +#define R200_STIPPLE_X_OFFSET_MASK (0x1f << 0) +#define R200_STIPPLE_Y_OFFSET_SHIFT 8 +#define R200_STIPPLE_Y_OFFSET_MASK (0x1f << 8) +#define R200_STIPPLE_LITTLE_BIT_ORDER (0 << 16) +#define R200_STIPPLE_BIG_BIT_ORDER (1 << 16) +/* gap */ +#define R200_RE_AUX_SCISSOR_CNTL 0x26f0 +#define R200_EXCLUSIVE_SCISSOR_0 0x01000000 +#define R200_EXCLUSIVE_SCISSOR_1 0x02000000 +#define R200_EXCLUSIVE_SCISSOR_2 0x04000000 +#define R200_SCISSOR_ENABLE_0 0x10000000 +#define R200_SCISSOR_ENABLE_1 0x20000000 +#define R200_SCISSOR_ENABLE_2 0x40000000 +/* gap */ +#define R200_PP_TXFILTER_0 0x2c00 +#define R200_MAG_FILTER_NEAREST (0 << 0) +#define R200_MAG_FILTER_LINEAR (1 << 0) +#define R200_MAG_FILTER_MASK (1 << 0) +#define R200_MIN_FILTER_NEAREST (0 << 1) +#define R200_MIN_FILTER_LINEAR (1 << 1) +#define R200_MIN_FILTER_NEAREST_MIP_NEAREST (2 << 1) +#define R200_MIN_FILTER_NEAREST_MIP_LINEAR (3 << 1) +#define R200_MIN_FILTER_LINEAR_MIP_NEAREST (6 << 1) +#define R200_MIN_FILTER_LINEAR_MIP_LINEAR (7 << 1) +#define R200_MIN_FILTER_ANISO_NEAREST (8 << 1) +#define R200_MIN_FILTER_ANISO_LINEAR (9 << 1) +#define R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (10 << 1) +#define R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR (11 << 1) +#define R200_MIN_FILTER_MASK (15 << 1) +#define R200_MAX_ANISO_1_TO_1 (0 << 5) +#define R200_MAX_ANISO_2_TO_1 (1 << 5) +#define R200_MAX_ANISO_4_TO_1 (2 << 5) +#define R200_MAX_ANISO_8_TO_1 (3 << 5) +#define R200_MAX_ANISO_16_TO_1 (4 << 5) +#define R200_MAX_ANISO_MASK (7 << 5) +#define R200_MAX_MIP_LEVEL_MASK (0x0f << 16) +#define R200_MAX_MIP_LEVEL_SHIFT 16 +#define R200_YUV_TO_RGB (1 << 20) +#define R200_YUV_TEMPERATURE_COOL (0 << 21) +#define R200_YUV_TEMPERATURE_HOT (1 << 21) +#define R200_YUV_TEMPERATURE_MASK (1 << 21) +#define R200_WRAPEN_S (1 << 22) +#define R200_CLAMP_S_WRAP (0 << 23) +#define R200_CLAMP_S_MIRROR (1 << 23) +#define R200_CLAMP_S_CLAMP_LAST (2 << 23) +#define R200_CLAMP_S_MIRROR_CLAMP_LAST (3 << 23) +#define R200_CLAMP_S_CLAMP_BORDER (4 << 23) +#define R200_CLAMP_S_MIRROR_CLAMP_BORDER (5 << 23) +#define R200_CLAMP_S_CLAMP_GL (6 << 23) +#define R200_CLAMP_S_MIRROR_CLAMP_GL (7 << 23) +#define R200_CLAMP_S_MASK (7 << 23) +#define R200_WRAPEN_T (1 << 26) +#define R200_CLAMP_T_WRAP (0 << 27) +#define R200_CLAMP_T_MIRROR (1 << 27) +#define R200_CLAMP_T_CLAMP_LAST (2 << 27) +#define R200_CLAMP_T_MIRROR_CLAMP_LAST (3 << 27) +#define R200_CLAMP_T_CLAMP_BORDER (4 << 27) +#define R200_CLAMP_T_MIRROR_CLAMP_BORDER (5 << 27) +#define R200_CLAMP_T_CLAMP_GL (6 << 27) +#define R200_CLAMP_T_MIRROR_CLAMP_GL (7 << 27) +#define R200_CLAMP_T_MASK (7 << 27) +#define R200_KILL_LT_ZERO (1 << 30) +#define R200_BORDER_MODE_OGL (0 << 31) +#define R200_BORDER_MODE_D3D (1 << 31) +#define R200_PP_TXFORMAT_0 0x2c04 +#define R200_TXFORMAT_I8 (0 << 0) +#define R200_TXFORMAT_AI88 (1 << 0) +#define R200_TXFORMAT_RGB332 (2 << 0) +#define R200_TXFORMAT_ARGB1555 (3 << 0) +#define R200_TXFORMAT_RGB565 (4 << 0) +#define R200_TXFORMAT_ARGB4444 (5 << 0) +#define R200_TXFORMAT_ARGB8888 (6 << 0) +#define R200_TXFORMAT_RGBA8888 (7 << 0) +#define R200_TXFORMAT_Y8 (8 << 0) +#define R200_TXFORMAT_AVYU4444 (9 << 0) +#define R200_TXFORMAT_VYUY422 (10 << 0) +#define R200_TXFORMAT_YVYU422 (11 << 0) +#define R200_TXFORMAT_DXT1 (12 << 0) +#define R200_TXFORMAT_DXT23 (14 << 0) +#define R200_TXFORMAT_DXT45 (15 << 0) +#define R200_TXFORMAT_DVDU88 (18 << 0) +#define R200_TXFORMAT_LDVDU655 (19 << 0) +#define R200_TXFORMAT_LDVDU8888 (20 << 0) +#define R200_TXFORMAT_GR1616 (21 << 0) +#define R200_TXFORMAT_ABGR8888 (22 << 0) +#define R200_TXFORMAT_BGR111110 (23 << 0) +#define R200_TXFORMAT_FORMAT_MASK (31 << 0) +#define R200_TXFORMAT_FORMAT_SHIFT 0 +#define R200_TXFORMAT_APPLE_YUV (1 << 5) +#define R200_TXFORMAT_ALPHA_IN_MAP (1 << 6) +#define R200_TXFORMAT_NON_POWER2 (1 << 7) +#define R200_TXFORMAT_WIDTH_MASK (15 << 8) +#define R200_TXFORMAT_WIDTH_SHIFT 8 +#define R200_TXFORMAT_HEIGHT_MASK (15 << 12) +#define R200_TXFORMAT_HEIGHT_SHIFT 12 +#define R200_TXFORMAT_F5_WIDTH_MASK (15 << 16) /* cube face 5 */ +#define R200_TXFORMAT_F5_WIDTH_SHIFT 16 +#define R200_TXFORMAT_F5_HEIGHT_MASK (15 << 20) +#define R200_TXFORMAT_F5_HEIGHT_SHIFT 20 +#define R200_TXFORMAT_ST_ROUTE_STQ0 (0 << 24) +#define R200_TXFORMAT_ST_ROUTE_STQ1 (1 << 24) +#define R200_TXFORMAT_ST_ROUTE_STQ2 (2 << 24) +#define R200_TXFORMAT_ST_ROUTE_STQ3 (3 << 24) +#define R200_TXFORMAT_ST_ROUTE_STQ4 (4 << 24) +#define R200_TXFORMAT_ST_ROUTE_STQ5 (5 << 24) +#define R200_TXFORMAT_ST_ROUTE_MASK (7 << 24) +#define R200_TXFORMAT_ST_ROUTE_SHIFT 24 +#define R200_TXFORMAT_LOOKUP_DISABLE (1 << 27) +#define R200_TXFORMAT_ALPHA_MASK_ENABLE (1 << 28) +#define R200_TXFORMAT_CHROMA_KEY_ENABLE (1 << 29) +#define R200_TXFORMAT_CUBIC_MAP_ENABLE (1 << 30) +#define R200_PP_TXFORMAT_X_0 0x2c08 +#define R200_DEPTH_LOG2_MASK (0xf << 0) +#define R200_DEPTH_LOG2_SHIFT 0 +#define R200_VOLUME_FILTER_SHIFT 4 +#define R200_VOLUME_FILTER_MASK (1 << 4) +#define R200_VOLUME_FILTER_NEAREST (0 << 4) +#define R200_VOLUME_FILTER_LINEAR (1 << 4) +#define R200_WRAPEN_Q (1 << 8) +#define R200_CLAMP_Q_WRAP (0 << 9) +#define R200_CLAMP_Q_MIRROR (1 << 9) +#define R200_CLAMP_Q_CLAMP_LAST (2 << 9) +#define R200_CLAMP_Q_MIRROR_CLAMP_LAST (3 << 9) +#define R200_CLAMP_Q_CLAMP_BORDER (4 << 9) +#define R200_CLAMP_Q_MIRROR_CLAMP_BORDER (5 << 9) +#define R200_CLAMP_Q_CLAMP_GL (6 << 9) +#define R200_CLAMP_Q_MIRROR_CLAMP_GL (7 << 9) +#define R200_CLAMP_Q_MASK (7 << 9) +#define R200_MIN_MIP_LEVEL_MASK (0xff << 12) +#define R200_MIN_MIP_LEVEL_SHIFT 12 +#define R200_TEXCOORD_NONPROJ (0 << 16) +#define R200_TEXCOORD_CUBIC_ENV (1 << 16) +#define R200_TEXCOORD_VOLUME (2 << 16) +#define R200_TEXCOORD_PROJ (3 << 16) +#define R200_TEXCOORD_DEPTH (4 << 16) +#define R200_TEXCOORD_1D_PROJ (5 << 16) +#define R200_TEXCOORD_1D (6 << 16) +#define R200_TEXCOORD_ZERO (7 << 16) +#define R200_TEXCOORD_MASK (7 << 16) +#define R200_LOD_BIAS_MASK (0xfff80000) +#define R200_LOD_BIAS_SHIFT 19 +#define R200_PP_TXSIZE_0 0x2c0c /* NPOT only */ +#define R200_PP_TXPITCH_0 0x2c10 /* NPOT only */ +#define R200_PP_BORDER_COLOR_0 0x2c14 +#define R200_PP_CUBIC_FACES_0 0x2c18 +#define R200_FACE_WIDTH_1_SHIFT 0 +#define R200_FACE_HEIGHT_1_SHIFT 4 +#define R200_FACE_WIDTH_1_MASK (0xf << 0) +#define R200_FACE_HEIGHT_1_MASK (0xf << 4) +#define R200_FACE_WIDTH_2_SHIFT 8 +#define R200_FACE_HEIGHT_2_SHIFT 12 +#define R200_FACE_WIDTH_2_MASK (0xf << 8) +#define R200_FACE_HEIGHT_2_MASK (0xf << 12) +#define R200_FACE_WIDTH_3_SHIFT 16 +#define R200_FACE_HEIGHT_3_SHIFT 20 +#define R200_FACE_WIDTH_3_MASK (0xf << 16) +#define R200_FACE_HEIGHT_3_MASK (0xf << 20) +#define R200_FACE_WIDTH_4_SHIFT 24 +#define R200_FACE_HEIGHT_4_SHIFT 28 +#define R200_FACE_WIDTH_4_MASK (0xf << 24) +#define R200_FACE_HEIGHT_4_MASK (0xf << 28) +#define R200_PP_TXMULTI_CTL_0 0x2c1c /* name from ddx, rest RE... */ +#define R200_PASS1_TXFORMAT_LOOKUP_DISABLE (1 << 0) +#define R200_PASS1_TEXCOORD_NONPROJ (0 << 1) +#define R200_PASS1_TEXCOORD_CUBIC_ENV (1 << 1) +#define R200_PASS1_TEXCOORD_VOLUME (2 << 1) +#define R200_PASS1_TEXCOORD_PROJ (3 << 1) +#define R200_PASS1_TEXCOORD_DEPTH (4 << 1) +#define R200_PASS1_TEXCOORD_1D_PROJ (5 << 1) +#define R200_PASS1_TEXCOORD_1D (6 << 1) /* pass1 texcoords only */ +#define R200_PASS1_TEXCOORD_ZERO (7 << 1) /* verifed for 2d targets! */ +#define R200_PASS1_TEXCOORD_MASK (7 << 1) /* assumed same values as for pass2 */ +#define R200_PASS1_ST_ROUTE_STQ0 (0 << 4) +#define R200_PASS1_ST_ROUTE_STQ1 (1 << 4) +#define R200_PASS1_ST_ROUTE_STQ2 (2 << 4) +#define R200_PASS1_ST_ROUTE_STQ3 (3 << 4) +#define R200_PASS1_ST_ROUTE_STQ4 (4 << 4) +#define R200_PASS1_ST_ROUTE_STQ5 (5 << 4) +#define R200_PASS1_ST_ROUTE_MASK (7 << 4) +#define R200_PASS1_ST_ROUTE_SHIFT (4) +#define R200_PASS2_COORDS_REG_0 (2 << 24) +#define R200_PASS2_COORDS_REG_1 (3 << 24) +#define R200_PASS2_COORDS_REG_2 (4 << 24) +#define R200_PASS2_COORDS_REG_3 (5 << 24) +#define R200_PASS2_COORDS_REG_4 (6 << 24) +#define R200_PASS2_COORDS_REG_5 (7 << 24) +#define R200_PASS2_COORDS_REG_MASK (0x7 << 24) +#define R200_PASS2_COORDS_REG_SHIFT (24) +#define R200_PP_TXFILTER_1 0x2c20 +#define R200_PP_TXFORMAT_1 0x2c24 +#define R200_PP_TXFORMAT_X_1 0x2c28 +#define R200_PP_TXSIZE_1 0x2c2c +#define R200_PP_TXPITCH_1 0x2c30 +#define R200_PP_BORDER_COLOR_1 0x2c34 +#define R200_PP_CUBIC_FACES_1 0x2c38 +#define R200_PP_TXMULTI_CTL_1 0x2c3c +#define R200_PP_TXFILTER_2 0x2c40 +#define R200_PP_TXFORMAT_2 0x2c44 +#define R200_PP_TXSIZE_2 0x2c4c +#define R200_PP_TXFORMAT_X_2 0x2c48 +#define R200_PP_TXPITCH_2 0x2c50 +#define R200_PP_BORDER_COLOR_2 0x2c54 +#define R200_PP_CUBIC_FACES_2 0x2c58 +#define R200_PP_TXMULTI_CTL_2 0x2c5c +#define R200_PP_TXFILTER_3 0x2c60 +#define R200_PP_TXFORMAT_3 0x2c64 +#define R200_PP_TXSIZE_3 0x2c6c +#define R200_PP_TXFORMAT_X_3 0x2c68 +#define R200_PP_TXPITCH_3 0x2c70 +#define R200_PP_BORDER_COLOR_3 0x2c74 +#define R200_PP_CUBIC_FACES_3 0x2c78 +#define R200_PP_TXMULTI_CTL_3 0x2c7c +#define R200_PP_TXFILTER_4 0x2c80 +#define R200_PP_TXFORMAT_4 0x2c84 +#define R200_PP_TXSIZE_4 0x2c8c +#define R200_PP_TXFORMAT_X_4 0x2c88 +#define R200_PP_TXPITCH_4 0x2c90 +#define R200_PP_BORDER_COLOR_4 0x2c94 +#define R200_PP_CUBIC_FACES_4 0x2c98 +#define R200_PP_TXMULTI_CTL_4 0x2c9c +#define R200_PP_TXFILTER_5 0x2ca0 +#define R200_PP_TXFORMAT_5 0x2ca4 +#define R200_PP_TXSIZE_5 0x2cac +#define R200_PP_TXFORMAT_X_5 0x2ca8 +#define R200_PP_TXPITCH_5 0x2cb0 +#define R200_PP_BORDER_COLOR_5 0x2cb4 +#define R200_PP_CUBIC_FACES_5 0x2cb8 +#define R200_PP_TXMULTI_CTL_5 0x2cbc +/* gap */ +#define R200_PP_CNTL_X 0x2cc4 /* Reveree engineered from fglrx */ +#define R200_PPX_TEX_0_ENABLE (1 << 0) +#define R200_PPX_TEX_1_ENABLE (1 << 1) +#define R200_PPX_TEX_2_ENABLE (1 << 2) +#define R200_PPX_TEX_3_ENABLE (1 << 3) +#define R200_PPX_TEX_4_ENABLE (1 << 4) +#define R200_PPX_TEX_5_ENABLE (1 << 5) +#define R200_PPX_TEX_ENABLE_MASK (0x3f << 0) +#define R200_PPX_OUTPUT_REG_0 (1 << 6) +#define R200_PPX_OUTPUT_REG_1 (1 << 7) +#define R200_PPX_OUTPUT_REG_2 (1 << 8) +#define R200_PPX_OUTPUT_REG_3 (1 << 9) +#define R200_PPX_OUTPUT_REG_4 (1 << 10) +#define R200_PPX_OUTPUT_REG_5 (1 << 11) +#define R200_PPX_OUTPUT_REG_MASK (0x3f << 6) +#define R200_PPX_OUTPUT_REG_0_SHIFT (6) +#define R200_PPX_PFS_INST0_ENABLE (1 << 12) +#define R200_PPX_PFS_INST1_ENABLE (1 << 13) +#define R200_PPX_PFS_INST2_ENABLE (1 << 14) +#define R200_PPX_PFS_INST3_ENABLE (1 << 15) +#define R200_PPX_PFS_INST4_ENABLE (1 << 16) +#define R200_PPX_PFS_INST5_ENABLE (1 << 17) +#define R200_PPX_PFS_INST6_ENABLE (1 << 18) +#define R200_PPX_PFS_INST7_ENABLE (1 << 19) +#define R200_PPX_PFS_INST_ENABLE_MASK (0xff << 12) +#define R200_PPX_FPS_INST0_ENABLE_SHIFT (12) +/* gap */ +#define R200_PP_TRI_PERF 0x2cf8 +#define R200_TRI_CUTOFF_MASK (0x1f << 0) +#define R200_PP_PERF_CNTL 0x2cfc +#define R200_PP_TXOFFSET_0 0x2d00 +#define R200_TXO_ENDIAN_NO_SWAP (0 << 0) +#define R200_TXO_ENDIAN_BYTE_SWAP (1 << 0) +#define R200_TXO_ENDIAN_WORD_SWAP (2 << 0) +#define R200_TXO_ENDIAN_HALFDW_SWAP (3 << 0) +#define R200_TXO_MACRO_TILE (1 << 2) +#define R200_TXO_MICRO_TILE (1 << 3) +#define R200_TXO_OFFSET_MASK 0xffffffe0 +#define R200_TXO_OFFSET_SHIFT 5 +#define R200_PP_CUBIC_OFFSET_F1_0 0x2d04 +#define R200_PP_CUBIC_OFFSET_F2_0 0x2d08 +#define R200_PP_CUBIC_OFFSET_F3_0 0x2d0c +#define R200_PP_CUBIC_OFFSET_F4_0 0x2d10 +#define R200_PP_CUBIC_OFFSET_F5_0 0x2d14 +#define R200_PP_TXOFFSET_1 0x2d18 +#define R200_PP_CUBIC_OFFSET_F1_1 0x2d1c +#define R200_PP_CUBIC_OFFSET_F2_1 0x2d20 +#define R200_PP_CUBIC_OFFSET_F3_1 0x2d24 +#define R200_PP_CUBIC_OFFSET_F4_1 0x2d28 +#define R200_PP_CUBIC_OFFSET_F5_1 0x2d2c +#define R200_PP_TXOFFSET_2 0x2d30 +#define R200_PP_CUBIC_OFFSET_F1_2 0x2d34 +#define R200_PP_CUBIC_OFFSET_F2_2 0x2d38 +#define R200_PP_CUBIC_OFFSET_F3_2 0x2d3c +#define R200_PP_CUBIC_OFFSET_F4_2 0x2d40 +#define R200_PP_CUBIC_OFFSET_F5_2 0x2d44 +#define R200_PP_TXOFFSET_3 0x2d48 +#define R200_PP_CUBIC_OFFSET_F1_3 0x2d4c +#define R200_PP_CUBIC_OFFSET_F2_3 0x2d50 +#define R200_PP_CUBIC_OFFSET_F3_3 0x2d54 +#define R200_PP_CUBIC_OFFSET_F4_3 0x2d58 +#define R200_PP_CUBIC_OFFSET_F5_3 0x2d5c +#define R200_PP_TXOFFSET_4 0x2d60 +#define R200_PP_CUBIC_OFFSET_F1_4 0x2d64 +#define R200_PP_CUBIC_OFFSET_F2_4 0x2d68 +#define R200_PP_CUBIC_OFFSET_F3_4 0x2d6c +#define R200_PP_CUBIC_OFFSET_F4_4 0x2d70 +#define R200_PP_CUBIC_OFFSET_F5_4 0x2d74 +#define R200_PP_TXOFFSET_5 0x2d78 +#define R200_PP_CUBIC_OFFSET_F1_5 0x2d7c +#define R200_PP_CUBIC_OFFSET_F2_5 0x2d80 +#define R200_PP_CUBIC_OFFSET_F3_5 0x2d84 +#define R200_PP_CUBIC_OFFSET_F4_5 0x2d88 +#define R200_PP_CUBIC_OFFSET_F5_5 0x2d8c +/* gap */ +#define R200_PP_TAM_DEBUG3 0x2d9c +/* gap */ +#define R200_PP_TFACTOR_0 0x2ee0 +#define R200_PP_TFACTOR_1 0x2ee4 +#define R200_PP_TFACTOR_2 0x2ee8 +#define R200_PP_TFACTOR_3 0x2eec +#define R200_PP_TFACTOR_4 0x2ef0 +#define R200_PP_TFACTOR_5 0x2ef4 +#define R200_PP_TFACTOR_6 0x2ef8 +#define R200_PP_TFACTOR_7 0x2efc +#define R200_PP_TXCBLEND_0 0x2f00 +#define R200_TXC_ARG_A_ZERO (0) +#define R200_TXC_ARG_A_CURRENT_COLOR (2) +#define R200_TXC_ARG_A_CURRENT_ALPHA (3) +#define R200_TXC_ARG_A_DIFFUSE_COLOR (4) +#define R200_TXC_ARG_A_DIFFUSE_ALPHA (5) +#define R200_TXC_ARG_A_SPECULAR_COLOR (6) +#define R200_TXC_ARG_A_SPECULAR_ALPHA (7) +#define R200_TXC_ARG_A_TFACTOR_COLOR (8) +#define R200_TXC_ARG_A_TFACTOR_ALPHA (9) +#define R200_TXC_ARG_A_R0_COLOR (10) +#define R200_TXC_ARG_A_R0_ALPHA (11) +#define R200_TXC_ARG_A_R1_COLOR (12) +#define R200_TXC_ARG_A_R1_ALPHA (13) +#define R200_TXC_ARG_A_R2_COLOR (14) +#define R200_TXC_ARG_A_R2_ALPHA (15) +#define R200_TXC_ARG_A_R3_COLOR (16) +#define R200_TXC_ARG_A_R3_ALPHA (17) +#define R200_TXC_ARG_A_R4_COLOR (18) +#define R200_TXC_ARG_A_R4_ALPHA (19) +#define R200_TXC_ARG_A_R5_COLOR (20) +#define R200_TXC_ARG_A_R5_ALPHA (21) +#define R200_TXC_ARG_A_TFACTOR1_COLOR (26) +#define R200_TXC_ARG_A_TFACTOR1_ALPHA (27) +#define R200_TXC_ARG_A_MASK (31 << 0) +#define R200_TXC_ARG_A_SHIFT 0 +#define R200_TXC_ARG_B_ZERO (0<<5) +#define R200_TXC_ARG_B_CURRENT_COLOR (2<<5) +#define R200_TXC_ARG_B_CURRENT_ALPHA (3<<5) +#define R200_TXC_ARG_B_DIFFUSE_COLOR (4<<5) +#define R200_TXC_ARG_B_DIFFUSE_ALPHA (5<<5) +#define R200_TXC_ARG_B_SPECULAR_COLOR (6<<5) +#define R200_TXC_ARG_B_SPECULAR_ALPHA (7<<5) +#define R200_TXC_ARG_B_TFACTOR_COLOR (8<<5) +#define R200_TXC_ARG_B_TFACTOR_ALPHA (9<<5) +#define R200_TXC_ARG_B_R0_COLOR (10<<5) +#define R200_TXC_ARG_B_R0_ALPHA (11<<5) +#define R200_TXC_ARG_B_R1_COLOR (12<<5) +#define R200_TXC_ARG_B_R1_ALPHA (13<<5) +#define R200_TXC_ARG_B_R2_COLOR (14<<5) +#define R200_TXC_ARG_B_R2_ALPHA (15<<5) +#define R200_TXC_ARG_B_R3_COLOR (16<<5) +#define R200_TXC_ARG_B_R3_ALPHA (17<<5) +#define R200_TXC_ARG_B_R4_COLOR (18<<5) +#define R200_TXC_ARG_B_R4_ALPHA (19<<5) +#define R200_TXC_ARG_B_R5_COLOR (20<<5) +#define R200_TXC_ARG_B_R5_ALPHA (21<<5) +#define R200_TXC_ARG_B_TFACTOR1_COLOR (26<<5) +#define R200_TXC_ARG_B_TFACTOR1_ALPHA (27<<5) +#define R200_TXC_ARG_B_MASK (31 << 5) +#define R200_TXC_ARG_B_SHIFT 5 +#define R200_TXC_ARG_C_ZERO (0<<10) +#define R200_TXC_ARG_C_CURRENT_COLOR (2<<10) +#define R200_TXC_ARG_C_CURRENT_ALPHA (3<<10) +#define R200_TXC_ARG_C_DIFFUSE_COLOR (4<<10) +#define R200_TXC_ARG_C_DIFFUSE_ALPHA (5<<10) +#define R200_TXC_ARG_C_SPECULAR_COLOR (6<<10) +#define R200_TXC_ARG_C_SPECULAR_ALPHA (7<<10) +#define R200_TXC_ARG_C_TFACTOR_COLOR (8<<10) +#define R200_TXC_ARG_C_TFACTOR_ALPHA (9<<10) +#define R200_TXC_ARG_C_R0_COLOR (10<<10) +#define R200_TXC_ARG_C_R0_ALPHA (11<<10) +#define R200_TXC_ARG_C_R1_COLOR (12<<10) +#define R200_TXC_ARG_C_R1_ALPHA (13<<10) +#define R200_TXC_ARG_C_R2_COLOR (14<<10) +#define R200_TXC_ARG_C_R2_ALPHA (15<<10) +#define R200_TXC_ARG_C_R3_COLOR (16<<10) +#define R200_TXC_ARG_C_R3_ALPHA (17<<10) +#define R200_TXC_ARG_C_R4_COLOR (18<<10) +#define R200_TXC_ARG_C_R4_ALPHA (19<<10) +#define R200_TXC_ARG_C_R5_COLOR (20<<10) +#define R200_TXC_ARG_C_R5_ALPHA (21<<10) +#define R200_TXC_ARG_C_TFACTOR1_COLOR (26<<10) +#define R200_TXC_ARG_C_TFACTOR1_ALPHA (27<<10) +#define R200_TXC_ARG_C_MASK (31 << 10) +#define R200_TXC_ARG_C_SHIFT 10 +#define R200_TXC_COMP_ARG_A (1 << 16) +#define R200_TXC_COMP_ARG_A_SHIFT (16) +#define R200_TXC_BIAS_ARG_A (1 << 17) +#define R200_TXC_SCALE_ARG_A (1 << 18) +#define R200_TXC_NEG_ARG_A (1 << 19) +#define R200_TXC_COMP_ARG_B (1 << 20) +#define R200_TXC_COMP_ARG_B_SHIFT (20) +#define R200_TXC_BIAS_ARG_B (1 << 21) +#define R200_TXC_SCALE_ARG_B (1 << 22) +#define R200_TXC_NEG_ARG_B (1 << 23) +#define R200_TXC_COMP_ARG_C (1 << 24) +#define R200_TXC_COMP_ARG_C_SHIFT (24) +#define R200_TXC_BIAS_ARG_C (1 << 25) +#define R200_TXC_SCALE_ARG_C (1 << 26) +#define R200_TXC_NEG_ARG_C (1 << 27) +#define R200_TXC_OP_MADD (0 << 28) +#define R200_TXC_OP_CND0 (2 << 28) +#define R200_TXC_OP_LERP (3 << 28) +#define R200_TXC_OP_DOT3 (4 << 28) +#define R200_TXC_OP_DOT4 (5 << 28) +#define R200_TXC_OP_CONDITIONAL (6 << 28) +#define R200_TXC_OP_DOT2_ADD (7 << 28) +#define R200_TXC_OP_MASK (7 << 28) +#define R200_PP_TXCBLEND2_0 0x2f04 +#define R200_TXC_TFACTOR_SEL_SHIFT 0 +#define R200_TXC_TFACTOR_SEL_MASK 0x7 +#define R200_TXC_TFACTOR1_SEL_SHIFT 4 +#define R200_TXC_TFACTOR1_SEL_MASK (0x7 << 4) +#define R200_TXC_SCALE_SHIFT 8 +#define R200_TXC_SCALE_MASK (7 << 8) +#define R200_TXC_SCALE_1X (0 << 8) +#define R200_TXC_SCALE_2X (1 << 8) +#define R200_TXC_SCALE_4X (2 << 8) +#define R200_TXC_SCALE_8X (3 << 8) +#define R200_TXC_SCALE_INV2 (5 << 8) +#define R200_TXC_SCALE_INV4 (6 << 8) +#define R200_TXC_SCALE_INV8 (7 << 8) +#define R200_TXC_CLAMP_SHIFT 12 +#define R200_TXC_CLAMP_MASK (3 << 12) +#define R200_TXC_CLAMP_WRAP (0 << 12) +#define R200_TXC_CLAMP_0_1 (1 << 12) +#define R200_TXC_CLAMP_8_8 (2 << 12) +#define R200_TXC_OUTPUT_REG_SHIFT 16 +#define R200_TXC_OUTPUT_REG_MASK (7 << 16) +#define R200_TXC_OUTPUT_REG_NONE (0 << 16) +#define R200_TXC_OUTPUT_REG_R0 (1 << 16) +#define R200_TXC_OUTPUT_REG_R1 (2 << 16) +#define R200_TXC_OUTPUT_REG_R2 (3 << 16) +#define R200_TXC_OUTPUT_REG_R3 (4 << 16) +#define R200_TXC_OUTPUT_REG_R4 (5 << 16) +#define R200_TXC_OUTPUT_REG_R5 (6 << 16) +#define R200_TXC_OUTPUT_MASK_MASK (7 << 20) +#define R200_TXC_OUTPUT_MASK_RGB (0 << 20) +#define R200_TXC_OUTPUT_MASK_RG (1 << 20) +#define R200_TXC_OUTPUT_MASK_RB (2 << 20) +#define R200_TXC_OUTPUT_MASK_R (3 << 20) +#define R200_TXC_OUTPUT_MASK_GB (4 << 20) +#define R200_TXC_OUTPUT_MASK_G (5 << 20) +#define R200_TXC_OUTPUT_MASK_B (6 << 20) +#define R200_TXC_OUTPUT_MASK_NONE (7 << 20) +#define R200_TXC_REPL_NORMAL 0 +#define R200_TXC_REPL_RED 1 +#define R200_TXC_REPL_GREEN 2 +#define R200_TXC_REPL_BLUE 3 +#define R200_TXC_REPL_ARG_A_SHIFT 26 +#define R200_TXC_REPL_ARG_A_MASK (3 << 26) +#define R200_TXC_REPL_ARG_B_SHIFT 28 +#define R200_TXC_REPL_ARG_B_MASK (3 << 28) +#define R200_TXC_REPL_ARG_C_SHIFT 30 +#define R200_TXC_REPL_ARG_C_MASK (3 << 30) +#define R200_PP_TXABLEND_0 0x2f08 +#define R200_TXA_ARG_A_ZERO (0) +#define R200_TXA_ARG_A_CURRENT_ALPHA (2) /* guess */ +#define R200_TXA_ARG_A_CURRENT_BLUE (3) /* guess */ +#define R200_TXA_ARG_A_DIFFUSE_ALPHA (4) +#define R200_TXA_ARG_A_DIFFUSE_BLUE (5) +#define R200_TXA_ARG_A_SPECULAR_ALPHA (6) +#define R200_TXA_ARG_A_SPECULAR_BLUE (7) +#define R200_TXA_ARG_A_TFACTOR_ALPHA (8) +#define R200_TXA_ARG_A_TFACTOR_BLUE (9) +#define R200_TXA_ARG_A_R0_ALPHA (10) +#define R200_TXA_ARG_A_R0_BLUE (11) +#define R200_TXA_ARG_A_R1_ALPHA (12) +#define R200_TXA_ARG_A_R1_BLUE (13) +#define R200_TXA_ARG_A_R2_ALPHA (14) +#define R200_TXA_ARG_A_R2_BLUE (15) +#define R200_TXA_ARG_A_R3_ALPHA (16) +#define R200_TXA_ARG_A_R3_BLUE (17) +#define R200_TXA_ARG_A_R4_ALPHA (18) +#define R200_TXA_ARG_A_R4_BLUE (19) +#define R200_TXA_ARG_A_R5_ALPHA (20) +#define R200_TXA_ARG_A_R5_BLUE (21) +#define R200_TXA_ARG_A_TFACTOR1_ALPHA (26) +#define R200_TXA_ARG_A_TFACTOR1_BLUE (27) +#define R200_TXA_ARG_A_MASK (31 << 0) +#define R200_TXA_ARG_A_SHIFT 0 +#define R200_TXA_ARG_B_ZERO (0<<5) +#define R200_TXA_ARG_B_CURRENT_ALPHA (2<<5) /* guess */ +#define R200_TXA_ARG_B_CURRENT_BLUE (3<<5) /* guess */ +#define R200_TXA_ARG_B_DIFFUSE_ALPHA (4<<5) +#define R200_TXA_ARG_B_DIFFUSE_BLUE (5<<5) +#define R200_TXA_ARG_B_SPECULAR_ALPHA (6<<5) +#define R200_TXA_ARG_B_SPECULAR_BLUE (7<<5) +#define R200_TXA_ARG_B_TFACTOR_ALPHA (8<<5) +#define R200_TXA_ARG_B_TFACTOR_BLUE (9<<5) +#define R200_TXA_ARG_B_R0_ALPHA (10<<5) +#define R200_TXA_ARG_B_R0_BLUE (11<<5) +#define R200_TXA_ARG_B_R1_ALPHA (12<<5) +#define R200_TXA_ARG_B_R1_BLUE (13<<5) +#define R200_TXA_ARG_B_R2_ALPHA (14<<5) +#define R200_TXA_ARG_B_R2_BLUE (15<<5) +#define R200_TXA_ARG_B_R3_ALPHA (16<<5) +#define R200_TXA_ARG_B_R3_BLUE (17<<5) +#define R200_TXA_ARG_B_R4_ALPHA (18<<5) +#define R200_TXA_ARG_B_R4_BLUE (19<<5) +#define R200_TXA_ARG_B_R5_ALPHA (20<<5) +#define R200_TXA_ARG_B_R5_BLUE (21<<5) +#define R200_TXA_ARG_B_TFACTOR1_ALPHA (26<<5) +#define R200_TXA_ARG_B_TFACTOR1_BLUE (27<<5) +#define R200_TXA_ARG_B_MASK (31 << 5) +#define R200_TXA_ARG_B_SHIFT 5 +#define R200_TXA_ARG_C_ZERO (0<<10) +#define R200_TXA_ARG_C_CURRENT_ALPHA (2<<10) /* guess */ +#define R200_TXA_ARG_C_CURRENT_BLUE (3<<10) /* guess */ +#define R200_TXA_ARG_C_DIFFUSE_ALPHA (4<<10) +#define R200_TXA_ARG_C_DIFFUSE_BLUE (5<<10) +#define R200_TXA_ARG_C_SPECULAR_ALPHA (6<<10) +#define R200_TXA_ARG_C_SPECULAR_BLUE (7<<10) +#define R200_TXA_ARG_C_TFACTOR_ALPHA (8<<10) +#define R200_TXA_ARG_C_TFACTOR_BLUE (9<<10) +#define R200_TXA_ARG_C_R0_ALPHA (10<<10) +#define R200_TXA_ARG_C_R0_BLUE (11<<10) +#define R200_TXA_ARG_C_R1_ALPHA (12<<10) +#define R200_TXA_ARG_C_R1_BLUE (13<<10) +#define R200_TXA_ARG_C_R2_ALPHA (14<<10) +#define R200_TXA_ARG_C_R2_BLUE (15<<10) +#define R200_TXA_ARG_C_R3_ALPHA (16<<10) +#define R200_TXA_ARG_C_R3_BLUE (17<<10) +#define R200_TXA_ARG_C_R4_ALPHA (18<<10) +#define R200_TXA_ARG_C_R4_BLUE (19<<10) +#define R200_TXA_ARG_C_R5_ALPHA (20<<10) +#define R200_TXA_ARG_C_R5_BLUE (21<<10) +#define R200_TXA_ARG_C_TFACTOR1_ALPHA (26<<10) +#define R200_TXA_ARG_C_TFACTOR1_BLUE (27<<10) +#define R200_TXA_ARG_C_MASK (31 << 10) +#define R200_TXA_ARG_C_SHIFT 10 +#define R200_TXA_COMP_ARG_A (1 << 16) +#define R200_TXA_COMP_ARG_A_SHIFT (16) +#define R200_TXA_BIAS_ARG_A (1 << 17) +#define R200_TXA_SCALE_ARG_A (1 << 18) +#define R200_TXA_NEG_ARG_A (1 << 19) +#define R200_TXA_COMP_ARG_B (1 << 20) +#define R200_TXA_COMP_ARG_B_SHIFT (20) +#define R200_TXA_BIAS_ARG_B (1 << 21) +#define R200_TXA_SCALE_ARG_B (1 << 22) +#define R200_TXA_NEG_ARG_B (1 << 23) +#define R200_TXA_COMP_ARG_C (1 << 24) +#define R200_TXA_COMP_ARG_C_SHIFT (24) +#define R200_TXA_BIAS_ARG_C (1 << 25) +#define R200_TXA_SCALE_ARG_C (1 << 26) +#define R200_TXA_NEG_ARG_C (1 << 27) +#define R200_TXA_OP_MADD (0 << 28) +#define R200_TXA_OP_CND0 (2 << 28) +#define R200_TXA_OP_LERP (3 << 28) +#define R200_TXA_OP_CONDITIONAL (6 << 28) +#define R200_TXA_OP_MASK (7 << 28) +#define R200_PP_TXABLEND2_0 0x2f0c +#define R200_TXA_TFACTOR_SEL_SHIFT 0 +#define R200_TXA_TFACTOR_SEL_MASK 0x7 +#define R200_TXA_TFACTOR1_SEL_SHIFT 4 +#define R200_TXA_TFACTOR1_SEL_MASK (0x7 << 4) +#define R200_TXA_SCALE_SHIFT 8 +#define R200_TXA_SCALE_MASK (7 << 8) +#define R200_TXA_SCALE_1X (0 << 8) +#define R200_TXA_SCALE_2X (1 << 8) +#define R200_TXA_SCALE_4X (2 << 8) +#define R200_TXA_SCALE_8X (3 << 8) +#define R200_TXA_SCALE_INV2 (5 << 8) +#define R200_TXA_SCALE_INV4 (6 << 8) +#define R200_TXA_SCALE_INV8 (7 << 8) +#define R200_TXA_CLAMP_SHIFT 12 +#define R200_TXA_CLAMP_MASK (3 << 12) +#define R200_TXA_CLAMP_WRAP (0 << 12) +#define R200_TXA_CLAMP_0_1 (1 << 12) +#define R200_TXA_CLAMP_8_8 (2 << 12) +#define R200_TXA_OUTPUT_REG_SHIFT 16 +#define R200_TXA_OUTPUT_REG_MASK (7 << 16) +#define R200_TXA_OUTPUT_REG_NONE (0 << 16) +#define R200_TXA_OUTPUT_REG_R0 (1 << 16) +#define R200_TXA_OUTPUT_REG_R1 (2 << 16) +#define R200_TXA_OUTPUT_REG_R2 (3 << 16) +#define R200_TXA_OUTPUT_REG_R3 (4 << 16) +#define R200_TXA_OUTPUT_REG_R4 (5 << 16) +#define R200_TXA_OUTPUT_REG_R5 (6 << 16) +#define R200_TXA_DOT_ALPHA (1 << 20) +#define R200_TXA_REPL_NORMAL 0 +#define R200_TXA_REPL_RED 1 +#define R200_TXA_REPL_GREEN 2 +#define R200_TXA_REPL_ARG_A_SHIFT 26 +#define R200_TXA_REPL_ARG_A_MASK (3 << 26) +#define R200_TXA_REPL_ARG_B_SHIFT 28 +#define R200_TXA_REPL_ARG_B_MASK (3 << 28) +#define R200_TXA_REPL_ARG_C_SHIFT 30 +#define R200_TXA_REPL_ARG_C_MASK (3 << 30) +#define R200_PP_TXCBLEND_1 0x2f10 +#define R200_PP_TXCBLEND2_1 0x2f14 +#define R200_PP_TXABLEND_1 0x2f18 +#define R200_PP_TXABLEND2_1 0x2f1c +#define R200_PP_TXCBLEND_2 0x2f20 +#define R200_PP_TXCBLEND2_2 0x2f24 +#define R200_PP_TXABLEND_2 0x2f28 +#define R200_PP_TXABLEND2_2 0x2f2c +#define R200_PP_TXCBLEND_3 0x2f30 +#define R200_PP_TXCBLEND2_3 0x2f34 +#define R200_PP_TXABLEND_3 0x2f38 +#define R200_PP_TXABLEND2_3 0x2f3c +#define R200_PP_TXCBLEND_4 0x2f40 +#define R200_PP_TXCBLEND2_4 0x2f44 +#define R200_PP_TXABLEND_4 0x2f48 +#define R200_PP_TXABLEND2_4 0x2f4c +#define R200_PP_TXCBLEND_5 0x2f50 +#define R200_PP_TXCBLEND2_5 0x2f54 +#define R200_PP_TXABLEND_5 0x2f58 +#define R200_PP_TXABLEND2_5 0x2f5c +#define R200_PP_TXCBLEND_6 0x2f60 +#define R200_PP_TXCBLEND2_6 0x2f64 +#define R200_PP_TXABLEND_6 0x2f68 +#define R200_PP_TXABLEND2_6 0x2f6c +#define R200_PP_TXCBLEND_7 0x2f70 +#define R200_PP_TXCBLEND2_7 0x2f74 +#define R200_PP_TXABLEND_7 0x2f78 +#define R200_PP_TXABLEND2_7 0x2f7c +#define R200_PP_TXCBLEND_8 0x2f80 +#define R200_PP_TXCBLEND2_8 0x2f84 +#define R200_PP_TXABLEND_8 0x2f88 +#define R200_PP_TXABLEND2_8 0x2f8c +#define R200_PP_TXCBLEND_9 0x2f90 +#define R200_PP_TXCBLEND2_9 0x2f94 +#define R200_PP_TXABLEND_9 0x2f98 +#define R200_PP_TXABLEND2_9 0x2f9c +#define R200_PP_TXCBLEND_10 0x2fa0 +#define R200_PP_TXCBLEND2_10 0x2fa4 +#define R200_PP_TXABLEND_10 0x2fa8 +#define R200_PP_TXABLEND2_10 0x2fac +#define R200_PP_TXCBLEND_11 0x2fb0 +#define R200_PP_TXCBLEND2_11 0x2fb4 +#define R200_PP_TXABLEND_11 0x2fb8 +#define R200_PP_TXABLEND2_11 0x2fbc +#define R200_PP_TXCBLEND_12 0x2fc0 +#define R200_PP_TXCBLEND2_12 0x2fc4 +#define R200_PP_TXABLEND_12 0x2fc8 +#define R200_PP_TXABLEND2_12 0x2fcc +#define R200_PP_TXCBLEND_13 0x2fd0 +#define R200_PP_TXCBLEND2_13 0x2fd4 +#define R200_PP_TXABLEND_13 0x2fd8 +#define R200_PP_TXABLEND2_13 0x2fdc +#define R200_PP_TXCBLEND_14 0x2fe0 +#define R200_PP_TXCBLEND2_14 0x2fe4 +#define R200_PP_TXABLEND_14 0x2fe8 +#define R200_PP_TXABLEND2_14 0x2fec +#define R200_PP_TXCBLEND_15 0x2ff0 +#define R200_PP_TXCBLEND2_15 0x2ff4 +#define R200_PP_TXABLEND_15 0x2ff8 +#define R200_PP_TXABLEND2_15 0x2ffc +/* gap */ +#define R200_RB3D_BLENDCOLOR 0x3218 /* ARGB 8888 */ +#define R200_RB3D_ABLENDCNTL 0x321C /* see BLENDCTL */ +#define R200_RB3D_CBLENDCNTL 0x3220 /* see BLENDCTL */ + + +/* + * Offsets in TCL vector state. NOTE: Hardwiring matrix positions. + * Multiple contexts could collaberate to eliminate state bouncing. + */ +#define R200_VS_LIGHT_AMBIENT_ADDR 0x00000028 +#define R200_VS_LIGHT_DIFFUSE_ADDR 0x00000030 +#define R200_VS_LIGHT_SPECULAR_ADDR 0x00000038 +#define R200_VS_LIGHT_DIRPOS_ADDR 0x00000040 +#define R200_VS_LIGHT_HWVSPOT_ADDR 0x00000048 +#define R200_VS_LIGHT_ATTENUATION_ADDR 0x00000050 +#define R200_VS_SPOT_DUAL_CONE 0x00000058 +#define R200_VS_GLOBAL_AMBIENT_ADDR 0x0000005C +#define R200_VS_FOG_PARAM_ADDR 0x0000005D +#define R200_VS_EYE_VECTOR_ADDR 0x0000005E +#define R200_VS_UCP_ADDR 0x00000060 +#define R200_VS_PNT_SPRITE_VPORT_SCALE 0x00000068 +#define R200_VS_MATRIX_0_MV 0x00000080 +#define R200_VS_MATRIX_1_INV_MV 0x00000084 +#define R200_VS_MATRIX_2_MVP 0x00000088 +#define R200_VS_MATRIX_3_TEX0 0x0000008C +#define R200_VS_MATRIX_4_TEX1 0x00000090 +#define R200_VS_MATRIX_5_TEX2 0x00000094 +#define R200_VS_MATRIX_6_TEX3 0x00000098 +#define R200_VS_MATRIX_7_TEX4 0x0000009C +#define R200_VS_MATRIX_8_TEX5 0x000000A0 +#define R200_VS_MAT_0_EMISS 0x000000B0 +#define R200_VS_MAT_0_AMB 0x000000B1 +#define R200_VS_MAT_0_DIF 0x000000B2 +#define R200_VS_MAT_0_SPEC 0x000000B3 +#define R200_VS_MAT_1_EMISS 0x000000B4 +#define R200_VS_MAT_1_AMB 0x000000B5 +#define R200_VS_MAT_1_DIF 0x000000B6 +#define R200_VS_MAT_1_SPEC 0x000000B7 +#define R200_VS_EYE2CLIP_MTX 0x000000B8 +#define R200_VS_PNT_SPRITE_ATT_CONST 0x000000BC +#define R200_VS_PNT_SPRITE_EYE_IN_MODEL 0x000000BD +#define R200_VS_PNT_SPRITE_CLAMP 0x000000BE +#define R200_VS_MAX 0x000001C0 + +#define R200_PVS_PROG0 0x00000080 +#define R200_PVS_PROG1 0x00000180 +#define R200_PVS_PARAM0 0x00000000 +#define R200_PVS_PARAM1 0x00000100 + +/* + * Offsets in TCL scalar state + */ +#define R200_SS_LIGHT_DCD_ADDR 0x00000000 +#define R200_SS_LIGHT_DCM_ADDR 0x00000008 +#define R200_SS_LIGHT_SPOT_EXPONENT_ADDR 0x00000010 +#define R200_SS_LIGHT_SPOT_CUTOFF_ADDR 0x00000018 +#define R200_SS_LIGHT_SPECULAR_THRESH_ADDR 0x00000020 +#define R200_SS_LIGHT_RANGE_CUTOFF_SQRD 0x00000028 +#define R200_SS_LIGHT_RANGE_ATT_CONST 0x00000030 +#define R200_SS_VERT_GUARD_CLIP_ADJ_ADDR 0x00000080 +#define R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR 0x00000081 +#define R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR 0x00000082 +#define R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR 0x00000083 +#define R200_SS_MAT_0_SHININESS 0x00000100 +#define R200_SS_MAT_1_SHININESS 0x00000101 + + +/* + * Matrix indices + */ +#define R200_MTX_MV 0 +#define R200_MTX_IMV 1 +#define R200_MTX_MVP 2 +#define R200_MTX_TEX0 3 +#define R200_MTX_TEX1 4 +#define R200_MTX_TEX2 5 +#define R200_MTX_TEX3 6 +#define R200_MTX_TEX4 7 +#define R200_MTX_TEX5 8 + +/* Color formats for 2d packets + */ +#define R200_CP_COLOR_FORMAT_CI8 2 +#define R200_CP_COLOR_FORMAT_ARGB1555 3 +#define R200_CP_COLOR_FORMAT_RGB565 4 +#define R200_CP_COLOR_FORMAT_ARGB8888 6 +#define R200_CP_COLOR_FORMAT_RGB332 7 +#define R200_CP_COLOR_FORMAT_RGB8 9 +#define R200_CP_COLOR_FORMAT_ARGB4444 15 + + +/* + * CP type-3 packets + */ +#define R200_CP_CMD_NOP 0xC0001000 +#define R200_CP_CMD_NEXT_CHAR 0xC0001900 +#define R200_CP_CMD_PLY_NEXTSCAN 0xC0001D00 +#define R200_CP_CMD_SET_SCISSORS 0xC0001E00 +#define R200_CP_CMD_LOAD_MICROCODE 0xC0002400 +#define R200_CP_CMD_WAIT_FOR_IDLE 0xC0002600 +#define R200_CP_CMD_3D_DRAW_VBUF 0xC0002800 +#define R200_CP_CMD_3D_DRAW_IMMD 0xC0002900 +#define R200_CP_CMD_3D_DRAW_INDX 0xC0002A00 +#define R200_CP_CMD_LOAD_PALETTE 0xC0002C00 +#define R200_CP_CMD_3D_LOAD_VBPNTR 0xC0002F00 +#define R200_CP_CMD_INDX_BUFFER 0xC0003300 +#define R200_CP_CMD_3D_DRAW_VBUF_2 0xC0003400 +#define R200_CP_CMD_3D_DRAW_IMMD_2 0xC0003500 +#define R200_CP_CMD_3D_DRAW_INDX_2 0xC0003600 +#define R200_CP_CMD_PAINT 0xC0009100 +#define R200_CP_CMD_BITBLT 0xC0009200 +#define R200_CP_CMD_SMALLTEXT 0xC0009300 +#define R200_CP_CMD_HOSTDATA_BLT 0xC0009400 +#define R200_CP_CMD_POLYLINE 0xC0009500 +#define R200_CP_CMD_POLYSCANLINES 0xC0009800 +#define R200_CP_CMD_PAINT_MULTI 0xC0009A00 +#define R200_CP_CMD_BITBLT_MULTI 0xC0009B00 +#define R200_CP_CMD_TRANS_BITBLT 0xC0009C00 + +#endif + diff --git a/r200/r200_sanity.c b/r200/r200_sanity.c new file mode 100644 index 0000000..3f2a866 --- /dev/null +++ b/r200/r200_sanity.c @@ -0,0 +1,1458 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_sanity.c,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ +/************************************************************************** + +Copyright 2002 ATI Technologies Inc., Ontario, Canada, and + Tungsten Graphics Inc, Cedar Park, TX. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * + */ + +#include <errno.h> + +#include "glheader.h" +#include "imports.h" + +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_sanity.h" +#include "radeon_reg.h" +#include "r200_reg.h" + +/* Set this '1' to get more verbiage. + */ +#define MORE_VERBOSE 1 + +#if MORE_VERBOSE +#define VERBOSE (R200_DEBUG & DEBUG_VERBOSE) +#define NORMAL (1) +#else +#define VERBOSE 0 +#define NORMAL (R200_DEBUG & DEBUG_VERBOSE) +#endif + + +/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in + * 1.3 cmdbuffers allow all previous state to be updated as well as + * the tcl scalar and vector areas. + */ +static struct { + int start; + int len; + const char *name; +} packet[RADEON_MAX_STATE_PACKETS] = { + { RADEON_PP_MISC,7,"RADEON_PP_MISC" }, + { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" }, + { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" }, + { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" }, + { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" }, + { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" }, + { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" }, + { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" }, + { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" }, + { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" }, + { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" }, + { RADEON_RE_MISC,1,"RADEON_RE_MISC" }, + { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" }, + { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" }, + { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" }, + { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" }, + { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" }, + { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" }, + { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" }, + { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" }, + { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" }, + { R200_PP_TXCBLEND_0, 4, "R200_EMIT_PP_TXCBLEND_0" }, + { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" }, + { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" }, + { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" }, + { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" }, + { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" }, + { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" }, + { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" }, + { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" }, + { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" }, + { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" }, + { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" }, + { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" }, + { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" }, + { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" }, + { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" }, + { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" }, + { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" }, + { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" }, + { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" }, + { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" }, + { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" }, + { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" }, + { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" }, + { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" }, + { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" }, + { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" }, + { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" }, + { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" }, + { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" }, + { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, + { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, + { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, + { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, + { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, + { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, + { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, + { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, + { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, + { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" }, + { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */ + { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */ + { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" }, + { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" }, + { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" }, + { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" }, + { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" }, + { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" }, + { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" }, + { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" }, + { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" }, + { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" }, + { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" }, + { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" }, + { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" }, + { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" }, + { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" }, + { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0" }, + { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0" }, + { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1" }, + { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0" }, + { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2" }, + { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0" }, + { R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF" }, + { R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"}, /* 85 */ + { R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"}, + { R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"}, + { R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"}, + { R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"}, + { R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"}, + { R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"}, + { R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"}, + { R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"}, + { R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"}, +}; + +struct reg_names { + int idx; + const char *name; +}; + +static struct reg_names reg_names[] = { + { R200_PP_MISC, "R200_PP_MISC" }, + { R200_PP_FOG_COLOR, "R200_PP_FOG_COLOR" }, + { R200_RE_SOLID_COLOR, "R200_RE_SOLID_COLOR" }, + { R200_RB3D_BLENDCNTL, "R200_RB3D_BLENDCNTL" }, + { R200_RB3D_DEPTHOFFSET, "R200_RB3D_DEPTHOFFSET" }, + { R200_RB3D_DEPTHPITCH, "R200_RB3D_DEPTHPITCH" }, + { R200_RB3D_ZSTENCILCNTL, "R200_RB3D_ZSTENCILCNTL" }, + { R200_PP_CNTL, "R200_PP_CNTL" }, + { R200_RB3D_CNTL, "R200_RB3D_CNTL" }, + { R200_RB3D_COLOROFFSET, "R200_RB3D_COLOROFFSET" }, + { R200_RE_WIDTH_HEIGHT, "R200_RE_WIDTH_HEIGHT" }, + { R200_RB3D_COLORPITCH, "R200_RB3D_COLORPITCH" }, + { R200_SE_CNTL, "R200_SE_CNTL" }, + { R200_RE_CNTL, "R200_RE_CNTL" }, + { R200_RE_MISC, "R200_RE_MISC" }, + { R200_RE_STIPPLE_ADDR, "R200_RE_STIPPLE_ADDR" }, + { R200_RE_STIPPLE_DATA, "R200_RE_STIPPLE_DATA" }, + { R200_RE_LINE_PATTERN, "R200_RE_LINE_PATTERN" }, + { R200_RE_LINE_STATE, "R200_RE_LINE_STATE" }, + { R200_RE_SCISSOR_TL_0, "R200_RE_SCISSOR_TL_0" }, + { R200_RE_SCISSOR_BR_0, "R200_RE_SCISSOR_BR_0" }, + { R200_RE_SCISSOR_TL_1, "R200_RE_SCISSOR_TL_1" }, + { R200_RE_SCISSOR_BR_1, "R200_RE_SCISSOR_BR_1" }, + { R200_RE_SCISSOR_TL_2, "R200_RE_SCISSOR_TL_2" }, + { R200_RE_SCISSOR_BR_2, "R200_RE_SCISSOR_BR_2" }, + { R200_RB3D_DEPTHXY_OFFSET, "R200_RB3D_DEPTHXY_OFFSET" }, + { R200_RB3D_STENCILREFMASK, "R200_RB3D_STENCILREFMASK" }, + { R200_RB3D_ROPCNTL, "R200_RB3D_ROPCNTL" }, + { R200_RB3D_PLANEMASK, "R200_RB3D_PLANEMASK" }, + { R200_SE_VPORT_XSCALE, "R200_SE_VPORT_XSCALE" }, + { R200_SE_VPORT_XOFFSET, "R200_SE_VPORT_XOFFSET" }, + { R200_SE_VPORT_YSCALE, "R200_SE_VPORT_YSCALE" }, + { R200_SE_VPORT_YOFFSET, "R200_SE_VPORT_YOFFSET" }, + { R200_SE_VPORT_ZSCALE, "R200_SE_VPORT_ZSCALE" }, + { R200_SE_VPORT_ZOFFSET, "R200_SE_VPORT_ZOFFSET" }, + { R200_SE_ZBIAS_FACTOR, "R200_SE_ZBIAS_FACTOR" }, + { R200_SE_ZBIAS_CONSTANT, "R200_SE_ZBIAS_CONSTANT" }, + { R200_SE_LINE_WIDTH, "R200_SE_LINE_WIDTH" }, + { R200_SE_VAP_CNTL, "R200_SE_VAP_CNTL" }, + { R200_SE_VF_CNTL, "R200_SE_VF_CNTL" }, + { R200_SE_VTX_FMT_0, "R200_SE_VTX_FMT_0" }, + { R200_SE_VTX_FMT_1, "R200_SE_VTX_FMT_1" }, + { R200_SE_TCL_OUTPUT_VTX_FMT_0, "R200_SE_TCL_OUTPUT_VTX_FMT_0" }, + { R200_SE_TCL_OUTPUT_VTX_FMT_1, "R200_SE_TCL_OUTPUT_VTX_FMT_1" }, + { R200_SE_VTE_CNTL, "R200_SE_VTE_CNTL" }, + { R200_SE_VTX_NUM_ARRAYS, "R200_SE_VTX_NUM_ARRAYS" }, + { R200_SE_VTX_AOS_ATTR01, "R200_SE_VTX_AOS_ATTR01" }, + { R200_SE_VTX_AOS_ADDR0, "R200_SE_VTX_AOS_ADDR0" }, + { R200_SE_VTX_AOS_ADDR1, "R200_SE_VTX_AOS_ADDR1" }, + { R200_SE_VTX_AOS_ATTR23, "R200_SE_VTX_AOS_ATTR23" }, + { R200_SE_VTX_AOS_ADDR2, "R200_SE_VTX_AOS_ADDR2" }, + { R200_SE_VTX_AOS_ADDR3, "R200_SE_VTX_AOS_ADDR3" }, + { R200_SE_VTX_AOS_ATTR45, "R200_SE_VTX_AOS_ATTR45" }, + { R200_SE_VTX_AOS_ADDR4, "R200_SE_VTX_AOS_ADDR4" }, + { R200_SE_VTX_AOS_ADDR5, "R200_SE_VTX_AOS_ADDR5" }, + { R200_SE_VTX_AOS_ATTR67, "R200_SE_VTX_AOS_ATTR67" }, + { R200_SE_VTX_AOS_ADDR6, "R200_SE_VTX_AOS_ADDR6" }, + { R200_SE_VTX_AOS_ADDR7, "R200_SE_VTX_AOS_ADDR7" }, + { R200_SE_VTX_AOS_ATTR89, "R200_SE_VTX_AOS_ATTR89" }, + { R200_SE_VTX_AOS_ADDR8, "R200_SE_VTX_AOS_ADDR8" }, + { R200_SE_VTX_AOS_ADDR9, "R200_SE_VTX_AOS_ADDR9" }, + { R200_SE_VTX_AOS_ATTR1011, "R200_SE_VTX_AOS_ATTR1011" }, + { R200_SE_VTX_AOS_ADDR10, "R200_SE_VTX_AOS_ADDR10" }, + { R200_SE_VTX_AOS_ADDR11, "R200_SE_VTX_AOS_ADDR11" }, + { R200_SE_VF_MAX_VTX_INDX, "R200_SE_VF_MAX_VTX_INDX" }, + { R200_SE_VF_MIN_VTX_INDX, "R200_SE_VF_MIN_VTX_INDX" }, + { R200_SE_VTX_STATE_CNTL, "R200_SE_VTX_STATE_CNTL" }, + { R200_SE_TCL_VECTOR_INDX_REG, "R200_SE_TCL_VECTOR_INDX_REG" }, + { R200_SE_TCL_VECTOR_DATA_REG, "R200_SE_TCL_VECTOR_DATA_REG" }, + { R200_SE_TCL_SCALAR_INDX_REG, "R200_SE_TCL_SCALAR_INDX_REG" }, + { R200_SE_TCL_SCALAR_DATA_REG, "R200_SE_TCL_SCALAR_DATA_REG" }, + { R200_SE_TCL_MATRIX_SEL_0, "R200_SE_TCL_MATRIX_SEL_0" }, + { R200_SE_TCL_MATRIX_SEL_1, "R200_SE_TCL_MATRIX_SEL_1" }, + { R200_SE_TCL_MATRIX_SEL_2, "R200_SE_TCL_MATRIX_SEL_2" }, + { R200_SE_TCL_MATRIX_SEL_3, "R200_SE_TCL_MATRIX_SEL_3" }, + { R200_SE_TCL_MATRIX_SEL_4, "R200_SE_TCL_MATRIX_SEL_4" }, + { R200_SE_TCL_LIGHT_MODEL_CTL_0, "R200_SE_TCL_LIGHT_MODEL_CTL_0" }, + { R200_SE_TCL_LIGHT_MODEL_CTL_1, "R200_SE_TCL_LIGHT_MODEL_CTL_1" }, + { R200_SE_TCL_PER_LIGHT_CTL_0, "R200_SE_TCL_PER_LIGHT_CTL_0" }, + { R200_SE_TCL_PER_LIGHT_CTL_1, "R200_SE_TCL_PER_LIGHT_CTL_1" }, + { R200_SE_TCL_PER_LIGHT_CTL_2, "R200_SE_TCL_PER_LIGHT_CTL_2" }, + { R200_SE_TCL_PER_LIGHT_CTL_3, "R200_SE_TCL_PER_LIGHT_CTL_3" }, + { R200_SE_TCL_TEX_PROC_CTL_2, "R200_SE_TCL_TEX_PROC_CTL_2" }, + { R200_SE_TCL_TEX_PROC_CTL_3, "R200_SE_TCL_TEX_PROC_CTL_3" }, + { R200_SE_TCL_TEX_PROC_CTL_0, "R200_SE_TCL_TEX_PROC_CTL_0" }, + { R200_SE_TCL_TEX_PROC_CTL_1, "R200_SE_TCL_TEX_PROC_CTL_1" }, + { R200_SE_TC_TEX_CYL_WRAP_CTL, "R200_SE_TC_TEX_CYL_WRAP_CTL" }, + { R200_SE_TCL_UCP_VERT_BLEND_CTL, "R200_SE_TCL_UCP_VERT_BLEND_CTL" }, + { R200_SE_TCL_POINT_SPRITE_CNTL, "R200_SE_TCL_POINT_SPRITE_CNTL" }, + { R200_SE_VTX_ST_POS_0_X_4, "R200_SE_VTX_ST_POS_0_X_4" }, + { R200_SE_VTX_ST_POS_0_Y_4, "R200_SE_VTX_ST_POS_0_Y_4" }, + { R200_SE_VTX_ST_POS_0_Z_4, "R200_SE_VTX_ST_POS_0_Z_4" }, + { R200_SE_VTX_ST_POS_0_W_4, "R200_SE_VTX_ST_POS_0_W_4" }, + { R200_SE_VTX_ST_NORM_0_X, "R200_SE_VTX_ST_NORM_0_X" }, + { R200_SE_VTX_ST_NORM_0_Y, "R200_SE_VTX_ST_NORM_0_Y" }, + { R200_SE_VTX_ST_NORM_0_Z, "R200_SE_VTX_ST_NORM_0_Z" }, + { R200_SE_VTX_ST_PVMS, "R200_SE_VTX_ST_PVMS" }, + { R200_SE_VTX_ST_CLR_0_R, "R200_SE_VTX_ST_CLR_0_R" }, + { R200_SE_VTX_ST_CLR_0_G, "R200_SE_VTX_ST_CLR_0_G" }, + { R200_SE_VTX_ST_CLR_0_B, "R200_SE_VTX_ST_CLR_0_B" }, + { R200_SE_VTX_ST_CLR_0_A, "R200_SE_VTX_ST_CLR_0_A" }, + { R200_SE_VTX_ST_CLR_1_R, "R200_SE_VTX_ST_CLR_1_R" }, + { R200_SE_VTX_ST_CLR_1_G, "R200_SE_VTX_ST_CLR_1_G" }, + { R200_SE_VTX_ST_CLR_1_B, "R200_SE_VTX_ST_CLR_1_B" }, + { R200_SE_VTX_ST_CLR_1_A, "R200_SE_VTX_ST_CLR_1_A" }, + { R200_SE_VTX_ST_CLR_2_R, "R200_SE_VTX_ST_CLR_2_R" }, + { R200_SE_VTX_ST_CLR_2_G, "R200_SE_VTX_ST_CLR_2_G" }, + { R200_SE_VTX_ST_CLR_2_B, "R200_SE_VTX_ST_CLR_2_B" }, + { R200_SE_VTX_ST_CLR_2_A, "R200_SE_VTX_ST_CLR_2_A" }, + { R200_SE_VTX_ST_CLR_3_R, "R200_SE_VTX_ST_CLR_3_R" }, + { R200_SE_VTX_ST_CLR_3_G, "R200_SE_VTX_ST_CLR_3_G" }, + { R200_SE_VTX_ST_CLR_3_B, "R200_SE_VTX_ST_CLR_3_B" }, + { R200_SE_VTX_ST_CLR_3_A, "R200_SE_VTX_ST_CLR_3_A" }, + { R200_SE_VTX_ST_CLR_4_R, "R200_SE_VTX_ST_CLR_4_R" }, + { R200_SE_VTX_ST_CLR_4_G, "R200_SE_VTX_ST_CLR_4_G" }, + { R200_SE_VTX_ST_CLR_4_B, "R200_SE_VTX_ST_CLR_4_B" }, + { R200_SE_VTX_ST_CLR_4_A, "R200_SE_VTX_ST_CLR_4_A" }, + { R200_SE_VTX_ST_CLR_5_R, "R200_SE_VTX_ST_CLR_5_R" }, + { R200_SE_VTX_ST_CLR_5_G, "R200_SE_VTX_ST_CLR_5_G" }, + { R200_SE_VTX_ST_CLR_5_B, "R200_SE_VTX_ST_CLR_5_B" }, + { R200_SE_VTX_ST_CLR_5_A, "R200_SE_VTX_ST_CLR_5_A" }, + { R200_SE_VTX_ST_CLR_6_R, "R200_SE_VTX_ST_CLR_6_R" }, + { R200_SE_VTX_ST_CLR_6_G, "R200_SE_VTX_ST_CLR_6_G" }, + { R200_SE_VTX_ST_CLR_6_B, "R200_SE_VTX_ST_CLR_6_B" }, + { R200_SE_VTX_ST_CLR_6_A, "R200_SE_VTX_ST_CLR_6_A" }, + { R200_SE_VTX_ST_CLR_7_R, "R200_SE_VTX_ST_CLR_7_R" }, + { R200_SE_VTX_ST_CLR_7_G, "R200_SE_VTX_ST_CLR_7_G" }, + { R200_SE_VTX_ST_CLR_7_B, "R200_SE_VTX_ST_CLR_7_B" }, + { R200_SE_VTX_ST_CLR_7_A, "R200_SE_VTX_ST_CLR_7_A" }, + { R200_SE_VTX_ST_TEX_0_S, "R200_SE_VTX_ST_TEX_0_S" }, + { R200_SE_VTX_ST_TEX_0_T, "R200_SE_VTX_ST_TEX_0_T" }, + { R200_SE_VTX_ST_TEX_0_R, "R200_SE_VTX_ST_TEX_0_R" }, + { R200_SE_VTX_ST_TEX_0_Q, "R200_SE_VTX_ST_TEX_0_Q" }, + { R200_SE_VTX_ST_TEX_1_S, "R200_SE_VTX_ST_TEX_1_S" }, + { R200_SE_VTX_ST_TEX_1_T, "R200_SE_VTX_ST_TEX_1_T" }, + { R200_SE_VTX_ST_TEX_1_R, "R200_SE_VTX_ST_TEX_1_R" }, + { R200_SE_VTX_ST_TEX_1_Q, "R200_SE_VTX_ST_TEX_1_Q" }, + { R200_SE_VTX_ST_TEX_2_S, "R200_SE_VTX_ST_TEX_2_S" }, + { R200_SE_VTX_ST_TEX_2_T, "R200_SE_VTX_ST_TEX_2_T" }, + { R200_SE_VTX_ST_TEX_2_R, "R200_SE_VTX_ST_TEX_2_R" }, + { R200_SE_VTX_ST_TEX_2_Q, "R200_SE_VTX_ST_TEX_2_Q" }, + { R200_SE_VTX_ST_TEX_3_S, "R200_SE_VTX_ST_TEX_3_S" }, + { R200_SE_VTX_ST_TEX_3_T, "R200_SE_VTX_ST_TEX_3_T" }, + { R200_SE_VTX_ST_TEX_3_R, "R200_SE_VTX_ST_TEX_3_R" }, + { R200_SE_VTX_ST_TEX_3_Q, "R200_SE_VTX_ST_TEX_3_Q" }, + { R200_SE_VTX_ST_TEX_4_S, "R200_SE_VTX_ST_TEX_4_S" }, + { R200_SE_VTX_ST_TEX_4_T, "R200_SE_VTX_ST_TEX_4_T" }, + { R200_SE_VTX_ST_TEX_4_R, "R200_SE_VTX_ST_TEX_4_R" }, + { R200_SE_VTX_ST_TEX_4_Q, "R200_SE_VTX_ST_TEX_4_Q" }, + { R200_SE_VTX_ST_TEX_5_S, "R200_SE_VTX_ST_TEX_5_S" }, + { R200_SE_VTX_ST_TEX_5_T, "R200_SE_VTX_ST_TEX_5_T" }, + { R200_SE_VTX_ST_TEX_5_R, "R200_SE_VTX_ST_TEX_5_R" }, + { R200_SE_VTX_ST_TEX_5_Q, "R200_SE_VTX_ST_TEX_5_Q" }, + { R200_SE_VTX_ST_PNT_SPRT_SZ, "R200_SE_VTX_ST_PNT_SPRT_SZ" }, + { R200_SE_VTX_ST_DISC_FOG, "R200_SE_VTX_ST_DISC_FOG" }, + { R200_SE_VTX_ST_SHININESS_0, "R200_SE_VTX_ST_SHININESS_0" }, + { R200_SE_VTX_ST_SHININESS_1, "R200_SE_VTX_ST_SHININESS_1" }, + { R200_SE_VTX_ST_BLND_WT_0, "R200_SE_VTX_ST_BLND_WT_0" }, + { R200_SE_VTX_ST_BLND_WT_1, "R200_SE_VTX_ST_BLND_WT_1" }, + { R200_SE_VTX_ST_BLND_WT_2, "R200_SE_VTX_ST_BLND_WT_2" }, + { R200_SE_VTX_ST_BLND_WT_3, "R200_SE_VTX_ST_BLND_WT_3" }, + { R200_SE_VTX_ST_POS_1_X, "R200_SE_VTX_ST_POS_1_X" }, + { R200_SE_VTX_ST_POS_1_Y, "R200_SE_VTX_ST_POS_1_Y" }, + { R200_SE_VTX_ST_POS_1_Z, "R200_SE_VTX_ST_POS_1_Z" }, + { R200_SE_VTX_ST_POS_1_W, "R200_SE_VTX_ST_POS_1_W" }, + { R200_SE_VTX_ST_NORM_1_X, "R200_SE_VTX_ST_NORM_1_X" }, + { R200_SE_VTX_ST_NORM_1_Y, "R200_SE_VTX_ST_NORM_1_Y" }, + { R200_SE_VTX_ST_NORM_1_Z, "R200_SE_VTX_ST_NORM_1_Z" }, + { R200_SE_VTX_ST_USR_CLR_0_R, "R200_SE_VTX_ST_USR_CLR_0_R" }, + { R200_SE_VTX_ST_USR_CLR_0_G, "R200_SE_VTX_ST_USR_CLR_0_G" }, + { R200_SE_VTX_ST_USR_CLR_0_B, "R200_SE_VTX_ST_USR_CLR_0_B" }, + { R200_SE_VTX_ST_USR_CLR_0_A, "R200_SE_VTX_ST_USR_CLR_0_A" }, + { R200_SE_VTX_ST_USR_CLR_1_R, "R200_SE_VTX_ST_USR_CLR_1_R" }, + { R200_SE_VTX_ST_USR_CLR_1_G, "R200_SE_VTX_ST_USR_CLR_1_G" }, + { R200_SE_VTX_ST_USR_CLR_1_B, "R200_SE_VTX_ST_USR_CLR_1_B" }, + { R200_SE_VTX_ST_USR_CLR_1_A, "R200_SE_VTX_ST_USR_CLR_1_A" }, + { R200_SE_VTX_ST_CLR_0_PKD, "R200_SE_VTX_ST_CLR_0_PKD" }, + { R200_SE_VTX_ST_CLR_1_PKD, "R200_SE_VTX_ST_CLR_1_PKD" }, + { R200_SE_VTX_ST_CLR_2_PKD, "R200_SE_VTX_ST_CLR_2_PKD" }, + { R200_SE_VTX_ST_CLR_3_PKD, "R200_SE_VTX_ST_CLR_3_PKD" }, + { R200_SE_VTX_ST_CLR_4_PKD, "R200_SE_VTX_ST_CLR_4_PKD" }, + { R200_SE_VTX_ST_CLR_5_PKD, "R200_SE_VTX_ST_CLR_5_PKD" }, + { R200_SE_VTX_ST_CLR_6_PKD, "R200_SE_VTX_ST_CLR_6_PKD" }, + { R200_SE_VTX_ST_CLR_7_PKD, "R200_SE_VTX_ST_CLR_7_PKD" }, + { R200_SE_VTX_ST_POS_0_X_2, "R200_SE_VTX_ST_POS_0_X_2" }, + { R200_SE_VTX_ST_POS_0_Y_2, "R200_SE_VTX_ST_POS_0_Y_2" }, + { R200_SE_VTX_ST_PAR_CLR_LD, "R200_SE_VTX_ST_PAR_CLR_LD" }, + { R200_SE_VTX_ST_USR_CLR_PKD, "R200_SE_VTX_ST_USR_CLR_PKD" }, + { R200_SE_VTX_ST_POS_0_X_3, "R200_SE_VTX_ST_POS_0_X_3" }, + { R200_SE_VTX_ST_POS_0_Y_3, "R200_SE_VTX_ST_POS_0_Y_3" }, + { R200_SE_VTX_ST_POS_0_Z_3, "R200_SE_VTX_ST_POS_0_Z_3" }, + { R200_SE_VTX_ST_END_OF_PKT, "R200_SE_VTX_ST_END_OF_PKT" }, + { R200_RE_POINTSIZE, "R200_RE_POINTSIZE" }, + { R200_RE_TOP_LEFT, "R200_RE_TOP_LEFT" }, + { R200_RE_AUX_SCISSOR_CNTL, "R200_RE_AUX_SCISSOR_CNTL" }, + { R200_PP_TXFILTER_0, "R200_PP_TXFILTER_0" }, + { R200_PP_TXFORMAT_0, "R200_PP_TXFORMAT_0" }, + { R200_PP_TXSIZE_0, "R200_PP_TXSIZE_0" }, + { R200_PP_TXFORMAT_X_0, "R200_PP_TXFORMAT_X_0" }, + { R200_PP_TXPITCH_0, "R200_PP_TXPITCH_0" }, + { R200_PP_BORDER_COLOR_0, "R200_PP_BORDER_COLOR_0" }, + { R200_PP_CUBIC_FACES_0, "R200_PP_CUBIC_FACES_0" }, + { R200_PP_TXMULTI_CTL_0, "R200_PP_TXMULTI_CTL_0" }, + { R200_PP_TXFILTER_1, "R200_PP_TXFILTER_1" }, + { R200_PP_TXFORMAT_1, "R200_PP_TXFORMAT_1" }, + { R200_PP_TXSIZE_1, "R200_PP_TXSIZE_1" }, + { R200_PP_TXFORMAT_X_1, "R200_PP_TXFORMAT_X_1" }, + { R200_PP_TXPITCH_1, "R200_PP_TXPITCH_1" }, + { R200_PP_BORDER_COLOR_1, "R200_PP_BORDER_COLOR_1" }, + { R200_PP_CUBIC_FACES_1, "R200_PP_CUBIC_FACES_1" }, + { R200_PP_TXMULTI_CTL_1, "R200_PP_TXMULTI_CTL_1" }, + { R200_PP_TXFILTER_2, "R200_PP_TXFILTER_2" }, + { R200_PP_TXFORMAT_2, "R200_PP_TXFORMAT_2" }, + { R200_PP_TXSIZE_2, "R200_PP_TXSIZE_2" }, + { R200_PP_TXFORMAT_X_2, "R200_PP_TXFORMAT_X_2" }, + { R200_PP_TXPITCH_2, "R200_PP_TXPITCH_2" }, + { R200_PP_BORDER_COLOR_2, "R200_PP_BORDER_COLOR_2" }, + { R200_PP_CUBIC_FACES_2, "R200_PP_CUBIC_FACES_2" }, + { R200_PP_TXMULTI_CTL_2, "R200_PP_TXMULTI_CTL_2" }, + { R200_PP_TXFILTER_3, "R200_PP_TXFILTER_3" }, + { R200_PP_TXFORMAT_3, "R200_PP_TXFORMAT_3" }, + { R200_PP_TXSIZE_3, "R200_PP_TXSIZE_3" }, + { R200_PP_TXFORMAT_X_3, "R200_PP_TXFORMAT_X_3" }, + { R200_PP_TXPITCH_3, "R200_PP_TXPITCH_3" }, + { R200_PP_BORDER_COLOR_3, "R200_PP_BORDER_COLOR_3" }, + { R200_PP_CUBIC_FACES_3, "R200_PP_CUBIC_FACES_3" }, + { R200_PP_TXMULTI_CTL_3, "R200_PP_TXMULTI_CTL_3" }, + { R200_PP_TXFILTER_4, "R200_PP_TXFILTER_4" }, + { R200_PP_TXFORMAT_4, "R200_PP_TXFORMAT_4" }, + { R200_PP_TXSIZE_4, "R200_PP_TXSIZE_4" }, + { R200_PP_TXFORMAT_X_4, "R200_PP_TXFORMAT_X_4" }, + { R200_PP_TXPITCH_4, "R200_PP_TXPITCH_4" }, + { R200_PP_BORDER_COLOR_4, "R200_PP_BORDER_COLOR_4" }, + { R200_PP_CUBIC_FACES_4, "R200_PP_CUBIC_FACES_4" }, + { R200_PP_TXMULTI_CTL_4, "R200_PP_TXMULTI_CTL_4" }, + { R200_PP_TXFILTER_5, "R200_PP_TXFILTER_5" }, + { R200_PP_TXFORMAT_5, "R200_PP_TXFORMAT_5" }, + { R200_PP_TXSIZE_5, "R200_PP_TXSIZE_5" }, + { R200_PP_TXFORMAT_X_5, "R200_PP_TXFORMAT_X_5" }, + { R200_PP_TXPITCH_5, "R200_PP_TXPITCH_5" }, + { R200_PP_BORDER_COLOR_5, "R200_PP_BORDER_COLOR_5" }, + { R200_PP_CUBIC_FACES_5, "R200_PP_CUBIC_FACES_5" }, + { R200_PP_TXMULTI_CTL_5, "R200_PP_TXMULTI_CTL_5" }, + { R200_PP_TXOFFSET_0, "R200_PP_TXOFFSET_0" }, + { R200_PP_CUBIC_OFFSET_F1_0, "R200_PP_CUBIC_OFFSET_F1_0" }, + { R200_PP_CUBIC_OFFSET_F2_0, "R200_PP_CUBIC_OFFSET_F2_0" }, + { R200_PP_CUBIC_OFFSET_F3_0, "R200_PP_CUBIC_OFFSET_F3_0" }, + { R200_PP_CUBIC_OFFSET_F4_0, "R200_PP_CUBIC_OFFSET_F4_0" }, + { R200_PP_CUBIC_OFFSET_F5_0, "R200_PP_CUBIC_OFFSET_F5_0" }, + { R200_PP_TXOFFSET_1, "R200_PP_TXOFFSET_1" }, + { R200_PP_CUBIC_OFFSET_F1_1, "R200_PP_CUBIC_OFFSET_F1_1" }, + { R200_PP_CUBIC_OFFSET_F2_1, "R200_PP_CUBIC_OFFSET_F2_1" }, + { R200_PP_CUBIC_OFFSET_F3_1, "R200_PP_CUBIC_OFFSET_F3_1" }, + { R200_PP_CUBIC_OFFSET_F4_1, "R200_PP_CUBIC_OFFSET_F4_1" }, + { R200_PP_CUBIC_OFFSET_F5_1, "R200_PP_CUBIC_OFFSET_F5_1" }, + { R200_PP_TXOFFSET_2, "R200_PP_TXOFFSET_2" }, + { R200_PP_CUBIC_OFFSET_F1_2, "R200_PP_CUBIC_OFFSET_F1_2" }, + { R200_PP_CUBIC_OFFSET_F2_2, "R200_PP_CUBIC_OFFSET_F2_2" }, + { R200_PP_CUBIC_OFFSET_F3_2, "R200_PP_CUBIC_OFFSET_F3_2" }, + { R200_PP_CUBIC_OFFSET_F4_2, "R200_PP_CUBIC_OFFSET_F4_2" }, + { R200_PP_CUBIC_OFFSET_F5_2, "R200_PP_CUBIC_OFFSET_F5_2" }, + { R200_PP_TXOFFSET_3, "R200_PP_TXOFFSET_3" }, + { R200_PP_CUBIC_OFFSET_F1_3, "R200_PP_CUBIC_OFFSET_F1_3" }, + { R200_PP_CUBIC_OFFSET_F2_3, "R200_PP_CUBIC_OFFSET_F2_3" }, + { R200_PP_CUBIC_OFFSET_F3_3, "R200_PP_CUBIC_OFFSET_F3_3" }, + { R200_PP_CUBIC_OFFSET_F4_3, "R200_PP_CUBIC_OFFSET_F4_3" }, + { R200_PP_CUBIC_OFFSET_F5_3, "R200_PP_CUBIC_OFFSET_F5_3" }, + { R200_PP_TXOFFSET_4, "R200_PP_TXOFFSET_4" }, + { R200_PP_CUBIC_OFFSET_F1_4, "R200_PP_CUBIC_OFFSET_F1_4" }, + { R200_PP_CUBIC_OFFSET_F2_4, "R200_PP_CUBIC_OFFSET_F2_4" }, + { R200_PP_CUBIC_OFFSET_F3_4, "R200_PP_CUBIC_OFFSET_F3_4" }, + { R200_PP_CUBIC_OFFSET_F4_4, "R200_PP_CUBIC_OFFSET_F4_4" }, + { R200_PP_CUBIC_OFFSET_F5_4, "R200_PP_CUBIC_OFFSET_F5_4" }, + { R200_PP_TXOFFSET_5, "R200_PP_TXOFFSET_5" }, + { R200_PP_CUBIC_OFFSET_F1_5, "R200_PP_CUBIC_OFFSET_F1_5" }, + { R200_PP_CUBIC_OFFSET_F2_5, "R200_PP_CUBIC_OFFSET_F2_5" }, + { R200_PP_CUBIC_OFFSET_F3_5, "R200_PP_CUBIC_OFFSET_F3_5" }, + { R200_PP_CUBIC_OFFSET_F4_5, "R200_PP_CUBIC_OFFSET_F4_5" }, + { R200_PP_CUBIC_OFFSET_F5_5, "R200_PP_CUBIC_OFFSET_F5_5" }, + { R200_PP_TAM_DEBUG3, "R200_PP_TAM_DEBUG3" }, + { R200_PP_TFACTOR_0, "R200_PP_TFACTOR_0" }, + { R200_PP_TFACTOR_1, "R200_PP_TFACTOR_1" }, + { R200_PP_TFACTOR_2, "R200_PP_TFACTOR_2" }, + { R200_PP_TFACTOR_3, "R200_PP_TFACTOR_3" }, + { R200_PP_TFACTOR_4, "R200_PP_TFACTOR_4" }, + { R200_PP_TFACTOR_5, "R200_PP_TFACTOR_5" }, + { R200_PP_TFACTOR_6, "R200_PP_TFACTOR_6" }, + { R200_PP_TFACTOR_7, "R200_PP_TFACTOR_7" }, + { R200_PP_TXCBLEND_0, "R200_PP_TXCBLEND_0" }, + { R200_PP_TXCBLEND2_0, "R200_PP_TXCBLEND2_0" }, + { R200_PP_TXABLEND_0, "R200_PP_TXABLEND_0" }, + { R200_PP_TXABLEND2_0, "R200_PP_TXABLEND2_0" }, + { R200_PP_TXCBLEND_1, "R200_PP_TXCBLEND_1" }, + { R200_PP_TXCBLEND2_1, "R200_PP_TXCBLEND2_1" }, + { R200_PP_TXABLEND_1, "R200_PP_TXABLEND_1" }, + { R200_PP_TXABLEND2_1, "R200_PP_TXABLEND2_1" }, + { R200_PP_TXCBLEND_2, "R200_PP_TXCBLEND_2" }, + { R200_PP_TXCBLEND2_2, "R200_PP_TXCBLEND2_2" }, + { R200_PP_TXABLEND_2, "R200_PP_TXABLEND_2" }, + { R200_PP_TXABLEND2_2, "R200_PP_TXABLEND2_2" }, + { R200_PP_TXCBLEND_3, "R200_PP_TXCBLEND_3" }, + { R200_PP_TXCBLEND2_3, "R200_PP_TXCBLEND2_3" }, + { R200_PP_TXABLEND_3, "R200_PP_TXABLEND_3" }, + { R200_PP_TXABLEND2_3, "R200_PP_TXABLEND2_3" }, + { R200_PP_TXCBLEND_4, "R200_PP_TXCBLEND_4" }, + { R200_PP_TXCBLEND2_4, "R200_PP_TXCBLEND2_4" }, + { R200_PP_TXABLEND_4, "R200_PP_TXABLEND_4" }, + { R200_PP_TXABLEND2_4, "R200_PP_TXABLEND2_4" }, + { R200_PP_TXCBLEND_5, "R200_PP_TXCBLEND_5" }, + { R200_PP_TXCBLEND2_5, "R200_PP_TXCBLEND2_5" }, + { R200_PP_TXABLEND_5, "R200_PP_TXABLEND_5" }, + { R200_PP_TXABLEND2_5, "R200_PP_TXABLEND2_5" }, + { R200_PP_TXCBLEND_6, "R200_PP_TXCBLEND_6" }, + { R200_PP_TXCBLEND2_6, "R200_PP_TXCBLEND2_6" }, + { R200_PP_TXABLEND_6, "R200_PP_TXABLEND_6" }, + { R200_PP_TXABLEND2_6, "R200_PP_TXABLEND2_6" }, + { R200_PP_TXCBLEND_7, "R200_PP_TXCBLEND_7" }, + { R200_PP_TXCBLEND2_7, "R200_PP_TXCBLEND2_7" }, + { R200_PP_TXABLEND_7, "R200_PP_TXABLEND_7" }, + { R200_PP_TXABLEND2_7, "R200_PP_TXABLEND2_7" }, + { R200_RB3D_BLENDCOLOR, "R200_RB3D_BLENDCOLOR" }, + { R200_RB3D_ABLENDCNTL, "R200_RB3D_ABLENDCNTL" }, + { R200_RB3D_CBLENDCNTL, "R200_RB3D_CBLENDCNTL" }, + { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" }, + { R200_PP_CNTL_X, "R200_PP_CNTL_X" }, + { R200_SE_VAP_CNTL_STATUS, "R200_SE_VAP_CNTL_STATUS" }, + { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" }, + { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1" }, + { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2" }, + { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3" }, + { R200_PP_TRI_PERF, "R200_PP_TRI_PERF" }, + { R200_PP_PERF_CNTL, "R200_PP_PERF_CNTL" }, + { R200_PP_TXCBLEND_8, "R200_PP_TXCBLEND_8" }, + { R200_PP_TXCBLEND2_8, "R200_PP_TXCBLEND2_8" }, + { R200_PP_TXABLEND_8, "R200_PP_TXABLEND_8" }, + { R200_PP_TXABLEND2_8, "R200_PP_TXABLEND2_8" }, + { R200_PP_TXCBLEND_9, "R200_PP_TXCBLEND_9" }, + { R200_PP_TXCBLEND2_9, "R200_PP_TXCBLEND2_9" }, + { R200_PP_TXABLEND_9, "R200_PP_TXABLEND_9" }, + { R200_PP_TXABLEND2_9, "R200_PP_TXABLEND2_9" }, + { R200_PP_TXCBLEND_10, "R200_PP_TXCBLEND_10" }, + { R200_PP_TXCBLEND2_10, "R200_PP_TXCBLEND2_10" }, + { R200_PP_TXABLEND_10, "R200_PP_TXABLEND_10" }, + { R200_PP_TXABLEND2_10, "R200_PP_TXABLEND2_10" }, + { R200_PP_TXCBLEND_11, "R200_PP_TXCBLEND_11" }, + { R200_PP_TXCBLEND2_11, "R200_PP_TXCBLEND2_11" }, + { R200_PP_TXABLEND_11, "R200_PP_TXABLEND_11" }, + { R200_PP_TXABLEND2_11, "R200_PP_TXABLEND2_11" }, + { R200_PP_TXCBLEND_12, "R200_PP_TXCBLEND_12" }, + { R200_PP_TXCBLEND2_12, "R200_PP_TXCBLEND2_12" }, + { R200_PP_TXABLEND_12, "R200_PP_TXABLEND_12" }, + { R200_PP_TXABLEND2_12, "R200_PP_TXABLEND2_12" }, + { R200_PP_TXCBLEND_13, "R200_PP_TXCBLEND_13" }, + { R200_PP_TXCBLEND2_13, "R200_PP_TXCBLEND2_13" }, + { R200_PP_TXABLEND_13, "R200_PP_TXABLEND_13" }, + { R200_PP_TXABLEND2_13, "R200_PP_TXABLEND2_13" }, + { R200_PP_TXCBLEND_14, "R200_PP_TXCBLEND_14" }, + { R200_PP_TXCBLEND2_14, "R200_PP_TXCBLEND2_14" }, + { R200_PP_TXABLEND_14, "R200_PP_TXABLEND_14" }, + { R200_PP_TXABLEND2_14, "R200_PP_TXABLEND2_14" }, + { R200_PP_TXCBLEND_15, "R200_PP_TXCBLEND_15" }, + { R200_PP_TXCBLEND2_15, "R200_PP_TXCBLEND2_15" }, + { R200_PP_TXABLEND_15, "R200_PP_TXABLEND_15" }, + { R200_PP_TXABLEND2_15, "R200_PP_TXABLEND2_15" }, + { R200_VAP_PVS_CNTL_1, "R200_VAP_PVS_CNTL_1" }, + { R200_VAP_PVS_CNTL_2, "R200_VAP_PVS_CNTL_2" }, +}; + +static struct reg_names scalar_names[] = { + { R200_SS_LIGHT_DCD_ADDR, "R200_SS_LIGHT_DCD_ADDR" }, + { R200_SS_LIGHT_DCM_ADDR, "R200_SS_LIGHT_DCM_ADDR" }, + { R200_SS_LIGHT_SPOT_EXPONENT_ADDR, "R200_SS_LIGHT_SPOT_EXPONENT_ADDR" }, + { R200_SS_LIGHT_SPOT_CUTOFF_ADDR, "R200_SS_LIGHT_SPOT_CUTOFF_ADDR" }, + { R200_SS_LIGHT_SPECULAR_THRESH_ADDR, "R200_SS_LIGHT_SPECULAR_THRESH_ADDR" }, + { R200_SS_LIGHT_RANGE_CUTOFF_SQRD, "R200_SS_LIGHT_RANGE_CUTOFF_SQRD" }, + { R200_SS_LIGHT_RANGE_ATT_CONST, "R200_SS_LIGHT_RANGE_ATT_CONST" }, + { R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, "R200_SS_VERT_GUARD_CLIP_ADJ_ADDR" }, + { R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR, "R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR" }, + { R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR, "R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR" }, + { R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR, "R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR" }, + { R200_SS_MAT_0_SHININESS, "R200_SS_MAT_0_SHININESS" }, + { R200_SS_MAT_1_SHININESS, "R200_SS_MAT_1_SHININESS" }, + { 1000, "" }, +}; + +/* Puff these out to make them look like normal (dword) registers. + */ +static struct reg_names vector_names[] = { + { 0, "start" }, + { R200_VS_LIGHT_AMBIENT_ADDR, "R200_VS_LIGHT_AMBIENT_ADDR" }, + { R200_VS_LIGHT_DIFFUSE_ADDR, "R200_VS_LIGHT_DIFFUSE_ADDR" }, + { R200_VS_LIGHT_SPECULAR_ADDR, "R200_VS_LIGHT_SPECULAR_ADDR" }, + { R200_VS_LIGHT_DIRPOS_ADDR, "R200_VS_LIGHT_DIRPOS_ADDR" }, + { R200_VS_LIGHT_HWVSPOT_ADDR, "R200_VS_LIGHT_HWVSPOT_ADDR" }, + { R200_VS_LIGHT_ATTENUATION_ADDR, "R200_VS_LIGHT_ATTENUATION_ADDR" }, + { R200_VS_SPOT_DUAL_CONE, "R200_VS_SPOT_DUAL_CONE" }, + { R200_VS_GLOBAL_AMBIENT_ADDR, "R200_VS_GLOBAL_AMBIENT_ADDR" }, + { R200_VS_FOG_PARAM_ADDR, "R200_VS_FOG_PARAM_ADDR" }, + { R200_VS_EYE_VECTOR_ADDR, "R200_VS_EYE_VECTOR_ADDR" }, + { R200_VS_UCP_ADDR, "R200_VS_UCP_ADDR" }, + { R200_VS_PNT_SPRITE_VPORT_SCALE, "R200_VS_PNT_SPRITE_VPORT_SCALE" }, + { R200_VS_MATRIX_0_MV, "R200_VS_MATRIX_0_MV" }, + { R200_VS_MATRIX_1_INV_MV, "R200_VS_MATRIX_1_INV_MV" }, + { R200_VS_MATRIX_2_MVP, "R200_VS_MATRIX_2_MVP" }, + { R200_VS_MATRIX_3_TEX0, "R200_VS_MATRIX_3_TEX0" }, + { R200_VS_MATRIX_4_TEX1, "R200_VS_MATRIX_4_TEX1" }, + { R200_VS_MATRIX_5_TEX2, "R200_VS_MATRIX_5_TEX2" }, + { R200_VS_MATRIX_6_TEX3, "R200_VS_MATRIX_6_TEX3" }, + { R200_VS_MATRIX_7_TEX4, "R200_VS_MATRIX_7_TEX4" }, + { R200_VS_MATRIX_8_TEX5, "R200_VS_MATRIX_8_TEX5" }, + { R200_VS_MAT_0_EMISS, "R200_VS_MAT_0_EMISS" }, + { R200_VS_MAT_0_AMB, "R200_VS_MAT_0_AMB" }, + { R200_VS_MAT_0_DIF, "R200_VS_MAT_0_DIF" }, + { R200_VS_MAT_0_SPEC, "R200_VS_MAT_0_SPEC" }, + { R200_VS_MAT_1_EMISS, "R200_VS_MAT_1_EMISS" }, + { R200_VS_MAT_1_AMB, "R200_VS_MAT_1_AMB" }, + { R200_VS_MAT_1_DIF, "R200_VS_MAT_1_DIF" }, + { R200_VS_MAT_1_SPEC, "R200_VS_MAT_1_SPEC" }, + { R200_VS_EYE2CLIP_MTX, "R200_VS_EYE2CLIP_MTX" }, + { R200_VS_PNT_SPRITE_ATT_CONST, "R200_VS_PNT_SPRITE_ATT_CONST" }, + { R200_VS_PNT_SPRITE_EYE_IN_MODEL, "R200_VS_PNT_SPRITE_EYE_IN_MODEL" }, + { R200_VS_PNT_SPRITE_CLAMP, "R200_VS_PNT_SPRITE_CLAMP" }, + { R200_VS_MAX, "R200_VS_MAX" }, + { 1000, "" }, +}; + +union fi { float f; int i; }; + +#define ISVEC 1 +#define ISFLOAT 2 +#define TOUCHED 4 + +struct reg { + int idx; + struct reg_names *closest; + int flags; + union fi current; + union fi *values; + int nvalues; + int nalloc; + float vmin, vmax; +}; + + +static struct reg regs[Elements(reg_names)+1]; +static struct reg scalars[512+1]; +static struct reg vectors[512*4+1]; + +static int total, total_changed, bufs; + +static void init_regs( void ) +{ + struct reg_names *tmp; + int i; + + for (i = 0 ; i < Elements(regs) ; i++) { + regs[i].idx = reg_names[i].idx; + regs[i].closest = ®_names[i]; + regs[i].flags = 0; + } + + for (i = 0, tmp = scalar_names ; i < Elements(scalars) ; i++) { + if (tmp[1].idx == i) tmp++; + scalars[i].idx = i; + scalars[i].closest = tmp; + scalars[i].flags = ISFLOAT; + } + + for (i = 0, tmp = vector_names ; i < Elements(vectors) ; i++) { + if (tmp[1].idx*4 == i) tmp++; + vectors[i].idx = i; + vectors[i].closest = tmp; + vectors[i].flags = ISFLOAT|ISVEC; + } + + regs[Elements(regs)-1].idx = -1; + scalars[Elements(scalars)-1].idx = -1; + vectors[Elements(vectors)-1].idx = -1; +} + +static int find_or_add_value( struct reg *reg, int val ) +{ + int j; + + for ( j = 0 ; j < reg->nvalues ; j++) + if ( val == reg->values[j].i ) + return 1; + + if (j == reg->nalloc) { + reg->nalloc += 5; + reg->nalloc *= 2; + reg->values = (union fi *) realloc( reg->values, + reg->nalloc * sizeof(union fi) ); + } + + reg->values[reg->nvalues++].i = val; + return 0; +} + +static struct reg *lookup_reg( struct reg *tab, int reg ) +{ + int i; + + for (i = 0 ; tab[i].idx != -1 ; i++) { + if (tab[i].idx == reg) + return &tab[i]; + } + + fprintf(stderr, "*** unknown reg 0x%x\n", reg); + return NULL; +} + + +static const char *get_reg_name( struct reg *reg ) +{ + static char tmp[80]; + + if (reg->idx == reg->closest->idx) + return reg->closest->name; + + + if (reg->flags & ISVEC) { + if (reg->idx/4 != reg->closest->idx) + sprintf(tmp, "%s+%d[%d]", + reg->closest->name, + (reg->idx/4) - reg->closest->idx, + reg->idx%4); + else + sprintf(tmp, "%s[%d]", reg->closest->name, reg->idx%4); + } + else { + if (reg->idx != reg->closest->idx) + sprintf(tmp, "%s+%d", reg->closest->name, reg->idx - reg->closest->idx); + else + sprintf(tmp, "%s", reg->closest->name); + } + + return tmp; +} + +static int print_int_reg_assignment( struct reg *reg, int data ) +{ + int changed = (reg->current.i != data); + int ever_seen = find_or_add_value( reg, data ); + + if (VERBOSE || (NORMAL && (changed || !ever_seen))) + fprintf(stderr, " %s <-- 0x%x", get_reg_name(reg), data); + + if (NORMAL) { + if (!ever_seen) + fprintf(stderr, " *** BRAND NEW VALUE"); + else if (changed) + fprintf(stderr, " *** CHANGED"); + } + + reg->current.i = data; + + if (VERBOSE || (NORMAL && (changed || !ever_seen))) + fprintf(stderr, "\n"); + + return changed; +} + + +static int print_float_reg_assignment( struct reg *reg, float data ) +{ + int changed = (reg->current.f != data); + int newmin = (data < reg->vmin); + int newmax = (data > reg->vmax); + + if (VERBOSE || (NORMAL && (newmin || newmax || changed))) + fprintf(stderr, " %s <-- %.3f", get_reg_name(reg), data); + + if (NORMAL) { + if (newmin) { + fprintf(stderr, " *** NEW MIN (prev %.3f)", reg->vmin); + reg->vmin = data; + } + else if (newmax) { + fprintf(stderr, " *** NEW MAX (prev %.3f)", reg->vmax); + reg->vmax = data; + } + else if (changed) { + fprintf(stderr, " *** CHANGED"); + } + } + + reg->current.f = data; + + if (VERBOSE || (NORMAL && (newmin || newmax || changed))) + fprintf(stderr, "\n"); + + return changed; +} + +static int print_reg_assignment( struct reg *reg, int data ) +{ + float_ui32_type datau; + datau.ui32 = data; + reg->flags |= TOUCHED; + if (reg->flags & ISFLOAT) + return print_float_reg_assignment( reg, datau.f ); + else + return print_int_reg_assignment( reg, data ); +} + +static void print_reg( struct reg *reg ) +{ + if (reg->flags & TOUCHED) { + if (reg->flags & ISFLOAT) { + fprintf(stderr, " %s == %f\n", get_reg_name(reg), reg->current.f); + } else { + fprintf(stderr, " %s == 0x%x\n", get_reg_name(reg), reg->current.i); + } + } +} + + +static void dump_state( void ) +{ + int i; + + for (i = 0 ; i < Elements(regs) ; i++) + print_reg( ®s[i] ); + + for (i = 0 ; i < Elements(scalars) ; i++) + print_reg( &scalars[i] ); + + for (i = 0 ; i < Elements(vectors) ; i++) + print_reg( &vectors[i] ); +} + + + +static int radeon_emit_packets( + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int id = (int)header.packet.packet_id; + int sz = packet[id].len; + int *data = (int *)cmdbuf->buf; + int i; + + if (sz * sizeof(int) > cmdbuf->bufsz) { + fprintf(stderr, "Packet overflows cmdbuf\n"); + return -EINVAL; + } + + if (!packet[id].name) { + fprintf(stderr, "*** Unknown packet 0 nr %d\n", id ); + return -EINVAL; + } + + + if (VERBOSE) + fprintf(stderr, "Packet 0 reg %s nr %d\n", packet[id].name, sz ); + + for ( i = 0 ; i < sz ; i++) { + struct reg *reg = lookup_reg( regs, packet[id].start + i*4 ); + if (print_reg_assignment( reg, data[i] )) + total_changed++; + total++; + } + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + + +static int radeon_emit_scalars( + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.scalars.count; + int *data = (int *)cmdbuf->buf; + int start = header.scalars.offset; + int stride = header.scalars.stride; + int i; + + if (VERBOSE) + fprintf(stderr, "emit scalars, start %d stride %d nr %d (end %d)\n", + start, stride, sz, start + stride * sz); + + + for (i = 0 ; i < sz ; i++, start += stride) { + struct reg *reg = lookup_reg( scalars, start ); + if (print_reg_assignment( reg, data[i] )) + total_changed++; + total++; + } + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + + +static int radeon_emit_scalars2( + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.scalars.count; + int *data = (int *)cmdbuf->buf; + int start = header.scalars.offset + 0x100; + int stride = header.scalars.stride; + int i; + + if (VERBOSE) + fprintf(stderr, "emit scalars2, start %d stride %d nr %d (end %d)\n", + start, stride, sz, start + stride * sz); + + if (start + stride * sz > 258) { + fprintf(stderr, "emit scalars OVERFLOW %d/%d/%d\n", start, stride, sz); + return -1; + } + + for (i = 0 ; i < sz ; i++, start += stride) { + struct reg *reg = lookup_reg( scalars, start ); + if (print_reg_assignment( reg, data[i] )) + total_changed++; + total++; + } + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + +/* Check: inf/nan/extreme-size? + * Check: table start, end, nr, etc. + */ +static int radeon_emit_vectors( + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.vectors.count; + int *data = (int *)cmdbuf->buf; + int start = header.vectors.offset; + int stride = header.vectors.stride; + int i,j; + + if (VERBOSE) + fprintf(stderr, "emit vectors, start %d stride %d nr %d (end %d) (0x%x)\n", + start, stride, sz, start + stride * sz, header.i); + +/* if (start + stride * (sz/4) > 128) { */ +/* fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz); */ +/* return -1; */ +/* } */ + + for (i = 0 ; i < sz ; start += stride) { + int changed = 0; + for (j = 0 ; j < 4 ; i++,j++) { + struct reg *reg = lookup_reg( vectors, start*4+j ); + if (print_reg_assignment( reg, data[i] )) + changed = 1; + } + if (changed) + total_changed += 4; + total += 4; + } + + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + +static int radeon_emit_veclinear( + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.veclinear.count * 4; + int *data = (int *)cmdbuf->buf; + float *fdata =(float *)cmdbuf->buf; + int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8); + int i; + + if (1||VERBOSE) + fprintf(stderr, "emit vectors linear, start %d nr %d (end %d) (0x%x)\n", + start, sz >> 2, start + (sz >> 2), header.i); + + + if (start < 0x60) { + for (i = 0 ; i < sz ; i += 4) { + fprintf(stderr, "R200_VS_PARAM %d 0 %f\n", (i >> 2) + start, fdata[i]); + fprintf(stderr, "R200_VS_PARAM %d 1 %f\n", (i >> 2) + start, fdata[i+1]); + fprintf(stderr, "R200_VS_PARAM %d 2 %f\n", (i >> 2) + start, fdata[i+2]); + fprintf(stderr, "R200_VS_PARAM %d 3 %f\n", (i >> 2) + start, fdata[i+3]); + } + } + else if ((start >= 0x100) && (start < 0x160)) { + for (i = 0 ; i < sz ; i += 4) { + fprintf(stderr, "R200_VS_PARAM %d 0 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i]); + fprintf(stderr, "R200_VS_PARAM %d 1 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+1]); + fprintf(stderr, "R200_VS_PARAM %d 2 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+2]); + fprintf(stderr, "R200_VS_PARAM %d 3 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+3]); + } + } + else if ((start >= 0x80) && (start < 0xc0)) { + for (i = 0 ; i < sz ; i += 4) { + fprintf(stderr, "R200_VS_PROG %d OPDST %08x\n", (i >> 2) + start - 0x80, data[i]); + fprintf(stderr, "R200_VS_PROG %d SRC1 %08x\n", (i >> 2) + start - 0x80, data[i+1]); + fprintf(stderr, "R200_VS_PROG %d SRC2 %08x\n", (i >> 2) + start - 0x80, data[i+2]); + fprintf(stderr, "R200_VS_PROG %d SRC3 %08x\n", (i >> 2) + start - 0x80, data[i+3]); + } + } + else if ((start >= 0x180) && (start < 0x1c0)) { + for (i = 0 ; i < sz ; i += 4) { + fprintf(stderr, "R200_VS_PROG %d OPDST %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i]); + fprintf(stderr, "R200_VS_PROG %d SRC1 %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+1]); + fprintf(stderr, "R200_VS_PROG %d SRC2 %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+2]); + fprintf(stderr, "R200_VS_PROG %d SRC3 %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+3]); + } + } + else { + fprintf(stderr, "write to unknown vector area\n"); + } + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + +#if 0 +static int print_vertex_format( int vfmt ) +{ + if (NORMAL) { + fprintf(stderr, " %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + "vertex format", + vfmt, + "xy,", + (vfmt & R200_VTX_Z0) ? "z," : "", + (vfmt & R200_VTX_W0) ? "w0," : "", + (vfmt & R200_VTX_FPCOLOR) ? "fpcolor," : "", + (vfmt & R200_VTX_FPALPHA) ? "fpalpha," : "", + (vfmt & R200_VTX_PKCOLOR) ? "pkcolor," : "", + (vfmt & R200_VTX_FPSPEC) ? "fpspec," : "", + (vfmt & R200_VTX_FPFOG) ? "fpfog," : "", + (vfmt & R200_VTX_PKSPEC) ? "pkspec," : "", + (vfmt & R200_VTX_ST0) ? "st0," : "", + (vfmt & R200_VTX_ST1) ? "st1," : "", + (vfmt & R200_VTX_Q1) ? "q1," : "", + (vfmt & R200_VTX_ST2) ? "st2," : "", + (vfmt & R200_VTX_Q2) ? "q2," : "", + (vfmt & R200_VTX_ST3) ? "st3," : "", + (vfmt & R200_VTX_Q3) ? "q3," : "", + (vfmt & R200_VTX_Q0) ? "q0," : "", + (vfmt & R200_VTX_N0) ? "n0," : "", + (vfmt & R200_VTX_XY1) ? "xy1," : "", + (vfmt & R200_VTX_Z1) ? "z1," : "", + (vfmt & R200_VTX_W1) ? "w1," : "", + (vfmt & R200_VTX_N1) ? "n1," : ""); + + + if (!find_or_add_value( &others[V_VTXFMT], vfmt )) + fprintf(stderr, " *** NEW VALUE"); + + fprintf(stderr, "\n"); + } + + return 0; +} +#endif + +static char *primname[0x10] = { + "NONE", + "POINTS", + "LINES", + "LINE_STRIP", + "TRIANGLES", + "TRIANGLE_FAN", + "TRIANGLE_STRIP", + "RECT_LIST", + NULL, + "3VRT_POINTS", + "3VRT_LINES", + "POINT_SPRITES", + "LINE_LOOP", + "QUADS", + "QUAD_STRIP", + "POLYGON", +}; + +static int print_prim_and_flags( int prim ) +{ + int numverts; + + if (NORMAL) + fprintf(stderr, " %s(%x): %s%s%s%s%s%s\n", + "prim flags", + prim, + ((prim & 0x30) == R200_VF_PRIM_WALK_IND) ? "IND," : "", + ((prim & 0x30) == R200_VF_PRIM_WALK_LIST) ? "LIST," : "", + ((prim & 0x30) == R200_VF_PRIM_WALK_RING) ? "RING," : "", + (prim & R200_VF_COLOR_ORDER_RGBA) ? "RGBA," : "BGRA, ", + (prim & R200_VF_INDEX_SZ_4) ? "INDX-32," : "", + (prim & R200_VF_TCL_OUTPUT_VTX_ENABLE) ? "TCL_OUT_VTX," : ""); + + numverts = prim>>16; + + if (NORMAL) + fprintf(stderr, " prim: %s numverts %d\n", primname[prim&0xf], numverts); + + switch (prim & 0xf) { + case R200_VF_PRIM_NONE: + case R200_VF_PRIM_POINTS: + if (numverts < 1) { + fprintf(stderr, "Bad nr verts for line %d\n", numverts); + return -1; + } + break; + case R200_VF_PRIM_LINES: + case R200_VF_PRIM_POINT_SPRITES: + if ((numverts & 1) || numverts == 0) { + fprintf(stderr, "Bad nr verts for line %d\n", numverts); + return -1; + } + break; + case R200_VF_PRIM_LINE_STRIP: + case R200_VF_PRIM_LINE_LOOP: + if (numverts < 2) { + fprintf(stderr, "Bad nr verts for line_strip %d\n", numverts); + return -1; + } + break; + case R200_VF_PRIM_TRIANGLES: + case R200_VF_PRIM_3VRT_POINTS: + case R200_VF_PRIM_3VRT_LINES: + case R200_VF_PRIM_RECT_LIST: + if (numverts % 3 || numverts == 0) { + fprintf(stderr, "Bad nr verts for tri %d\n", numverts); + return -1; + } + break; + case R200_VF_PRIM_TRIANGLE_FAN: + case R200_VF_PRIM_TRIANGLE_STRIP: + case R200_VF_PRIM_POLYGON: + if (numverts < 3) { + fprintf(stderr, "Bad nr verts for strip/fan %d\n", numverts); + return -1; + } + break; + case R200_VF_PRIM_QUADS: + if (numverts % 4 || numverts == 0) { + fprintf(stderr, "Bad nr verts for quad %d\n", numverts); + return -1; + } + break; + case R200_VF_PRIM_QUAD_STRIP: + if (numverts % 2 || numverts < 4) { + fprintf(stderr, "Bad nr verts for quadstrip %d\n", numverts); + return -1; + } + break; + default: + fprintf(stderr, "Bad primitive\n"); + return -1; + } + return 0; +} + +/* build in knowledge about each packet type + */ +static int radeon_emit_packet3( drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int cmdsz; + int *cmd = (int *)cmdbuf->buf; + int *tmp; + int i, stride, size, start; + + cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16); + + if ((cmd[0] & RADEON_CP_PACKET_MASK) != RADEON_CP_PACKET3 || + cmdsz * 4 > cmdbuf->bufsz || + cmdsz > RADEON_CP_PACKET_MAX_DWORDS) { + fprintf(stderr, "Bad packet\n"); + return -EINVAL; + } + + switch( cmd[0] & ~RADEON_CP_PACKET_COUNT_MASK ) { + case R200_CP_CMD_NOP: + if (NORMAL) + fprintf(stderr, "PACKET3_NOP, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_NEXT_CHAR: + if (NORMAL) + fprintf(stderr, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_PLY_NEXTSCAN: + if (NORMAL) + fprintf(stderr, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_SET_SCISSORS: + if (NORMAL) + fprintf(stderr, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_LOAD_MICROCODE: + if (NORMAL) + fprintf(stderr, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_WAIT_FOR_IDLE: + if (NORMAL) + fprintf(stderr, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz); + break; + + case R200_CP_CMD_3D_DRAW_VBUF: + if (NORMAL) + fprintf(stderr, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz); +/* print_vertex_format(cmd[1]); */ + if (print_prim_and_flags(cmd[2])) + return -EINVAL; + break; + + case R200_CP_CMD_3D_DRAW_IMMD: + if (NORMAL) + fprintf(stderr, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_3D_DRAW_INDX: { + int neltdwords; + if (NORMAL) + fprintf(stderr, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz); +/* print_vertex_format(cmd[1]); */ + if (print_prim_and_flags(cmd[2])) + return -EINVAL; + neltdwords = cmd[2]>>16; + neltdwords += neltdwords & 1; + neltdwords /= 2; + if (neltdwords + 3 != cmdsz) + fprintf(stderr, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n", + neltdwords, cmdsz); + break; + } + case R200_CP_CMD_LOAD_PALETTE: + if (NORMAL) + fprintf(stderr, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_3D_LOAD_VBPNTR: + if (NORMAL) { + fprintf(stderr, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz); + fprintf(stderr, " nr arrays: %d\n", cmd[1]); + } + + if (((cmd[1]/2)*3) + ((cmd[1]%2)*2) != cmdsz - 2) { + fprintf(stderr, " ****** MISMATCH %d/%d *******\n", + ((cmd[1]/2)*3) + ((cmd[1]%2)*2) + 2, cmdsz); + return -EINVAL; + } + + if (NORMAL) { + tmp = cmd+2; + for (i = 0 ; i < cmd[1] ; i++) { + if (i & 1) { + stride = (tmp[0]>>24) & 0xff; + size = (tmp[0]>>16) & 0xff; + start = tmp[2]; + tmp += 3; + } + else { + stride = (tmp[0]>>8) & 0xff; + size = (tmp[0]) & 0xff; + start = tmp[1]; + } + fprintf(stderr, " array %d: start 0x%x vsize %d vstride %d\n", + i, start, size, stride ); + } + } + break; + case R200_CP_CMD_PAINT: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_BITBLT: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_SMALLTEXT: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_HOSTDATA_BLT: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n", + cmdsz); + break; + case R200_CP_CMD_POLYLINE: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz); + break; + case R200_CP_CMD_POLYSCANLINES: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n", + cmdsz); + break; + case R200_CP_CMD_PAINT_MULTI: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n", + cmdsz); + break; + case R200_CP_CMD_BITBLT_MULTI: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n", + cmdsz); + break; + case R200_CP_CMD_TRANS_BITBLT: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n", + cmdsz); + break; + case R200_CP_CMD_3D_DRAW_VBUF_2: + if (NORMAL) + fprintf(stderr, "R200_CP_CMD_3D_DRAW_VBUF_2, %d dwords\n", + cmdsz); + if (print_prim_and_flags(cmd[1])) + return -EINVAL; + break; + case R200_CP_CMD_3D_DRAW_IMMD_2: + if (NORMAL) + fprintf(stderr, "R200_CP_CMD_3D_DRAW_IMMD_2, %d dwords\n", + cmdsz); + if (print_prim_and_flags(cmd[1])) + return -EINVAL; + break; + case R200_CP_CMD_3D_DRAW_INDX_2: + if (NORMAL) + fprintf(stderr, "R200_CP_CMD_3D_DRAW_INDX_2, %d dwords\n", + cmdsz); + if (print_prim_and_flags(cmd[1])) + return -EINVAL; + break; + default: + fprintf(stderr, "UNKNOWN PACKET, %d dwords\n", cmdsz); + break; + } + + cmdbuf->buf += cmdsz * 4; + cmdbuf->bufsz -= cmdsz * 4; + return 0; +} + + +/* Check cliprects for bounds, then pass on to above: + */ +static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf ) +{ + drm_clip_rect_t *boxes = (drm_clip_rect_t *)cmdbuf->boxes; + int i = 0; + + if (VERBOSE && total_changed) { + dump_state(); + total_changed = 0; + } + + if (NORMAL) { + do { + if ( i < cmdbuf->nbox ) { + fprintf(stderr, "Emit box %d/%d %d,%d %d,%d\n", + i, cmdbuf->nbox, + boxes[i].x1, boxes[i].y1, boxes[i].x2, boxes[i].y2); + } + } while ( ++i < cmdbuf->nbox ); + } + + if (cmdbuf->nbox == 1) + cmdbuf->nbox = 0; + + return radeon_emit_packet3( cmdbuf ); +} + + +int r200SanityCmdBuffer( r200ContextPtr rmesa, + int nbox, + drm_clip_rect_t *boxes ) +{ + int idx; + drm_radeon_cmd_buffer_t cmdbuf; + drm_radeon_cmd_header_t header; + static int inited = 0; + + if (!inited) { + init_regs(); + inited = 1; + } + + + cmdbuf.buf = rmesa->store.cmd_buf; + cmdbuf.bufsz = rmesa->store.cmd_used; + cmdbuf.boxes = (drm_clip_rect_t *)boxes; + cmdbuf.nbox = nbox; + + while ( cmdbuf.bufsz >= sizeof(header) ) { + + header.i = *(int *)cmdbuf.buf; + cmdbuf.buf += sizeof(header); + cmdbuf.bufsz -= sizeof(header); + + switch (header.header.cmd_type) { + case RADEON_CMD_PACKET: + if (radeon_emit_packets( header, &cmdbuf )) { + fprintf(stderr,"radeon_emit_packets failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_SCALARS: + if (radeon_emit_scalars( header, &cmdbuf )) { + fprintf(stderr,"radeon_emit_scalars failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_SCALARS2: + if (radeon_emit_scalars2( header, &cmdbuf )) { + fprintf(stderr,"radeon_emit_scalars failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_VECTORS: + if (radeon_emit_vectors( header, &cmdbuf )) { + fprintf(stderr,"radeon_emit_vectors failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_DMA_DISCARD: + idx = header.dma.buf_idx; + if (NORMAL) + fprintf(stderr, "RADEON_CMD_DMA_DISCARD buf %d\n", idx); + bufs++; + break; + + case RADEON_CMD_PACKET3: + if (radeon_emit_packet3( &cmdbuf )) { + fprintf(stderr,"radeon_emit_packet3 failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_PACKET3_CLIP: + if (radeon_emit_packet3_cliprect( &cmdbuf )) { + fprintf(stderr,"radeon_emit_packet3_clip failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_WAIT: + break; + + case RADEON_CMD_VECLINEAR: + if (radeon_emit_veclinear( header, &cmdbuf )) { + fprintf(stderr,"radeon_emit_veclinear failed\n"); + return -EINVAL; + } + break; + + default: + fprintf(stderr,"bad cmd_type %d at %p\n", + header.header.cmd_type, + cmdbuf.buf - sizeof(header)); + return -EINVAL; + } + } + + if (0) + { + static int n = 0; + n++; + if (n == 10) { + fprintf(stderr, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n", + bufs, + total, total_changed, + ((float)total_changed/(float)total*100.0)); + fprintf(stderr, "Total emitted per buf: %.2f\n", + (float)total/(float)bufs); + fprintf(stderr, "Real changes per buf: %.2f\n", + (float)total_changed/(float)bufs); + + bufs = n = total = total_changed = 0; + } + } + + fprintf(stderr, "leaving %s\n\n\n", __FUNCTION__); + + return 0; +} diff --git a/r200/r200_sanity.h b/r200/r200_sanity.h new file mode 100644 index 0000000..f4c110d --- /dev/null +++ b/r200/r200_sanity.h @@ -0,0 +1,8 @@ +#ifndef R200_SANITY_H +#define R200_SANITY_H + +extern int r200SanityCmdBuffer( r200ContextPtr rmesa, + int nbox, + drm_clip_rect_t *boxes ); + +#endif diff --git a/r200/r200_span.c b/r200/r200_span.c new file mode 100644 index 0000000..6e99dfe --- /dev/null +++ b/r200/r200_span.c @@ -0,0 +1,306 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_span.c,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "swrast/swrast.h" +#include "colormac.h" + +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_state.h" +#include "r200_span.h" +#include "r200_tex.h" + +#define DBG 0 + +/* + * Note that all information needed to access pixels in a renderbuffer + * should be obtained through the gl_renderbuffer parameter, not per-context + * information. + */ +#define LOCAL_VARS \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + const __DRIdrawablePrivate *dPriv = drb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ + GLubyte *buf = (GLubyte *) drb->flippedData \ + + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ + GLuint p; \ + (void) p; + +#define LOCAL_DEPTH_VARS \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + const __DRIdrawablePrivate *dPriv = drb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ + GLuint xo = dPriv->x; \ + GLuint yo = dPriv->y; \ + GLubyte *buf = (GLubyte *) drb->Base.Data; + +#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS + +#define Y_FLIP(Y) (bottom - (Y)) + +#define HW_LOCK() + +#define HW_UNLOCK() + + + +/* ================================================================ + * Color buffer + */ + +/* 16 bit, RGB565 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) r200##x##_RGB565 +#define TAG2(x,y) r200##x##_RGB565##y +#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) +#include "spantmp2.h" + +/* 32 bit, ARGB8888 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) r200##x##_ARGB8888 +#define TAG2(x,y) r200##x##_ARGB8888##y +#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) +#include "spantmp2.h" + + +/* ================================================================ + * Depth buffer + */ + +/* The Radeon family has depth tiling on all the time, so we have to convert + * the x,y coordinates into the memory bus address (mba) in the same + * manner as the engine. In each case, the linear block address (ba) + * is calculated, and then wired with x and y to produce the final + * memory address. + * The chip will do address translation on its own if the surface registers + * are set up correctly. It is not quite enough to get it working with hyperz too... + */ + +/* extract bit 'b' of x, result is zero or one */ +#define BIT(x,b) ((x & (1<<b))>>b) + +static GLuint +r200_mba_z32( driRenderbuffer *drb, GLint x, GLint y ) +{ + GLuint pitch = drb->pitch; + if (drb->depthHasSurface) { + return 4 * (x + y * pitch); + } + else { + GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x7FF) >> 5); + GLuint a = + (BIT(x,0) << 2) | + (BIT(y,0) << 3) | + (BIT(x,1) << 4) | + (BIT(y,1) << 5) | + (BIT(x,3) << 6) | + (BIT(x,4) << 7) | + (BIT(x,2) << 8) | + (BIT(y,2) << 9) | + (BIT(y,3) << 10) | + (((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) | + ((b >> 1) << 12); + return a; + } +} + +static GLuint +r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y ) +{ + GLuint pitch = drb->pitch; + if (drb->depthHasSurface) { + return 2 * (x + y * pitch); + } + else { + GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x7FF) >> 6); + GLuint a = + (BIT(x,0) << 1) | + (BIT(y,0) << 2) | + (BIT(x,1) << 3) | + (BIT(y,1) << 4) | + (BIT(x,2) << 5) | + (BIT(x,4) << 6) | + (BIT(x,5) << 7) | + (BIT(x,3) << 8) | + (BIT(y,2) << 9) | + (BIT(y,3) << 10) | + (((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) | + ((b >> 1) << 12); + return a; + } +} + + +/* 16-bit depth buffer functions + */ + +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d; + +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )); + +#define TAG(x) r200##x##_z16 +#include "depthtmp.h" + + +/* 24 bit depth, 8 bit stencil depthbuffer functions + */ + +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ + *(GLuint *)(buf + offset) = tmp; \ +} while (0) + +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLuint *)(buf + r200_mba_z32( drb, _x + xo, \ + _y + yo )) & 0x00ffffff; + +#define TAG(x) r200##x##_z24_s8 +#include "depthtmp.h" + + +/* ================================================================ + * Stencil buffer + */ + +/* 24 bit depth, 8 bit stencil depthbuffer functions + */ +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0x00ffffff; \ + tmp |= (((d) & 0xff) << 24); \ + *(GLuint *)(buf + offset) = tmp; \ +} while (0) + +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0xff000000; \ + d = tmp >> 24; \ +} while (0) + +#define TAG(x) r200##x##_z24_s8 +#include "stenciltmp.h" + + +/* Move locking out to get reasonable span performance (10x better + * than doing this in HW_LOCK above). WaitForIdle() is the main + * culprit. + */ + +static void r200SpanRenderStart( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + + R200_FIREVERTICES( rmesa ); + LOCK_HARDWARE( rmesa ); + r200WaitForIdleLocked( rmesa ); + + /* Read & rewrite the first pixel in the frame buffer. This should + * be a noop, right? In fact without this conform fails as reading + * from the framebuffer sometimes produces old results -- the + * on-card read cache gets mixed up and doesn't notice that the + * framebuffer has been updated. + * + * In the worst case this is buggy too as p might get the wrong + * value first time, so really need a hidden pixel somewhere for this. + */ + { + int p; + driRenderbuffer *drb = + (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0][0]; + volatile int *buf = + (volatile int *)(rmesa->dri.screen->pFB + drb->offset); + p = *buf; + *buf = p; + } +} + +static void r200SpanRenderFinish( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + _swrast_flush( ctx ); + UNLOCK_HARDWARE( rmesa ); +} + +void r200InitSpanFuncs( GLcontext *ctx ) +{ + struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx); + swdd->SpanRenderStart = r200SpanRenderStart; + swdd->SpanRenderFinish = r200SpanRenderFinish; +} + + + +/** + * Plug in the Get/Put routines for the given driRenderbuffer. + */ +void +radeonSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis) +{ + if (drb->Base.InternalFormat == GL_RGBA) { + if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) { + r200InitPointers_RGB565(&drb->Base); + } + else { + r200InitPointers_ARGB8888(&drb->Base); + } + } + else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { + r200InitDepthPointers_z16(&drb->Base); + } + else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { + r200InitDepthPointers_z24_s8(&drb->Base); + } + else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { + r200InitStencilPointers_z24_s8(&drb->Base); + } +} diff --git a/r200/r200_span.h b/r200/r200_span.h new file mode 100644 index 0000000..5e7d3e4 --- /dev/null +++ b/r200/r200_span.h @@ -0,0 +1,46 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_span.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __R200_SPAN_H__ +#define __R200_SPAN_H__ + +#include "drirenderbuffer.h" + +extern void r200InitSpanFuncs( GLcontext *ctx ); + +extern void +radeonSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis); + +#endif diff --git a/r200/r200_state.c b/r200/r200_state.c new file mode 100644 index 0000000..16726d7 --- /dev/null +++ b/r200/r200_state.c @@ -0,0 +1,2651 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "api_arrayelt.h" +#include "enums.h" +#include "colormac.h" +#include "light.h" +#include "framebuffer.h" + +#include "swrast/swrast.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "swrast_setup/swrast_setup.h" + +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_state.h" +#include "r200_tcl.h" +#include "r200_tex.h" +#include "r200_swtcl.h" +#include "r200_vertprog.h" + +#include "drirenderbuffer.h" + + +/* ============================================================= + * Alpha blending + */ + +static void r200AlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC]; + GLubyte refByte; + + CLAMPED_FLOAT_TO_UBYTE(refByte, ref); + + R200_STATECHANGE( rmesa, ctx ); + + pp_misc &= ~(R200_ALPHA_TEST_OP_MASK | R200_REF_ALPHA_MASK); + pp_misc |= (refByte & R200_REF_ALPHA_MASK); + + switch ( func ) { + case GL_NEVER: + pp_misc |= R200_ALPHA_TEST_FAIL; + break; + case GL_LESS: + pp_misc |= R200_ALPHA_TEST_LESS; + break; + case GL_EQUAL: + pp_misc |= R200_ALPHA_TEST_EQUAL; + break; + case GL_LEQUAL: + pp_misc |= R200_ALPHA_TEST_LEQUAL; + break; + case GL_GREATER: + pp_misc |= R200_ALPHA_TEST_GREATER; + break; + case GL_NOTEQUAL: + pp_misc |= R200_ALPHA_TEST_NEQUAL; + break; + case GL_GEQUAL: + pp_misc |= R200_ALPHA_TEST_GEQUAL; + break; + case GL_ALWAYS: + pp_misc |= R200_ALPHA_TEST_PASS; + break; + } + + rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc; +} + +static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] ) +{ + GLubyte color[4]; + r200ContextPtr rmesa = R200_CONTEXT(ctx); + R200_STATECHANGE( rmesa, ctx ); + CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]); + CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]); + CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]); + CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]); + if (rmesa->r200Screen->drmSupportsBlendColor) + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] ); +} + +/** + * Calculate the hardware blend factor setting. This same function is used + * for source and destination of both alpha and RGB. + * + * \returns + * The hardware register value for the specified blend factor. This value + * will need to be shifted into the correct position for either source or + * destination factor. + * + * \todo + * Since the two cases where source and destination are handled differently + * are essentially error cases, they should never happen. Determine if these + * cases can be removed. + */ +static int blend_factor( GLenum factor, GLboolean is_src ) +{ + int func; + + switch ( factor ) { + case GL_ZERO: + func = R200_BLEND_GL_ZERO; + break; + case GL_ONE: + func = R200_BLEND_GL_ONE; + break; + case GL_DST_COLOR: + func = R200_BLEND_GL_DST_COLOR; + break; + case GL_ONE_MINUS_DST_COLOR: + func = R200_BLEND_GL_ONE_MINUS_DST_COLOR; + break; + case GL_SRC_COLOR: + func = R200_BLEND_GL_SRC_COLOR; + break; + case GL_ONE_MINUS_SRC_COLOR: + func = R200_BLEND_GL_ONE_MINUS_SRC_COLOR; + break; + case GL_SRC_ALPHA: + func = R200_BLEND_GL_SRC_ALPHA; + break; + case GL_ONE_MINUS_SRC_ALPHA: + func = R200_BLEND_GL_ONE_MINUS_SRC_ALPHA; + break; + case GL_DST_ALPHA: + func = R200_BLEND_GL_DST_ALPHA; + break; + case GL_ONE_MINUS_DST_ALPHA: + func = R200_BLEND_GL_ONE_MINUS_DST_ALPHA; + break; + case GL_SRC_ALPHA_SATURATE: + func = (is_src) ? R200_BLEND_GL_SRC_ALPHA_SATURATE : R200_BLEND_GL_ZERO; + break; + case GL_CONSTANT_COLOR: + func = R200_BLEND_GL_CONST_COLOR; + break; + case GL_ONE_MINUS_CONSTANT_COLOR: + func = R200_BLEND_GL_ONE_MINUS_CONST_COLOR; + break; + case GL_CONSTANT_ALPHA: + func = R200_BLEND_GL_CONST_ALPHA; + break; + case GL_ONE_MINUS_CONSTANT_ALPHA: + func = R200_BLEND_GL_ONE_MINUS_CONST_ALPHA; + break; + default: + func = (is_src) ? R200_BLEND_GL_ONE : R200_BLEND_GL_ZERO; + } + return func; +} + +/** + * Sets both the blend equation and the blend function. + * This is done in a single + * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX) + * change the interpretation of the blend function. + * Also, make sure that blend function and blend equation are set to their default + * value if color blending is not enabled, since at least blend equations GL_MIN + * and GL_FUNC_REVERSE_SUBTRACT will cause wrong results otherwise for + * unknown reasons. + */ +static void r200_set_blend_state( GLcontext * ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint cntl = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] & + ~(R200_ROP_ENABLE | R200_ALPHA_BLEND_ENABLE | R200_SEPARATE_ALPHA_ENABLE); + + int func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | + (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT); + int eqn = R200_COMB_FCN_ADD_CLAMP; + int funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | + (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT); + int eqnA = R200_COMB_FCN_ADD_CLAMP; + + R200_STATECHANGE( rmesa, ctx ); + + if (rmesa->r200Screen->drmSupportsBlendColor) { + if (ctx->Color.ColorLogicOpEnabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl | R200_ROP_ENABLE; + rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func; + rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func; + return; + } else if (ctx->Color.BlendEnabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl | R200_ALPHA_BLEND_ENABLE | R200_SEPARATE_ALPHA_ENABLE; + } + else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl; + rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func; + rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func; + return; + } + } + else { + if (ctx->Color.ColorLogicOpEnabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl | R200_ROP_ENABLE; + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func; + return; + } else if (ctx->Color.BlendEnabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl | R200_ALPHA_BLEND_ENABLE; + } + else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl; + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func; + return; + } + } + + func = (blend_factor( ctx->Color.BlendSrcRGB, GL_TRUE ) << R200_SRC_BLEND_SHIFT) | + (blend_factor( ctx->Color.BlendDstRGB, GL_FALSE ) << R200_DST_BLEND_SHIFT); + + switch(ctx->Color.BlendEquationRGB) { + case GL_FUNC_ADD: + eqn = R200_COMB_FCN_ADD_CLAMP; + break; + + case GL_FUNC_SUBTRACT: + eqn = R200_COMB_FCN_SUB_CLAMP; + break; + + case GL_FUNC_REVERSE_SUBTRACT: + eqn = R200_COMB_FCN_RSUB_CLAMP; + break; + + case GL_MIN: + eqn = R200_COMB_FCN_MIN; + func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | + (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT); + break; + + case GL_MAX: + eqn = R200_COMB_FCN_MAX; + func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | + (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT); + break; + + default: + fprintf( stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB ); + return; + } + + if (!rmesa->r200Screen->drmSupportsBlendColor) { + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func; + return; + } + + funcA = (blend_factor( ctx->Color.BlendSrcA, GL_TRUE ) << R200_SRC_BLEND_SHIFT) | + (blend_factor( ctx->Color.BlendDstA, GL_FALSE ) << R200_DST_BLEND_SHIFT); + + switch(ctx->Color.BlendEquationA) { + case GL_FUNC_ADD: + eqnA = R200_COMB_FCN_ADD_CLAMP; + break; + + case GL_FUNC_SUBTRACT: + eqnA = R200_COMB_FCN_SUB_CLAMP; + break; + + case GL_FUNC_REVERSE_SUBTRACT: + eqnA = R200_COMB_FCN_RSUB_CLAMP; + break; + + case GL_MIN: + eqnA = R200_COMB_FCN_MIN; + funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | + (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT); + break; + + case GL_MAX: + eqnA = R200_COMB_FCN_MAX; + funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | + (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT); + break; + + default: + fprintf( stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationA ); + return; + } + + rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqnA | funcA; + rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func; + +} + +static void r200BlendEquationSeparate( GLcontext *ctx, + GLenum modeRGB, GLenum modeA ) +{ + r200_set_blend_state( ctx ); +} + +static void r200BlendFuncSeparate( GLcontext *ctx, + GLenum sfactorRGB, GLenum dfactorRGB, + GLenum sfactorA, GLenum dfactorA ) +{ + r200_set_blend_state( ctx ); +} + + +/* ============================================================= + * Depth testing + */ + +static void r200DepthFunc( GLcontext *ctx, GLenum func ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_Z_TEST_MASK; + + switch ( ctx->Depth.Func ) { + case GL_NEVER: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_NEVER; + break; + case GL_LESS: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_LESS; + break; + case GL_EQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_EQUAL; + break; + case GL_LEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_LEQUAL; + break; + case GL_GREATER: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_GREATER; + break; + case GL_NOTEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_NEQUAL; + break; + case GL_GEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_GEQUAL; + break; + case GL_ALWAYS: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_ALWAYS; + break; + } +} + +static void r200ClearDepth( GLcontext *ctx, GLclampd d ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] & + R200_DEPTH_FORMAT_MASK); + + switch ( format ) { + case R200_DEPTH_FORMAT_16BIT_INT_Z: + rmesa->state.depth.clear = d * 0x0000ffff; + break; + case R200_DEPTH_FORMAT_24BIT_INT_Z: + rmesa->state.depth.clear = d * 0x00ffffff; + break; + } +} + +static void r200DepthMask( GLcontext *ctx, GLboolean flag ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + R200_STATECHANGE( rmesa, ctx ); + + if ( ctx->Depth.Mask ) { + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_WRITE_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_Z_WRITE_ENABLE; + } +} + + +/* ============================================================= + * Fog + */ + + +static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + union { int i; float f; } c, d; + GLchan col[4]; + GLuint i; + + c.i = rmesa->hw.fog.cmd[FOG_C]; + d.i = rmesa->hw.fog.cmd[FOG_D]; + + switch (pname) { + case GL_FOG_MODE: + if (!ctx->Fog.Enabled) + return; + R200_STATECHANGE(rmesa, tcl); + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK; + switch (ctx->Fog.Mode) { + case GL_LINEAR: + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_LINEAR; + if (ctx->Fog.Start == ctx->Fog.End) { + c.f = 1.0F; + d.f = 1.0F; + } + else { + c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start); + d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start); + } + break; + case GL_EXP: + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_EXP; + c.f = 0.0; + d.f = -ctx->Fog.Density; + break; + case GL_EXP2: + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_EXP2; + c.f = 0.0; + d.f = -(ctx->Fog.Density * ctx->Fog.Density); + break; + default: + return; + } + break; + case GL_FOG_DENSITY: + switch (ctx->Fog.Mode) { + case GL_EXP: + c.f = 0.0; + d.f = -ctx->Fog.Density; + break; + case GL_EXP2: + c.f = 0.0; + d.f = -(ctx->Fog.Density * ctx->Fog.Density); + break; + default: + break; + } + break; + case GL_FOG_START: + case GL_FOG_END: + if (ctx->Fog.Mode == GL_LINEAR) { + if (ctx->Fog.Start == ctx->Fog.End) { + c.f = 1.0F; + d.f = 1.0F; + } else { + c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start); + d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start); + } + } + break; + case GL_FOG_COLOR: + R200_STATECHANGE( rmesa, ctx ); + UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color ); + i = r200PackColor( 4, col[0], col[1], col[2], 0 ); + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK; + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i; + break; + case GL_FOG_COORD_SRC: { + GLuint out_0 = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0]; + GLuint fog = rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR]; + + fog &= ~R200_FOG_USE_MASK; + if ( ctx->Fog.FogCoordinateSource == GL_FOG_COORD || ctx->VertexProgram.Enabled) { + fog |= R200_FOG_USE_VTX_FOG; + out_0 |= R200_VTX_DISCRETE_FOG; + } + else { + fog |= R200_FOG_USE_SPEC_ALPHA; + out_0 &= ~R200_VTX_DISCRETE_FOG; + } + + if ( fog != rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] ) { + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = fog; + } + + if (out_0 != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0]) { + R200_STATECHANGE( rmesa, vtx ); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0; + } + + break; + } + default: + return; + } + + if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) { + R200_STATECHANGE( rmesa, fog ); + rmesa->hw.fog.cmd[FOG_C] = c.i; + rmesa->hw.fog.cmd[FOG_D] = d.i; + } +} + + +/* ============================================================= + * Scissoring + */ + + +static GLboolean intersect_rect( drm_clip_rect_t *out, + drm_clip_rect_t *a, + drm_clip_rect_t *b ) +{ + *out = *a; + if ( b->x1 > out->x1 ) out->x1 = b->x1; + if ( b->y1 > out->y1 ) out->y1 = b->y1; + if ( b->x2 < out->x2 ) out->x2 = b->x2; + if ( b->y2 < out->y2 ) out->y2 = b->y2; + if ( out->x1 >= out->x2 ) return GL_FALSE; + if ( out->y1 >= out->y2 ) return GL_FALSE; + return GL_TRUE; +} + + +void r200RecalcScissorRects( r200ContextPtr rmesa ) +{ + drm_clip_rect_t *out; + int i; + + /* Grow cliprect store? + */ + if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { + while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { + rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */ + rmesa->state.scissor.numAllocedClipRects *= 2; + } + + if (rmesa->state.scissor.pClipRects) + FREE(rmesa->state.scissor.pClipRects); + + rmesa->state.scissor.pClipRects = + MALLOC( rmesa->state.scissor.numAllocedClipRects * + sizeof(drm_clip_rect_t) ); + + if ( rmesa->state.scissor.pClipRects == NULL ) { + rmesa->state.scissor.numAllocedClipRects = 0; + return; + } + } + + out = rmesa->state.scissor.pClipRects; + rmesa->state.scissor.numClipRects = 0; + + for ( i = 0 ; i < rmesa->numClipRects ; i++ ) { + if ( intersect_rect( out, + &rmesa->pClipRects[i], + &rmesa->state.scissor.rect ) ) { + rmesa->state.scissor.numClipRects++; + out++; + } + } +} + + +static void r200UpdateScissor( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if ( rmesa->dri.drawable ) { + __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + + int x = ctx->Scissor.X; + int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height; + int w = ctx->Scissor.X + ctx->Scissor.Width - 1; + int h = dPriv->h - ctx->Scissor.Y - 1; + + rmesa->state.scissor.rect.x1 = x + dPriv->x; + rmesa->state.scissor.rect.y1 = y + dPriv->y; + rmesa->state.scissor.rect.x2 = w + dPriv->x + 1; + rmesa->state.scissor.rect.y2 = h + dPriv->y + 1; + + r200RecalcScissorRects( rmesa ); + } +} + + +static void r200Scissor( GLcontext *ctx, + GLint x, GLint y, GLsizei w, GLsizei h ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if ( ctx->Scissor.Enabled ) { + R200_FIREVERTICES( rmesa ); /* don't pipeline cliprect changes */ + r200UpdateScissor( ctx ); + } + +} + + +/* ============================================================= + * Culling + */ + +static void r200CullFace( GLcontext *ctx, GLenum unused ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL]; + GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL]; + + s |= R200_FFACE_SOLID | R200_BFACE_SOLID; + t &= ~(R200_CULL_FRONT | R200_CULL_BACK); + + if ( ctx->Polygon.CullFlag ) { + switch ( ctx->Polygon.CullFaceMode ) { + case GL_FRONT: + s &= ~R200_FFACE_SOLID; + t |= R200_CULL_FRONT; + break; + case GL_BACK: + s &= ~R200_BFACE_SOLID; + t |= R200_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + s &= ~(R200_FFACE_SOLID | R200_BFACE_SOLID); + t |= (R200_CULL_FRONT | R200_CULL_BACK); + break; + } + } + + if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) { + R200_STATECHANGE(rmesa, set ); + rmesa->hw.set.cmd[SET_SE_CNTL] = s; + } + + if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) { + R200_STATECHANGE(rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t; + } +} + +static void r200FrontFace( GLcontext *ctx, GLenum mode ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + R200_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_FFACE_CULL_DIR_MASK; + + R200_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_CULL_FRONT_IS_CCW; + + switch ( mode ) { + case GL_CW: + rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_FFACE_CULL_CW; + break; + case GL_CCW: + rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_FFACE_CULL_CCW; + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_CULL_FRONT_IS_CCW; + break; + } +} + +/* ============================================================= + * Point state + */ +static void r200PointSize( GLcontext *ctx, GLfloat size ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd; + + R200_STATECHANGE( rmesa, cst ); + R200_STATECHANGE( rmesa, ptp ); + rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= ~0xffff; + rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= ((GLuint)(ctx->Point.Size * 16.0)); +/* this is the size param of the point size calculation (point size reg value + is not used when calculation is active). */ + fcmd[PTP_VPORT_SCALE_PTSIZE] = ctx->Point.Size; +} + +static void r200PointParameter( GLcontext *ctx, GLenum pname, const GLfloat *params) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd; + + switch (pname) { + case GL_POINT_SIZE_MIN: + /* Can clamp both in tcl and setup - just set both (as does fglrx) */ + R200_STATECHANGE( rmesa, lin ); + R200_STATECHANGE( rmesa, ptp ); + rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= 0xffff; + rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Point.MinSize * 16.0) << 16; + fcmd[PTP_CLAMP_MIN] = ctx->Point.MinSize; + break; + case GL_POINT_SIZE_MAX: + R200_STATECHANGE( rmesa, cst ); + R200_STATECHANGE( rmesa, ptp ); + rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= 0xffff; + rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= (GLuint)(ctx->Point.MaxSize * 16.0) << 16; + fcmd[PTP_CLAMP_MAX] = ctx->Point.MaxSize; + break; + case GL_POINT_DISTANCE_ATTENUATION: + R200_STATECHANGE( rmesa, vtx ); + R200_STATECHANGE( rmesa, spr ); + R200_STATECHANGE( rmesa, ptp ); + GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd; + rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &= + ~(R200_PS_MULT_MASK | R200_PS_LIN_ATT_ZERO | R200_PS_SE_SEL_STATE); + /* can't rely on ctx->Point._Attenuated here and test for NEW_POINT in + r200ValidateState looks like overkill */ + if (ctx->Point.Params[0] != 1.0 || + ctx->Point.Params[1] != 0.0 || + ctx->Point.Params[2] != 0.0 || + (ctx->VertexProgram.Enabled && ctx->VertexProgram.PointSizeEnabled)) { + /* all we care for vp would be the ps_se_sel_state setting */ + fcmd[PTP_ATT_CONST_QUAD] = ctx->Point.Params[2]; + fcmd[PTP_ATT_CONST_LIN] = ctx->Point.Params[1]; + fcmd[PTP_ATT_CONST_CON] = ctx->Point.Params[0]; + rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_MULT_ATTENCONST; + if (ctx->Point.Params[1] == 0.0) + rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_LIN_ATT_ZERO; +/* FIXME: setting this here doesn't look quite ok - we only want to do + that if we're actually drawing points probably */ + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_PT_SIZE; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= R200_VTX_POINT_SIZE; + } + else { + rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= + R200_PS_SE_SEL_STATE | R200_PS_MULT_CONST; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_PT_SIZE; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~R200_VTX_POINT_SIZE; + } + break; + case GL_POINT_FADE_THRESHOLD_SIZE: + /* don't support multisampling, so doesn't matter. */ + break; + /* can't do these but don't need them. + case GL_POINT_SPRITE_R_MODE_NV: + case GL_POINT_SPRITE_COORD_ORIGIN: */ + default: + fprintf(stderr, "bad pname parameter in r200PointParameter\n"); + return; + } +} + +/* ============================================================= + * Line state + */ +static void r200LineWidth( GLcontext *ctx, GLfloat widthf ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + R200_STATECHANGE( rmesa, lin ); + R200_STATECHANGE( rmesa, set ); + + /* Line width is stored in U6.4 format. + */ + rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= ~0xffff; + rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Line._Width * 16.0); + + if ( widthf > 1.0 ) { + rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_WIDELINE_ENABLE; + } else { + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_WIDELINE_ENABLE; + } +} + +static void r200LineStipple( GLcontext *ctx, GLint factor, GLushort pattern ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + R200_STATECHANGE( rmesa, lin ); + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = + ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern)); +} + + +/* ============================================================= + * Masks + */ +static void r200ColorMask( GLcontext *ctx, + GLboolean r, GLboolean g, + GLboolean b, GLboolean a ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint mask = r200PackColor( rmesa->r200Screen->cpp, + ctx->Color.ColorMask[RCOMP], + ctx->Color.ColorMask[GCOMP], + ctx->Color.ColorMask[BCOMP], + ctx->Color.ColorMask[ACOMP] ); + + GLuint flag = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] & ~R200_PLANE_MASK_ENABLE; + + if (!(r && g && b && a)) + flag |= R200_PLANE_MASK_ENABLE; + + if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) { + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag; + } + + if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) { + R200_STATECHANGE( rmesa, msk ); + rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask; + } +} + + +/* ============================================================= + * Polygon state + */ + +static void r200PolygonOffset( GLcontext *ctx, + GLfloat factor, GLfloat units ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + float_ui32_type constant = { units * rmesa->state.depth.scale }; + float_ui32_type factoru = { factor }; + +/* factor *= 2; */ +/* constant *= 2; */ + +/* fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */ + + R200_STATECHANGE( rmesa, zbs ); + rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR] = factoru.ui32; + rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32; +} + +static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint i; + drm_radeon_stipple_t stipple; + + /* Must flip pattern upside down. + */ + for ( i = 0 ; i < 32 ; i++ ) { + rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i]; + } + + /* TODO: push this into cmd mechanism + */ + R200_FIREVERTICES( rmesa ); + LOCK_HARDWARE( rmesa ); + + /* FIXME: Use window x,y offsets into stipple RAM. + */ + stipple.mask = rmesa->state.stipple.mask; + drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, + &stipple, sizeof(stipple) ); + UNLOCK_HARDWARE( rmesa ); +} + +static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0; + + /* Can't generally do unfilled via tcl, but some good special + * cases work. + */ + TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag); + if (rmesa->TclFallback) { + r200ChooseRenderState( ctx ); + r200ChooseVertexState( ctx ); + } +} + + +/* ============================================================= + * Rendering attributes + * + * We really don't want to recalculate all this every time we bind a + * texture. These things shouldn't change all that often, so it makes + * sense to break them out of the core texture state update routines. + */ + +/* Examine lighting and texture state to determine if separate specular + * should be enabled. + */ +static void r200UpdateSpecular( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + u_int32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL]; + + R200_STATECHANGE( rmesa, tcl ); + R200_STATECHANGE( rmesa, vtx ); + + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~(3<<R200_VTX_COLOR_0_SHIFT); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~(3<<R200_VTX_COLOR_1_SHIFT); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_COLOR_0; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_COLOR_1; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LIGHTING_ENABLE; + + p &= ~R200_SPECULAR_ENABLE; + + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_DIFFUSE_SPECULAR_COMBINE; + + + if (ctx->Light.Enabled && + ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) { + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) | + (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE; + p |= R200_SPECULAR_ENABLE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= + ~R200_DIFFUSE_SPECULAR_COMBINE; + } + else if (ctx->Light.Enabled) { + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE; + } else if (ctx->Fog.ColorSumEnabled ) { + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) | + (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); + p |= R200_SPECULAR_ENABLE; + } else { + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)); + } + + if (ctx->Fog.Enabled) { + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1; + } + + if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) { + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p; + } + + /* Update vertex/render formats + */ + if (rmesa->TclFallback) { + r200ChooseRenderState( ctx ); + r200ChooseVertexState( ctx ); + } +} + + +/* ============================================================= + * Materials + */ + + +/* Update on colormaterial, material emmissive/ambient, + * lightmodel.globalambient + */ +static void update_global_ambient( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + float *fcmd = (float *)R200_DB_STATE( glt ); + + /* Need to do more if both emmissive & ambient are PREMULT: + * I believe this is not nessary when using source_material. This condition thus + * will never happen currently, and the function has no dependencies on materials now + */ + if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] & + ((3 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) | + (3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0) + { + COPY_3V( &fcmd[GLT_RED], + ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]); + ACC_SCALE_3V( &fcmd[GLT_RED], + ctx->Light.Model.Ambient, + ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]); + } + else + { + COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient ); + } + + R200_DB_STATECHANGE(rmesa, &rmesa->hw.glt); +} + +/* Update on change to + * - light[p].colors + * - light[p].enabled + */ +static void update_light_colors( GLcontext *ctx, GLuint p ) +{ + struct gl_light *l = &ctx->Light.Light[p]; + +/* fprintf(stderr, "%s\n", __FUNCTION__); */ + + if (l->Enabled) { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + float *fcmd = (float *)R200_DB_STATE( lit[p] ); + + COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); + COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse ); + COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular ); + + R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); + } +} + +static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1]; + light_model_ctl1 &= ~((0xf << R200_FRONT_EMISSIVE_SOURCE_SHIFT) | + (0xf << R200_FRONT_AMBIENT_SOURCE_SHIFT) | + (0xf << R200_FRONT_DIFFUSE_SOURCE_SHIFT) | + (0xf << R200_FRONT_SPECULAR_SOURCE_SHIFT) | + (0xf << R200_BACK_EMISSIVE_SOURCE_SHIFT) | + (0xf << R200_BACK_AMBIENT_SOURCE_SHIFT) | + (0xf << R200_BACK_DIFFUSE_SOURCE_SHIFT) | + (0xf << R200_BACK_SPECULAR_SOURCE_SHIFT)); + + if (ctx->Light.ColorMaterialEnabled) { + GLuint mask = ctx->Light.ColorMaterialBitmask; + + if (mask & MAT_BIT_FRONT_EMISSION) { + light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << + R200_FRONT_EMISSIVE_SOURCE_SHIFT); + } + else + light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 << + R200_FRONT_EMISSIVE_SOURCE_SHIFT); + + if (mask & MAT_BIT_FRONT_AMBIENT) { + light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << + R200_FRONT_AMBIENT_SOURCE_SHIFT); + } + else + light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 << + R200_FRONT_AMBIENT_SOURCE_SHIFT); + + if (mask & MAT_BIT_FRONT_DIFFUSE) { + light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << + R200_FRONT_DIFFUSE_SOURCE_SHIFT); + } + else + light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 << + R200_FRONT_DIFFUSE_SOURCE_SHIFT); + + if (mask & MAT_BIT_FRONT_SPECULAR) { + light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << + R200_FRONT_SPECULAR_SOURCE_SHIFT); + } + else { + light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 << + R200_FRONT_SPECULAR_SOURCE_SHIFT); + } + + if (mask & MAT_BIT_BACK_EMISSION) { + light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << + R200_BACK_EMISSIVE_SOURCE_SHIFT); + } + + else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 << + R200_BACK_EMISSIVE_SOURCE_SHIFT); + + if (mask & MAT_BIT_BACK_AMBIENT) { + light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << + R200_BACK_AMBIENT_SOURCE_SHIFT); + } + else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 << + R200_BACK_AMBIENT_SOURCE_SHIFT); + + if (mask & MAT_BIT_BACK_DIFFUSE) { + light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << + R200_BACK_DIFFUSE_SOURCE_SHIFT); + } + else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 << + R200_BACK_DIFFUSE_SOURCE_SHIFT); + + if (mask & MAT_BIT_BACK_SPECULAR) { + light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << + R200_BACK_SPECULAR_SOURCE_SHIFT); + } + else { + light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 << + R200_BACK_SPECULAR_SOURCE_SHIFT); + } + } + else { + /* Default to SOURCE_MATERIAL: + */ + light_model_ctl1 |= + (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_EMISSIVE_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_AMBIENT_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_DIFFUSE_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_SPECULAR_SOURCE_SHIFT); + } + + if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1]) { + R200_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = light_model_ctl1; + } + + +} + +void r200UpdateMaterial( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLfloat (*mat)[4] = ctx->Light.Material.Attrib; + GLfloat *fcmd = (GLfloat *)R200_DB_STATE( mtl[0] ); + GLfloat *fcmd2 = (GLfloat *)R200_DB_STATE( mtl[1] ); + GLuint mask = ~0; + + /* Might be possible and faster to update everything unconditionally? */ + if (ctx->Light.ColorMaterialEnabled) + mask &= ~ctx->Light.ColorMaterialBitmask; + + if (R200_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (mask & MAT_BIT_FRONT_EMISSION) { + fcmd[MTL_EMMISSIVE_RED] = mat[MAT_ATTRIB_FRONT_EMISSION][0]; + fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1]; + fcmd[MTL_EMMISSIVE_BLUE] = mat[MAT_ATTRIB_FRONT_EMISSION][2]; + fcmd[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_FRONT_EMISSION][3]; + } + if (mask & MAT_BIT_FRONT_AMBIENT) { + fcmd[MTL_AMBIENT_RED] = mat[MAT_ATTRIB_FRONT_AMBIENT][0]; + fcmd[MTL_AMBIENT_GREEN] = mat[MAT_ATTRIB_FRONT_AMBIENT][1]; + fcmd[MTL_AMBIENT_BLUE] = mat[MAT_ATTRIB_FRONT_AMBIENT][2]; + fcmd[MTL_AMBIENT_ALPHA] = mat[MAT_ATTRIB_FRONT_AMBIENT][3]; + } + if (mask & MAT_BIT_FRONT_DIFFUSE) { + fcmd[MTL_DIFFUSE_RED] = mat[MAT_ATTRIB_FRONT_DIFFUSE][0]; + fcmd[MTL_DIFFUSE_GREEN] = mat[MAT_ATTRIB_FRONT_DIFFUSE][1]; + fcmd[MTL_DIFFUSE_BLUE] = mat[MAT_ATTRIB_FRONT_DIFFUSE][2]; + fcmd[MTL_DIFFUSE_ALPHA] = mat[MAT_ATTRIB_FRONT_DIFFUSE][3]; + } + if (mask & MAT_BIT_FRONT_SPECULAR) { + fcmd[MTL_SPECULAR_RED] = mat[MAT_ATTRIB_FRONT_SPECULAR][0]; + fcmd[MTL_SPECULAR_GREEN] = mat[MAT_ATTRIB_FRONT_SPECULAR][1]; + fcmd[MTL_SPECULAR_BLUE] = mat[MAT_ATTRIB_FRONT_SPECULAR][2]; + fcmd[MTL_SPECULAR_ALPHA] = mat[MAT_ATTRIB_FRONT_SPECULAR][3]; + } + if (mask & MAT_BIT_FRONT_SHININESS) { + fcmd[MTL_SHININESS] = mat[MAT_ATTRIB_FRONT_SHININESS][0]; + } + + if (mask & MAT_BIT_BACK_EMISSION) { + fcmd2[MTL_EMMISSIVE_RED] = mat[MAT_ATTRIB_BACK_EMISSION][0]; + fcmd2[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_BACK_EMISSION][1]; + fcmd2[MTL_EMMISSIVE_BLUE] = mat[MAT_ATTRIB_BACK_EMISSION][2]; + fcmd2[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_BACK_EMISSION][3]; + } + if (mask & MAT_BIT_BACK_AMBIENT) { + fcmd2[MTL_AMBIENT_RED] = mat[MAT_ATTRIB_BACK_AMBIENT][0]; + fcmd2[MTL_AMBIENT_GREEN] = mat[MAT_ATTRIB_BACK_AMBIENT][1]; + fcmd2[MTL_AMBIENT_BLUE] = mat[MAT_ATTRIB_BACK_AMBIENT][2]; + fcmd2[MTL_AMBIENT_ALPHA] = mat[MAT_ATTRIB_BACK_AMBIENT][3]; + } + if (mask & MAT_BIT_BACK_DIFFUSE) { + fcmd2[MTL_DIFFUSE_RED] = mat[MAT_ATTRIB_BACK_DIFFUSE][0]; + fcmd2[MTL_DIFFUSE_GREEN] = mat[MAT_ATTRIB_BACK_DIFFUSE][1]; + fcmd2[MTL_DIFFUSE_BLUE] = mat[MAT_ATTRIB_BACK_DIFFUSE][2]; + fcmd2[MTL_DIFFUSE_ALPHA] = mat[MAT_ATTRIB_BACK_DIFFUSE][3]; + } + if (mask & MAT_BIT_BACK_SPECULAR) { + fcmd2[MTL_SPECULAR_RED] = mat[MAT_ATTRIB_BACK_SPECULAR][0]; + fcmd2[MTL_SPECULAR_GREEN] = mat[MAT_ATTRIB_BACK_SPECULAR][1]; + fcmd2[MTL_SPECULAR_BLUE] = mat[MAT_ATTRIB_BACK_SPECULAR][2]; + fcmd2[MTL_SPECULAR_ALPHA] = mat[MAT_ATTRIB_BACK_SPECULAR][3]; + } + if (mask & MAT_BIT_BACK_SHININESS) { + fcmd2[MTL_SHININESS] = mat[MAT_ATTRIB_BACK_SHININESS][0]; + } + + R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[0] ); + R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[1] ); + + /* currently material changes cannot trigger a global ambient change, I believe this is correct + update_global_ambient( ctx ); */ +} + +/* _NEW_LIGHT + * _NEW_MODELVIEW + * _MESA_NEW_NEED_EYE_COORDS + * + * Uses derived state from mesa: + * _VP_inf_norm + * _h_inf_norm + * _Position + * _NormDirection + * _ModelViewInvScale + * _NeedEyeCoords + * _EyeZDir + * + * which are calculated in light.c and are correct for the current + * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW + * and _MESA_NEW_NEED_EYE_COORDS. + */ +static void update_light( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + /* Have to check these, or have an automatic shortcircuit mechanism + * to remove noop statechanges. (Or just do a better job on the + * front end). + */ + { + GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]; + + if (ctx->_NeedEyeCoords) + tmp &= ~R200_LIGHT_IN_MODELSPACE; + else + tmp |= R200_LIGHT_IN_MODELSPACE; + + if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]) + { + R200_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = tmp; + } + } + + { + GLfloat *fcmd = (GLfloat *)R200_DB_STATE( eye ); + fcmd[EYE_X] = ctx->_EyeZDir[0]; + fcmd[EYE_Y] = ctx->_EyeZDir[1]; + fcmd[EYE_Z] = - ctx->_EyeZDir[2]; + fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale; + R200_DB_STATECHANGE( rmesa, &rmesa->hw.eye ); + } + + + + if (ctx->Light.Enabled) { + GLint p; + for (p = 0 ; p < MAX_LIGHTS; p++) { + if (ctx->Light.Light[p].Enabled) { + struct gl_light *l = &ctx->Light.Light[p]; + GLfloat *fcmd = (GLfloat *)R200_DB_STATE( lit[p] ); + + if (l->EyePosition[3] == 0.0) { + COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); + COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); + fcmd[LIT_POSITION_W] = 0; + fcmd[LIT_DIRECTION_W] = 0; + } else { + COPY_4V( &fcmd[LIT_POSITION_X], l->_Position ); + fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0]; + fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1]; + fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2]; + fcmd[LIT_DIRECTION_W] = 0; + } + + R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); + } + } + } +} + +static void r200Lightfv( GLcontext *ctx, GLenum light, + GLenum pname, const GLfloat *params ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLint p = light - GL_LIGHT0; + struct gl_light *l = &ctx->Light.Light[p]; + GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd; + + + switch (pname) { + case GL_AMBIENT: + case GL_DIFFUSE: + case GL_SPECULAR: + update_light_colors( ctx, p ); + break; + + case GL_SPOT_DIRECTION: + /* picked up in update_light */ + break; + + case GL_POSITION: { + /* positions picked up in update_light, but can do flag here */ + GLuint flag = (p&1)? R200_LIGHT_1_IS_LOCAL : R200_LIGHT_0_IS_LOCAL; + GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; + + R200_STATECHANGE(rmesa, tcl); + if (l->EyePosition[3] != 0.0F) + rmesa->hw.tcl.cmd[idx] |= flag; + else + rmesa->hw.tcl.cmd[idx] &= ~flag; + break; + } + + case GL_SPOT_EXPONENT: + R200_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_SPOT_EXPONENT] = params[0]; + break; + + case GL_SPOT_CUTOFF: { + GLuint flag = (p&1) ? R200_LIGHT_1_IS_SPOT : R200_LIGHT_0_IS_SPOT; + GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; + + R200_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff; + + R200_STATECHANGE(rmesa, tcl); + if (l->SpotCutoff != 180.0F) + rmesa->hw.tcl.cmd[idx] |= flag; + else + rmesa->hw.tcl.cmd[idx] &= ~flag; + + break; + } + + case GL_CONSTANT_ATTENUATION: + R200_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_ATTEN_CONST] = params[0]; + if ( params[0] == 0.0 ) + fcmd[LIT_ATTEN_CONST_INV] = FLT_MAX; + else + fcmd[LIT_ATTEN_CONST_INV] = 1.0 / params[0]; + break; + case GL_LINEAR_ATTENUATION: + R200_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_ATTEN_LINEAR] = params[0]; + break; + case GL_QUADRATIC_ATTENUATION: + R200_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_ATTEN_QUADRATIC] = params[0]; + break; + default: + return; + } + + /* Set RANGE_ATTEN only when needed */ + switch (pname) { + case GL_POSITION: + case GL_CONSTANT_ATTENUATION: + case GL_LINEAR_ATTENUATION: + case GL_QUADRATIC_ATTENUATION: { + GLuint *icmd = (GLuint *)R200_DB_STATE( tcl ); + GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; + GLuint atten_flag = ( p&1 ) ? R200_LIGHT_1_ENABLE_RANGE_ATTEN + : R200_LIGHT_0_ENABLE_RANGE_ATTEN; + GLuint atten_const_flag = ( p&1 ) ? R200_LIGHT_1_CONSTANT_RANGE_ATTEN + : R200_LIGHT_0_CONSTANT_RANGE_ATTEN; + + if ( l->EyePosition[3] == 0.0F || + ( ( fcmd[LIT_ATTEN_CONST] == 0.0 || fcmd[LIT_ATTEN_CONST] == 1.0 ) && + fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) ) { + /* Disable attenuation */ + icmd[idx] &= ~atten_flag; + } else { + if ( fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) { + /* Enable only constant portion of attenuation calculation */ + icmd[idx] |= ( atten_flag | atten_const_flag ); + } else { + /* Enable full attenuation calculation */ + icmd[idx] &= ~atten_const_flag; + icmd[idx] |= atten_flag; + } + } + + R200_DB_STATECHANGE( rmesa, &rmesa->hw.tcl ); + break; + } + default: + break; + } +} + +static void r200UpdateLocalViewer ( GLcontext *ctx ) +{ +/* It looks like for the texgen modes GL_SPHERE_MAP, GL_NORMAL_MAP and + GL_REFLECTION_MAP we need R200_LOCAL_VIEWER set (fglrx does exactly that + for these and only these modes). This means specular highlights may turn out + wrong in some cases when lighting is enabled but GL_LIGHT_MODEL_LOCAL_VIEWER + is not set, though it seems to happen rarely and the effect seems quite + subtle. May need TCL fallback to fix it completely, though I'm not sure + how you'd identify the cases where the specular highlights indeed will + be wrong. Don't know if fglrx does something special in that case. +*/ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + R200_STATECHANGE( rmesa, tcl ); + if (ctx->Light.Model.LocalViewer || + ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS) + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER; + else + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER; +} + +static void r200LightModelfv( GLcontext *ctx, GLenum pname, + const GLfloat *param ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + switch (pname) { + case GL_LIGHT_MODEL_AMBIENT: + update_global_ambient( ctx ); + break; + + case GL_LIGHT_MODEL_LOCAL_VIEWER: + r200UpdateLocalViewer( ctx ); + break; + + case GL_LIGHT_MODEL_TWO_SIDE: + R200_STATECHANGE( rmesa, tcl ); + if (ctx->Light.Model.TwoSide) + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE; + else + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE); + if (rmesa->TclFallback) { + r200ChooseRenderState( ctx ); + r200ChooseVertexState( ctx ); + } + break; + + case GL_LIGHT_MODEL_COLOR_CONTROL: + r200UpdateSpecular(ctx); + break; + + default: + break; + } +} + +static void r200ShadeModel( GLcontext *ctx, GLenum mode ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL]; + + s &= ~(R200_DIFFUSE_SHADE_MASK | + R200_ALPHA_SHADE_MASK | + R200_SPECULAR_SHADE_MASK | + R200_FOG_SHADE_MASK | + R200_DISC_FOG_SHADE_MASK); + + switch ( mode ) { + case GL_FLAT: + s |= (R200_DIFFUSE_SHADE_FLAT | + R200_ALPHA_SHADE_FLAT | + R200_SPECULAR_SHADE_FLAT | + R200_FOG_SHADE_FLAT | + R200_DISC_FOG_SHADE_FLAT); + break; + case GL_SMOOTH: + s |= (R200_DIFFUSE_SHADE_GOURAUD | + R200_ALPHA_SHADE_GOURAUD | + R200_SPECULAR_SHADE_GOURAUD | + R200_FOG_SHADE_GOURAUD | + R200_DISC_FOG_SHADE_GOURAUD); + break; + default: + return; + } + + if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) { + R200_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_CNTL] = s; + } +} + + +/* ============================================================= + * User clip planes + */ + +static void r200ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) +{ + GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0; + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + R200_STATECHANGE( rmesa, ucp[p] ); + rmesa->hw.ucp[p].cmd[UCP_X] = ip[0]; + rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1]; + rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2]; + rmesa->hw.ucp[p].cmd[UCP_W] = ip[3]; +} + +static void r200UpdateClipPlanes( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint p; + + for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { + GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + R200_STATECHANGE( rmesa, ucp[p] ); + rmesa->hw.ucp[p].cmd[UCP_X] = ip[0]; + rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1]; + rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2]; + rmesa->hw.ucp[p].cmd[UCP_W] = ip[3]; + } + } +} + + +/* ============================================================= + * Stencil + */ + +static void +r200StencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func, + GLint ref, GLuint mask ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << R200_STENCIL_REF_SHIFT) | + ((ctx->Stencil.ValueMask[0] & 0xff) << R200_STENCIL_MASK_SHIFT)); + + R200_STATECHANGE( rmesa, ctx ); + R200_STATECHANGE( rmesa, msk ); + + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_STENCIL_TEST_MASK; + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(R200_STENCIL_REF_MASK| + R200_STENCIL_VALUE_MASK); + + switch ( ctx->Stencil.Function[0] ) { + case GL_NEVER: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_NEVER; + break; + case GL_LESS: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_LESS; + break; + case GL_EQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_EQUAL; + break; + case GL_LEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_LEQUAL; + break; + case GL_GREATER: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_GREATER; + break; + case GL_NOTEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_NEQUAL; + break; + case GL_GEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_GEQUAL; + break; + case GL_ALWAYS: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_ALWAYS; + break; + } + + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask; +} + +static void +r200StencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + R200_STATECHANGE( rmesa, msk ); + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~R200_STENCIL_WRITE_MASK; + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= + ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT); +} + +static void +r200StencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail, + GLenum zfail, GLenum zpass ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(R200_STENCIL_FAIL_MASK | + R200_STENCIL_ZFAIL_MASK | + R200_STENCIL_ZPASS_MASK); + + switch ( ctx->Stencil.FailFunc[0] ) { + case GL_KEEP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_KEEP; + break; + case GL_ZERO: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_ZERO; + break; + case GL_REPLACE: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_REPLACE; + break; + case GL_INCR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INC; + break; + case GL_DECR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_DEC; + break; + case GL_INCR_WRAP_EXT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INC_WRAP; + break; + case GL_DECR_WRAP_EXT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_DEC_WRAP; + break; + case GL_INVERT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INVERT; + break; + } + + switch ( ctx->Stencil.ZFailFunc[0] ) { + case GL_KEEP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_KEEP; + break; + case GL_ZERO: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_ZERO; + break; + case GL_REPLACE: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_REPLACE; + break; + case GL_INCR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INC; + break; + case GL_DECR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_DEC; + break; + case GL_INCR_WRAP_EXT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INC_WRAP; + break; + case GL_DECR_WRAP_EXT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_DEC_WRAP; + break; + case GL_INVERT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INVERT; + break; + } + + switch ( ctx->Stencil.ZPassFunc[0] ) { + case GL_KEEP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_KEEP; + break; + case GL_ZERO: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_ZERO; + break; + case GL_REPLACE: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_REPLACE; + break; + case GL_INCR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INC; + break; + case GL_DECR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_DEC; + break; + case GL_INCR_WRAP_EXT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INC_WRAP; + break; + case GL_DECR_WRAP_EXT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_DEC_WRAP; + break; + case GL_INVERT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INVERT; + break; + } +} + +static void r200ClearStencil( GLcontext *ctx, GLint s ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + rmesa->state.stencil.clear = + ((GLuint) (ctx->Stencil.Clear & 0xff) | + (0xff << R200_STENCIL_MASK_SHIFT) | + ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT)); +} + + +/* ============================================================= + * Window position and viewport transformation + */ + +/* + * To correctly position primitives: + */ +#define SUBPIXEL_X 0.125 +#define SUBPIXEL_Y 0.125 + + +/** + * Called when window size or position changes or viewport or depth range + * state is changed. We update the hardware viewport state here. + */ +void r200UpdateWindow( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + GLfloat xoffset = (GLfloat)dPriv->x; + GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; + const GLfloat *v = ctx->Viewport._WindowMap.m; + + float_ui32_type sx = { v[MAT_SX] }; + float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X }; + float_ui32_type sy = { - v[MAT_SY] }; + float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y }; + float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale }; + float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale }; + + R200_FIREVERTICES( rmesa ); + R200_STATECHANGE( rmesa, vpt ); + + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = sx.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE] = sy.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE] = sz.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32; +} + + + +static void r200Viewport( GLcontext *ctx, GLint x, GLint y, + GLsizei width, GLsizei height ) +{ + /* Don't pipeline viewport changes, conflict with window offset + * setting below. Could apply deltas to rescue pipelined viewport + * values, or keep the originals hanging around. + */ + r200UpdateWindow( ctx ); +} + +static void r200DepthRange( GLcontext *ctx, GLclampd nearval, + GLclampd farval ) +{ + r200UpdateWindow( ctx ); +} + +void r200UpdateViewportOffset( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + GLfloat xoffset = (GLfloat)dPriv->x; + GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; + const GLfloat *v = ctx->Viewport._WindowMap.m; + + float_ui32_type tx; + float_ui32_type ty; + + tx.f = v[MAT_TX] + xoffset + SUBPIXEL_X; + ty.f = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y; + + if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 || + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 ) + { + /* Note: this should also modify whatever data the context reset + * code uses... + */ + R200_STATECHANGE( rmesa, vpt ); + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32; + + /* update polygon stipple x/y screen offset */ + { + GLuint stx, sty; + GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC]; + + m &= ~(R200_STIPPLE_X_OFFSET_MASK | + R200_STIPPLE_Y_OFFSET_MASK); + + /* add magic offsets, then invert */ + stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK); + sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1) + & R200_STIPPLE_COORD_MASK); + + m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) | + (sty << R200_STIPPLE_Y_OFFSET_SHIFT)); + + if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) { + R200_STATECHANGE( rmesa, msc ); + rmesa->hw.msc.cmd[MSC_RE_MISC] = m; + } + } + } + + r200UpdateScissor( ctx ); +} + + + +/* ============================================================= + * Miscellaneous + */ + +static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLubyte color[4]; + CLAMPED_FLOAT_TO_UBYTE(color[0], c[0]); + CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]); + CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]); + CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]); + rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp, + color[0], color[1], + color[2], color[3] ); +} + + +static void r200RenderMode( GLcontext *ctx, GLenum mode ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + FALLBACK( rmesa, R200_FALLBACK_RENDER_MODE, (mode != GL_RENDER) ); +} + + +static GLuint r200_rop_tab[] = { + R200_ROP_CLEAR, + R200_ROP_AND, + R200_ROP_AND_REVERSE, + R200_ROP_COPY, + R200_ROP_AND_INVERTED, + R200_ROP_NOOP, + R200_ROP_XOR, + R200_ROP_OR, + R200_ROP_NOR, + R200_ROP_EQUIV, + R200_ROP_INVERT, + R200_ROP_OR_REVERSE, + R200_ROP_COPY_INVERTED, + R200_ROP_OR_INVERTED, + R200_ROP_NAND, + R200_ROP_SET, +}; + +static void r200LogicOpCode( GLcontext *ctx, GLenum opcode ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint rop = (GLuint)opcode - GL_CLEAR; + + ASSERT( rop < 16 ); + + R200_STATECHANGE( rmesa, msk ); + rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = r200_rop_tab[rop]; +} + + +/* + * Set up the cliprects for either front or back-buffer drawing. + */ +void r200SetCliprects( r200ContextPtr rmesa ) +{ + __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; + __DRIdrawablePrivate *const readable = rmesa->dri.readable; + GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate; + GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate; + + if (draw_fb->_ColorDrawBufferMask[0] + == BUFFER_BIT_BACK_LEFT) { + /* Can't ignore 2d windows if we are page flipping. + */ + if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) { + rmesa->numClipRects = drawable->numClipRects; + rmesa->pClipRects = drawable->pClipRects; + } + else { + rmesa->numClipRects = drawable->numBackClipRects; + rmesa->pClipRects = drawable->pBackClipRects; + } + } + else { + /* front buffer (or none, or multiple buffers) */ + rmesa->numClipRects = drawable->numClipRects; + rmesa->pClipRects = drawable->pClipRects; + } + + if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) { + _mesa_resize_framebuffer(rmesa->glCtx, draw_fb, + drawable->w, drawable->h); + draw_fb->Initialized = GL_TRUE; + } + + if (drawable != readable) { + if ((read_fb->Width != readable->w) || + (read_fb->Height != readable->h)) { + _mesa_resize_framebuffer(rmesa->glCtx, read_fb, + readable->w, readable->h); + read_fb->Initialized = GL_TRUE; + } + } + + if (rmesa->state.scissor.enabled) + r200RecalcScissorRects( rmesa ); + + rmesa->lastStamp = drawable->lastStamp; +} + + +static void r200DrawBuffer( GLcontext *ctx, GLenum mode ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if (R200_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s %s\n", __FUNCTION__, + _mesa_lookup_enum_by_nr( mode )); + + R200_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */ + + /* + * _ColorDrawBufferMask is easier to cope with than <mode>. + * Check for software fallback, update cliprects. + */ + switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) { + case BUFFER_BIT_FRONT_LEFT: + case BUFFER_BIT_BACK_LEFT: + FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE ); + break; + default: + /* 0 (GL_NONE) buffers or multiple color drawing buffers */ + FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE ); + return; + } + + r200SetCliprects( rmesa ); + + /* We'll set the drawing engine's offset/pitch parameters later + * when we update other state. + */ +} + + +static void r200ReadBuffer( GLcontext *ctx, GLenum mode ) +{ + /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ +} + +/* ============================================================= + * State enable/disable + */ + +static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint p, flag; + + if ( R200_DEBUG & DEBUG_STATE ) + fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr( cap ), + state ? "GL_TRUE" : "GL_FALSE" ); + + switch ( cap ) { + /* Fast track this one... + */ + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + break; + + case GL_ALPHA_TEST: + R200_STATECHANGE( rmesa, ctx ); + if (state) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_ALPHA_TEST_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ALPHA_TEST_ENABLE; + } + break; + + case GL_BLEND: + case GL_COLOR_LOGIC_OP: + r200_set_blend_state( ctx ); + break; + + case GL_CLIP_PLANE0: + case GL_CLIP_PLANE1: + case GL_CLIP_PLANE2: + case GL_CLIP_PLANE3: + case GL_CLIP_PLANE4: + case GL_CLIP_PLANE5: + p = cap-GL_CLIP_PLANE0; + R200_STATECHANGE( rmesa, tcl ); + if (state) { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0<<p); + r200ClipPlane( ctx, cap, NULL ); + } + else { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0<<p); + } + break; + + case GL_COLOR_MATERIAL: + r200ColorMaterial( ctx, 0, 0 ); + r200UpdateMaterial( ctx ); + break; + + case GL_CULL_FACE: + r200CullFace( ctx, 0 ); + break; + + case GL_DEPTH_TEST: + R200_STATECHANGE(rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_Z_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_Z_ENABLE; + } + break; + + case GL_DITHER: + R200_STATECHANGE(rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE; + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE; + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; + } + break; + + case GL_FOG: + R200_STATECHANGE(rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_FOG_ENABLE; + r200Fogfv( ctx, GL_FOG_MODE, NULL ); + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_FOG_ENABLE; + R200_STATECHANGE(rmesa, tcl); + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK; + } + r200UpdateSpecular( ctx ); /* for PK_SPEC */ + if (rmesa->TclFallback) + r200ChooseVertexState( ctx ); + _mesa_allow_light_in_model( ctx, !state ); + break; + + case GL_LIGHT0: + case GL_LIGHT1: + case GL_LIGHT2: + case GL_LIGHT3: + case GL_LIGHT4: + case GL_LIGHT5: + case GL_LIGHT6: + case GL_LIGHT7: + R200_STATECHANGE(rmesa, tcl); + p = cap - GL_LIGHT0; + if (p&1) + flag = (R200_LIGHT_1_ENABLE | + R200_LIGHT_1_ENABLE_AMBIENT | + R200_LIGHT_1_ENABLE_SPECULAR); + else + flag = (R200_LIGHT_0_ENABLE | + R200_LIGHT_0_ENABLE_AMBIENT | + R200_LIGHT_0_ENABLE_SPECULAR); + + if (state) + rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag; + else + rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag; + + /* + */ + update_light_colors( ctx, p ); + break; + + case GL_LIGHTING: + r200UpdateSpecular(ctx); + /* for reflection map fixup - might set recheck_texgen for all units too */ + rmesa->NewGLState |= _NEW_TEXTURE; + break; + + case GL_LINE_SMOOTH: + R200_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_ANTI_ALIAS_LINE; + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ANTI_ALIAS_LINE; + } + break; + + case GL_LINE_STIPPLE: + R200_STATECHANGE( rmesa, set ); + if ( state ) { + rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PATTERN_ENABLE; + } else { + rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PATTERN_ENABLE; + } + break; + + case GL_NORMALIZE: + R200_STATECHANGE( rmesa, tcl ); + if ( state ) { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_NORMALIZE_NORMALS; + } else { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_NORMALIZE_NORMALS; + } + break; + + /* Pointsize registers on r200 only work for point sprites, and point smooth + * doesn't work for point sprites (and isn't needed for 1.0 sized aa points). + * In any case, setting pointmin == pointsizemax == 1.0 for aa points + * is enough to satisfy conform. + */ + case GL_POINT_SMOOTH: + break; + + /* These don't really do anything, as we don't use the 3vtx + * primitives yet. + */ +#if 0 + case GL_POLYGON_OFFSET_POINT: + R200_STATECHANGE( rmesa, set ); + if ( state ) { + rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_ZBIAS_ENABLE_POINT; + } else { + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_POINT; + } + break; + + case GL_POLYGON_OFFSET_LINE: + R200_STATECHANGE( rmesa, set ); + if ( state ) { + rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_ZBIAS_ENABLE_LINE; + } else { + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_LINE; + } + break; +#endif + + case GL_POINT_SPRITE_ARB: + R200_STATECHANGE( rmesa, spr ); + if ( state ) { + int i; + for (i = 0; i < 6; i++) { + rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= + ctx->Point.CoordReplace[i] << (R200_PS_GEN_TEX_0_SHIFT + i); + } + } else { + rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &= ~R200_PS_GEN_TEX_MASK; + } + break; + + case GL_POLYGON_OFFSET_FILL: + R200_STATECHANGE( rmesa, set ); + if ( state ) { + rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_ZBIAS_ENABLE_TRI; + } else { + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_TRI; + } + break; + + case GL_POLYGON_SMOOTH: + R200_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_ANTI_ALIAS_POLY; + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ANTI_ALIAS_POLY; + } + break; + + case GL_POLYGON_STIPPLE: + R200_STATECHANGE(rmesa, set ); + if ( state ) { + rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_STIPPLE_ENABLE; + } else { + rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_STIPPLE_ENABLE; + } + break; + + case GL_RESCALE_NORMAL_EXT: { + GLboolean tmp = ctx->_NeedEyeCoords ? state : !state; + R200_STATECHANGE( rmesa, tcl ); + if ( tmp ) { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_RESCALE_NORMALS; + } else { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS; + } + break; + } + + case GL_SCISSOR_TEST: + R200_FIREVERTICES( rmesa ); + rmesa->state.scissor.enabled = state; + r200UpdateScissor( ctx ); + break; + + case GL_STENCIL_TEST: + if ( rmesa->state.stencil.hwBuffer ) { + R200_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_STENCIL_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE; + } + } else { + FALLBACK( rmesa, R200_FALLBACK_STENCIL, state ); + } + break; + + case GL_TEXTURE_GEN_Q: + case GL_TEXTURE_GEN_R: + case GL_TEXTURE_GEN_S: + case GL_TEXTURE_GEN_T: + /* Picked up in r200UpdateTextureState. + */ + rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; + break; + + case GL_COLOR_SUM_EXT: + r200UpdateSpecular ( ctx ); + break; + + case GL_VERTEX_PROGRAM_ARB: + if (!state) { + GLuint i; + rmesa->curr_vp_hw = NULL; + R200_STATECHANGE( rmesa, vap ); + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_PROG_VTX_SHADER_ENABLE; + /* mark all tcl atoms (tcl vector state got overwritten) dirty + not sure about tcl scalar state - we need at least grd + with vert progs too. + ucp looks like it doesn't get overwritten (may even work + with vp for pos-invariant progs if we're lucky) */ + R200_STATECHANGE( rmesa, mtl[0] ); + R200_STATECHANGE( rmesa, mtl[1] ); + R200_STATECHANGE( rmesa, fog ); + R200_STATECHANGE( rmesa, glt ); + R200_STATECHANGE( rmesa, eye ); + for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++) { + R200_STATECHANGE( rmesa, mat[i] ); + } + for (i = 0 ; i < 8; i++) { + R200_STATECHANGE( rmesa, lit[i] ); + } + R200_STATECHANGE( rmesa, tcl ); + for (i = 0; i <= ctx->Const.MaxClipPlanes; i++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << i)) { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0 << i); + } +/* else { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0 << i); + }*/ + } + /* ugly. Need to call everything which might change compsel. */ + r200UpdateSpecular( ctx ); +#if 0 + /* shouldn't be necessary, as it's picked up anyway in r200ValidateState (_NEW_PROGRAM), + but without it doom3 locks up at always the same places. Why? */ + /* FIXME: This can (and should) be replaced by a call to the TCL_STATE_FLUSH reg before + accessing VAP_SE_VAP_CNTL. Requires drm changes (done). Remove after some time... */ + r200UpdateTextureState( ctx ); + /* if we call r200UpdateTextureState we need the code below because we are calling it with + non-current derived enabled values which may revert the state atoms for frag progs even when + they already got disabled... ugh + Should really figure out why we need to call r200UpdateTextureState in the first place */ + GLuint unit; + for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) { + R200_STATECHANGE( rmesa, pix[unit] ); + R200_STATECHANGE( rmesa, tex[unit] ); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= + ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT; + /* need to guard this with drmSupportsFragmentShader? Should never get here if + we don't announce ATI_fs, right? */ + rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0; + } + R200_STATECHANGE( rmesa, cst ); + R200_STATECHANGE( rmesa, tf ); + rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0; +#endif + } + else { + /* picked up later */ + } + /* call functions which change hw state based on ARB_vp enabled or not. */ + r200PointParameter( ctx, GL_POINT_DISTANCE_ATTENUATION, NULL ); + r200Fogfv( ctx, GL_FOG_COORD_SRC, NULL ); + break; + + case GL_VERTEX_PROGRAM_POINT_SIZE_ARB: + r200PointParameter( ctx, GL_POINT_DISTANCE_ATTENUATION, NULL ); + break; + + case GL_FRAGMENT_SHADER_ATI: + if ( !state ) { + /* restore normal tex env colors and make sure tex env combine will get updated + mark env atoms dirty (as their data was overwritten by afs even + if they didn't change) and restore tex coord routing */ + GLuint unit; + for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) { + R200_STATECHANGE( rmesa, pix[unit] ); + R200_STATECHANGE( rmesa, tex[unit] ); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= + ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT; + /* need to guard this with drmSupportsFragmentShader? Should never get here if + we don't announce ATI_fs, right? */ + rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0; + } + R200_STATECHANGE( rmesa, cst ); + R200_STATECHANGE( rmesa, tf ); + rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0; + } + else { + /* need to mark this dirty as pix/tf atoms have overwritten the data + even if the data in the atoms didn't change */ + R200_STATECHANGE( rmesa, atf ); + R200_STATECHANGE( rmesa, afs[1] ); + /* everything else picked up in r200UpdateTextureState hopefully */ + } + break; + default: + return; + } +} + + +void r200LightingSpaceChange( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLboolean tmp; + + if (R200_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords, + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]); + + if (ctx->_NeedEyeCoords) + tmp = ctx->Transform.RescaleNormals; + else + tmp = !ctx->Transform.RescaleNormals; + + R200_STATECHANGE( rmesa, tcl ); + if ( tmp ) { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_RESCALE_NORMALS; + } else { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS; + } + + if (R200_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords, + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]); +} + +/* ============================================================= + * Deferred state management - matrices, textures, other? + */ + + + + +static void upload_matrix( r200ContextPtr rmesa, GLfloat *src, int idx ) +{ + float *dest = ((float *)R200_DB_STATE( mat[idx] ))+MAT_ELT_0; + int i; + + + for (i = 0 ; i < 4 ; i++) { + *dest++ = src[i]; + *dest++ = src[i+4]; + *dest++ = src[i+8]; + *dest++ = src[i+12]; + } + + R200_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] ); +} + +static void upload_matrix_t( r200ContextPtr rmesa, const GLfloat *src, int idx ) +{ + float *dest = ((float *)R200_DB_STATE( mat[idx] ))+MAT_ELT_0; + memcpy(dest, src, 16*sizeof(float)); + R200_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] ); +} + + +static void update_texturematrix( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + GLuint tpc = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0]; + GLuint compsel = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]; + int unit; + + if (R200_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s before COMPSEL: %x\n", __FUNCTION__, + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]); + + rmesa->TexMatEnabled = 0; + rmesa->TexMatCompSel = 0; + + for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) { + if (!ctx->Texture.Unit[unit]._ReallyEnabled) + continue; + + if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) { + rmesa->TexMatEnabled |= (R200_TEXGEN_TEXMAT_0_ENABLE| + R200_TEXMAT_0_ENABLE) << unit; + + rmesa->TexMatCompSel |= R200_OUTPUT_TEX_0 << unit; + + if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) { + /* Need to preconcatenate any active texgen + * obj/eyeplane matrices: + */ + _math_matrix_mul_matrix( &rmesa->tmpmat, + ctx->TextureMatrixStack[unit].Top, + &rmesa->TexGenMatrix[unit] ); + upload_matrix( rmesa, rmesa->tmpmat.m, R200_MTX_TEX0+unit ); + } + else { + upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m, + R200_MTX_TEX0+unit ); + } + } + else if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) { + upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, + R200_MTX_TEX0+unit ); + } + } + + tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled); + if (tpc != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0]) { + R200_STATECHANGE(rmesa, tcg); + rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = tpc; + } + + compsel &= ~R200_OUTPUT_TEX_MASK; + compsel |= rmesa->TexMatCompSel | rmesa->TexGenCompSel; + if (compsel != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]) { + R200_STATECHANGE(rmesa, vtx); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = compsel; + } +} + + + +/** + * Tell the card where to render (offset, pitch). + * Effected by glDrawBuffer, etc + */ +void +r200UpdateDrawBuffer(GLcontext *ctx) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + driRenderbuffer *drb; + + if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) { + /* draw to front */ + drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + } + else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) { + /* draw to back */ + drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + } + else { + /* drawing to multiple buffers, or none */ + return; + } + + assert(drb); + assert(drb->flippedPitch); + + R200_STATECHANGE( rmesa, ctx ); + + /* Note: we used the (possibly) page-flipped values */ + rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] + = ((drb->flippedOffset + rmesa->r200Screen->fbLocation) + & R200_COLOROFFSET_MASK); + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch; + if (rmesa->sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; + } +} + + + +void r200ValidateState( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint new_state = rmesa->NewGLState; + + if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + r200UpdateDrawBuffer(ctx); + } + + if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) { + r200UpdateTextureState( ctx ); + new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */ + r200UpdateLocalViewer( ctx ); + } + +/* FIXME: don't really need most of these when vertex progs are enabled */ + + /* Need an event driven matrix update? + */ + if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) + upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, R200_MTX_MVP ); + + /* Need these for lighting (shouldn't upload otherwise) + */ + if (new_state & (_NEW_MODELVIEW)) { + upload_matrix( rmesa, ctx->ModelviewMatrixStack.Top->m, R200_MTX_MV ); + upload_matrix_t( rmesa, ctx->ModelviewMatrixStack.Top->inv, R200_MTX_IMV ); + } + + /* Does this need to be triggered on eg. modelview for + * texgen-derived objplane/eyeplane matrices? + */ + if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) { + update_texturematrix( ctx ); + } + + if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) { + update_light( ctx ); + } + + /* emit all active clip planes if projection matrix changes. + */ + if (new_state & (_NEW_PROJECTION)) { + if (ctx->Transform.ClipPlanesEnabled) + r200UpdateClipPlanes( ctx ); + } + + if (new_state & (_NEW_PROGRAM| + /* need to test for pretty much anything due to possible parameter bindings */ + _NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM| + _NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX| + _NEW_FOG|_NEW_POINT|_NEW_TRACK_MATRIX)) { + if (ctx->VertexProgram._Enabled) { + r200SetupVertexProg( ctx ); + } + else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0); + } + + rmesa->NewGLState = 0; +} + + +static void r200InvalidateState( GLcontext *ctx, GLuint new_state ) +{ + _swrast_InvalidateState( ctx, new_state ); + _swsetup_InvalidateState( ctx, new_state ); + _vbo_InvalidateState( ctx, new_state ); + _tnl_InvalidateState( ctx, new_state ); + _ae_invalidate_state( ctx, new_state ); + R200_CONTEXT(ctx)->NewGLState |= new_state; +} + +/* A hack. The r200 can actually cope just fine with materials + * between begin/ends, so fix this. + * Should map to inputs just like the generic vertex arrays for vertex progs. + * In theory there could still be too many and we'd still need a fallback. + */ +static GLboolean check_material( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLint i; + + for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT; + i < _TNL_ATTRIB_MAT_BACK_INDEXES; + i++) + if (tnl->vb.AttribPtr[i] && + tnl->vb.AttribPtr[i]->stride) + return GL_TRUE; + + return GL_FALSE; +} + +static void r200WrapRunPipeline( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLboolean has_material; + + if (0) + fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState); + + /* Validate state: + */ + if (rmesa->NewGLState) + r200ValidateState( ctx ); + + has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx ); + + if (has_material) { + TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_TRUE ); + } + + /* Run the pipeline. + */ + _tnl_run_pipeline( ctx ); + + if (has_material) { + TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_FALSE ); + } +} + + +/* Initialize the driver's state functions. + */ +void r200InitStateFuncs( struct dd_function_table *functions ) +{ + functions->UpdateState = r200InvalidateState; + functions->LightingSpaceChange = r200LightingSpaceChange; + + functions->DrawBuffer = r200DrawBuffer; + functions->ReadBuffer = r200ReadBuffer; + + functions->AlphaFunc = r200AlphaFunc; + functions->BlendColor = r200BlendColor; + functions->BlendEquationSeparate = r200BlendEquationSeparate; + functions->BlendFuncSeparate = r200BlendFuncSeparate; + functions->ClearColor = r200ClearColor; + functions->ClearDepth = r200ClearDepth; + functions->ClearIndex = NULL; + functions->ClearStencil = r200ClearStencil; + functions->ClipPlane = r200ClipPlane; + functions->ColorMask = r200ColorMask; + functions->CullFace = r200CullFace; + functions->DepthFunc = r200DepthFunc; + functions->DepthMask = r200DepthMask; + functions->DepthRange = r200DepthRange; + functions->Enable = r200Enable; + functions->Fogfv = r200Fogfv; + functions->FrontFace = r200FrontFace; + functions->Hint = NULL; + functions->IndexMask = NULL; + functions->LightModelfv = r200LightModelfv; + functions->Lightfv = r200Lightfv; + functions->LineStipple = r200LineStipple; + functions->LineWidth = r200LineWidth; + functions->LogicOpcode = r200LogicOpCode; + functions->PolygonMode = r200PolygonMode; + functions->PolygonOffset = r200PolygonOffset; + functions->PolygonStipple = r200PolygonStipple; + functions->PointParameterfv = r200PointParameter; + functions->PointSize = r200PointSize; + functions->RenderMode = r200RenderMode; + functions->Scissor = r200Scissor; + functions->ShadeModel = r200ShadeModel; + functions->StencilFuncSeparate = r200StencilFuncSeparate; + functions->StencilMaskSeparate = r200StencilMaskSeparate; + functions->StencilOpSeparate = r200StencilOpSeparate; + functions->Viewport = r200Viewport; +} + + +void r200InitTnlFuncs( GLcontext *ctx ) +{ + TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = r200UpdateMaterial; + TNL_CONTEXT(ctx)->Driver.RunPipeline = r200WrapRunPipeline; +} diff --git a/r200/r200_state.h b/r200/r200_state.h new file mode 100644 index 0000000..f34090b --- /dev/null +++ b/r200/r200_state.h @@ -0,0 +1,68 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_state.h,v 1.2 2002/11/05 17:46:08 tsi Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __R200_STATE_H__ +#define __R200_STATE_H__ + +#include "r200_context.h" + +extern void r200InitState( r200ContextPtr rmesa ); +extern void r200InitStateFuncs( struct dd_function_table *functions ); +extern void r200InitTnlFuncs( GLcontext *ctx ); + +extern void r200UpdateMaterial( GLcontext *ctx ); + +extern void r200SetCliprects( r200ContextPtr rmesa ); +extern void r200RecalcScissorRects( r200ContextPtr rmesa ); +extern void r200UpdateViewportOffset( GLcontext *ctx ); +extern void r200UpdateWindow( GLcontext *ctx ); +extern void r200UpdateDrawBuffer(GLcontext *ctx); + +extern void r200ValidateState( GLcontext *ctx ); + +extern void r200PrintDirty( r200ContextPtr rmesa, + const char *msg ); + + +extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ); +#define FALLBACK( rmesa, bit, mode ) do { \ + if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ + __FUNCTION__, bit, mode ); \ + r200Fallback( rmesa->glCtx, bit, mode ); \ +} while (0) + +extern void r200LightingSpaceChange( GLcontext *ctx ); + +#endif diff --git a/r200/r200_state_init.c b/r200/r200_state_init.c new file mode 100644 index 0000000..b40d0bd --- /dev/null +++ b/r200/r200_state_init.c @@ -0,0 +1,972 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_state_init.c,v 1.4 2003/02/22 06:21:11 dawes Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "enums.h" +#include "colormac.h" +#include "api_arrayelt.h" + +#include "swrast/swrast.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "swrast_setup/swrast_setup.h" + +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_state.h" +#include "r200_tcl.h" +#include "r200_tex.h" +#include "r200_swtcl.h" + +#include "xmlpool.h" + +/* ============================================================= + * State initialization + */ + +void r200PrintDirty( r200ContextPtr rmesa, const char *msg ) +{ + struct r200_state_atom *l; + + fprintf(stderr, msg); + fprintf(stderr, ": "); + + foreach(l, &rmesa->hw.atomlist) { + if (l->dirty || rmesa->hw.all_dirty) + fprintf(stderr, "%s, ", l->name); + } + + fprintf(stderr, "\n"); +} + +static int cmdpkt( int id ) +{ + drm_radeon_cmd_header_t h; + h.i = 0; + h.packet.cmd_type = RADEON_CMD_PACKET; + h.packet.packet_id = id; + return h.i; +} + +static int cmdvec( int offset, int stride, int count ) +{ + drm_radeon_cmd_header_t h; + h.i = 0; + h.vectors.cmd_type = RADEON_CMD_VECTORS; + h.vectors.offset = offset; + h.vectors.stride = stride; + h.vectors.count = count; + return h.i; +} + +/* warning: the count here is divided by 4 compared to other cmds + (so it doesn't exceed the char size)! */ +static int cmdveclinear( int offset, int count ) +{ + drm_radeon_cmd_header_t h; + h.i = 0; + h.veclinear.cmd_type = RADEON_CMD_VECLINEAR; + h.veclinear.addr_lo = offset & 0xff; + h.veclinear.addr_hi = (offset & 0xff00) >> 8; + h.veclinear.count = count; + return h.i; +} + +static int cmdscl( int offset, int stride, int count ) +{ + drm_radeon_cmd_header_t h; + h.i = 0; + h.scalars.cmd_type = RADEON_CMD_SCALARS; + h.scalars.offset = offset; + h.scalars.stride = stride; + h.scalars.count = count; + return h.i; +} + +static int cmdscl2( int offset, int stride, int count ) +{ + drm_radeon_cmd_header_t h; + h.i = 0; + h.scalars.cmd_type = RADEON_CMD_SCALARS2; + h.scalars.offset = offset - 0x100; + h.scalars.stride = stride; + h.scalars.count = count; + return h.i; +} + +#define CHECK( NM, FLAG ) \ +static GLboolean check_##NM( GLcontext *ctx, int idx ) \ +{ \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + (void) idx; \ + (void) rmesa; \ + return FLAG; \ +} + +#define TCL_CHECK( NM, FLAG ) \ +static GLboolean check_##NM( GLcontext *ctx, int idx ) \ +{ \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + (void) idx; \ + return !rmesa->TclFallback && !ctx->VertexProgram._Enabled && (FLAG); \ +} + +#define TCL_OR_VP_CHECK( NM, FLAG ) \ +static GLboolean check_##NM( GLcontext *ctx, int idx ) \ +{ \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + (void) idx; \ + return !rmesa->TclFallback && (FLAG); \ +} + +#define VP_CHECK( NM, FLAG ) \ +static GLboolean check_##NM( GLcontext *ctx, int idx ) \ +{ \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + (void) idx; \ + return !rmesa->TclFallback && ctx->VertexProgram._Enabled && (FLAG); \ +} + + +CHECK( always, GL_TRUE ) +CHECK( never, GL_FALSE ) +CHECK( tex_any, ctx->Texture._EnabledUnits ) +CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) ); +CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) ) +CHECK( tex, rmesa->state.texture.unit[idx].unitneeded ) +CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled ) +CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) ) +CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) ) +CHECK( afs, ctx->ATIFragmentShader._Enabled ) +CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT ) +TCL_CHECK( tcl_fog, ctx->Fog.Enabled ) +TCL_CHECK( tcl, GL_TRUE ) +TCL_CHECK( tcl_tex, rmesa->state.texture.unit[idx].unitneeded ) +TCL_CHECK( tcl_lighting, ctx->Light.Enabled ) +TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[idx].Enabled ) +TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) ) +TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE ) +VP_CHECK( tcl_vp, GL_TRUE ) +VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 ) +VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 ) + + +/* Initialize the context's hardware state. + */ +void r200InitState( r200ContextPtr rmesa ) +{ + GLcontext *ctx = rmesa->glCtx; + GLuint color_fmt, depth_fmt, i; + GLint drawPitch, drawOffset; + + switch ( rmesa->r200Screen->cpp ) { + case 2: + color_fmt = R200_COLOR_FORMAT_RGB565; + break; + case 4: + color_fmt = R200_COLOR_FORMAT_ARGB8888; + break; + default: + fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" ); + exit( -1 ); + } + + rmesa->state.color.clear = 0x00000000; + + switch ( ctx->Visual.depthBits ) { + case 16: + rmesa->state.depth.clear = 0x0000ffff; + rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff; + depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z; + rmesa->state.stencil.clear = 0x00000000; + break; + case 24: + rmesa->state.depth.clear = 0x00ffffff; + rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff; + depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z; + rmesa->state.stencil.clear = 0xffff0000; + break; + default: + fprintf( stderr, "Error: Unsupported depth %d... exiting\n", + ctx->Visual.depthBits ); + exit( -1 ); + } + + /* Only have hw stencil when depth buffer is 24 bits deep */ + rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 && + ctx->Visual.depthBits == 24 ); + + rmesa->Fallback = 0; + + if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) { + drawOffset = rmesa->r200Screen->backOffset; + drawPitch = rmesa->r200Screen->backPitch; + } else { + drawOffset = rmesa->r200Screen->frontOffset; + drawPitch = rmesa->r200Screen->frontPitch; + } +#if 000 + if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) { + rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset; + rmesa->state.color.drawPitch = rmesa->r200Screen->backPitch; + } else { + rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset; + rmesa->state.color.drawPitch = rmesa->r200Screen->frontPitch; + } + + rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset; + rmesa->state.pixel.readPitch = rmesa->state.color.drawPitch; +#endif + + rmesa->hw.max_state_size = 0; + +#define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX ) \ + do { \ + rmesa->hw.ATOM.cmd_size = SZ; \ + rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \ + rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \ + rmesa->hw.ATOM.name = NM; \ + rmesa->hw.ATOM.idx = IDX; \ + rmesa->hw.ATOM.check = check_##CHK; \ + rmesa->hw.ATOM.dirty = GL_FALSE; \ + rmesa->hw.max_state_size += SZ * sizeof(int); \ + } while (0) + + + /* Allocate state buffers: + */ + if (rmesa->r200Screen->drmSupportsBlendColor) + ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 ); + else + ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 ); + ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 ); + ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); + ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); + ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 ); + ALLOC_STATE( vtx, always, VTX_STATE_SIZE, "VTX/vertex", 0 ); + ALLOC_STATE( vap, always, VAP_STATE_SIZE, "VAP/vap", 0 ); + ALLOC_STATE( vte, always, VTE_STATE_SIZE, "VTE/vte", 0 ); + ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 ); + ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 ); + ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 ); + ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 ); + if (rmesa->r200Screen->drmSupportsFragShader) { + if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) { + /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */ + ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + else { + ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-2", 2 ); + ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-3", 3 ); + ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-4", 4 ); + ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-5", 5 ); + ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 ); + ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); + ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); + } + else { + if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) { + ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + else { + ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-2", 2 ); + ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-3", 3 ); + ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-4", 4 ); + ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-5", 5 ); + ALLOC_STATE( atf, never, ATF_STATE_SIZE, "TF/tfactor", 0 ); + ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); + ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); + } + if (rmesa->r200Screen->drmSupportsCubeMapsR200) { + ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 ); + ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 ); + ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 ); + ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 ); + ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 ); + ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 ); + } + else { + ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/tex-0", 0 ); + ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/tex-1", 1 ); + ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/tex-2", 2 ); + ALLOC_STATE( cube[3], never, CUBE_STATE_SIZE, "CUBE/tex-3", 3 ); + ALLOC_STATE( cube[4], never, CUBE_STATE_SIZE, "CUBE/tex-4", 4 ); + ALLOC_STATE( cube[5], never, CUBE_STATE_SIZE, "CUBE/tex-5", 5 ); + } + if (rmesa->r200Screen->drmSupportsVertexProgram) { + ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 ); + ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); + ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); + ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 ); + ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 ); + } + else { + ALLOC_STATE( pvs, never, PVS_STATE_SIZE, "PVS/pvscntl", 0 ); + ALLOC_STATE( vpi[0], never, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); + ALLOC_STATE( vpi[1], never, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); + ALLOC_STATE( vpp[0], never, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 ); + ALLOC_STATE( vpp[1], never, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 ); + } + /* FIXME: this atom has two commands, we need only one (ucp_vert_blend) for vp */ + ALLOC_STATE( tcl, tcl_or_vp, TCL_STATE_SIZE, "TCL/tcl", 0 ); + ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 ); + ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 ); + ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 ); + ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 ); + ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 ); + ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 ); + ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 ); + ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 ); + ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 ); + ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 ); + ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 ); + ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 ); + ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 ); + ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex, MAT_STATE_SIZE, "MAT/texmat2", 2 ); + ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex, MAT_STATE_SIZE, "MAT/texmat3", 3 ); + ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex, MAT_STATE_SIZE, "MAT/texmat4", 4 ); + ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex, MAT_STATE_SIZE, "MAT/texmat5", 5 ); + ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 ); + ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); + ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 ); + ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 ); + ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 ); + ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 ); + ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 ); + ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 ); + ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 ); + ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 ); + ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 ); + ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 ); + ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 ); + ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 ); + ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 ); + ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 ); + ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 ); + ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 ); + ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 ); + ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 ); + if (rmesa->r200Screen->drmSupportsTriPerf) { + ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 ); + } + else { + ALLOC_STATE( prf, never, PRF_STATE_SIZE, "PRF/performance-tri", 0 ); + } + if (rmesa->r200Screen->drmSupportsPointSprites) { + ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 ); + ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 ); + } + else { + ALLOC_STATE (spr, never, SPR_STATE_SIZE, "SPR/pointsprite", 0 ); + ALLOC_STATE (ptp, never, PTP_STATE_SIZE, "PTP/pointparams", 0 ); + } + + r200SetUpAtomList( rmesa ); + + /* Fill in the packet headers: + */ + rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC); + rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL); + rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH); + if (rmesa->r200Screen->drmSupportsBlendColor) + rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(R200_EMIT_RB3D_BLENDCOLOR); + rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN); + rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH); + rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK); + rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE); + rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL); + rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC); + rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(R200_EMIT_PP_CNTL_X); + rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(R200_EMIT_RB3D_DEPTHXY_OFFSET); + rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(R200_EMIT_RE_AUX_SCISSOR_CNTL); + rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(R200_EMIT_RE_SCISSOR_TL_0); + rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(R200_EMIT_SE_VAP_CNTL_STATUS); + rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE); + rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0); + rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3); + rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0); + if (rmesa->r200Screen->drmSupportsFragShader) { + rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR); + rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0); + rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0); + rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1); + rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1); + rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2); + rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2); + rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3); + rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3); + rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4); + rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4); + rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5); + rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5); + } else { + rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0); + rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0); + rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1); + rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1); + rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2); + rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2); + rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3); + rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3); + rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4); + rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4); + rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5); + rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5); + } + rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0); + rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1); + rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(R200_EMIT_VAP_PVS_CNTL); + rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0); + rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0); + rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1); + rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_1); + rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_2); + rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_2); + rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_3); + rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_3); + rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_4); + rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_4); + rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_5); + rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_5); + rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_0); + rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_1); + rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_2); + rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_3); + rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_4); + rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_5); + rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR); + rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(R200_EMIT_TCL_LIGHT_MODEL_CTL_0); + rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(R200_EMIT_TCL_UCP_VERT_BLEND_CTL); + rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(R200_EMIT_TEX_PROC_CTL_2); + rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(R200_EMIT_MATRIX_SELECT_0); + rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(R200_EMIT_VAP_CTL); + rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(R200_EMIT_VTX_FMT_0); + rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(R200_EMIT_OUTPUT_VTX_COMP_SEL); + rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(R200_EMIT_SE_VTX_STATE_CNTL); + rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(R200_EMIT_VTE_CNTL); + rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(R200_EMIT_PP_TRI_PERF_CNTL); + rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(R200_EMIT_TCL_POINT_SPRITE_CNTL); + rmesa->hw.mtl[0].cmd[MTL_CMD_0] = + cmdvec( R200_VS_MAT_0_EMISS, 1, 16 ); + rmesa->hw.mtl[0].cmd[MTL_CMD_1] = + cmdscl2( R200_SS_MAT_0_SHININESS, 1, 1 ); + rmesa->hw.mtl[1].cmd[MTL_CMD_0] = + cmdvec( R200_VS_MAT_1_EMISS, 1, 16 ); + rmesa->hw.mtl[1].cmd[MTL_CMD_1] = + cmdscl2( R200_SS_MAT_1_SHININESS, 1, 1 ); + + rmesa->hw.vpi[0].cmd[VPI_CMD_0] = + cmdveclinear( R200_PVS_PROG0, 64 ); + rmesa->hw.vpi[1].cmd[VPI_CMD_0] = + cmdveclinear( R200_PVS_PROG1, 64 ); + rmesa->hw.vpp[0].cmd[VPP_CMD_0] = + cmdveclinear( R200_PVS_PARAM0, 96 ); + rmesa->hw.vpp[1].cmd[VPP_CMD_0] = + cmdveclinear( R200_PVS_PARAM1, 96 ); + + rmesa->hw.grd.cmd[GRD_CMD_0] = + cmdscl( R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 ); + rmesa->hw.fog.cmd[FOG_CMD_0] = + cmdvec( R200_VS_FOG_PARAM_ADDR, 1, 4 ); + rmesa->hw.glt.cmd[GLT_CMD_0] = + cmdvec( R200_VS_GLOBAL_AMBIENT_ADDR, 1, 4 ); + rmesa->hw.eye.cmd[EYE_CMD_0] = + cmdvec( R200_VS_EYE_VECTOR_ADDR, 1, 4 ); + + rmesa->hw.mat[R200_MTX_MV].cmd[MAT_CMD_0] = + cmdvec( R200_VS_MATRIX_0_MV, 1, 16); + rmesa->hw.mat[R200_MTX_IMV].cmd[MAT_CMD_0] = + cmdvec( R200_VS_MATRIX_1_INV_MV, 1, 16); + rmesa->hw.mat[R200_MTX_MVP].cmd[MAT_CMD_0] = + cmdvec( R200_VS_MATRIX_2_MVP, 1, 16); + rmesa->hw.mat[R200_MTX_TEX0].cmd[MAT_CMD_0] = + cmdvec( R200_VS_MATRIX_3_TEX0, 1, 16); + rmesa->hw.mat[R200_MTX_TEX1].cmd[MAT_CMD_0] = + cmdvec( R200_VS_MATRIX_4_TEX1, 1, 16); + rmesa->hw.mat[R200_MTX_TEX2].cmd[MAT_CMD_0] = + cmdvec( R200_VS_MATRIX_5_TEX2, 1, 16); + rmesa->hw.mat[R200_MTX_TEX3].cmd[MAT_CMD_0] = + cmdvec( R200_VS_MATRIX_6_TEX3, 1, 16); + rmesa->hw.mat[R200_MTX_TEX4].cmd[MAT_CMD_0] = + cmdvec( R200_VS_MATRIX_7_TEX4, 1, 16); + rmesa->hw.mat[R200_MTX_TEX5].cmd[MAT_CMD_0] = + cmdvec( R200_VS_MATRIX_8_TEX5, 1, 16); + + for (i = 0 ; i < 8; i++) { + rmesa->hw.lit[i].cmd[LIT_CMD_0] = + cmdvec( R200_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 ); + rmesa->hw.lit[i].cmd[LIT_CMD_1] = + cmdscl( R200_SS_LIGHT_DCD_ADDR + i, 8, 7 ); + } + + for (i = 0 ; i < 6; i++) { + rmesa->hw.ucp[i].cmd[UCP_CMD_0] = + cmdvec( R200_VS_UCP_ADDR + i, 1, 4 ); + } + + rmesa->hw.ptp.cmd[PTP_CMD_0] = + cmdvec( R200_VS_PNT_SPRITE_VPORT_SCALE, 1, 4 ); + rmesa->hw.ptp.cmd[PTP_CMD_1] = + cmdvec( R200_VS_PNT_SPRITE_ATT_CONST, 1, 12 ); + + /* Initial Harware state: + */ + rmesa->hw.ctx.cmd[CTX_PP_MISC] = (R200_ALPHA_TEST_PASS + /* | R200_RIGHT_HAND_CUBE_OGL*/); + + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (R200_FOG_VERTEX | + R200_FOG_USE_SPEC_ALPHA); + + rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000; + + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP | + (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | + (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT)); + + if (rmesa->r200Screen->drmSupportsBlendColor) { + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000; + rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP | + (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | + (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT)); + rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP | + (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | + (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT)); + } + + rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] = + rmesa->r200Screen->depthOffset + rmesa->r200Screen->fbLocation; + + rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = + ((rmesa->r200Screen->depthPitch & + R200_DEPTHPITCH_MASK) | + R200_DEPTH_ENDIAN_NO_SWAP); + + if (rmesa->using_hyperz) + rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ; + + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt | + R200_Z_TEST_LESS | + R200_STENCIL_TEST_ALWAYS | + R200_STENCIL_FAIL_KEEP | + R200_STENCIL_ZPASS_KEEP | + R200_STENCIL_ZFAIL_KEEP | + R200_Z_WRITE_ENABLE); + + if (rmesa->using_hyperz) { + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE | + R200_Z_DECOMPRESSION_ENABLE; +/* if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/ + } + + rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE + | R200_TEX_BLEND_0_ENABLE); + + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = color_fmt; + switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) { + case DRI_CONF_DITHER_XERRORDIFFRESET: + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT; + break; + case DRI_CONF_DITHER_ORDERED: + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE; + break; + } + if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) == + DRI_CONF_ROUND_ROUND ) + rmesa->state.color.roundEnable = R200_ROUND_ENABLE; + else + rmesa->state.color.roundEnable = 0; + if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) == + DRI_CONF_COLOR_REDUCTION_DITHER ) + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE; + else + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; + +#if 000 + rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset + + rmesa->r200Screen->fbLocation) + & R200_COLOROFFSET_MASK); + + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch & + R200_COLORPITCH_MASK) | + R200_COLOR_ENDIAN_NO_SWAP); +#else + rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset + + rmesa->r200Screen->fbLocation) + & R200_COLOROFFSET_MASK); + + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch & + R200_COLORPITCH_MASK) | + R200_COLOR_ENDIAN_NO_SWAP); +#endif + /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */ + if (rmesa->sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; + } + + rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK * + driQueryOptionf (&rmesa->optionCache,"texture_blend_quality"); + rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0; + + rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW | + R200_BFACE_SOLID | + R200_FFACE_SOLID | + R200_FLAT_SHADE_VTX_LAST | + R200_DIFFUSE_SHADE_GOURAUD | + R200_ALPHA_SHADE_GOURAUD | + R200_SPECULAR_SHADE_GOURAUD | + R200_FOG_SHADE_GOURAUD | + R200_DISC_FOG_SHADE_GOURAUD | + R200_VTX_PIX_CENTER_OGL | + R200_ROUND_MODE_TRUNC | + R200_ROUND_PREC_8TH_PIX); + + rmesa->hw.set.cmd[SET_RE_CNTL] = (R200_PERSPECTIVE_ENABLE | + R200_SCISSOR_ENABLE); + + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff); + + rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] = + ((0 << R200_LINE_CURRENT_PTR_SHIFT) | + (1 << R200_LINE_CURRENT_COUNT_SHIFT)); + + rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4); + + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] = + ((0x00 << R200_STENCIL_REF_SHIFT) | + (0xff << R200_STENCIL_MASK_SHIFT) | + (0xff << R200_STENCIL_WRITEMASK_SHIFT)); + + rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = R200_ROP_COPY; + rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff; + + rmesa->hw.tam.cmd[TAM_DEBUG3] = 0; + + rmesa->hw.msc.cmd[MSC_RE_MISC] = + ((0 << R200_STIPPLE_X_OFFSET_SHIFT) | + (0 << R200_STIPPLE_Y_OFFSET_SHIFT) | + R200_STIPPLE_BIG_BIT_ORDER); + + + rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0; + rmesa->hw.cst.cmd[CST_RB3D_DEPTHXY_OFFSET] = 0; + rmesa->hw.cst.cmd[CST_RE_AUX_SCISSOR_CNTL] = 0x0; + rmesa->hw.cst.cmd[CST_RE_SCISSOR_TL_0] = 0; + rmesa->hw.cst.cmd[CST_RE_SCISSOR_BR_0] = 0; + rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] = +#ifdef MESA_BIG_ENDIAN + R200_VC_32BIT_SWAP; +#else + R200_VC_NO_SWAP; +#endif + + if (!(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) { + /* Bypass TCL */ + rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8); + } + + rmesa->hw.cst.cmd[CST_RE_POINTSIZE] = + (((GLuint)(ctx->Const.MaxPointSize * 16.0)) << R200_MAXPOINTSIZE_SHIFT) | 0x10; + rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_0] = + (0x0 << R200_VERTEX_POSITION_ADDR__SHIFT); + rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_1] = + (0x02 << R200_VTX_COLOR_0_ADDR__SHIFT) | + (0x03 << R200_VTX_COLOR_1_ADDR__SHIFT); + rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_2] = + (0x06 << R200_VTX_TEX_0_ADDR__SHIFT) | + (0x07 << R200_VTX_TEX_1_ADDR__SHIFT) | + (0x08 << R200_VTX_TEX_2_ADDR__SHIFT) | + (0x09 << R200_VTX_TEX_3_ADDR__SHIFT); + rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_3] = + (0x0A << R200_VTX_TEX_4_ADDR__SHIFT) | + (0x0B << R200_VTX_TEX_5_ADDR__SHIFT); + + + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = 0x00000000; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE] = 0x00000000; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE] = 0x00000000; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000; + + for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) { + rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] = R200_BORDER_MODE_OGL; + rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT] = + ((i << R200_TXFORMAT_ST_ROUTE_SHIFT) | /* <-- note i */ + (2 << R200_TXFORMAT_WIDTH_SHIFT) | + (2 << R200_TXFORMAT_HEIGHT_SHIFT)); + rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0; + rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] = + (/* R200_TEXCOORD_PROJ | */ + 0x100000); /* Small default bias */ + if (rmesa->r200Screen->drmSupportsFragShader) { + rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] = + rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0; + rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0; + } + else { + rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] = + rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + } + + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] = + rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] = + rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] = + rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] = + rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] = + rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + + rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] = + (R200_TXC_ARG_A_ZERO | + R200_TXC_ARG_B_ZERO | + R200_TXC_ARG_C_DIFFUSE_COLOR | + R200_TXC_OP_MADD); + + rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND2] = + ((i << R200_TXC_TFACTOR_SEL_SHIFT) | + R200_TXC_SCALE_1X | + R200_TXC_CLAMP_0_1 | + R200_TXC_OUTPUT_REG_R0); + + rmesa->hw.pix[i].cmd[PIX_PP_TXABLEND] = + (R200_TXA_ARG_A_ZERO | + R200_TXA_ARG_B_ZERO | + R200_TXA_ARG_C_DIFFUSE_ALPHA | + R200_TXA_OP_MADD); + + rmesa->hw.pix[i].cmd[PIX_PP_TXABLEND2] = + ((i << R200_TXA_TFACTOR_SEL_SHIFT) | + R200_TXA_SCALE_1X | + R200_TXA_CLAMP_0_1 | + R200_TXA_OUTPUT_REG_R0); + } + + rmesa->hw.tf.cmd[TF_TFACTOR_0] = 0; + rmesa->hw.tf.cmd[TF_TFACTOR_1] = 0; + rmesa->hw.tf.cmd[TF_TFACTOR_2] = 0; + rmesa->hw.tf.cmd[TF_TFACTOR_3] = 0; + rmesa->hw.tf.cmd[TF_TFACTOR_4] = 0; + rmesa->hw.tf.cmd[TF_TFACTOR_5] = 0; + + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] = + (R200_VAP_TCL_ENABLE | + (0x9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT)); + + rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = + (R200_VPORT_X_SCALE_ENA | + R200_VPORT_Y_SCALE_ENA | + R200_VPORT_Z_SCALE_ENA | + R200_VPORT_X_OFFSET_ENA | + R200_VPORT_Y_OFFSET_ENA | + R200_VPORT_Z_OFFSET_ENA | +/* FIXME: Turn on for tex rect only */ + R200_VTX_ST_DENORMALIZED | + R200_VTX_W0_FMT); + + + rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = 0; + rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = 0; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = + ((R200_VTX_Z0 | R200_VTX_W0 | + (R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT))); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] = 0; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = (R200_OUTPUT_XYZW); + rmesa->hw.vtx.cmd[VTX_STATE_CNTL] = R200_VSC_UPDATE_USER_COLOR_0_ENABLE; + + + /* Matrix selection */ + rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_0] = + (R200_MTX_MV << R200_MODELVIEW_0_SHIFT); + + rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_1] = + (R200_MTX_IMV << R200_IT_MODELVIEW_0_SHIFT); + + rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_2] = + (R200_MTX_MVP << R200_MODELPROJECT_0_SHIFT); + + rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_3] = + ((R200_MTX_TEX0 << R200_TEXMAT_0_SHIFT) | + (R200_MTX_TEX1 << R200_TEXMAT_1_SHIFT) | + (R200_MTX_TEX2 << R200_TEXMAT_2_SHIFT) | + (R200_MTX_TEX3 << R200_TEXMAT_3_SHIFT)); + + rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_4] = + ((R200_MTX_TEX4 << R200_TEXMAT_4_SHIFT) | + (R200_MTX_TEX5 << R200_TEXMAT_5_SHIFT)); + + + /* General TCL state */ + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = + (R200_SPECULAR_LIGHTS | + R200_DIFFUSE_SPECULAR_COMBINE | + R200_LOCAL_LIGHT_VEC_GL | + R200_LM0_SOURCE_MATERIAL_0 << R200_FRONT_SHININESS_SOURCE_SHIFT | + R200_LM0_SOURCE_MATERIAL_1 << R200_BACK_SHININESS_SOURCE_SHIFT); + + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = + ((R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_EMISSIVE_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_AMBIENT_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_DIFFUSE_SOURCE_SHIFT) | + (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_SPECULAR_SOURCE_SHIFT)); + + rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_0] = 0; /* filled in via callbacks */ + rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_1] = 0; + rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_2] = 0; + rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_3] = 0; + + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = + (R200_UCP_IN_CLIP_SPACE | + R200_CULL_FRONT_IS_CCW); + + /* Texgen/Texmat state */ + rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = 0x00ffffff; + rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_3] = + ((0 << R200_TEXGEN_0_INPUT_TEX_SHIFT) | + (1 << R200_TEXGEN_1_INPUT_TEX_SHIFT) | + (2 << R200_TEXGEN_2_INPUT_TEX_SHIFT) | + (3 << R200_TEXGEN_3_INPUT_TEX_SHIFT) | + (4 << R200_TEXGEN_4_INPUT_TEX_SHIFT) | + (5 << R200_TEXGEN_5_INPUT_TEX_SHIFT)); + rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = 0; + rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] = + ((0 << R200_TEXGEN_0_INPUT_SHIFT) | + (1 << R200_TEXGEN_1_INPUT_SHIFT) | + (2 << R200_TEXGEN_2_INPUT_SHIFT) | + (3 << R200_TEXGEN_3_INPUT_SHIFT) | + (4 << R200_TEXGEN_4_INPUT_SHIFT) | + (5 << R200_TEXGEN_5_INPUT_SHIFT)); + rmesa->hw.tcg.cmd[TCG_TEX_CYL_WRAP_CTL] = 0; + + + for (i = 0 ; i < 8; i++) { + struct gl_light *l = &ctx->Light.Light[i]; + GLenum p = GL_LIGHT0 + i; + *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX; + + ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient ); + ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse ); + ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular ); + ctx->Driver.Lightfv( ctx, p, GL_POSITION, NULL ); + ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, NULL ); + ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent ); + ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff ); + ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION, + &l->ConstantAttenuation ); + ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, + &l->LinearAttenuation ); + ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, + &l->QuadraticAttenuation ); + *(float *)&(rmesa->hw.lit[i].cmd[LIT_ATTEN_XXX]) = 0.0; + } + + ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, + ctx->Light.Model.Ambient ); + + TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx ); + + for (i = 0 ; i < 6; i++) { + ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL ); + } + + ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL ); + ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density ); + ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start ); + ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End ); + ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color ); + ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL ); + + rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE; + rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE; + rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE; + rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE; + + rmesa->hw.eye.cmd[EYE_X] = 0; + rmesa->hw.eye.cmd[EYE_Y] = 0; + rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE; + rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE; + + rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] = + R200_PS_SE_SEL_STATE | R200_PS_MULT_CONST; + + /* ptp_eye is presumably used to calculate the attenuation wrt a different + location? In any case, since point attenuation triggers _needeyecoords, + it is constant. Probably ignored as long as R200_PS_USE_MODEL_EYE_VEC + isn't set */ + rmesa->hw.ptp.cmd[PTP_EYE_X] = 0; + rmesa->hw.ptp.cmd[PTP_EYE_Y] = 0; + rmesa->hw.ptp.cmd[PTP_EYE_Z] = IEEE_ONE | 0x80000000; /* -1.0 */ + rmesa->hw.ptp.cmd[PTP_EYE_3] = 0; + /* no idea what the ptp_vport_scale values are good for, except the + PTSIZE one - hopefully doesn't matter */ + rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_0] = IEEE_ONE; + rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_1] = IEEE_ONE; + rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_PTSIZE] = IEEE_ONE; + rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_3] = IEEE_ONE; + rmesa->hw.ptp.cmd[PTP_ATT_CONST_QUAD] = 0; + rmesa->hw.ptp.cmd[PTP_ATT_CONST_LIN] = 0; + rmesa->hw.ptp.cmd[PTP_ATT_CONST_CON] = IEEE_ONE; + rmesa->hw.ptp.cmd[PTP_ATT_CONST_3] = 0; + rmesa->hw.ptp.cmd[PTP_CLAMP_MIN] = IEEE_ONE; + rmesa->hw.ptp.cmd[PTP_CLAMP_MAX] = 0x44ffe000; /* 2047 */ + rmesa->hw.ptp.cmd[PTP_CLAMP_2] = 0; + rmesa->hw.ptp.cmd[PTP_CLAMP_3] = 0; + + r200LightingSpaceChange( ctx ); + + rmesa->hw.all_dirty = GL_TRUE; +} diff --git a/r200/r200_swtcl.c b/r200/r200_swtcl.c new file mode 100644 index 0000000..25d229d --- /dev/null +++ b/r200/r200_swtcl.c @@ -0,0 +1,979 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_swtcl.c,v 1.5 2003/05/06 23:52:08 daenzer Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "mtypes.h" +#include "colormac.h" +#include "enums.h" +#include "image.h" +#include "imports.h" +#include "macros.h" + +#include "swrast/s_context.h" +#include "swrast/s_fog.h" +#include "swrast_setup/swrast_setup.h" +#include "math/m_translate.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" +#include "tnl/t_pipeline.h" + +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_state.h" +#include "r200_swtcl.h" +#include "r200_tcl.h" + + +static void flush_last_swtcl_prim( r200ContextPtr rmesa ); + + +/*********************************************************************** + * Initialization + ***********************************************************************/ + +#define EMIT_ATTR( ATTR, STYLE, F0 ) \ +do { \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->swtcl.vertex_attr_count++; \ + fmt_0 |= F0; \ +} while (0) + +#define EMIT_PAD( N ) \ +do { \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ + rmesa->swtcl.vertex_attr_count++; \ +} while (0) + +static void r200SetVertexFormat( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + DECLARE_RENDERINPUTS(index_bitset); + int fmt_0 = 0; + int fmt_1 = 0; + int offset = 0; + + RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); + + /* Important: + */ + if ( VB->NdcPtr != NULL ) { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; + } + else { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + } + + assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); + rmesa->swtcl.vertex_attr_count = 0; + + /* EMIT_ATTR's must be in order as they tell t_vertex.c how to + * build up a hardware vertex. + */ + if ( !rmesa->swtcl.needproj || + RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { /* need w coord for projected textures */ + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F, R200_VTX_XY | R200_VTX_Z0 | R200_VTX_W0 ); + offset = 4; + } + else { + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F, R200_VTX_XY | R200_VTX_Z0 ); + offset = 3; + } + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { + EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F, R200_VTX_POINT_SIZE ); + offset += 1; + } + + rmesa->swtcl.coloroffset = offset; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) ); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) ); +#endif + offset += 1; + + rmesa->swtcl.specoffset = 0; + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) || + RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { + +#if MESA_LITTLE_ENDIAN + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + rmesa->swtcl.specoffset = offset; + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) ); + } + else { + EMIT_PAD( 3 ); + } + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) ); + } + else { + EMIT_PAD( 1 ); + } +#else + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) ); + } + else { + EMIT_PAD( 1 ); + } + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + rmesa->swtcl.specoffset = offset; + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) ); + } + else { + EMIT_PAD( 3 ); + } +#endif + } + + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + GLuint sz = VB->TexCoordPtr[i]->size; + + fmt_1 |= sz << (3 * i); + EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1, 0 ); + } + } + } + + if ( (rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] & R200_FOG_USE_MASK) + != R200_FOG_USE_SPEC_ALPHA ) { + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_USE_MASK; + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA; + } + + if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) || + (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) || + (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) { + R200_NEWPRIM(rmesa); + R200_STATECHANGE( rmesa, vtx ); + rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0; + rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1; + + rmesa->swtcl.vertex_size = + _tnl_install_attrs( ctx, + rmesa->swtcl.vertex_attrs, + rmesa->swtcl.vertex_attr_count, + NULL, 0 ); + rmesa->swtcl.vertex_size /= 4; + RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); + } +} + + +static void r200RenderStart( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + + r200SetVertexFormat( ctx ); + + if (rmesa->dma.flush != 0 && + rmesa->dma.flush != flush_last_swtcl_prim) + rmesa->dma.flush( rmesa ); +} + + +/** + * Set vertex state for SW TCL. The primary purpose of this function is to + * determine in advance whether or not the hardware can / should do the + * projection divide or Mesa should do it. + */ +void r200ChooseVertexState( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint vte; + GLuint vap; + + /* We must ensure that we don't do _tnl_need_projected_coords while in a + * rasterization fallback. As this function will be called again when we + * leave a rasterization fallback, we can just skip it for now. + */ + if (rmesa->Fallback != 0) + return; + + vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]; + vap = rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL]; + + /* HW perspective divide is a win, but tiny vertex formats are a + * bigger one. + */ + if (!RENDERINPUTS_TEST_RANGE( tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX ) + || (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) { + rmesa->swtcl.needproj = GL_TRUE; + vte |= R200_VTX_XY_FMT | R200_VTX_Z_FMT; + vte &= ~R200_VTX_W0_FMT; + if (RENDERINPUTS_TEST_RANGE( tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + vap &= ~R200_VAP_FORCE_W_TO_ONE; + } + else { + vap |= R200_VAP_FORCE_W_TO_ONE; + } + } + else { + rmesa->swtcl.needproj = GL_FALSE; + vte &= ~(R200_VTX_XY_FMT | R200_VTX_Z_FMT); + vte |= R200_VTX_W0_FMT; + vap &= ~R200_VAP_FORCE_W_TO_ONE; + } + + _tnl_need_projected_coords( ctx, rmesa->swtcl.needproj ); + + if (vte != rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]) { + R200_STATECHANGE( rmesa, vte ); + rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = vte; + } + + if (vap != rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL]) { + R200_STATECHANGE( rmesa, vap ); + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] = vap; + } +} + + +/* Flush vertices in the current dma region. + */ +static void flush_last_swtcl_prim( r200ContextPtr rmesa ) +{ + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + rmesa->dma.flush = NULL; + + if (rmesa->dma.current.buf) { + struct r200_dma_region *current = &rmesa->dma.current; + GLuint current_offset = (rmesa->r200Screen->gart_buffer_offset + + current->buf->buf->idx * RADEON_BUFFER_SIZE + + current->start); + + assert (!(rmesa->swtcl.hw_primitive & R200_VF_PRIM_WALK_IND)); + + assert (current->start + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + current->ptr); + + if (rmesa->dma.current.start != rmesa->dma.current.ptr) { + r200EnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ + + rmesa->hw.max_state_size + VBUF_BUFSZ ); + r200EmitVertexAOS( rmesa, + rmesa->swtcl.vertex_size, + current_offset); + + r200EmitVbufPrim( rmesa, + rmesa->swtcl.hw_primitive, + rmesa->swtcl.numverts); + } + + rmesa->swtcl.numverts = 0; + current->start = current->ptr; + } +} + + +/* Alloc space in the current dma region. + */ +static INLINE void * +r200AllocDmaLowVerts( r200ContextPtr rmesa, int nverts, int vsize ) +{ + GLuint bytes = vsize * nverts; + + if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) + r200RefillCurrentDmaRegion( rmesa ); + + if (!rmesa->dma.flush) { + rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->dma.flush = flush_last_swtcl_prim; + } + + ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); + ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); + ASSERT( rmesa->dma.current.start + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + rmesa->dma.current.ptr ); + + + { + GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); + rmesa->dma.current.ptr += bytes; + rmesa->swtcl.numverts += nverts; + return head; + } + +} + + +/**************************************************************************/ + + +static INLINE GLuint reduced_hw_prim( GLcontext *ctx, GLuint prim) +{ + switch (prim) { + case GL_POINTS: + return (ctx->Point.PointSprite || + ((ctx->_TriangleCaps & (DD_POINT_SIZE | DD_POINT_ATTEN)) && + !(ctx->_TriangleCaps & (DD_POINT_SMOOTH)))) ? + R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS; + case GL_LINES: + /* fallthrough */ + case GL_LINE_LOOP: + /* fallthrough */ + case GL_LINE_STRIP: + return R200_VF_PRIM_LINES; + default: + /* all others reduced to triangles */ + return R200_VF_PRIM_TRIANGLES; + } +} + + +static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim ); +static void r200RenderPrimitive( GLcontext *ctx, GLenum prim ); +static void r200ResetLineStipple( GLcontext *ctx ); + +/*********************************************************************** + * Emit primitives as inline vertices * + ***********************************************************************/ + +#define HAVE_POINTS 1 +#define HAVE_LINES 1 +#define HAVE_LINE_STRIPS 1 +#define HAVE_TRIANGLES 1 +#define HAVE_TRI_STRIPS 1 +#define HAVE_TRI_STRIP_1 0 +#define HAVE_TRI_FANS 1 +#define HAVE_QUADS 0 +#define HAVE_QUAD_STRIPS 0 +#define HAVE_POLYGONS 1 +#define HAVE_ELTS 0 + +#undef LOCAL_VARS +#undef ALLOC_VERTS +#define CTX_ARG r200ContextPtr rmesa +#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size +#define ALLOC_VERTS( n, size ) r200AllocDmaLowVerts( rmesa, n, size * 4 ) +#define LOCAL_VARS \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + const char *r200verts = (char *)rmesa->swtcl.verts; +#define VERT(x) (r200Vertex *)(r200verts + ((x) * vertsize * sizeof(int))) +#define VERTEX r200Vertex +#define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS)) + +#undef TAG +#define TAG(x) r200_##x +#include "tnl_dd/t_dd_triemit.h" + + +/*********************************************************************** + * Macros for t_dd_tritmp.h to draw basic primitives * + ***********************************************************************/ + +#define QUAD( a, b, c, d ) r200_quad( rmesa, a, b, c, d ) +#define TRI( a, b, c ) r200_triangle( rmesa, a, b, c ) +#define LINE( a, b ) r200_line( rmesa, a, b ) +#define POINT( a ) r200_point( rmesa, a ) + +/*********************************************************************** + * Build render functions from dd templates * + ***********************************************************************/ + +#define R200_TWOSIDE_BIT 0x01 +#define R200_UNFILLED_BIT 0x02 +#define R200_MAX_TRIFUNC 0x04 + + +static struct { + tnl_points_func points; + tnl_line_func line; + tnl_triangle_func triangle; + tnl_quad_func quad; +} rast_tab[R200_MAX_TRIFUNC]; + + +#define DO_FALLBACK 0 +#define DO_UNFILLED (IND & R200_UNFILLED_BIT) +#define DO_TWOSIDE (IND & R200_TWOSIDE_BIT) +#define DO_FLAT 0 +#define DO_OFFSET 0 +#define DO_TRI 1 +#define DO_QUAD 1 +#define DO_LINE 1 +#define DO_POINTS 1 +#define DO_FULL_QUAD 1 + +#define HAVE_RGBA 1 +#define HAVE_SPEC 1 +#define HAVE_BACK_COLORS 0 +#define HAVE_HW_FLATSHADE 1 +#define TAB rast_tab + +#define DEPTH_SCALE 1.0 +#define UNFILLED_TRI unfilled_tri +#define UNFILLED_QUAD unfilled_quad +#define VERT_X(_v) _v->v.x +#define VERT_Y(_v) _v->v.y +#define VERT_Z(_v) _v->v.z +#define AREA_IS_CCW( a ) (a < 0) +#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int))) + +#define VERT_SET_RGBA( v, c ) \ +do { \ + r200_color_t *color = (r200_color_t *)&((v)->ui[coloroffset]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \ +} while (0) + +#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset] + +#define VERT_SET_SPEC( v, c ) \ +do { \ + if (specoffset) { \ + r200_color_t *spec = (r200_color_t *)&((v)->ui[specoffset]); \ + UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]); \ + } \ +} while (0) +#define VERT_COPY_SPEC( v0, v1 ) \ +do { \ + if (specoffset) { \ + r200_color_t *spec0 = (r200_color_t *)&((v0)->ui[specoffset]); \ + r200_color_t *spec1 = (r200_color_t *)&((v1)->ui[specoffset]); \ + spec0->red = spec1->red; \ + spec0->green = spec1->green; \ + spec0->blue = spec1->blue; \ + } \ +} while (0) + +/* These don't need LE32_TO_CPU() as they used to save and restore + * colors which are already in the correct format. + */ +#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset] +#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx] +#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset] +#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] + +#undef LOCAL_VARS +#undef TAG +#undef INIT + +#define LOCAL_VARS(n) \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + GLuint color[n], spec[n]; \ + GLuint coloroffset = rmesa->swtcl.coloroffset; \ + GLuint specoffset = rmesa->swtcl.specoffset; \ + (void) color; (void) spec; (void) coloroffset; (void) specoffset; + +/*********************************************************************** + * Helpers for rendering unfilled primitives * + ***********************************************************************/ + +#define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, x) ) +#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive +#undef TAG +#define TAG(x) x +#include "tnl_dd/t_dd_unfilled.h" +#undef IND + + +/*********************************************************************** + * Generate GL render functions * + ***********************************************************************/ + + +#define IND (0) +#define TAG(x) x +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R200_TWOSIDE_BIT) +#define TAG(x) x##_twoside +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R200_UNFILLED_BIT) +#define TAG(x) x##_unfilled +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R200_TWOSIDE_BIT|R200_UNFILLED_BIT) +#define TAG(x) x##_twoside_unfilled +#include "tnl_dd/t_dd_tritmp.h" + + +static void init_rast_tab( void ) +{ + init(); + init_twoside(); + init_unfilled(); + init_twoside_unfilled(); +} + +/**********************************************************************/ +/* Render unclipped begin/end objects */ +/**********************************************************************/ + +#define RENDER_POINTS( start, count ) \ + for ( ; start < count ; start++) \ + r200_point( rmesa, VERT(start) ) +#define RENDER_LINE( v0, v1 ) \ + r200_line( rmesa, VERT(v0), VERT(v1) ) +#define RENDER_TRI( v0, v1, v2 ) \ + r200_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) ) +#define RENDER_QUAD( v0, v1, v2, v3 ) \ + r200_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) ) +#define INIT(x) do { \ + r200RenderPrimitive( ctx, x ); \ +} while (0) +#undef LOCAL_VARS +#define LOCAL_VARS \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + const GLuint vertsize = rmesa->swtcl.vertex_size; \ + const char *r200verts = (char *)rmesa->swtcl.verts; \ + const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ + const GLboolean stipple = ctx->Line.StippleFlag; \ + (void) elt; (void) stipple; +#define RESET_STIPPLE if ( stipple ) r200ResetLineStipple( ctx ); +#define RESET_OCCLUSION +#define PRESERVE_VB_DEFS +#define ELT(x) (x) +#define TAG(x) r200_##x##_verts +#include "tnl/t_vb_rendertmp.h" +#undef ELT +#undef TAG +#define TAG(x) r200_##x##_elts +#define ELT(x) elt[x] +#include "tnl/t_vb_rendertmp.h" + + + +/**********************************************************************/ +/* Choose render functions */ +/**********************************************************************/ + +void r200ChooseRenderState( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint index = 0; + GLuint flags = ctx->_TriangleCaps; + + if (!rmesa->TclFallback || rmesa->Fallback) + return; + + if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT; + if (flags & DD_TRI_UNFILLED) index |= R200_UNFILLED_BIT; + + if (index != rmesa->swtcl.RenderIndex) { + tnl->Driver.Render.Points = rast_tab[index].points; + tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; + tnl->Driver.Render.Triangle = rast_tab[index].triangle; + tnl->Driver.Render.Quad = rast_tab[index].quad; + + if (index == 0) { + tnl->Driver.Render.PrimTabVerts = r200_render_tab_verts; + tnl->Driver.Render.PrimTabElts = r200_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = r200_fast_clipped_poly; + } else { + tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; + tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; + } + + rmesa->swtcl.RenderIndex = index; + } +} + + +/**********************************************************************/ +/* High level hooks for t_vb_render.c */ +/**********************************************************************/ + + +static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if (rmesa->swtcl.hw_primitive != hwprim) { + /* need to disable perspective-correct texturing for point sprites */ + if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) { + if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) { + R200_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PERSPECTIVE_ENABLE; + } + } + else if (!(rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE)) { + R200_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE; + } + R200_NEWPRIM( rmesa ); + rmesa->swtcl.hw_primitive = hwprim; + } +} + +static void r200RenderPrimitive( GLcontext *ctx, GLenum prim ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + rmesa->swtcl.render_primitive = prim; + if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) + r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) ); +} + +static void r200RenderFinish( GLcontext *ctx ) +{ +} + +static void r200ResetLineStipple( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + R200_STATECHANGE( rmesa, lin ); +} + + +/**********************************************************************/ +/* Transition to/from hardware rasterization. */ +/**********************************************************************/ + +static const char * const fallbackStrings[] = { + "Texture mode", + "glDrawBuffer(GL_FRONT_AND_BACK)", + "glEnable(GL_STENCIL) without hw stencil buffer", + "glRenderMode(selection or feedback)", + "R200_NO_RAST", + "Mixing GL_CLAMP_TO_BORDER and GL_CLAMP (or GL_MIRROR_CLAMP_ATI)" +}; + + +static const char *getFallbackString(GLuint bit) +{ + int i = 0; + while (bit > 1) { + i++; + bit >>= 1; + } + return fallbackStrings[i]; +} + + +void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint oldfallback = rmesa->Fallback; + + if (mode) { + rmesa->Fallback |= bit; + if (oldfallback == 0) { + R200_FIREVERTICES( rmesa ); + TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE ); + _swsetup_Wakeup( ctx ); + rmesa->swtcl.RenderIndex = ~0; + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n", + bit, getFallbackString(bit)); + } + } + } + else { + rmesa->Fallback &= ~bit; + if (oldfallback == bit) { + + _swrast_flush( ctx ); + tnl->Driver.Render.Start = r200RenderStart; + tnl->Driver.Render.PrimitiveNotify = r200RenderPrimitive; + tnl->Driver.Render.Finish = r200RenderFinish; + + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple; + TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE ); + if (rmesa->TclFallback) { + /* These are already done if rmesa->TclFallback goes to + * zero above. But not if it doesn't (R200_NO_TCL for + * example?) + */ + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); + r200ChooseVertexState( ctx ); + r200ChooseRenderState( ctx ); + } + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "R200 end rasterization fallback: 0x%x %s\n", + bit, getFallbackString(bit)); + } + } + } +} + + + + +/** + * Cope with depth operations by drawing individual pixels as points. + * + * \todo + * The way the vertex state is set in this routine is hokey. It seems to + * work, but it's very hackish. This whole routine is pretty hackish. If + * the bitmap is small enough, it seems like it would be faster to copy it + * to AGP memory and use it as a non-power-of-two texture (i.e., + * NV_texture_rectangle). + */ +void +r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + const GLfloat *rc = ctx->Current.RasterColor; + GLint row, col; + r200Vertex vert; + GLuint orig_vte; + GLuint h; + + + /* Turn off tcl. + */ + TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 1 ); + + /* Choose tiny vertex format + */ + { + const GLuint fmt_0 = R200_VTX_XY | R200_VTX_Z0 | R200_VTX_W0 + | (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT); + const GLuint fmt_1 = 0; + GLuint vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]; + GLuint vap = rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL]; + + vte &= ~(R200_VTX_XY_FMT | R200_VTX_Z_FMT); + vte |= R200_VTX_W0_FMT; + vap &= ~R200_VAP_FORCE_W_TO_ONE; + + rmesa->swtcl.vertex_size = 5; + + if ( (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) + || (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) { + R200_NEWPRIM(rmesa); + R200_STATECHANGE( rmesa, vtx ); + rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0; + rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1; + } + + if (vte != rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]) { + R200_STATECHANGE( rmesa, vte ); + rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = vte; + } + + if (vap != rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL]) { + R200_STATECHANGE( rmesa, vap ); + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] = vap; + } + } + + /* Ready for point primitives: + */ + r200RenderPrimitive( ctx, GL_POINTS ); + + /* Turn off the hw viewport transformation: + */ + R200_STATECHANGE( rmesa, vte ); + orig_vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]; + rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VPORT_X_SCALE_ENA | + R200_VPORT_Y_SCALE_ENA | + R200_VPORT_Z_SCALE_ENA | + R200_VPORT_X_OFFSET_ENA | + R200_VPORT_Y_OFFSET_ENA | + R200_VPORT_Z_OFFSET_ENA); + + /* Turn off other stuff: Stipple?, texture?, blending?, etc. + */ + + + /* Populate the vertex + * + * Incorporate FOG into RGBA + */ + if (ctx->Fog.Enabled) { + const GLfloat *fc = ctx->Fog.Color; + GLfloat color[4]; + GLfloat f; + + if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT) + f = _swrast_z_to_fogfactor(ctx, ctx->Current.Attrib[VERT_ATTRIB_FOG][0]); + else + f = _swrast_z_to_fogfactor(ctx, ctx->Current.RasterDistance); + + color[0] = f * rc[0] + (1.F - f) * fc[0]; + color[1] = f * rc[1] + (1.F - f) * fc[1]; + color[2] = f * rc[2] + (1.F - f) * fc[2]; + color[3] = rc[3]; + + UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.red, color[0]); + UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.green, color[1]); + UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.blue, color[2]); + UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.alpha, color[3]); + } + else { + UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.red, rc[0]); + UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.green, rc[1]); + UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.blue, rc[2]); + UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.alpha, rc[3]); + } + + + vert.tv.z = ctx->Current.RasterPos[2]; + + + /* Update window height + */ + LOCK_HARDWARE( rmesa ); + UNLOCK_HARDWARE( rmesa ); + h = rmesa->dri.drawable->h + rmesa->dri.drawable->y; + px += rmesa->dri.drawable->x; + + /* Clipping handled by existing mechansims in r200_ioctl.c? + */ + for (row=0; row<height; row++) { + const GLubyte *src = (const GLubyte *) + _mesa_image_address2d(unpack, bitmap, width, height, + GL_COLOR_INDEX, GL_BITMAP, row, 0 ); + + if (unpack->LsbFirst) { + /* Lsb first */ + GLubyte mask = 1U << (unpack->SkipPixels & 0x7); + for (col=0; col<width; col++) { + if (*src & mask) { + vert.tv.x = px+col; + vert.tv.y = h - (py+row) - 1; + r200_point( rmesa, &vert ); + } + src += (mask >> 7); + mask = ((mask << 1) & 0xff) | (mask >> 7); + } + + /* get ready for next row */ + if (mask != 1) + src++; + } + else { + /* Msb first */ + GLubyte mask = 128U >> (unpack->SkipPixels & 0x7); + for (col=0; col<width; col++) { + if (*src & mask) { + vert.tv.x = px+col; + vert.tv.y = h - (py+row) - 1; + r200_point( rmesa, &vert ); + } + src += mask & 1; + mask = ((mask << 7) & 0xff) | (mask >> 1); + } + /* get ready for next row */ + if (mask != 128) + src++; + } + } + + /* Fire outstanding vertices, restore state + */ + R200_STATECHANGE( rmesa, vte ); + rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = orig_vte; + + /* Unfallback + */ + TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 0 ); + + /* Need to restore vertexformat? + */ + if (rmesa->TclFallback) + r200ChooseVertexState( ctx ); +} + + + +/**********************************************************************/ +/* Initialization. */ +/**********************************************************************/ + +void r200InitSwtcl( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r200ContextPtr rmesa = R200_CONTEXT(ctx); + static int firsttime = 1; + + if (firsttime) { + init_rast_tab(); + firsttime = 0; + } + + tnl->Driver.Render.Start = r200RenderStart; + tnl->Driver.Render.Finish = r200RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r200RenderPrimitive; + tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple; + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + /* FIXME: what are these numbers? */ + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + 36 * sizeof(GLfloat) ); + + rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->swtcl.RenderIndex = ~0; + rmesa->swtcl.render_primitive = GL_TRIANGLES; + rmesa->swtcl.hw_primitive = 0; +} + + +void r200DestroySwtcl( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if (rmesa->swtcl.indexed_verts.buf) + r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ ); +} diff --git a/r200/r200_swtcl.h b/r200/r200_swtcl.h new file mode 100644 index 0000000..ccf8179 --- /dev/null +++ b/r200/r200_swtcl.h @@ -0,0 +1,75 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_swtcl.h,v 1.3 2003/05/06 23:52:08 daenzer Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __R200_SWTCL_H__ +#define __R200_SWTCL_H__ + +#include "mtypes.h" +#include "swrast/swrast.h" +#include "r200_context.h" + +extern void r200InitSwtcl( GLcontext *ctx ); +extern void r200DestroySwtcl( GLcontext *ctx ); + +extern void r200ChooseRenderState( GLcontext *ctx ); +extern void r200ChooseVertexState( GLcontext *ctx ); + +extern void r200CheckTexSizes( GLcontext *ctx ); + +extern void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count, + GLuint newinputs ); + +extern void r200PrintSetupFlags(char *msg, GLuint flags ); + + +extern void r200_emit_indexed_verts( GLcontext *ctx, + GLuint start, + GLuint count ); + +extern void r200_translate_vertex( GLcontext *ctx, + const r200Vertex *src, + SWvertex *dst ); + +extern void r200_print_vertex( GLcontext *ctx, const r200Vertex *v ); + +extern void r200_import_float_colors( GLcontext *ctx ); +extern void r200_import_float_spec_colors( GLcontext *ctx ); + +extern void r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ); + + +#endif diff --git a/r200/r200_tcl.c b/r200/r200_tcl.c new file mode 100644 index 0000000..1ff0cf9 --- /dev/null +++ b/r200/r200_tcl.c @@ -0,0 +1,655 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tcl.c,v 1.2 2002/12/16 16:18:55 dawes Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "mtypes.h" +#include "enums.h" +#include "colormac.h" +#include "light.h" + +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" + +#include "r200_context.h" +#include "r200_state.h" +#include "r200_ioctl.h" +#include "r200_tex.h" +#include "r200_tcl.h" +#include "r200_swtcl.h" +#include "r200_maos.h" + + + +#define HAVE_POINTS 1 +#define HAVE_LINES 1 +#define HAVE_LINE_LOOP 0 +#define HAVE_LINE_STRIPS 1 +#define HAVE_TRIANGLES 1 +#define HAVE_TRI_STRIPS 1 +#define HAVE_TRI_STRIP_1 0 +#define HAVE_TRI_FANS 1 +#define HAVE_QUADS 1 +#define HAVE_QUAD_STRIPS 1 +#define HAVE_POLYGONS 1 +#define HAVE_ELTS 1 + + +#define HW_POINTS ((ctx->Point.PointSprite || \ + ((ctx->_TriangleCaps & (DD_POINT_SIZE | DD_POINT_ATTEN)) && \ + !(ctx->_TriangleCaps & (DD_POINT_SMOOTH)))) ? \ + R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS) +#define HW_LINES R200_VF_PRIM_LINES +#define HW_LINE_LOOP 0 +#define HW_LINE_STRIP R200_VF_PRIM_LINE_STRIP +#define HW_TRIANGLES R200_VF_PRIM_TRIANGLES +#define HW_TRIANGLE_STRIP_0 R200_VF_PRIM_TRIANGLE_STRIP +#define HW_TRIANGLE_STRIP_1 0 +#define HW_TRIANGLE_FAN R200_VF_PRIM_TRIANGLE_FAN +#define HW_QUADS R200_VF_PRIM_QUADS +#define HW_QUAD_STRIP R200_VF_PRIM_QUAD_STRIP +#define HW_POLYGON R200_VF_PRIM_POLYGON + + +static GLboolean discrete_prim[0x10] = { + 0, /* 0 none */ + 1, /* 1 points */ + 1, /* 2 lines */ + 0, /* 3 line_strip */ + 1, /* 4 tri_list */ + 0, /* 5 tri_fan */ + 0, /* 6 tri_strip */ + 0, /* 7 tri_w_flags */ + 1, /* 8 rect list (unused) */ + 1, /* 9 3vert point */ + 1, /* a 3vert line */ + 0, /* b point sprite */ + 0, /* c line loop */ + 1, /* d quads */ + 0, /* e quad strip */ + 0, /* f polygon */ +}; + + +#define LOCAL_VARS r200ContextPtr rmesa = R200_CONTEXT(ctx) +#define ELT_TYPE GLushort + +#define ELT_INIT(prim, hw_prim) \ + r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND ) + +#define GET_MESA_ELTS() rmesa->tcl.Elts + + +/* Don't really know how many elts will fit in what's left of cmdbuf, + * as there is state to emit, etc: + */ + +/* Testing on isosurf shows a maximum around here. Don't know if it's + * the card or driver or kernel module that is causing the behaviour. + */ +#define GET_MAX_HW_ELTS() 300 + +#define RESET_STIPPLE() do { \ + R200_STATECHANGE( rmesa, lin ); \ + r200EmitState( rmesa ); \ +} while (0) + +#define AUTO_STIPPLE( mode ) do { \ + R200_STATECHANGE( rmesa, lin ); \ + if (mode) \ + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |= \ + R200_LINE_PATTERN_AUTO_RESET; \ + else \ + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \ + ~R200_LINE_PATTERN_AUTO_RESET; \ + r200EmitState( rmesa ); \ +} while (0) + + +#define ALLOC_ELTS(nr) r200AllocElts( rmesa, nr ) + +static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) +{ + if (rmesa->dma.flush == r200FlushElts && + rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) { + + GLushort *dest = (GLushort *)(rmesa->store.cmd_buf + + rmesa->store.cmd_used); + + rmesa->store.cmd_used += nr*2; + + return dest; + } + else { + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + + rmesa->hw.max_state_size + ELTS_BUFSZ(nr) ); + + r200EmitAOS( rmesa, + rmesa->tcl.aos_components, + rmesa->tcl.nr_aos_components, 0 ); + + return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr ); + } +} + + +#define CLOSE_ELTS() \ +do { \ + if (0) R200_NEWPRIM( rmesa ); \ +} \ +while (0) + + +/* TODO: Try to extend existing primitive if both are identical, + * discrete and there are no intervening state changes. (Somewhat + * duplicates changes to DrawArrays code) + */ +static void r200EmitPrim( GLcontext *ctx, + GLenum prim, + GLuint hwprim, + GLuint start, + GLuint count) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + r200TclPrimitive( ctx, prim, hwprim ); + + r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + + rmesa->hw.max_state_size + VBUF_BUFSZ ); + + r200EmitAOS( rmesa, + rmesa->tcl.aos_components, + rmesa->tcl.nr_aos_components, + start ); + + /* Why couldn't this packet have taken an offset param? + */ + r200EmitVbufPrim( rmesa, + rmesa->tcl.hw_primitive, + count - start ); +} + +#define EMIT_PRIM(ctx, prim, hwprim, start, count) do { \ + r200EmitPrim( ctx, prim, hwprim, start, count ); \ + (void) rmesa; } while (0) + +/* Try & join small primitives + */ +#if 0 +#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0 +#else +#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) \ + ((NR) < 20 || \ + ((NR) < 40 && \ + rmesa->tcl.hw_primitive == (PRIM| \ + R200_VF_TCL_OUTPUT_VTX_ENABLE| \ + R200_VF_PRIM_WALK_IND))) +#endif + +#ifdef MESA_BIG_ENDIAN +/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */ +#define EMIT_ELT(dest, offset, x) do { \ + int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 ); \ + GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 ); \ + (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); \ + (void)rmesa; } while (0) +#else +#define EMIT_ELT(dest, offset, x) do { \ + (dest)[offset] = (GLushort) (x); \ + (void)rmesa; } while (0) +#endif + +#define EMIT_TWO_ELTS(dest, offset, x, y) *(GLuint *)((dest)+offset) = ((y)<<16)|(x); + + + +#define TAG(x) tcl_##x +#include "tnl_dd/t_dd_dmatmp2.h" + +/**********************************************************************/ +/* External entrypoints */ +/**********************************************************************/ + +void r200EmitPrimitive( GLcontext *ctx, + GLuint first, + GLuint last, + GLuint flags ) +{ + tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags ); +} + +void r200EmitEltPrimitive( GLcontext *ctx, + GLuint first, + GLuint last, + GLuint flags ) +{ + tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags ); +} + +void r200TclPrimitive( GLcontext *ctx, + GLenum prim, + int hw_prim ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE; + + if (newprim != rmesa->tcl.hw_primitive || + !discrete_prim[hw_prim&0xf]) { + /* need to disable perspective-correct texturing for point sprites */ + if ((prim & PRIM_MODE_MASK) == GL_POINTS && ctx->Point.PointSprite) { + if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) { + R200_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PERSPECTIVE_ENABLE; + } + } + else if (!(rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE)) { + R200_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE; + } + R200_NEWPRIM( rmesa ); + rmesa->tcl.hw_primitive = newprim; + } +} + + +/**********************************************************************/ +/* Fog blend factor computation for hw tcl */ +/* same calculation used as in t_vb_fog.c */ +/**********************************************************************/ + +#define FOG_EXP_TABLE_SIZE 256 +#define FOG_MAX (10.0) +#define EXP_FOG_MAX .0006595 +#define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE) +static GLfloat exp_table[FOG_EXP_TABLE_SIZE]; + +#if 1 +#define NEG_EXP( result, narg ) \ +do { \ + GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR)); \ + GLint k = (GLint) f; \ + if (k > FOG_EXP_TABLE_SIZE-2) \ + result = (GLfloat) EXP_FOG_MAX; \ + else \ + result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]); \ +} while (0) +#else +#define NEG_EXP( result, narg ) \ +do { \ + result = exp(-narg); \ +} while (0) +#endif + + +/** + * Initialize the exp_table[] lookup table for approximating exp(). + */ +void +r200InitStaticFogData( void ) +{ + GLfloat f = 0.0F; + GLint i = 0; + for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) { + exp_table[i] = (GLfloat) exp(-f); + } +} + + +/** + * Compute per-vertex fog blend factors from fog coordinates by + * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function. + * Fog coordinates are distances from the eye (typically between the + * near and far clip plane distances). + * Note the fog (eye Z) coords may be negative so we use ABS(z) below. + * Fog blend factors are in the range [0,1]. + */ +float +r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) +{ + GLfloat end = ctx->Fog.End; + GLfloat d, temp; + const GLfloat z = FABSF(fogcoord); + + switch (ctx->Fog.Mode) { + case GL_LINEAR: + if (ctx->Fog.Start == ctx->Fog.End) + d = 1.0F; + else + d = 1.0F / (ctx->Fog.End - ctx->Fog.Start); + temp = (end - z) * d; + return CLAMP(temp, 0.0F, 1.0F); + break; + case GL_EXP: + d = ctx->Fog.Density; + NEG_EXP( temp, d * z ); + return temp; + break; + case GL_EXP2: + d = ctx->Fog.Density*ctx->Fog.Density; + NEG_EXP( temp, d * z * z ); + return temp; + break; + default: + _mesa_problem(ctx, "Bad fog mode in make_fog_coord"); + return 0; + } +} + + +/**********************************************************************/ +/* Render pipeline stage */ +/**********************************************************************/ + + +/* TCL render. + */ +static GLboolean r200_run_tcl_render( GLcontext *ctx, + struct tnl_pipeline_stage *stage ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint i; + GLubyte *vimap_rev; +/* use hw fixed order for simplicity, pos 0, weight 1, normal 2, fog 3, + color0 - color3 4-7, texcoord0 - texcoord5 8-13, pos 1 14. Must not use + more than 12 of those at the same time. */ + GLubyte map_rev_fixed[15] = {255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255}; + + + /* TODO: separate this from the swtnl pipeline + */ + if (rmesa->TclFallback) + return GL_TRUE; /* fallback to software t&l */ + + if (R200_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (VB->Count == 0) + return GL_FALSE; + + /* Validate state: + */ + if (rmesa->NewGLState) + r200ValidateState( ctx ); + + if (!ctx->VertexProgram._Enabled) { + /* NOTE: inputs != tnl->render_inputs - these are the untransformed + * inputs. + */ + map_rev_fixed[0] = VERT_ATTRIB_POS; + /* technically there is no reason we always need VA_COLOR0. In theory + could disable it depending on lighting, color materials, texturing... */ + map_rev_fixed[4] = VERT_ATTRIB_COLOR0; + + if (ctx->Light.Enabled) { + map_rev_fixed[2] = VERT_ATTRIB_NORMAL; + } + + /* this also enables VA_COLOR1 when using separate specular + lighting model, which is unnecessary. + FIXME: OTOH, we're missing the case where a ATI_fragment_shader accesses + the secondary color (if lighting is disabled). The chip seems + misconfigured for that though elsewhere (tcl output, might lock up) */ + if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { + map_rev_fixed[5] = VERT_ATTRIB_COLOR1; + } + + if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) { + map_rev_fixed[3] = VERT_ATTRIB_FOG; + } + + for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + if (rmesa->TexGenNeedNormals[i]) { + map_rev_fixed[2] = VERT_ATTRIB_NORMAL; + } + map_rev_fixed[8 + i] = VERT_ATTRIB_TEX0 + i; + } + } + vimap_rev = &map_rev_fixed[0]; + } + else { + /* vtx_tcl_output_vtxfmt_0/1 need to match configuration of "fragment + part", since using some vertex interpolator later which is not in + out_vtxfmt0/1 will lock up. It seems to be ok to write in vertex + prog to a not enabled output however, so just don't mess with it. + We only need to change compsel. */ + GLuint out_compsel = 0; + GLuint vp_out = rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten; + + vimap_rev = &rmesa->curr_vp_hw->inputmap_rev[0]; + assert(vp_out & (1 << VERT_RESULT_HPOS)); + out_compsel = R200_OUTPUT_XYZW; + if (vp_out & (1 << VERT_RESULT_COL0)) { + out_compsel |= R200_OUTPUT_COLOR_0; + } + if (vp_out & (1 << VERT_RESULT_COL1)) { + out_compsel |= R200_OUTPUT_COLOR_1; + } + if (vp_out & (1 << VERT_RESULT_FOGC)) { + out_compsel |= R200_OUTPUT_DISCRETE_FOG; + } + if (vp_out & (1 << VERT_RESULT_PSIZ)) { + out_compsel |= R200_OUTPUT_PT_SIZE; + } + for (i = VERT_RESULT_TEX0; i < VERT_RESULT_TEX6; i++) { + if (vp_out & (1 << i)) { + out_compsel |= R200_OUTPUT_TEX_0 << (i - VERT_RESULT_TEX0); + } + } + if (rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] != out_compsel) { + R200_STATECHANGE( rmesa, vtx ); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = out_compsel; + } + } + + /* Do the actual work: + */ + r200ReleaseArrays( ctx, ~0 /* stage->changed_inputs */ ); + r200EmitArrays( ctx, vimap_rev ); + + rmesa->tcl.Elts = VB->Elts; + + for (i = 0 ; i < VB->PrimitiveCount ; i++) + { + GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); + GLuint start = VB->Primitive[i].start; + GLuint length = VB->Primitive[i].count; + + if (!length) + continue; + + if (rmesa->tcl.Elts) + r200EmitEltPrimitive( ctx, start, start+length, prim ); + else + r200EmitPrimitive( ctx, start, start+length, prim ); + } + + return GL_FALSE; /* finished the pipe */ +} + + + +/* Initial state for tcl stage. + */ +const struct tnl_pipeline_stage _r200_tcl_stage = +{ + "r200 render", + NULL, /* private */ + NULL, + NULL, + NULL, + r200_run_tcl_render /* run */ +}; + + + +/**********************************************************************/ +/* Validate state at pipeline start */ +/**********************************************************************/ + + +/*----------------------------------------------------------------------- + * Manage TCL fallbacks + */ + + +static void transition_to_swtnl( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + + R200_NEWPRIM( rmesa ); + + r200ChooseVertexState( ctx ); + r200ChooseRenderState( ctx ); + + _mesa_validate_all_lighting_tables( ctx ); + + tnl->Driver.NotifyMaterialChange = + _mesa_validate_all_lighting_tables; + + r200ReleaseArrays( ctx, ~0 ); + + /* Still using the D3D based hardware-rasterizer from the radeon; + * need to put the card into D3D mode to make it work: + */ + R200_STATECHANGE( rmesa, vap ); + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~(R200_VAP_TCL_ENABLE|R200_VAP_PROG_VTX_SHADER_ENABLE); +} + +static void transition_to_hwtnl( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + + _tnl_need_projected_coords( ctx, GL_FALSE ); + + r200UpdateMaterial( ctx ); + + tnl->Driver.NotifyMaterialChange = r200UpdateMaterial; + + if ( rmesa->dma.flush ) + rmesa->dma.flush( rmesa ); + + rmesa->dma.flush = NULL; + + if (rmesa->swtcl.indexed_verts.buf) + r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, + __FUNCTION__ ); + + R200_STATECHANGE( rmesa, vap ); + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE; + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE; + + if (ctx->VertexProgram._Enabled) { + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE; + } + + if ( ((rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] & R200_FOG_USE_MASK) + == R200_FOG_USE_SPEC_ALPHA) && + (ctx->Fog.FogCoordinateSource == GL_FOG_COORD )) { + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_USE_MASK; + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_VTX_FOG; + } + + R200_STATECHANGE( rmesa, vte ); + rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VTX_XY_FMT|R200_VTX_Z_FMT); + rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] |= R200_VTX_W0_FMT; + + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "R200 end tcl fallback\n"); +} + + +static char *fallbackStrings[] = { + "Rasterization fallback", + "Unfilled triangles", + "Twosided lighting, differing materials", + "Materials in VB (maybe between begin/end)", + "Texgen unit 0", + "Texgen unit 1", + "Texgen unit 2", + "Texgen unit 3", + "Texgen unit 4", + "Texgen unit 5", + "User disable", + "Bitmap as points", + "Vertex program" +}; + + +static char *getFallbackString(GLuint bit) +{ + int i = 0; + while (bit > 1) { + i++; + bit >>= 1; + } + return fallbackStrings[i]; +} + + + +void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint oldfallback = rmesa->TclFallback; + + if (mode) { + rmesa->TclFallback |= bit; + if (oldfallback == 0) { + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "R200 begin tcl fallback %s\n", + getFallbackString( bit )); + transition_to_swtnl( ctx ); + } + } + else { + rmesa->TclFallback &= ~bit; + if (oldfallback == bit) { + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "R200 end tcl fallback %s\n", + getFallbackString( bit )); + transition_to_hwtnl( ctx ); + } + } +} diff --git a/r200/r200_tcl.h b/r200/r200_tcl.h new file mode 100644 index 0000000..ac5bc11 --- /dev/null +++ b/r200/r200_tcl.h @@ -0,0 +1,69 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tcl.h,v 1.2 2002/12/16 16:18:55 dawes Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __R200_TCL_H__ +#define __R200_TCL_H__ + +#include "r200_context.h" + +extern void r200TclPrimitive( GLcontext *ctx, GLenum prim, int hw_prim ); +extern void r200EmitEltPrimitive( GLcontext *ctx, GLuint first, GLuint last, + GLuint flags ); +extern void r200EmitPrimitive( GLcontext *ctx, GLuint first, GLuint last, + GLuint flags ); + +extern void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ); + +extern void r200InitStaticFogData( void ); + +extern float r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ); + +#define R200_TCL_FALLBACK_RASTER 0x1 /* rasterization */ +#define R200_TCL_FALLBACK_UNFILLED 0x2 /* unfilled tris */ +#define R200_TCL_FALLBACK_LIGHT_TWOSIDE 0x4 /* twoside tris */ +#define R200_TCL_FALLBACK_MATERIAL 0x8 /* material in vb */ +#define R200_TCL_FALLBACK_TEXGEN_0 0x10 /* texgen, unit 0 */ +#define R200_TCL_FALLBACK_TEXGEN_1 0x20 /* texgen, unit 1 */ +#define R200_TCL_FALLBACK_TEXGEN_2 0x40 /* texgen, unit 2 */ +#define R200_TCL_FALLBACK_TEXGEN_3 0x80 /* texgen, unit 3 */ +#define R200_TCL_FALLBACK_TEXGEN_4 0x100 /* texgen, unit 4 */ +#define R200_TCL_FALLBACK_TEXGEN_5 0x200 /* texgen, unit 5 */ +#define R200_TCL_FALLBACK_TCL_DISABLE 0x400 /* user disable */ +#define R200_TCL_FALLBACK_BITMAP 0x800 /* draw bitmap with points */ +#define R200_TCL_FALLBACK_VERTEX_PROGRAM 0x1000/* vertex program active */ + +#define TCL_FALLBACK( ctx, bit, mode ) r200TclFallback( ctx, bit, mode ) + +#endif diff --git a/r200/r200_tex.c b/r200/r200_tex.c new file mode 100644 index 0000000..6c6450c --- /dev/null +++ b/r200/r200_tex.c @@ -0,0 +1,1216 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tex.c,v 1.2 2002/11/05 17:46:08 tsi Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "colormac.h" +#include "context.h" +#include "enums.h" +#include "image.h" +#include "simple_list.h" +#include "texformat.h" +#include "texstore.h" +#include "texmem.h" +#include "teximage.h" +#include "texobj.h" + +#include "r200_context.h" +#include "r200_state.h" +#include "r200_ioctl.h" +#include "r200_swtcl.h" +#include "r200_tex.h" + +#include "xmlpool.h" + + + +/** + * Set the texture wrap modes. + * + * \param t Texture object whose wrap modes are to be set + * \param swrap Wrap mode for the \a s texture coordinate + * \param twrap Wrap mode for the \a t texture coordinate + */ + +static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap ) +{ + GLboolean is_clamp = GL_FALSE; + GLboolean is_clamp_to_border = GL_FALSE; + + t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D); + + switch ( swrap ) { + case GL_REPEAT: + t->pp_txfilter |= R200_CLAMP_S_WRAP; + break; + case GL_CLAMP: + t->pp_txfilter |= R200_CLAMP_S_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_CLAMP_TO_EDGE: + t->pp_txfilter |= R200_CLAMP_S_CLAMP_LAST; + break; + case GL_CLAMP_TO_BORDER: + t->pp_txfilter |= R200_CLAMP_S_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + case GL_MIRRORED_REPEAT: + t->pp_txfilter |= R200_CLAMP_S_MIRROR; + break; + case GL_MIRROR_CLAMP_EXT: + t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_LAST; + break; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + default: + _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__); + } + + switch ( twrap ) { + case GL_REPEAT: + t->pp_txfilter |= R200_CLAMP_T_WRAP; + break; + case GL_CLAMP: + t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_CLAMP_TO_EDGE: + t->pp_txfilter |= R200_CLAMP_T_CLAMP_LAST; + break; + case GL_CLAMP_TO_BORDER: + t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + case GL_MIRRORED_REPEAT: + t->pp_txfilter |= R200_CLAMP_T_MIRROR; + break; + case GL_MIRROR_CLAMP_EXT: + t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_LAST; + break; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + default: + _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__); + } + + t->pp_txformat_x &= ~R200_CLAMP_Q_MASK; + + switch ( rwrap ) { + case GL_REPEAT: + t->pp_txformat_x |= R200_CLAMP_Q_WRAP; + break; + case GL_CLAMP: + t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_CLAMP_TO_EDGE: + t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_LAST; + break; + case GL_CLAMP_TO_BORDER: + t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + case GL_MIRRORED_REPEAT: + t->pp_txformat_x |= R200_CLAMP_Q_MIRROR; + break; + case GL_MIRROR_CLAMP_EXT: + t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_LAST; + break; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + default: + _mesa_problem(NULL, "bad R wrap mode in %s", __FUNCTION__); + } + + if ( is_clamp_to_border ) { + t->pp_txfilter |= R200_BORDER_MODE_D3D; + } + + t->border_fallback = (is_clamp && is_clamp_to_border); +} + +static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max ) +{ + t->pp_txfilter &= ~R200_MAX_ANISO_MASK; + + if ( max == 1.0 ) { + t->pp_txfilter |= R200_MAX_ANISO_1_TO_1; + } else if ( max <= 2.0 ) { + t->pp_txfilter |= R200_MAX_ANISO_2_TO_1; + } else if ( max <= 4.0 ) { + t->pp_txfilter |= R200_MAX_ANISO_4_TO_1; + } else if ( max <= 8.0 ) { + t->pp_txfilter |= R200_MAX_ANISO_8_TO_1; + } else { + t->pp_txfilter |= R200_MAX_ANISO_16_TO_1; + } +} + +/** + * Set the texture magnification and minification modes. + * + * \param t Texture whose filter modes are to be set + * \param minf Texture minification mode + * \param magf Texture magnification mode + */ + +static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf ) +{ + GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK); + + t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK); + t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK; + + if ( anisotropy == R200_MAX_ANISO_1_TO_1 ) { + switch ( minf ) { + case GL_NEAREST: + t->pp_txfilter |= R200_MIN_FILTER_NEAREST; + break; + case GL_LINEAR: + t->pp_txfilter |= R200_MIN_FILTER_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: + t->pp_txfilter |= R200_MIN_FILTER_NEAREST_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + t->pp_txfilter |= R200_MIN_FILTER_LINEAR_MIP_NEAREST; + break; + case GL_LINEAR_MIPMAP_NEAREST: + t->pp_txfilter |= R200_MIN_FILTER_NEAREST_MIP_LINEAR; + break; + case GL_LINEAR_MIPMAP_LINEAR: + t->pp_txfilter |= R200_MIN_FILTER_LINEAR_MIP_LINEAR; + break; + } + } else { + switch ( minf ) { + case GL_NEAREST: + t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST; + break; + case GL_LINEAR: + t->pp_txfilter |= R200_MIN_FILTER_ANISO_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: + case GL_LINEAR_MIPMAP_NEAREST: + t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + case GL_LINEAR_MIPMAP_LINEAR: + t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR; + break; + } + } + + /* Note we don't have 3D mipmaps so only use the mag filter setting + * to set the 3D texture filter mode. + */ + switch ( magf ) { + case GL_NEAREST: + t->pp_txfilter |= R200_MAG_FILTER_NEAREST; + t->pp_txformat_x |= R200_VOLUME_FILTER_NEAREST; + break; + case GL_LINEAR: + t->pp_txfilter |= R200_MAG_FILTER_LINEAR; + t->pp_txformat_x |= R200_VOLUME_FILTER_LINEAR; + break; + } +} + +static void r200SetTexBorderColor( r200TexObjPtr t, GLubyte c[4] ) +{ + t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] ); +} + + +/** + * Allocate space for and load the mesa images into the texture memory block. + * This will happen before drawing with a new texture, or drawing with a + * texture after it was swapped out or teximaged again. + */ + +static r200TexObjPtr r200AllocTexObj( struct gl_texture_object *texObj ) +{ + r200TexObjPtr t; + + t = CALLOC_STRUCT( r200_tex_obj ); + texObj->DriverData = t; + if ( t != NULL ) { + if ( R200_DEBUG & DEBUG_TEXTURE ) { + fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, + (void *)t ); + } + + /* Initialize non-image-dependent parts of the state: + */ + t->base.tObj = texObj; + t->border_fallback = GL_FALSE; + + make_empty_list( & t->base ); + + r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR ); + r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); + r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); + r200SetTexBorderColor( t, texObj->_BorderChan ); + } + + return t; +} + +/* try to find a format which will only need a memcopy */ +static const struct gl_texture_format * +r200Choose8888TexFormat( GLenum srcFormat, GLenum srcType ) +{ + const GLuint ui = 1; + const GLubyte littleEndian = *((const GLubyte *) &ui); + + if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) { + return &_mesa_texformat_rgba8888; + } + else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) { + return &_mesa_texformat_rgba8888_rev; + } + else return _dri_texformat_argb8888; +} + +static const struct gl_texture_format * +r200ChooseTextureFormat( GLcontext *ctx, GLint internalFormat, + GLenum format, GLenum type ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + const GLboolean do32bpt = + ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 ); + const GLboolean force16bpt = + ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 ); + (void) format; + + switch ( internalFormat ) { + case 4: + case GL_RGBA: + case GL_COMPRESSED_RGBA: + switch ( type ) { + case GL_UNSIGNED_INT_10_10_10_2: + case GL_UNSIGNED_INT_2_10_10_10_REV: + return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555; + case GL_UNSIGNED_SHORT_4_4_4_4: + case GL_UNSIGNED_SHORT_4_4_4_4_REV: + return _dri_texformat_argb4444; + case GL_UNSIGNED_SHORT_5_5_5_1: + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + return _dri_texformat_argb1555; + default: + return do32bpt ? + r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444; + } + + case 3: + case GL_RGB: + case GL_COMPRESSED_RGB: + switch ( type ) { + case GL_UNSIGNED_SHORT_4_4_4_4: + case GL_UNSIGNED_SHORT_4_4_4_4_REV: + return _dri_texformat_argb4444; + case GL_UNSIGNED_SHORT_5_5_5_1: + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + return _dri_texformat_argb1555; + case GL_UNSIGNED_SHORT_5_6_5: + case GL_UNSIGNED_SHORT_5_6_5_REV: + return _dri_texformat_rgb565; + default: + return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; + } + + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + case GL_RGBA16: + return !force16bpt ? + r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444; + + case GL_RGBA4: + case GL_RGBA2: + return _dri_texformat_argb4444; + + case GL_RGB5_A1: + return _dri_texformat_argb1555; + + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; + + case GL_RGB5: + case GL_RGB4: + case GL_R3_G3_B2: + return _dri_texformat_rgb565; + + case GL_ALPHA: + case GL_ALPHA4: + case GL_ALPHA8: + case GL_ALPHA12: + case GL_ALPHA16: + case GL_COMPRESSED_ALPHA: + /* can't use a8 format since interpreting hw I8 as a8 would result + in wrong rgb values (same as alpha value instead of 0). */ + return _dri_texformat_al88; + + case 1: + case GL_LUMINANCE: + case GL_LUMINANCE4: + case GL_LUMINANCE8: + case GL_LUMINANCE12: + case GL_LUMINANCE16: + case GL_COMPRESSED_LUMINANCE: + return _dri_texformat_l8; + + case 2: + case GL_LUMINANCE_ALPHA: + case GL_LUMINANCE4_ALPHA4: + case GL_LUMINANCE6_ALPHA2: + case GL_LUMINANCE8_ALPHA8: + case GL_LUMINANCE12_ALPHA4: + case GL_LUMINANCE12_ALPHA12: + case GL_LUMINANCE16_ALPHA16: + case GL_COMPRESSED_LUMINANCE_ALPHA: + return _dri_texformat_al88; + + case GL_INTENSITY: + case GL_INTENSITY4: + case GL_INTENSITY8: + case GL_INTENSITY12: + case GL_INTENSITY16: + case GL_COMPRESSED_INTENSITY: + return _dri_texformat_i8; + + case GL_YCBCR_MESA: + if (type == GL_UNSIGNED_SHORT_8_8_APPLE || + type == GL_UNSIGNED_BYTE) + return &_mesa_texformat_ycbcr; + else + return &_mesa_texformat_ycbcr_rev; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return &_mesa_texformat_rgb_dxt1; + + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return &_mesa_texformat_rgba_dxt1; + + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return &_mesa_texformat_rgba_dxt3; + + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return &_mesa_texformat_rgba_dxt5; + + default: + _mesa_problem(ctx, + "unexpected internalFormat 0x%x in r200ChooseTextureFormat", + (int) internalFormat); + return NULL; + } + + return NULL; /* never get here */ +} + + +static GLboolean +r200ValidateClientStorage( GLcontext *ctx, GLenum target, + GLint internalFormat, + GLint srcWidth, GLint srcHeight, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) + +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if (0) + fprintf(stderr, "intformat %s format %s type %s\n", + _mesa_lookup_enum_by_nr( internalFormat ), + _mesa_lookup_enum_by_nr( format ), + _mesa_lookup_enum_by_nr( type )); + + if (!ctx->Unpack.ClientStorage) + return 0; + + if (ctx->_ImageTransferState || + texImage->IsCompressed || + texObj->GenerateMipmap) + return 0; + + + /* This list is incomplete, may be different on ppc??? + */ + switch ( internalFormat ) { + case GL_RGBA: + if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) { + texImage->TexFormat = _dri_texformat_argb8888; + } + else + return 0; + break; + + case GL_RGB: + if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) { + texImage->TexFormat = _dri_texformat_rgb565; + } + else + return 0; + break; + + case GL_YCBCR_MESA: + if ( format == GL_YCBCR_MESA && + type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) { + texImage->TexFormat = &_mesa_texformat_ycbcr_rev; + } + else if ( format == GL_YCBCR_MESA && + (type == GL_UNSIGNED_SHORT_8_8_APPLE || + type == GL_UNSIGNED_BYTE)) { + texImage->TexFormat = &_mesa_texformat_ycbcr; + } + else + return 0; + break; + + default: + return 0; + } + + /* Could deal with these packing issues, but currently don't: + */ + if (packing->SkipPixels || + packing->SkipRows || + packing->SwapBytes || + packing->LsbFirst) { + return 0; + } + + { + GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, + format, type); + + + if (0) + fprintf(stderr, "%s: srcRowStride %d/%x\n", + __FUNCTION__, srcRowStride, srcRowStride); + + /* Could check this later in upload, pitch restrictions could be + * relaxed, but would need to store the image pitch somewhere, + * as packing details might change before image is uploaded: + */ + if (!r200IsGartMemory( rmesa, pixels, srcHeight * srcRowStride ) || + (srcRowStride & 63)) + return 0; + + + /* Have validated that _mesa_transfer_teximage would be a straight + * memcpy at this point. NOTE: future calls to TexSubImage will + * overwrite the client data. This is explicitly mentioned in the + * extension spec. + */ + texImage->Data = (void *)pixels; + texImage->IsClientData = GL_TRUE; + texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; + + return 1; + } +} + + +static void r200TexImage1D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint border, + GLenum format, GLenum type, const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + + if ( t ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) r200AllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); + return; + } + } + + /* Note, this will call ChooseTextureFormat */ + _mesa_store_teximage1d(ctx, target, level, internalFormat, + width, border, format, type, pixels, + &ctx->Unpack, texObj, texImage); + + t->dirty_images[0] |= (1 << level); +} + + +static void r200TexSubImage1D( GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, + GLsizei width, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + + assert( t ); /* this _should_ be true */ + if ( t ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) r200AllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); + return; + } + } + + _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, + format, type, pixels, packing, texObj, + texImage); + + t->dirty_images[0] |= (1 << level); +} + + +static void r200TexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLenum format, GLenum type, const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + if ( t != NULL ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) r200AllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); + return; + } + } + + texImage->IsClientData = GL_FALSE; + + if (r200ValidateClientStorage( ctx, target, + internalFormat, + width, height, + format, type, pixels, + packing, texObj, texImage)) { + if (R200_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); + } + else { + if (R200_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); + + /* Normal path: copy (to cached memory) and eventually upload + * via another copy to GART memory and then a blit... Could + * eliminate one copy by going straight to (permanent) GART. + * + * Note, this will call r200ChooseTextureFormat. + */ + _mesa_store_teximage2d(ctx, target, level, internalFormat, + width, height, border, format, type, pixels, + &ctx->Unpack, texObj, texImage); + + t->dirty_images[face] |= (1 << level); + } +} + + +static void r200TexSubImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + assert( t ); /* this _should_ be true */ + if ( t ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) r200AllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); + return; + } + } + + _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, + height, format, type, pixels, packing, texObj, + texImage); + + t->dirty_images[face] |= (1 << level); +} + + +static void r200CompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + if ( t != NULL ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) r200AllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D"); + return; + } + } + + texImage->IsClientData = GL_FALSE; +/* can't call this, different parameters. Would never evaluate to true anyway currently + if (r200ValidateClientStorage( ctx, target, + internalFormat, + width, height, + format, type, pixels, + packing, texObj, texImage)) { + if (R200_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); + } + else */{ + if (R200_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); + + /* Normal path: copy (to cached memory) and eventually upload + * via another copy to GART memory and then a blit... Could + * eliminate one copy by going straight to (permanent) GART. + * + * Note, this will call r200ChooseTextureFormat. + */ + _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width, + height, border, imageSize, data, texObj, texImage); + + t->dirty_images[face] |= (1 << level); + } +} + + +static void r200CompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + GLuint face; + + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + assert( t ); /* this _should_ be true */ + if ( t ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) r200AllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D"); + return; + } + } + + _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, + height, format, imageSize, data, texObj, texImage); + + t->dirty_images[face] |= (1 << level); +} + + +#if ENABLE_HW_3D_TEXTURE +static void r200TexImage3D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + + if ( t ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) r200AllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D"); + return; + } + } + + texImage->IsClientData = GL_FALSE; + +#if 0 + if (r200ValidateClientStorage( ctx, target, + internalFormat, + width, height, + format, type, pixels, + packing, texObj, texImage)) { + if (R200_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); + } + else +#endif + { + if (R200_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); + + /* Normal path: copy (to cached memory) and eventually upload + * via another copy to GART memory and then a blit... Could + * eliminate one copy by going straight to (permanent) GART. + * + * Note, this will call r200ChooseTextureFormat. + */ + _mesa_store_teximage3d(ctx, target, level, internalFormat, + width, height, depth, border, + format, type, pixels, + &ctx->Unpack, texObj, texImage); + + t->dirty_images[0] |= (1 << level); + } +} +#endif + + +#if ENABLE_HW_3D_TEXTURE +static void +r200TexSubImage3D( GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + +/* fprintf(stderr, "%s\n", __FUNCTION__); */ + + assert( t ); /* this _should_ be true */ + if ( t ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) r200AllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D"); + return; + } + texObj->DriverData = t; + } + + _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, packing, texObj, texImage); + + t->dirty_images[0] |= (1 << level); +} +#endif + + + +static void r200TexEnv( GLcontext *ctx, GLenum target, + GLenum pname, const GLfloat *param ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint unit = ctx->Texture.CurrentUnit; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + + if ( R200_DEBUG & DEBUG_STATE ) { + fprintf( stderr, "%s( %s )\n", + __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) ); + } + + /* This is incorrect: Need to maintain this data for each of + * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch + * between them according to _ReallyEnabled. + */ + switch ( pname ) { + case GL_TEXTURE_ENV_COLOR: { + GLubyte c[4]; + GLuint envColor; + UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor ); + envColor = r200PackColor( 4, c[0], c[1], c[2], c[3] ); + if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) { + R200_STATECHANGE( rmesa, tf ); + rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor; + } + break; + } + + case GL_TEXTURE_LOD_BIAS_EXT: { + GLfloat bias, min; + GLuint b; + const int fixed_one = 0x8000000; + + /* The R200's LOD bias is a signed 2's complement value with a + * range of -16.0 <= bias < 16.0. + * + * NOTE: Add a small bias to the bias for conform mipsel.c test. + */ + bias = *param + .01; + min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ? + 0.0 : -16.0; + bias = CLAMP( bias, min, 16.0 ); + b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK; + + if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] & R200_LOD_BIAS_MASK) != b ) { + R200_STATECHANGE( rmesa, tex[unit] ); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] &= ~R200_LOD_BIAS_MASK; + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] |= b; + } + break; + } + case GL_COORD_REPLACE_ARB: + if (ctx->Point.PointSprite) { + R200_STATECHANGE( rmesa, spr ); + if ((GLenum)param[0]) { + rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_GEN_TEX_0 << unit; + } else { + rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &= ~(R200_PS_GEN_TEX_0 << unit); + } + } + break; + default: + return; + } +} + + +/** + * Changes variables and flags for a state update, which will happen at the + * next UpdateTextureState + */ + +static void r200TexParameter( GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat *params ) +{ + r200TexObjPtr t = (r200TexObjPtr) texObj->DriverData; + + if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { + fprintf( stderr, "%s( %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr( pname ) ); + } + + switch ( pname ) { + case GL_TEXTURE_MIN_FILTER: + case GL_TEXTURE_MAG_FILTER: + case GL_TEXTURE_MAX_ANISOTROPY_EXT: + r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); + r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); + break; + + case GL_TEXTURE_WRAP_S: + case GL_TEXTURE_WRAP_T: + case GL_TEXTURE_WRAP_R: + r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR ); + break; + + case GL_TEXTURE_BORDER_COLOR: + r200SetTexBorderColor( t, texObj->_BorderChan ); + break; + + case GL_TEXTURE_BASE_LEVEL: + case GL_TEXTURE_MAX_LEVEL: + case GL_TEXTURE_MIN_LOD: + case GL_TEXTURE_MAX_LOD: + /* This isn't the most efficient solution but there doesn't appear to + * be a nice alternative. Since there's no LOD clamping, + * we just have to rely on loading the right subset of mipmap levels + * to simulate a clamped LOD. + */ + driSwapOutTextureObject( (driTextureObject *) t ); + break; + + default: + return; + } + + /* Mark this texobj as dirty (one bit per tex unit) + */ + t->dirty_state = TEX_ALL; +} + + + +static void r200BindTexture( GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj ) +{ + if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { + fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj, + ctx->Texture.CurrentUnit ); + } + + if ( (target == GL_TEXTURE_1D) + || (target == GL_TEXTURE_2D) +#if ENABLE_HW_3D_TEXTURE + || (target == GL_TEXTURE_3D) +#endif + || (target == GL_TEXTURE_CUBE_MAP) + || (target == GL_TEXTURE_RECTANGLE_NV) ) { + assert( texObj->DriverData != NULL ); + } +} + + +static void r200DeleteTexture( GLcontext *ctx, + struct gl_texture_object *texObj ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + driTextureObject * t = (driTextureObject *) texObj->DriverData; + + if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { + fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, + _mesa_lookup_enum_by_nr( texObj->Target ) ); + } + + if ( t != NULL ) { + if ( rmesa ) { + R200_FIREVERTICES( rmesa ); + } + + driDestroyTextureObject( t ); + } + /* Free mipmap images and the texture object itself */ + _mesa_delete_texture_object(ctx, texObj); +} + +/* Need: + * - Same GEN_MODE for all active bits + * - Same EyePlane/ObjPlane for all active bits when using Eye/Obj + * - STRQ presumably all supported (matrix means incoming R values + * can end up in STQ, this has implications for vertex support, + * presumably ok if maos is used, though?) + * + * Basically impossible to do this on the fly - just collect some + * basic info & do the checks from ValidateState(). + */ +static void r200TexGen( GLcontext *ctx, + GLenum coord, + GLenum pname, + const GLfloat *params ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint unit = ctx->Texture.CurrentUnit; + rmesa->recheck_texgen[unit] = GL_TRUE; +} + + +/** + * Allocate a new texture object. + * Called via ctx->Driver.NewTextureObject. + * Note: this function will be called during context creation to + * allocate the default texture objects. + * Note: we could use containment here to 'derive' the driver-specific + * texture object from the core mesa gl_texture_object. Not done at this time. + * Fixup MaxAnisotropy according to user preference. + */ +static struct gl_texture_object * +r200NewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + struct gl_texture_object *obj; + obj = _mesa_new_texture_object(ctx, name, target); + if (!obj) + return NULL; + obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; + r200AllocTexObj( obj ); + return obj; +} + + +void r200InitTextureFuncs( struct dd_function_table *functions ) +{ + /* Note: we only plug in the functions we implement in the driver + * since _mesa_init_driver_functions() was already called. + */ + functions->ChooseTextureFormat = r200ChooseTextureFormat; + functions->TexImage1D = r200TexImage1D; + functions->TexImage2D = r200TexImage2D; +#if ENABLE_HW_3D_TEXTURE + functions->TexImage3D = r200TexImage3D; +#else + functions->TexImage3D = _mesa_store_teximage3d; +#endif + functions->TexSubImage1D = r200TexSubImage1D; + functions->TexSubImage2D = r200TexSubImage2D; +#if ENABLE_HW_3D_TEXTURE + functions->TexSubImage3D = r200TexSubImage3D; +#else + functions->TexSubImage3D = _mesa_store_texsubimage3d; +#endif + functions->NewTextureObject = r200NewTextureObject; + functions->BindTexture = r200BindTexture; + functions->DeleteTexture = r200DeleteTexture; + functions->IsTextureResident = driIsTextureResident; + + functions->TexEnv = r200TexEnv; + functions->TexParameter = r200TexParameter; + functions->TexGen = r200TexGen; + + functions->CompressedTexImage2D = r200CompressedTexImage2D; + functions->CompressedTexSubImage2D = r200CompressedTexSubImage2D; + + driInitTextureFormats(); + +#if 000 + /* moved or obsolete code */ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + driInitTextureObjects( ctx, & rmesa->swapped, + DRI_TEXMGR_DO_TEXTURE_1D + | DRI_TEXMGR_DO_TEXTURE_2D ); + + /* Hack: r200NewTextureObject is not yet installed when the + * default textures are created. Therefore set MaxAnisotropy of the + * default 2D texture now. */ + ctx->Shared->Default2D->MaxAnisotropy = driQueryOptionf (&rmesa->optionCache, + "def_max_anisotropy"); +#endif +} diff --git a/r200/r200_tex.h b/r200/r200_tex.h new file mode 100644 index 0000000..68e9a0e --- /dev/null +++ b/r200/r200_tex.h @@ -0,0 +1,55 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_tex.h,v 1.1 2002/10/30 12:51:53 alanh Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __R200_TEX_H__ +#define __R200_TEX_H__ + +extern void r200SetTexOffset(__DRIcontext *pDRICtx, GLint texname, + unsigned long long offset, GLint depth, + GLuint pitch); + +extern void r200UpdateTextureState( GLcontext *ctx ); + +extern int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face ); + +extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t ); + +extern void r200InitTextureFuncs( struct dd_function_table *functions ); + +extern void r200UpdateFragmentShader( GLcontext *ctx ); + +extern void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ); + +#endif /* __R200_TEX_H__ */ diff --git a/r200/r200_texmem.c b/r200/r200_texmem.c new file mode 100644 index 0000000..9daafcf --- /dev/null +++ b/r200/r200_texmem.c @@ -0,0 +1,531 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_texmem.c,v 1.5 2002/12/17 00:32:56 dawes Exp $ */ +/************************************************************************** + +Copyright (C) Tungsten Graphics 2002. All Rights Reserved. +The Weather Channel, Inc. funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 +license. This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation on the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR +SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + * + */ + +#include <errno.h> + +#include "glheader.h" +#include "imports.h" +#include "context.h" +#include "colormac.h" +#include "macros.h" +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_tex.h" +#include "radeon_reg.h" + +#include <unistd.h> /* for usleep() */ + + +/** + * Destroy any device-dependent state associated with the texture. This may + * include NULLing out hardware state that points to the texture. + */ +void +r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t ) +{ + if ( R200_DEBUG & DEBUG_TEXTURE ) { + fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, + (void *)t, (void *)t->base.tObj ); + } + + if ( rmesa != NULL ) { + unsigned i; + + + for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) { + if ( t == rmesa->state.texture.unit[i].texobj ) { + rmesa->state.texture.unit[i].texobj = NULL; + rmesa->hw.tex[i].dirty = GL_FALSE; + rmesa->hw.cube[i].dirty = GL_FALSE; + } + } + } +} + + +/* ------------------------------------------------------------ + * Texture image conversions + */ + + +static void r200UploadGARTClientSubImage( r200ContextPtr rmesa, + r200TexObjPtr t, + struct gl_texture_image *texImage, + GLint hwlevel, + GLint x, GLint y, + GLint width, GLint height ) +{ + const struct gl_texture_format *texFormat = texImage->TexFormat; + GLuint srcPitch, dstPitch; + int blit_format; + int srcOffset; + + /* + * XXX it appears that we always upload the full image, not a subimage. + * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever + * changed, the src pitch will have to change. + */ + switch ( texFormat->TexelBytes ) { + case 1: + blit_format = R200_CP_COLOR_FORMAT_CI8; + srcPitch = t->image[0][0].width * texFormat->TexelBytes; + dstPitch = t->image[0][0].width * texFormat->TexelBytes; + break; + case 2: + blit_format = R200_CP_COLOR_FORMAT_RGB565; + srcPitch = t->image[0][0].width * texFormat->TexelBytes; + dstPitch = t->image[0][0].width * texFormat->TexelBytes; + break; + case 4: + blit_format = R200_CP_COLOR_FORMAT_ARGB8888; + srcPitch = t->image[0][0].width * texFormat->TexelBytes; + dstPitch = t->image[0][0].width * texFormat->TexelBytes; + break; + default: + return; + } + + t->image[0][hwlevel].data = texImage->Data; + srcOffset = r200GartOffsetFromVirtual( rmesa, texImage->Data ); + + assert( srcOffset != ~0 ); + + /* Don't currently need to cope with small pitches? + */ + width = texImage->Width; + height = texImage->Height; + + r200EmitWait( rmesa, RADEON_WAIT_3D ); + + r200EmitBlit( rmesa, blit_format, + srcPitch, + srcOffset, + dstPitch, + t->bufAddr, + x, + y, + t->image[0][hwlevel].x + x, + t->image[0][hwlevel].y + y, + width, + height ); + + r200EmitWait( rmesa, RADEON_WAIT_2D ); +} + +static void r200UploadRectSubImage( r200ContextPtr rmesa, + r200TexObjPtr t, + struct gl_texture_image *texImage, + GLint x, GLint y, + GLint width, GLint height ) +{ + const struct gl_texture_format *texFormat = texImage->TexFormat; + int blit_format, dstPitch, done; + + switch ( texFormat->TexelBytes ) { + case 1: + blit_format = R200_CP_COLOR_FORMAT_CI8; + break; + case 2: + blit_format = R200_CP_COLOR_FORMAT_RGB565; + break; + case 4: + blit_format = R200_CP_COLOR_FORMAT_ARGB8888; + break; + default: + return; + } + + t->image[0][0].data = texImage->Data; + + /* Currently don't need to cope with small pitches. + */ + width = texImage->Width; + height = texImage->Height; + dstPitch = t->pp_txpitch + 32; + + if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) { + /* In this case, could also use GART texturing. This is + * currently disabled, but has been tested & works. + */ + if ( !t->image_override ) + t->pp_txoffset = r200GartOffsetFromVirtual( rmesa, texImage->Data ); + t->pp_txpitch = texImage->RowStride * texFormat->TexelBytes - 32; + + if (R200_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, + "Using GART texturing for rectangular client texture\n"); + + /* Release FB memory allocated for this image: + */ + /* FIXME This may not be correct as driSwapOutTextureObject sets + * FIXME dirty_images. It may be fine, though. + */ + if ( t->base.memBlock ) { + driSwapOutTextureObject( (driTextureObject *) t ); + } + } + else if (texImage->IsClientData) { + /* Data already in GART memory, with usable pitch. + */ + GLuint srcPitch; + srcPitch = texImage->RowStride * texFormat->TexelBytes; + r200EmitBlit( rmesa, + blit_format, + srcPitch, + r200GartOffsetFromVirtual( rmesa, texImage->Data ), + dstPitch, t->bufAddr, + 0, 0, + 0, 0, + width, height ); + } + else { + /* Data not in GART memory, or bad pitch. + */ + for (done = 0; done < height ; ) { + struct r200_dma_region region; + int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch ); + int src_pitch; + char *tex; + + src_pitch = texImage->RowStride * texFormat->TexelBytes; + + tex = (char *)texImage->Data + done * src_pitch; + + memset(®ion, 0, sizeof(region)); + r200AllocDmaRegion( rmesa, ®ion, lines * dstPitch, 1024 ); + + /* Copy texdata to dma: + */ + if (0) + fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n", + __FUNCTION__, src_pitch, dstPitch); + + if (src_pitch == dstPitch) { + memcpy( region.address + region.start, tex, lines * src_pitch ); + } + else { + char *buf = region.address + region.start; + int i; + for (i = 0 ; i < lines ; i++) { + memcpy( buf, tex, src_pitch ); + buf += dstPitch; + tex += src_pitch; + } + } + + r200EmitWait( rmesa, RADEON_WAIT_3D ); + + /* Blit to framebuffer + */ + r200EmitBlit( rmesa, + blit_format, + dstPitch, GET_START( ®ion ), + dstPitch | (t->tile_bits >> 16), + t->bufAddr, + 0, 0, + 0, done, + width, lines ); + + r200EmitWait( rmesa, RADEON_WAIT_2D ); + + r200ReleaseDmaRegion( rmesa, ®ion, __FUNCTION__ ); + done += lines; + } + } +} + + +/** + * Upload the texture image associated with texture \a t at the specified + * level at the address relative to \a start. + */ +static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, + GLint hwlevel, + GLint x, GLint y, GLint width, GLint height, + GLuint face ) +{ + struct gl_texture_image *texImage = NULL; + GLuint offset; + GLint imageWidth, imageHeight; + GLint ret; + drm_radeon_texture_t tex; + drm_radeon_tex_image_t tmp; + const int level = hwlevel + t->base.firstLevel; + + if ( R200_DEBUG & DEBUG_TEXTURE ) { + fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", + __FUNCTION__, (void *)t, (void *)t->base.tObj, + level, width, height, face ); + } + + ASSERT(face < 6); + + /* Ensure we have a valid texture to upload */ + if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) { + _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); + return; + } + + texImage = t->base.tObj->Image[face][level]; + + if ( !texImage ) { + if ( R200_DEBUG & DEBUG_TEXTURE ) + fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level ); + return; + } + if ( !texImage->Data ) { + if ( R200_DEBUG & DEBUG_TEXTURE ) + fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ ); + return; + } + + + if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + assert(level == 0); + assert(hwlevel == 0); + if ( R200_DEBUG & DEBUG_TEXTURE ) + fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__); + r200UploadRectSubImage( rmesa, t, texImage, x, y, width, height ); + return; + } + else if (texImage->IsClientData) { + if ( R200_DEBUG & DEBUG_TEXTURE ) + fprintf( stderr, "%s: image data is in GART client storage\n", + __FUNCTION__); + r200UploadGARTClientSubImage( rmesa, t, texImage, hwlevel, + x, y, width, height ); + return; + } + else if ( R200_DEBUG & DEBUG_TEXTURE ) + fprintf( stderr, "%s: image data is in normal memory\n", + __FUNCTION__); + + + imageWidth = texImage->Width; + imageHeight = texImage->Height; + + offset = t->bufAddr + t->base.totalSize / 6 * face; + + if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { + GLint imageX = 0; + GLint imageY = 0; + GLint blitX = t->image[face][hwlevel].x; + GLint blitY = t->image[face][hwlevel].y; + GLint blitWidth = t->image[face][hwlevel].width; + GLint blitHeight = t->image[face][hwlevel].height; + fprintf( stderr, " upload image: %d,%d at %d,%d\n", + imageWidth, imageHeight, imageX, imageY ); + fprintf( stderr, " upload blit: %d,%d at %d,%d\n", + blitWidth, blitHeight, blitX, blitY ); + fprintf( stderr, " blit ofs: 0x%07x level: %d/%d\n", + (GLuint)offset, hwlevel, level ); + } + + t->image[face][hwlevel].data = texImage->Data; + + /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. + * NOTE: we're always use a 1KB-wide blit and I8 texture format. + * We used to use 1, 2 and 4-byte texels and used to use the texture + * width to dictate the blit width - but that won't work for compressed + * textures. (Brian) + * NOTE: can't do that with texture tiling. (sroland) + */ + tex.offset = offset; + tex.image = &tmp; + /* copy (x,y,width,height,data) */ + memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) ); + + if (texImage->TexFormat->TexelBytes) { + /* use multi-byte upload scheme */ + tex.height = imageHeight; + tex.width = imageWidth; + tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK; + if (tex.format == R200_TXFORMAT_ABGR8888) { + /* drm will refuse abgr8888 textures. */ + tex.format = R200_TXFORMAT_ARGB8888; + } + tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1); + tex.offset += tmp.x & ~1023; + tmp.x = tmp.x % 1024; + if (t->tile_bits & R200_TXO_MICRO_TILE) { + /* need something like "tiled coordinates" ? */ + tmp.y = tmp.x / (tex.pitch * 128) * 2; + tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes; + tex.pitch |= RADEON_DST_TILE_MICRO >> 22; + } + else { + tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); + } + if ((t->tile_bits & R200_TXO_MACRO_TILE) && + (texImage->Width * texImage->TexFormat->TexelBytes >= 256) && + ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) || + (texImage->Height >= 16))) { + /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, + OR if height is smaller than 8 automatically, but if micro tiling is active + the limit is height 16 instead ? */ + tex.pitch |= RADEON_DST_TILE_MACRO >> 22; + } + } + else { + /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is + needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ + /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed + so the kernel module reads the right amount of data. */ + tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */ + tex.pitch = (BLIT_WIDTH_BYTES / 64); + tex.height = (imageHeight + 3) / 4; + tex.width = (imageWidth + 3) / 4; + switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) { + case R200_TXFORMAT_DXT1: + tex.width *= 8; + break; + case R200_TXFORMAT_DXT23: + case R200_TXFORMAT_DXT45: + tex.width *= 16; + break; + default: + fprintf(stderr, "unknown compressed tex format in uploadSubImage\n"); + } + } + + LOCK_HARDWARE( rmesa ); + do { + ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE, + &tex, sizeof(drm_radeon_texture_t) ); + if (ret) { + if (R200_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n"); + usleep(1); + } + } while ( ret == -EAGAIN ); + + UNLOCK_HARDWARE( rmesa ); + + if ( ret ) { + fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret ); + fprintf( stderr, " offset=0x%08x\n", + offset ); + fprintf( stderr, " image width=%d height=%d\n", + imageWidth, imageHeight ); + fprintf( stderr, " blit width=%d height=%d data=%p\n", + t->image[face][hwlevel].width, t->image[face][hwlevel].height, + t->image[face][hwlevel].data ); + exit( 1 ); + } +} + + +/** + * Upload the texture images associated with texture \a t. This might + * require the allocation of texture memory. + * + * \param rmesa Context pointer + * \param t Texture to be uploaded + * \param face Cube map face to be uploaded. Zero for non-cube maps. + */ + +int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face ) +{ + const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; + + if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { + fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, + (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize, + t->base.firstLevel, t->base.lastLevel ); + } + + if ( !t || t->base.totalSize == 0 || t->image_override ) + return 0; + + if (R200_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); + r200Finish( rmesa->glCtx ); + } + + LOCK_HARDWARE( rmesa ); + + if ( t->base.memBlock == NULL ) { + int heap; + + heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps, + (driTextureObject *) t ); + if ( heap == -1 ) { + UNLOCK_HARDWARE( rmesa ); + return -1; + } + + /* Set the base offset of the texture image */ + t->bufAddr = rmesa->r200Screen->texOffset[heap] + + t->base.memBlock->ofs; + t->pp_txoffset = t->bufAddr; + + if (!(t->base.tObj->Image[0][0]->IsClientData)) { + /* hope it's safe to add that here... */ + t->pp_txoffset |= t->tile_bits; + } + + /* Mark this texobj as dirty on all units: + */ + t->dirty_state = TEX_ALL; + } + + /* Let the world know we've used this memory recently. + */ + driUpdateTextureLRU( (driTextureObject *) t ); + UNLOCK_HARDWARE( rmesa ); + + /* Upload any images that are new */ + if (t->base.dirty_images[face]) { + int i; + for ( i = 0 ; i < numLevels ; i++ ) { + if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) { + uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width, + t->image[face][i].height, face ); + } + } + t->base.dirty_images[face] = 0; + } + + + if (R200_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); + r200Finish( rmesa->glCtx ); + } + + return 0; +} diff --git a/r200/r200_texstate.c b/r200/r200_texstate.c new file mode 100644 index 0000000..d12c3bc --- /dev/null +++ b/r200/r200_texstate.c @@ -0,0 +1,1954 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_texstate.c,v 1.3 2003/02/15 22:18:47 dawes Exp $ */ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "context.h" +#include "macros.h" +#include "texformat.h" +#include "texobj.h" +#include "enums.h" + +#include "r200_context.h" +#include "r200_state.h" +#include "r200_ioctl.h" +#include "r200_swtcl.h" +#include "r200_tex.h" +#include "r200_tcl.h" + + +#define R200_TXFORMAT_A8 R200_TXFORMAT_I8 +#define R200_TXFORMAT_L8 R200_TXFORMAT_I8 +#define R200_TXFORMAT_AL88 R200_TXFORMAT_AI88 +#define R200_TXFORMAT_YCBCR R200_TXFORMAT_YVYU422 +#define R200_TXFORMAT_YCBCR_REV R200_TXFORMAT_VYUY422 +#define R200_TXFORMAT_RGB_DXT1 R200_TXFORMAT_DXT1 +#define R200_TXFORMAT_RGBA_DXT1 R200_TXFORMAT_DXT1 +#define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23 +#define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45 + +#define _COLOR(f) \ + [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, 0 } +#define _COLOR_REV(f) \ + [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f, 0 } +#define _ALPHA(f) \ + [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 } +#define _ALPHA_REV(f) \ + [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 } +#define _YUV(f) \ + [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, R200_YUV_TO_RGB } +#define _INVALID(f) \ + [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 } +#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \ + && (tx_table_be[f].format != 0xffffffff) ) + +struct tx_table { + GLuint format, filter; +}; + +static const struct tx_table tx_table_be[] = +{ + [ MESA_FORMAT_RGBA8888 ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + _ALPHA_REV(RGBA8888), + _ALPHA(ARGB8888), + _ALPHA_REV(ARGB8888), + _INVALID(RGB888), + _COLOR(RGB565), + _COLOR_REV(RGB565), + _ALPHA(ARGB4444), + _ALPHA_REV(ARGB4444), + _ALPHA(ARGB1555), + _ALPHA_REV(ARGB1555), + _ALPHA(AL88), + _ALPHA_REV(AL88), + _ALPHA(A8), + _COLOR(L8), + _ALPHA(I8), + _INVALID(CI8), + _YUV(YCBCR), + _YUV(YCBCR_REV), + _INVALID(RGB_FXT1), + _INVALID(RGBA_FXT1), + _COLOR(RGB_DXT1), + _ALPHA(RGBA_DXT1), + _ALPHA(RGBA_DXT3), + _ALPHA(RGBA_DXT5), +}; + +static const struct tx_table tx_table_le[] = +{ + _ALPHA(RGBA8888), + [ MESA_FORMAT_RGBA8888_REV ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + _ALPHA(ARGB8888), + _ALPHA_REV(ARGB8888), + [ MESA_FORMAT_RGB888 ] = { R200_TXFORMAT_ARGB8888, 0 }, + _COLOR(RGB565), + _COLOR_REV(RGB565), + _ALPHA(ARGB4444), + _ALPHA_REV(ARGB4444), + _ALPHA(ARGB1555), + _ALPHA_REV(ARGB1555), + _ALPHA(AL88), + _ALPHA_REV(AL88), + _ALPHA(A8), + _COLOR(L8), + _ALPHA(I8), + _INVALID(CI8), + _YUV(YCBCR), + _YUV(YCBCR_REV), + _INVALID(RGB_FXT1), + _INVALID(RGBA_FXT1), + _COLOR(RGB_DXT1), + _ALPHA(RGBA_DXT1), + _ALPHA(RGBA_DXT3), + _ALPHA(RGBA_DXT5), +}; + +#undef _COLOR +#undef _ALPHA +#undef _INVALID + +/** + * This function computes the number of bytes of storage needed for + * the given texture object (all mipmap levels, all cube faces). + * The \c image[face][level].x/y/width/height parameters for upload/blitting + * are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here + * too. + * + * \param rmesa Context pointer + * \param tObj GL texture object whose images are to be posted to + * hardware state. + */ +static void r200SetTexImages( r200ContextPtr rmesa, + struct gl_texture_object *tObj ) +{ + r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData; + const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; + GLint curOffset, blitWidth; + GLint i, texelBytes; + GLint numLevels; + GLint log2Width, log2Height, log2Depth; + + /* Set the hardware texture format + */ + if ( !t->image_override ) { + if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) { + const struct tx_table *table = _mesa_little_endian() ? tx_table_le : + tx_table_be; + + t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK | + R200_TXFORMAT_ALPHA_IN_MAP); + t->pp_txfilter &= ~R200_YUV_TO_RGB; + + t->pp_txformat |= table[ baseImage->TexFormat->MesaFormat ].format; + t->pp_txfilter |= table[ baseImage->TexFormat->MesaFormat ].filter; + } + else { + _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); + return; + } + } + + texelBytes = baseImage->TexFormat->TexelBytes; + + /* Compute which mipmap levels we really want to send to the hardware. + */ + + driCalculateTextureFirstLastLevel( (driTextureObject *) t ); + log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; + log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; + log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; + + numLevels = t->base.lastLevel - t->base.firstLevel + 1; + + assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); + + /* Calculate mipmap offsets and dimensions for blitting (uploading) + * The idea is that we lay out the mipmap levels within a block of + * memory organized as a rectangle of width BLIT_WIDTH_BYTES. + */ + curOffset = 0; + blitWidth = BLIT_WIDTH_BYTES; + t->tile_bits = 0; + + /* figure out if this texture is suitable for tiling. */ + if (texelBytes) { + if (rmesa->texmicrotile && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) && + /* texrect might be able to use micro tiling too in theory? */ + (baseImage->Height > 1)) { + /* allow 32 (bytes) x 1 mip (which will use two times the space + the non-tiled version would use) max if base texture is large enough */ + if ((numLevels == 1) || + (((baseImage->Width * texelBytes / baseImage->Height) <= 32) && + (baseImage->Width * texelBytes > 64)) || + ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) { + t->tile_bits |= R200_TXO_MICRO_TILE; + } + } + if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { + /* we can set macro tiling even for small textures, they will be untiled anyway */ + t->tile_bits |= R200_TXO_MACRO_TILE; + } + } + + for (i = 0; i < numLevels; i++) { + const struct gl_texture_image *texImage; + GLuint size; + + texImage = tObj->Image[0][i + t->base.firstLevel]; + if ( !texImage ) + break; + + /* find image size in bytes */ + if (texImage->IsCompressed) { + /* need to calculate the size AFTER padding even though the texture is + submitted without padding. + Only handle pot textures currently - don't know if npot is even possible, + size calculation would certainly need (trivial) adjustments. + Align (and later pad) to 32byte, not sure what that 64byte blit width is + good for? */ + if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) { + /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */ + if ((texImage->Width + 3) < 8) /* width one block */ + size = texImage->CompressedSize * 4; + else if ((texImage->Width + 3) < 16) + size = texImage->CompressedSize * 2; + else size = texImage->CompressedSize; + } + else /* DXT3/5, 16 bytes per block */ + if ((texImage->Width + 3) < 8) + size = texImage->CompressedSize * 2; + else size = texImage->CompressedSize; + } + else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height; + } + else if (t->tile_bits & R200_TXO_MICRO_TILE) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + though the actual offset may be different (if texture is less than + 32 bytes width) to the untiled case */ + int w = (texImage->Width * texelBytes * 2 + 31) & ~31; + size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + } + else { + int w = (texImage->Width * texelBytes + 31) & ~31; + size = w * texImage->Height * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + } + assert(size > 0); + + /* Align to 32-byte offset. It is faster to do this unconditionally + * (no branch penalty). + */ + + curOffset = (curOffset + 0x1f) & ~0x1f; + + if (texelBytes) { + t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */ + t->image[0][i].y = 0; + t->image[0][i].width = MIN2(size / texelBytes, blitWidth); + t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; + } + else { + t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; + t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; + t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); + t->image[0][i].height = size / t->image[0][i].width; + } + +#if 0 + /* for debugging only and only applicable to non-rectangle targets */ + assert(size % t->image[0][i].width == 0); + assert(t->image[0][i].x == 0 + || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1)); +#endif + + if (0) + fprintf(stderr, + "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", + i, texImage->Width, texImage->Height, + t->image[0][i].x, t->image[0][i].y, + t->image[0][i].width, t->image[0][i].height, size, curOffset); + + curOffset += size; + + } + + /* Align the total size of texture memory block. + */ + t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; + + /* Setup remaining cube face blits, if needed */ + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + const GLuint faceSize = t->base.totalSize; + GLuint face; + /* reuse face 0 x/y/width/height - just update the offset when uploading */ + for (face = 1; face < 6; face++) { + for (i = 0; i < numLevels; i++) { + t->image[face][i].x = t->image[0][i].x; + t->image[face][i].y = t->image[0][i].y; + t->image[face][i].width = t->image[0][i].width; + t->image[face][i].height = t->image[0][i].height; + } + } + t->base.totalSize = 6 * faceSize; /* total texmem needed */ + } + + + /* Hardware state: + */ + t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK; + t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT; + + t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK | + R200_TXFORMAT_HEIGHT_MASK | + R200_TXFORMAT_CUBIC_MAP_ENABLE | + R200_TXFORMAT_F5_WIDTH_MASK | + R200_TXFORMAT_F5_HEIGHT_MASK); + t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) | + (log2Height << R200_TXFORMAT_HEIGHT_SHIFT)); + + t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK); + if (tObj->Target == GL_TEXTURE_3D) { + t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT); + t->pp_txformat_x |= R200_TEXCOORD_VOLUME; + } + else if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + ASSERT(log2Width == log2Height); + t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) | + (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) | +/* don't think we need this bit, if it exists at all - fglrx does not set it */ + (R200_TXFORMAT_CUBIC_MAP_ENABLE)); + t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV; + t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) | + (log2Height << R200_FACE_HEIGHT_1_SHIFT) | + (log2Width << R200_FACE_WIDTH_2_SHIFT) | + (log2Height << R200_FACE_HEIGHT_2_SHIFT) | + (log2Width << R200_FACE_WIDTH_3_SHIFT) | + (log2Height << R200_FACE_HEIGHT_3_SHIFT) | + (log2Width << R200_FACE_WIDTH_4_SHIFT) | + (log2Height << R200_FACE_HEIGHT_4_SHIFT)); + } + else { + /* If we don't in fact send enough texture coordinates, q will be 1, + * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?) + */ + t->pp_txformat_x |= R200_TEXCOORD_PROJ; + } + + t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) | + ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16)); + + /* Only need to round to nearest 32 for textures, but the blitter + * requires 64-byte aligned pitches, and we may/may not need the + * blitter. NPOT only! + */ + if ( !t->image_override ) { + if (baseImage->IsCompressed) + t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); + else + t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); + t->pp_txpitch -= 32; + } + + t->dirty_state = TEX_ALL; + + /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */ +} + + + +/* ================================================================ + * Texture combine functions + */ + +/* GL_ARB_texture_env_combine support + */ + +/* The color tables have combine functions for GL_SRC_COLOR, + * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA. + */ +static GLuint r200_register_color[][R200_MAX_TEXTURE_UNITS] = +{ + { + R200_TXC_ARG_A_R0_COLOR, + R200_TXC_ARG_A_R1_COLOR, + R200_TXC_ARG_A_R2_COLOR, + R200_TXC_ARG_A_R3_COLOR, + R200_TXC_ARG_A_R4_COLOR, + R200_TXC_ARG_A_R5_COLOR + }, + { + R200_TXC_ARG_A_R0_COLOR | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_R1_COLOR | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_R2_COLOR | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_R3_COLOR | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_R4_COLOR | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_R5_COLOR | R200_TXC_COMP_ARG_A + }, + { + R200_TXC_ARG_A_R0_ALPHA, + R200_TXC_ARG_A_R1_ALPHA, + R200_TXC_ARG_A_R2_ALPHA, + R200_TXC_ARG_A_R3_ALPHA, + R200_TXC_ARG_A_R4_ALPHA, + R200_TXC_ARG_A_R5_ALPHA + }, + { + R200_TXC_ARG_A_R0_ALPHA | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_R1_ALPHA | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_R2_ALPHA | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_R3_ALPHA | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_R4_ALPHA | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_R5_ALPHA | R200_TXC_COMP_ARG_A + }, +}; + +static GLuint r200_tfactor_color[] = +{ + R200_TXC_ARG_A_TFACTOR_COLOR, + R200_TXC_ARG_A_TFACTOR_COLOR | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_TFACTOR_ALPHA, + R200_TXC_ARG_A_TFACTOR_ALPHA | R200_TXC_COMP_ARG_A +}; + +static GLuint r200_tfactor1_color[] = +{ + R200_TXC_ARG_A_TFACTOR1_COLOR, + R200_TXC_ARG_A_TFACTOR1_COLOR | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_TFACTOR1_ALPHA, + R200_TXC_ARG_A_TFACTOR1_ALPHA | R200_TXC_COMP_ARG_A +}; + +static GLuint r200_primary_color[] = +{ + R200_TXC_ARG_A_DIFFUSE_COLOR, + R200_TXC_ARG_A_DIFFUSE_COLOR | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_DIFFUSE_ALPHA, + R200_TXC_ARG_A_DIFFUSE_ALPHA | R200_TXC_COMP_ARG_A +}; + +/* GL_ZERO table - indices 0-3 + * GL_ONE table - indices 1-4 + */ +static GLuint r200_zero_color[] = +{ + R200_TXC_ARG_A_ZERO, + R200_TXC_ARG_A_ZERO | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_ZERO, + R200_TXC_ARG_A_ZERO | R200_TXC_COMP_ARG_A, + R200_TXC_ARG_A_ZERO +}; + +/* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA. + */ +static GLuint r200_register_alpha[][R200_MAX_TEXTURE_UNITS] = +{ + { + R200_TXA_ARG_A_R0_ALPHA, + R200_TXA_ARG_A_R1_ALPHA, + R200_TXA_ARG_A_R2_ALPHA, + R200_TXA_ARG_A_R3_ALPHA, + R200_TXA_ARG_A_R4_ALPHA, + R200_TXA_ARG_A_R5_ALPHA + }, + { + R200_TXA_ARG_A_R0_ALPHA | R200_TXA_COMP_ARG_A, + R200_TXA_ARG_A_R1_ALPHA | R200_TXA_COMP_ARG_A, + R200_TXA_ARG_A_R2_ALPHA | R200_TXA_COMP_ARG_A, + R200_TXA_ARG_A_R3_ALPHA | R200_TXA_COMP_ARG_A, + R200_TXA_ARG_A_R4_ALPHA | R200_TXA_COMP_ARG_A, + R200_TXA_ARG_A_R5_ALPHA | R200_TXA_COMP_ARG_A + }, +}; + +static GLuint r200_tfactor_alpha[] = +{ + R200_TXA_ARG_A_TFACTOR_ALPHA, + R200_TXA_ARG_A_TFACTOR_ALPHA | R200_TXA_COMP_ARG_A +}; + +static GLuint r200_tfactor1_alpha[] = +{ + R200_TXA_ARG_A_TFACTOR1_ALPHA, + R200_TXA_ARG_A_TFACTOR1_ALPHA | R200_TXA_COMP_ARG_A +}; + +static GLuint r200_primary_alpha[] = +{ + R200_TXA_ARG_A_DIFFUSE_ALPHA, + R200_TXA_ARG_A_DIFFUSE_ALPHA | R200_TXA_COMP_ARG_A +}; + +/* GL_ZERO table - indices 0-1 + * GL_ONE table - indices 1-2 + */ +static GLuint r200_zero_alpha[] = +{ + R200_TXA_ARG_A_ZERO, + R200_TXA_ARG_A_ZERO | R200_TXA_COMP_ARG_A, + R200_TXA_ARG_A_ZERO, +}; + + +/* Extract the arg from slot A, shift it into the correct argument slot + * and set the corresponding complement bit. + */ +#define R200_COLOR_ARG( n, arg ) \ +do { \ + color_combine |= \ + ((color_arg[n] & R200_TXC_ARG_A_MASK) \ + << R200_TXC_ARG_##arg##_SHIFT); \ + color_combine |= \ + ((color_arg[n] >> R200_TXC_COMP_ARG_A_SHIFT) \ + << R200_TXC_COMP_ARG_##arg##_SHIFT); \ +} while (0) + +#define R200_ALPHA_ARG( n, arg ) \ +do { \ + alpha_combine |= \ + ((alpha_arg[n] & R200_TXA_ARG_A_MASK) \ + << R200_TXA_ARG_##arg##_SHIFT); \ + alpha_combine |= \ + ((alpha_arg[n] >> R200_TXA_COMP_ARG_A_SHIFT) \ + << R200_TXA_COMP_ARG_##arg##_SHIFT); \ +} while (0) + + +/* ================================================================ + * Texture unit state management + */ + +static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuint replaceargs ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + GLuint color_combine, alpha_combine; + GLuint color_scale = rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] & + ~(R200_TXC_SCALE_MASK | R200_TXC_OUTPUT_REG_MASK | R200_TXC_TFACTOR_SEL_MASK | + R200_TXC_TFACTOR1_SEL_MASK); + GLuint alpha_scale = rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] & + ~(R200_TXA_DOT_ALPHA | R200_TXA_SCALE_MASK | R200_TXA_OUTPUT_REG_MASK | + R200_TXA_TFACTOR_SEL_MASK | R200_TXA_TFACTOR1_SEL_MASK); + + /* texUnit->_Current can be NULL if and only if the texture unit is + * not actually enabled. + */ + assert( (texUnit->_ReallyEnabled == 0) + || (texUnit->_Current != NULL) ); + + if ( R200_DEBUG & DEBUG_TEXTURE ) { + fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit ); + } + + /* Set the texture environment state. Isn't this nice and clean? + * The chip will automagically set the texture alpha to 0xff when + * the texture format does not include an alpha component. This + * reduces the amount of special-casing we have to do, alpha-only + * textures being a notable exception. + */ + + color_scale |= ((rmesa->state.texture.unit[unit].outputreg + 1) << R200_TXC_OUTPUT_REG_SHIFT) | + (unit << R200_TXC_TFACTOR_SEL_SHIFT) | + (replaceargs << R200_TXC_TFACTOR1_SEL_SHIFT); + alpha_scale |= ((rmesa->state.texture.unit[unit].outputreg + 1) << R200_TXA_OUTPUT_REG_SHIFT) | + (unit << R200_TXA_TFACTOR_SEL_SHIFT) | + (replaceargs << R200_TXA_TFACTOR1_SEL_SHIFT); + + if ( !texUnit->_ReallyEnabled ) { + assert( unit == 0); + color_combine = R200_TXC_ARG_A_ZERO | R200_TXC_ARG_B_ZERO + | R200_TXC_ARG_C_DIFFUSE_COLOR | R200_TXC_OP_MADD; + alpha_combine = R200_TXA_ARG_A_ZERO | R200_TXA_ARG_B_ZERO + | R200_TXA_ARG_C_DIFFUSE_ALPHA | R200_TXA_OP_MADD; + } + else { + GLuint color_arg[3], alpha_arg[3]; + GLuint i; + const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB; + const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA; + GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB; + GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA; + + + const GLint replaceoprgb = + ctx->Texture.Unit[replaceargs]._CurrentCombine->OperandRGB[0] - GL_SRC_COLOR; + const GLint replaceopa = + ctx->Texture.Unit[replaceargs]._CurrentCombine->OperandA[0] - GL_SRC_ALPHA; + + /* Step 1: + * Extract the color and alpha combine function arguments. + */ + for ( i = 0 ; i < numColorArgs ; i++ ) { + GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR; + const GLint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i]; + assert(op >= 0); + assert(op <= 3); + switch ( srcRGBi ) { + case GL_TEXTURE: + color_arg[i] = r200_register_color[op][unit]; + break; + case GL_CONSTANT: + color_arg[i] = r200_tfactor_color[op]; + break; + case GL_PRIMARY_COLOR: + color_arg[i] = r200_primary_color[op]; + break; + case GL_PREVIOUS: + if (replaceargs != unit) { + const GLint srcRGBreplace = + ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0]; + if (op >= 2) { + op = op ^ replaceopa; + } + else { + op = op ^ replaceoprgb; + } + switch (srcRGBreplace) { + case GL_TEXTURE: + color_arg[i] = r200_register_color[op][replaceargs]; + break; + case GL_CONSTANT: + color_arg[i] = r200_tfactor1_color[op]; + break; + case GL_PRIMARY_COLOR: + color_arg[i] = r200_primary_color[op]; + break; + case GL_PREVIOUS: + if (slot == 0) + color_arg[i] = r200_primary_color[op]; + else + color_arg[i] = r200_register_color[op] + [rmesa->state.texture.unit[replaceargs - 1].outputreg]; + break; + case GL_ZERO: + color_arg[i] = r200_zero_color[op]; + break; + case GL_ONE: + color_arg[i] = r200_zero_color[op+1]; + break; + case GL_TEXTURE0: + case GL_TEXTURE1: + case GL_TEXTURE2: + case GL_TEXTURE3: + case GL_TEXTURE4: + case GL_TEXTURE5: + color_arg[i] = r200_register_color[op][srcRGBreplace - GL_TEXTURE0]; + break; + default: + return GL_FALSE; + } + } + else { + if (slot == 0) + color_arg[i] = r200_primary_color[op]; + else + color_arg[i] = r200_register_color[op] + [rmesa->state.texture.unit[unit - 1].outputreg]; + } + break; + case GL_ZERO: + color_arg[i] = r200_zero_color[op]; + break; + case GL_ONE: + color_arg[i] = r200_zero_color[op+1]; + break; + case GL_TEXTURE0: + case GL_TEXTURE1: + case GL_TEXTURE2: + case GL_TEXTURE3: + case GL_TEXTURE4: + case GL_TEXTURE5: + color_arg[i] = r200_register_color[op][srcRGBi - GL_TEXTURE0]; + break; + default: + return GL_FALSE; + } + } + + for ( i = 0 ; i < numAlphaArgs ; i++ ) { + GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA; + const GLint srcAi = texUnit->_CurrentCombine->SourceA[i]; + assert(op >= 0); + assert(op <= 1); + switch ( srcAi ) { + case GL_TEXTURE: + alpha_arg[i] = r200_register_alpha[op][unit]; + break; + case GL_CONSTANT: + alpha_arg[i] = r200_tfactor_alpha[op]; + break; + case GL_PRIMARY_COLOR: + alpha_arg[i] = r200_primary_alpha[op]; + break; + case GL_PREVIOUS: + if (replaceargs != unit) { + const GLint srcAreplace = + ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0]; + op = op ^ replaceopa; + switch (srcAreplace) { + case GL_TEXTURE: + alpha_arg[i] = r200_register_alpha[op][replaceargs]; + break; + case GL_CONSTANT: + alpha_arg[i] = r200_tfactor1_alpha[op]; + break; + case GL_PRIMARY_COLOR: + alpha_arg[i] = r200_primary_alpha[op]; + break; + case GL_PREVIOUS: + if (slot == 0) + alpha_arg[i] = r200_primary_alpha[op]; + else + alpha_arg[i] = r200_register_alpha[op] + [rmesa->state.texture.unit[replaceargs - 1].outputreg]; + break; + case GL_ZERO: + alpha_arg[i] = r200_zero_alpha[op]; + break; + case GL_ONE: + alpha_arg[i] = r200_zero_alpha[op+1]; + break; + case GL_TEXTURE0: + case GL_TEXTURE1: + case GL_TEXTURE2: + case GL_TEXTURE3: + case GL_TEXTURE4: + case GL_TEXTURE5: + alpha_arg[i] = r200_register_alpha[op][srcAreplace - GL_TEXTURE0]; + break; + default: + return GL_FALSE; + } + } + else { + if (slot == 0) + alpha_arg[i] = r200_primary_alpha[op]; + else + alpha_arg[i] = r200_register_alpha[op] + [rmesa->state.texture.unit[unit - 1].outputreg]; + } + break; + case GL_ZERO: + alpha_arg[i] = r200_zero_alpha[op]; + break; + case GL_ONE: + alpha_arg[i] = r200_zero_alpha[op+1]; + break; + case GL_TEXTURE0: + case GL_TEXTURE1: + case GL_TEXTURE2: + case GL_TEXTURE3: + case GL_TEXTURE4: + case GL_TEXTURE5: + alpha_arg[i] = r200_register_alpha[op][srcAi - GL_TEXTURE0]; + break; + default: + return GL_FALSE; + } + } + + /* Step 2: + * Build up the color and alpha combine functions. + */ + switch ( texUnit->_CurrentCombine->ModeRGB ) { + case GL_REPLACE: + color_combine = (R200_TXC_ARG_A_ZERO | + R200_TXC_ARG_B_ZERO | + R200_TXC_OP_MADD); + R200_COLOR_ARG( 0, C ); + break; + case GL_MODULATE: + color_combine = (R200_TXC_ARG_C_ZERO | + R200_TXC_OP_MADD); + R200_COLOR_ARG( 0, A ); + R200_COLOR_ARG( 1, B ); + break; + case GL_ADD: + color_combine = (R200_TXC_ARG_B_ZERO | + R200_TXC_COMP_ARG_B | + R200_TXC_OP_MADD); + R200_COLOR_ARG( 0, A ); + R200_COLOR_ARG( 1, C ); + break; + case GL_ADD_SIGNED: + color_combine = (R200_TXC_ARG_B_ZERO | + R200_TXC_COMP_ARG_B | + R200_TXC_BIAS_ARG_C | /* new */ + R200_TXC_OP_MADD); /* was ADDSIGNED */ + R200_COLOR_ARG( 0, A ); + R200_COLOR_ARG( 1, C ); + break; + case GL_SUBTRACT: + color_combine = (R200_TXC_ARG_B_ZERO | + R200_TXC_COMP_ARG_B | + R200_TXC_NEG_ARG_C | + R200_TXC_OP_MADD); + R200_COLOR_ARG( 0, A ); + R200_COLOR_ARG( 1, C ); + break; + case GL_INTERPOLATE: + color_combine = (R200_TXC_OP_LERP); + R200_COLOR_ARG( 0, B ); + R200_COLOR_ARG( 1, A ); + R200_COLOR_ARG( 2, C ); + break; + + case GL_DOT3_RGB_EXT: + case GL_DOT3_RGBA_EXT: + /* The EXT version of the DOT3 extension does not support the + * scale factor, but the ARB version (and the version in OpenGL + * 1.3) does. + */ + RGBshift = 0; + /* FALLTHROUGH */ + + case GL_DOT3_RGB: + case GL_DOT3_RGBA: + /* DOT3 works differently on R200 than on R100. On R100, just + * setting the DOT3 mode did everything for you. On R200, the + * driver has to enable the biasing and scale in the inputs to + * put them in the proper [-1,1] range. This is what the 4x and + * the -0.5 in the DOT3 spec do. The post-scale is then set + * normally. + */ + + color_combine = (R200_TXC_ARG_C_ZERO | + R200_TXC_OP_DOT3 | + R200_TXC_BIAS_ARG_A | + R200_TXC_BIAS_ARG_B | + R200_TXC_SCALE_ARG_A | + R200_TXC_SCALE_ARG_B); + R200_COLOR_ARG( 0, A ); + R200_COLOR_ARG( 1, B ); + break; + + case GL_MODULATE_ADD_ATI: + color_combine = (R200_TXC_OP_MADD); + R200_COLOR_ARG( 0, A ); + R200_COLOR_ARG( 1, C ); + R200_COLOR_ARG( 2, B ); + break; + case GL_MODULATE_SIGNED_ADD_ATI: + color_combine = (R200_TXC_BIAS_ARG_C | /* new */ + R200_TXC_OP_MADD); /* was ADDSIGNED */ + R200_COLOR_ARG( 0, A ); + R200_COLOR_ARG( 1, C ); + R200_COLOR_ARG( 2, B ); + break; + case GL_MODULATE_SUBTRACT_ATI: + color_combine = (R200_TXC_NEG_ARG_C | + R200_TXC_OP_MADD); + R200_COLOR_ARG( 0, A ); + R200_COLOR_ARG( 1, C ); + R200_COLOR_ARG( 2, B ); + break; + default: + return GL_FALSE; + } + + switch ( texUnit->_CurrentCombine->ModeA ) { + case GL_REPLACE: + alpha_combine = (R200_TXA_ARG_A_ZERO | + R200_TXA_ARG_B_ZERO | + R200_TXA_OP_MADD); + R200_ALPHA_ARG( 0, C ); + break; + case GL_MODULATE: + alpha_combine = (R200_TXA_ARG_C_ZERO | + R200_TXA_OP_MADD); + R200_ALPHA_ARG( 0, A ); + R200_ALPHA_ARG( 1, B ); + break; + case GL_ADD: + alpha_combine = (R200_TXA_ARG_B_ZERO | + R200_TXA_COMP_ARG_B | + R200_TXA_OP_MADD); + R200_ALPHA_ARG( 0, A ); + R200_ALPHA_ARG( 1, C ); + break; + case GL_ADD_SIGNED: + alpha_combine = (R200_TXA_ARG_B_ZERO | + R200_TXA_COMP_ARG_B | + R200_TXA_BIAS_ARG_C | /* new */ + R200_TXA_OP_MADD); /* was ADDSIGNED */ + R200_ALPHA_ARG( 0, A ); + R200_ALPHA_ARG( 1, C ); + break; + case GL_SUBTRACT: + alpha_combine = (R200_TXA_ARG_B_ZERO | + R200_TXA_COMP_ARG_B | + R200_TXA_NEG_ARG_C | + R200_TXA_OP_MADD); + R200_ALPHA_ARG( 0, A ); + R200_ALPHA_ARG( 1, C ); + break; + case GL_INTERPOLATE: + alpha_combine = (R200_TXA_OP_LERP); + R200_ALPHA_ARG( 0, B ); + R200_ALPHA_ARG( 1, A ); + R200_ALPHA_ARG( 2, C ); + break; + + case GL_MODULATE_ADD_ATI: + alpha_combine = (R200_TXA_OP_MADD); + R200_ALPHA_ARG( 0, A ); + R200_ALPHA_ARG( 1, C ); + R200_ALPHA_ARG( 2, B ); + break; + case GL_MODULATE_SIGNED_ADD_ATI: + alpha_combine = (R200_TXA_BIAS_ARG_C | /* new */ + R200_TXA_OP_MADD); /* was ADDSIGNED */ + R200_ALPHA_ARG( 0, A ); + R200_ALPHA_ARG( 1, C ); + R200_ALPHA_ARG( 2, B ); + break; + case GL_MODULATE_SUBTRACT_ATI: + alpha_combine = (R200_TXA_NEG_ARG_C | + R200_TXA_OP_MADD); + R200_ALPHA_ARG( 0, A ); + R200_ALPHA_ARG( 1, C ); + R200_ALPHA_ARG( 2, B ); + break; + default: + return GL_FALSE; + } + + if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT) + || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) { + alpha_scale |= R200_TXA_DOT_ALPHA; + Ashift = RGBshift; + } + + /* Step 3: + * Apply the scale factor. + */ + color_scale |= (RGBshift << R200_TXC_SCALE_SHIFT); + alpha_scale |= (Ashift << R200_TXA_SCALE_SHIFT); + + /* All done! + */ + } + + if ( rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND] != color_combine || + rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND] != alpha_combine || + rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] != color_scale || + rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] != alpha_scale) { + R200_STATECHANGE( rmesa, pix[slot] ); + rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND] = color_combine; + rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND] = alpha_combine; + rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] = color_scale; + rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] = alpha_scale; + } + + return GL_TRUE; +} + +void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch) +{ + r200ContextPtr rmesa = + (r200ContextPtr) ((__DRIcontextPrivate *) pDRICtx->private)-> + driverPrivate; + struct gl_texture_object *tObj = + _mesa_lookup_texture(rmesa->glCtx, texname); + r200TexObjPtr t; + + if (!tObj) + return; + + t = (r200TexObjPtr) tObj->DriverData; + + t->image_override = GL_TRUE; + + if (!offset) + return; + + t->pp_txoffset = offset; + t->pp_txpitch = pitch - 32; + + switch (depth) { + case 32: + t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format; + t->pp_txfilter |= tx_table_le[MESA_FORMAT_ARGB8888].filter; + break; + case 24: + default: + t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format; + t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB888].filter; + break; + case 16: + t->pp_txformat = tx_table_le[MESA_FORMAT_RGB565].format; + t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB565].filter; + break; + } +} + +#define REF_COLOR 1 +#define REF_ALPHA 2 + +static GLboolean r200UpdateAllTexEnv( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLint i, j, currslot; + GLint maxunitused = -1; + GLboolean texregfree[6] = {GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE}; + GLubyte stageref[7] = {0, 0, 0, 0, 0, 0, 0}; + GLint nextunit[R200_MAX_TEXTURE_UNITS] = {0, 0, 0, 0, 0, 0}; + GLint currentnext = -1; + GLboolean ok; + + /* find highest used unit */ + for ( j = 0; j < R200_MAX_TEXTURE_UNITS; j++) { + if (ctx->Texture.Unit[j]._ReallyEnabled) { + maxunitused = j; + } + } + stageref[maxunitused + 1] = REF_COLOR | REF_ALPHA; + + for ( j = maxunitused; j >= 0; j-- ) { + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[j]; + + rmesa->state.texture.unit[j].outputreg = -1; + + if (stageref[j + 1]) { + + /* use the lowest available reg. That gets us automatically reg0 for the last stage. + need this even for disabled units, as it may get referenced due to the replace + optimization */ + for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS; i++ ) { + if (texregfree[i]) { + rmesa->state.texture.unit[j].outputreg = i; + break; + } + } + if (rmesa->state.texture.unit[j].outputreg == -1) { + /* no more free regs we can use. Need a fallback :-( */ + return GL_FALSE; + } + + nextunit[j] = currentnext; + + if (!texUnit->_ReallyEnabled) { + /* the not enabled stages are referenced "indirectly", + must not cut off the lower stages */ + stageref[j] = REF_COLOR | REF_ALPHA; + continue; + } + currentnext = j; + + const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB; + const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA; + const GLboolean isdot3rgba = (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) || + (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT); + + + /* check if we need the color part, special case for dot3_rgba + as if only the alpha part is referenced later on it still is using the color part */ + if ((stageref[j + 1] & REF_COLOR) || isdot3rgba) { + for ( i = 0 ; i < numColorArgs ; i++ ) { + const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i]; + const GLuint op = texUnit->_CurrentCombine->OperandRGB[i]; + switch ( srcRGBi ) { + case GL_PREVIOUS: + /* op 0/1 are referencing color, op 2/3 alpha */ + stageref[j] |= (op >> 1) + 1; + break; + case GL_TEXTURE: + texregfree[j] = GL_FALSE; + break; + case GL_TEXTURE0: + case GL_TEXTURE1: + case GL_TEXTURE2: + case GL_TEXTURE3: + case GL_TEXTURE4: + case GL_TEXTURE5: + texregfree[srcRGBi - GL_TEXTURE0] = GL_FALSE; + break; + default: /* don't care about other sources here */ + break; + } + } + } + + /* alpha args are ignored for dot3_rgba */ + if ((stageref[j + 1] & REF_ALPHA) && !isdot3rgba) { + + for ( i = 0 ; i < numAlphaArgs ; i++ ) { + const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i]; + switch ( srcAi ) { + case GL_PREVIOUS: + stageref[j] |= REF_ALPHA; + break; + case GL_TEXTURE: + texregfree[j] = GL_FALSE; + break; + case GL_TEXTURE0: + case GL_TEXTURE1: + case GL_TEXTURE2: + case GL_TEXTURE3: + case GL_TEXTURE4: + case GL_TEXTURE5: + texregfree[srcAi - GL_TEXTURE0] = GL_FALSE; + break; + default: /* don't care about other sources here */ + break; + } + } + } + } + } + + /* don't enable texture sampling for units if the result is not used */ + for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled && !texregfree[i]) + rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled; + else rmesa->state.texture.unit[i].unitneeded = 0; + } + + ok = GL_TRUE; + currslot = 0; + rmesa->state.envneeded = 1; + + i = 0; + while ((i <= maxunitused) && (i >= 0)) { + /* only output instruction if the results are referenced */ + if (ctx->Texture.Unit[i]._ReallyEnabled && stageref[i+1]) { + GLuint replaceunit = i; + /* try to optimize GL_REPLACE away (only one level deep though) */ + if ( (ctx->Texture.Unit[i]._CurrentCombine->ModeRGB == GL_REPLACE) && + (ctx->Texture.Unit[i]._CurrentCombine->ModeA == GL_REPLACE) && + (ctx->Texture.Unit[i]._CurrentCombine->ScaleShiftRGB == 0) && + (ctx->Texture.Unit[i]._CurrentCombine->ScaleShiftA == 0) && + (nextunit[i] > 0) ) { + /* yippie! can optimize it away! */ + replaceunit = i; + i = nextunit[i]; + } + + /* need env instruction slot */ + rmesa->state.envneeded |= 1 << currslot; + ok = r200UpdateTextureEnv( ctx, i, currslot, replaceunit ); + if (!ok) return GL_FALSE; + currslot++; + } + i = i + 1; + } + + if (currslot == 0) { + /* need one stage at least */ + rmesa->state.texture.unit[0].outputreg = 0; + ok = r200UpdateTextureEnv( ctx, 0, 0, 0 ); + } + + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_BLEND_ENABLE_MASK | R200_MULTI_PASS_ENABLE); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= rmesa->state.envneeded << R200_TEX_BLEND_0_ENABLE_SHIFT; + + return ok; +} + +#undef REF_COLOR +#undef REF_ALPHA + + +#define TEXOBJ_TXFILTER_MASK (R200_MAX_MIP_LEVEL_MASK | \ + R200_MIN_FILTER_MASK | \ + R200_MAG_FILTER_MASK | \ + R200_MAX_ANISO_MASK | \ + R200_YUV_TO_RGB | \ + R200_YUV_TEMPERATURE_MASK | \ + R200_CLAMP_S_MASK | \ + R200_CLAMP_T_MASK | \ + R200_BORDER_MODE_D3D ) + +#define TEXOBJ_TXFORMAT_MASK (R200_TXFORMAT_WIDTH_MASK | \ + R200_TXFORMAT_HEIGHT_MASK | \ + R200_TXFORMAT_FORMAT_MASK | \ + R200_TXFORMAT_F5_WIDTH_MASK | \ + R200_TXFORMAT_F5_HEIGHT_MASK | \ + R200_TXFORMAT_ALPHA_IN_MAP | \ + R200_TXFORMAT_CUBIC_MAP_ENABLE | \ + R200_TXFORMAT_NON_POWER2) + +#define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK | \ + R200_TEXCOORD_MASK | \ + R200_CLAMP_Q_MASK | \ + R200_VOLUME_FILTER_MASK) + + +static void import_tex_obj_state( r200ContextPtr rmesa, + int unit, + r200TexObjPtr texobj ) +{ +/* do not use RADEON_DB_STATE to avoid stale texture caches */ + int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; + + R200_STATECHANGE( rmesa, tex[unit] ); + + cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK; + cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK; + cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; + cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK; + cmd[TEX_PP_TXFORMAT_X] &= ~TEXOBJ_TXFORMAT_X_MASK; + cmd[TEX_PP_TXFORMAT_X] |= texobj->pp_txformat_x & TEXOBJ_TXFORMAT_X_MASK; + cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */ + cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */ + cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; + if (rmesa->r200Screen->drmSupportsFragShader) { + cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset; + } + else { + cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset; + } + + if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { + int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; + GLuint bytesPerFace = texobj->base.totalSize / 6; + ASSERT(texobj->base.totalSize % 6 == 0); + + R200_STATECHANGE( rmesa, cube[unit] ); + cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; + if (rmesa->r200Screen->drmSupportsFragShader) { + /* that value is submitted twice. could change cube atom + to not include that command when new drm is used */ + cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces; + } + cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace; + cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace; + cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace; + cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace; + cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace; + } + + texobj->dirty_state &= ~(1<<unit); +} + + +static void set_texgen_matrix( r200ContextPtr rmesa, + GLuint unit, + const GLfloat *s_plane, + const GLfloat *t_plane, + const GLfloat *r_plane, + const GLfloat *q_plane ) +{ + GLfloat m[16]; + + m[0] = s_plane[0]; + m[4] = s_plane[1]; + m[8] = s_plane[2]; + m[12] = s_plane[3]; + + m[1] = t_plane[0]; + m[5] = t_plane[1]; + m[9] = t_plane[2]; + m[13] = t_plane[3]; + + m[2] = r_plane[0]; + m[6] = r_plane[1]; + m[10] = r_plane[2]; + m[14] = r_plane[3]; + + m[3] = q_plane[0]; + m[7] = q_plane[1]; + m[11] = q_plane[2]; + m[15] = q_plane[3]; + + _math_matrix_loadf( &(rmesa->TexGenMatrix[unit]), m); + _math_matrix_analyse( &(rmesa->TexGenMatrix[unit]) ); + rmesa->TexGenEnabled |= R200_TEXMAT_0_ENABLE<<unit; +} + + +static GLuint r200_need_dis_texgen(const GLbitfield texGenEnabled, + const GLfloat *planeS, + const GLfloat *planeT, + const GLfloat *planeR, + const GLfloat *planeQ) +{ + GLuint needtgenable = 0; + + if (!(texGenEnabled & S_BIT)) { + if (((texGenEnabled & T_BIT) && planeT[0] != 0.0) || + ((texGenEnabled & R_BIT) && planeR[0] != 0.0) || + ((texGenEnabled & Q_BIT) && planeQ[0] != 0.0)) { + needtgenable |= S_BIT; + } + } + if (!(texGenEnabled & T_BIT)) { + if (((texGenEnabled & S_BIT) && planeS[1] != 0.0) || + ((texGenEnabled & R_BIT) && planeR[1] != 0.0) || + ((texGenEnabled & Q_BIT) && planeQ[1] != 0.0)) { + needtgenable |= T_BIT; + } + } + if (!(texGenEnabled & R_BIT)) { + if (((texGenEnabled & S_BIT) && planeS[2] != 0.0) || + ((texGenEnabled & T_BIT) && planeT[2] != 0.0) || + ((texGenEnabled & Q_BIT) && planeQ[2] != 0.0)) { + needtgenable |= R_BIT; + } + } + if (!(texGenEnabled & Q_BIT)) { + if (((texGenEnabled & S_BIT) && planeS[3] != 0.0) || + ((texGenEnabled & T_BIT) && planeT[3] != 0.0) || + ((texGenEnabled & R_BIT) && planeR[3] != 0.0)) { + needtgenable |= Q_BIT; + } + } + + return needtgenable; +} + + +/* + * Returns GL_FALSE if fallback required. + */ +static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + GLuint inputshift = R200_TEXGEN_0_INPUT_SHIFT + unit*4; + GLuint tgi, tgcm; + GLuint mode = 0; + GLboolean mixed_fallback = GL_FALSE; + static const GLfloat I[16] = { + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1 }; + static const GLfloat reflect[16] = { + -1, 0, 0, 0, + 0, -1, 0, 0, + 0, 0, -1, 0, + 0, 0, 0, 1 }; + + rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit); + rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit); + rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit); + rmesa->TexGenNeedNormals[unit] = GL_FALSE; + tgi = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] & ~(R200_TEXGEN_INPUT_MASK << + inputshift); + tgcm = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] & ~(R200_TEXGEN_COMP_MASK << + (unit * 4)); + + if (0) + fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit); + + if (texUnit->TexGenEnabled & S_BIT) { + mode = texUnit->GenModeS; + } else { + tgcm |= R200_TEXGEN_COMP_S << (unit * 4); + } + + if (texUnit->TexGenEnabled & T_BIT) { + if (texUnit->GenModeT != mode) + mixed_fallback = GL_TRUE; + } else { + tgcm |= R200_TEXGEN_COMP_T << (unit * 4); + } + + if (texUnit->TexGenEnabled & R_BIT) { + if (texUnit->GenModeR != mode) + mixed_fallback = GL_TRUE; + } else { + tgcm |= R200_TEXGEN_COMP_R << (unit * 4); + } + + if (texUnit->TexGenEnabled & Q_BIT) { + if (texUnit->GenModeQ != mode) + mixed_fallback = GL_TRUE; + } else { + tgcm |= R200_TEXGEN_COMP_Q << (unit * 4); + } + + if (mixed_fallback) { + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "fallback mixed texgen, 0x%x (0x%x 0x%x 0x%x 0x%x)\n", + texUnit->TexGenEnabled, texUnit->GenModeS, texUnit->GenModeT, + texUnit->GenModeR, texUnit->GenModeQ); + return GL_FALSE; + } + +/* we CANNOT do mixed mode if the texgen mode requires a plane where the input + is not enabled for texgen, since the planes are concatenated into texmat, + and thus the input will come from texcoord rather than tex gen equation! + Either fallback or just hope that those texcoords aren't really needed... + Assuming the former will cause lots of unnecessary fallbacks, the latter will + generate bogus results sometimes - it's pretty much impossible to really know + when a fallback is needed, depends on texmat and what sort of texture is bound + etc, - for now fallback if we're missing either S or T bits, there's a high + probability we need the texcoords in that case. + That's a lot of work for some obscure texgen mixed mode fixup - why oh why + doesn't the chip just directly accept the plane parameters :-(. */ + switch (mode) { + case GL_OBJECT_LINEAR: { + GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled, + texUnit->ObjectPlaneS, texUnit->ObjectPlaneT, + texUnit->ObjectPlaneR, texUnit->ObjectPlaneQ ); + if (needtgenable & (S_BIT | T_BIT)) { + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "fallback mixed texgen / obj plane, 0x%x\n", + texUnit->TexGenEnabled); + return GL_FALSE; + } + if (needtgenable & (R_BIT)) { + tgcm &= ~(R200_TEXGEN_COMP_R << (unit * 4)); + } + if (needtgenable & (Q_BIT)) { + tgcm &= ~(R200_TEXGEN_COMP_Q << (unit * 4)); + } + + tgi |= R200_TEXGEN_INPUT_OBJ << inputshift; + set_texgen_matrix( rmesa, unit, + (texUnit->TexGenEnabled & S_BIT) ? texUnit->ObjectPlaneS : I, + (texUnit->TexGenEnabled & T_BIT) ? texUnit->ObjectPlaneT : I + 4, + (texUnit->TexGenEnabled & R_BIT) ? texUnit->ObjectPlaneR : I + 8, + (texUnit->TexGenEnabled & Q_BIT) ? texUnit->ObjectPlaneQ : I + 12); + } + break; + + case GL_EYE_LINEAR: { + GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled, + texUnit->EyePlaneS, texUnit->EyePlaneT, + texUnit->EyePlaneR, texUnit->EyePlaneQ ); + if (needtgenable & (S_BIT | T_BIT)) { + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "fallback mixed texgen / eye plane, 0x%x\n", + texUnit->TexGenEnabled); + return GL_FALSE; + } + if (needtgenable & (R_BIT)) { + tgcm &= ~(R200_TEXGEN_COMP_R << (unit * 4)); + } + if (needtgenable & (Q_BIT)) { + tgcm &= ~(R200_TEXGEN_COMP_Q << (unit * 4)); + } + tgi |= R200_TEXGEN_INPUT_EYE << inputshift; + set_texgen_matrix( rmesa, unit, + (texUnit->TexGenEnabled & S_BIT) ? texUnit->EyePlaneS : I, + (texUnit->TexGenEnabled & T_BIT) ? texUnit->EyePlaneT : I + 4, + (texUnit->TexGenEnabled & R_BIT) ? texUnit->EyePlaneR : I + 8, + (texUnit->TexGenEnabled & Q_BIT) ? texUnit->EyePlaneQ : I + 12); + } + break; + + case GL_REFLECTION_MAP_NV: + rmesa->TexGenNeedNormals[unit] = GL_TRUE; + tgi |= R200_TEXGEN_INPUT_EYE_REFLECT << inputshift; + /* pretty weird, must only negate when lighting is enabled? */ + if (ctx->Light.Enabled) + set_texgen_matrix( rmesa, unit, + (texUnit->TexGenEnabled & S_BIT) ? reflect : I, + (texUnit->TexGenEnabled & T_BIT) ? reflect + 4 : I + 4, + (texUnit->TexGenEnabled & R_BIT) ? reflect + 8 : I + 8, + I + 12); + break; + + case GL_NORMAL_MAP_NV: + rmesa->TexGenNeedNormals[unit] = GL_TRUE; + tgi |= R200_TEXGEN_INPUT_EYE_NORMAL<<inputshift; + break; + + case GL_SPHERE_MAP: + rmesa->TexGenNeedNormals[unit] = GL_TRUE; + tgi |= R200_TEXGEN_INPUT_SPHERE<<inputshift; + break; + + case 0: + /* All texgen units were disabled, so just pass coords through. */ + tgi |= unit << inputshift; + break; + + default: + /* Unsupported mode, fallback: + */ + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "fallback unsupported texgen, %d\n", + texUnit->GenModeS); + return GL_FALSE; + } + + rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit; + rmesa->TexGenCompSel |= R200_OUTPUT_TEX_0 << unit; + + if (tgi != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] || + tgcm != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2]) + { + R200_STATECHANGE(rmesa, tcg); + rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] = tgi; + rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = tgcm; + } + + return GL_TRUE; +} + + +static void disable_tex( GLcontext *ctx, int unit ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit)) { + /* Texture unit disabled */ + if ( rmesa->state.texture.unit[unit].texobj != NULL ) { + /* The old texture is no longer bound to this texture unit. + * Mark it as such. + */ + + rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit); + rmesa->state.texture.unit[unit].texobj = NULL; + } + + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit); + + R200_STATECHANGE( rmesa, vtx ); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); + + if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) { + TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE); + } + + /* Actually want to keep all units less than max active texture + * enabled, right? Fix this for >2 texunits. + */ + + { + GLuint tmp = rmesa->TexGenEnabled; + + rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit); + rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit); + rmesa->TexGenNeedNormals[unit] = GL_FALSE; + rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit); + + if (tmp != rmesa->TexGenEnabled) { + rmesa->recheck_texgen[unit] = GL_TRUE; + rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; + } + } + } +} + +void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + GLuint re_cntl; + + re_cntl = rmesa->hw.set.cmd[SET_RE_CNTL] & ~(R200_VTX_STQ0_D3D << (2 * unit)); + if (use_d3d) + re_cntl |= R200_VTX_STQ0_D3D << (2 * unit); + + if ( re_cntl != rmesa->hw.set.cmd[SET_RE_CNTL] ) { + R200_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_RE_CNTL] = re_cntl; + } +} + +static GLboolean enable_tex_2d( GLcontext *ctx, int unit ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; + + /* Need to load the 2d images associated with this unit. + */ + if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) { + t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2; + t->base.dirty_images[0] = ~0; + } + + ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); + + if ( t->base.dirty_images[0] ) { + R200_FIREVERTICES( rmesa ); + r200SetTexImages( rmesa, tObj ); + r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 ); + if ( !t->base.memBlock && !t->image_override ) + return GL_FALSE; + } + + set_re_cntl_d3d( ctx, unit, GL_FALSE ); + + return GL_TRUE; +} + +#if ENABLE_HW_3D_TEXTURE +static GLboolean enable_tex_3d( GLcontext *ctx, int unit ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; + + /* Need to load the 3d images associated with this unit. + */ + if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) { + t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2; + t->base.dirty_images[0] = ~0; + } + + ASSERT(tObj->Target == GL_TEXTURE_3D); + + /* R100 & R200 do not support mipmaps for 3D textures. + */ + if ( (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR) ) { + return GL_FALSE; + } + + if ( t->base.dirty_images[0] ) { + R200_FIREVERTICES( rmesa ); + r200SetTexImages( rmesa, tObj ); + r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 ); + if ( !t->base.memBlock ) + return GL_FALSE; + } + + set_re_cntl_d3d( ctx, unit, GL_TRUE ); + + return GL_TRUE; +} +#endif + +static GLboolean enable_tex_cube( GLcontext *ctx, int unit ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; + GLuint face; + + /* Need to load the 2d images associated with this unit. + */ + if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) { + t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2; + for (face = 0; face < 6; face++) + t->base.dirty_images[face] = ~0; + } + + ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); + + if ( t->base.dirty_images[0] || t->base.dirty_images[1] || + t->base.dirty_images[2] || t->base.dirty_images[3] || + t->base.dirty_images[4] || t->base.dirty_images[5] ) { + /* flush */ + R200_FIREVERTICES( rmesa ); + /* layout memory space, once for all faces */ + r200SetTexImages( rmesa, tObj ); + } + + /* upload (per face) */ + for (face = 0; face < 6; face++) { + if (t->base.dirty_images[face]) { + r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face ); + } + } + + if ( !t->base.memBlock ) { + /* texmem alloc failed, use s/w fallback */ + return GL_FALSE; + } + + set_re_cntl_d3d( ctx, unit, GL_TRUE ); + + return GL_TRUE; +} + +static GLboolean enable_tex_rect( GLcontext *ctx, int unit ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; + + if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) { + t->pp_txformat |= R200_TXFORMAT_NON_POWER2; + t->base.dirty_images[0] = ~0; + } + + ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); + + if ( t->base.dirty_images[0] ) { + R200_FIREVERTICES( rmesa ); + r200SetTexImages( rmesa, tObj ); + r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 ); + if ( !t->base.memBlock && + !t->image_override && + !rmesa->prefer_gart_client_texturing ) + return GL_FALSE; + } + + set_re_cntl_d3d( ctx, unit, GL_FALSE ); + + return GL_TRUE; +} + + +static GLboolean update_tex_common( GLcontext *ctx, int unit ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; + + /* Fallback if there's a texture border */ + if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) + return GL_FALSE; + + /* Update state if this is a different texture object to last + * time. + */ + if ( rmesa->state.texture.unit[unit].texobj != t ) { + if ( rmesa->state.texture.unit[unit].texobj != NULL ) { + /* The old texture is no longer bound to this texture unit. + * Mark it as such. + */ + + rmesa->state.texture.unit[unit].texobj->base.bound &= + ~(1UL << unit); + } + + rmesa->state.texture.unit[unit].texobj = t; + t->base.bound |= (1UL << unit); + t->dirty_state |= 1<<unit; + driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */ + } + + + /* Newly enabled? + */ + if ( 1|| !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit))) { + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit; + + R200_STATECHANGE( rmesa, vtx ); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3); + + rmesa->recheck_texgen[unit] = GL_TRUE; + } + + if (t->dirty_state & (1<<unit)) { + import_tex_obj_state( rmesa, unit, t ); + } + + if (rmesa->recheck_texgen[unit]) { + GLboolean fallback = !r200_validate_texgen( ctx, unit ); + TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback); + rmesa->recheck_texgen[unit] = 0; + rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; + } + + FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback ); + return !t->border_fallback; +} + + + +static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded; + + if ( unitneeded & (TEXTURE_RECT_BIT) ) { + return (enable_tex_rect( ctx, unit ) && + update_tex_common( ctx, unit )); + } + else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) { + return (enable_tex_2d( ctx, unit ) && + update_tex_common( ctx, unit )); + } +#if ENABLE_HW_3D_TEXTURE + else if ( unitneeded & (TEXTURE_3D_BIT) ) { + return (enable_tex_3d( ctx, unit ) && + update_tex_common( ctx, unit )); + } +#endif + else if ( unitneeded & (TEXTURE_CUBE_BIT) ) { + return (enable_tex_cube( ctx, unit ) && + update_tex_common( ctx, unit )); + } + else if ( unitneeded ) { + return GL_FALSE; + } + else { + disable_tex( ctx, unit ); + return GL_TRUE; + } +} + + +void r200UpdateTextureState( GLcontext *ctx ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + GLboolean ok; + GLuint dbg; + + if (ctx->ATIFragmentShader._Enabled) { + GLuint i; + for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) { + rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled; + } + ok = GL_TRUE; + } + else { + ok = r200UpdateAllTexEnv( ctx ); + } + if (ok) { + ok = (r200UpdateTextureUnit( ctx, 0 ) && + r200UpdateTextureUnit( ctx, 1 ) && + r200UpdateTextureUnit( ctx, 2 ) && + r200UpdateTextureUnit( ctx, 3 ) && + r200UpdateTextureUnit( ctx, 4 ) && + r200UpdateTextureUnit( ctx, 5 )); + } + + if (ok && ctx->ATIFragmentShader._Enabled) { + r200UpdateFragmentShader(ctx); + } + + FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok ); + + if (rmesa->TclFallback) + r200ChooseVertexState( ctx ); + + + if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) { + + /* + * T0 hang workaround ------------- + * not needed for r200 derivatives + */ + if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE && + (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) { + + R200_STATECHANGE(rmesa, ctx); + R200_STATECHANGE(rmesa, tex[1]); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE; + if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE)) + rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; + rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE; + } + else if (!ctx->ATIFragmentShader._Enabled) { + if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) && + (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & R200_TXFORMAT_LOOKUP_DISABLE)) { + R200_STATECHANGE(rmesa, tex[1]); + rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~R200_TXFORMAT_LOOKUP_DISABLE; + } + } + /* do the same workaround for the first pass of a fragment shader. + * completely unknown if necessary / sufficient. + */ + if ((rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_ENABLE_MASK) == R200_PPX_TEX_0_ENABLE && + (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) { + + R200_STATECHANGE(rmesa, cst); + R200_STATECHANGE(rmesa, tex[1]); + rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_1_ENABLE; + if (!(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE)) + rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; + rmesa->hw.tex[1].cmd[TEX_PP_TXMULTI_CTL] |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE; + } + + /* maybe needs to be done pairwise due to 2 parallel (physical) tex units ? + looks like that's not the case, if 8500/9100 owners don't complain remove this... + for ( i = 0; i < ctx->Const.MaxTextureUnits; i += 2) { + if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ((R200_TEX_0_ENABLE | + R200_TEX_1_ENABLE ) << i)) == (R200_TEX_0_ENABLE << i)) && + ((rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > + R200_MIN_FILTER_LINEAR)) { + R200_STATECHANGE(rmesa, ctx); + R200_STATECHANGE(rmesa, tex[i+1]); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= (R200_TEX_1_ENABLE << i); + rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; + rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] |= 0x08000000; + } + else { + if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_1_ENABLE << i)) && + (rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) { + R200_STATECHANGE(rmesa, tex[i+1]); + rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000; + } + } + } */ + + /* + * Texture cache LRU hang workaround ------------- + * not needed for r200 derivatives + * hopefully this covers first pass of a shader as well + */ + + /* While the cases below attempt to only enable the workaround in the + * specific cases necessary, they were insufficient. See bugzilla #1519, + * #729, #814. Tests with quake3 showed no impact on performance. + */ + dbg = 0x6; + + /* + if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE )) && + ((((rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & + 0x04) == 0)) || + ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_2_ENABLE) && + ((((rmesa->hw.tex[2].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & + 0x04) == 0)) || + ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_4_ENABLE) && + ((((rmesa->hw.tex[4].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & + 0x04) == 0))) + { + dbg |= 0x02; + } + + if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_1_ENABLE )) && + ((((rmesa->hw.tex[1].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & + 0x04) == 0)) || + ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_3_ENABLE) && + ((((rmesa->hw.tex[3].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & + 0x04) == 0)) || + ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_5_ENABLE) && + ((((rmesa->hw.tex[5].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) & + 0x04) == 0))) + { + dbg |= 0x04; + }*/ + + if (dbg != rmesa->hw.tam.cmd[TAM_DEBUG3]) { + R200_STATECHANGE( rmesa, tam ); + rmesa->hw.tam.cmd[TAM_DEBUG3] = dbg; + if (0) printf("TEXCACHE LRU HANG WORKAROUND %x\n", dbg); + } + } +} diff --git a/r200/r200_vertprog.c b/r200/r200_vertprog.c new file mode 100644 index 0000000..6089d61 --- /dev/null +++ b/r200/r200_vertprog.c @@ -0,0 +1,1256 @@ +/************************************************************************** + +Copyright (C) 2005 Aapo Tahkola. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Aapo Tahkola <aet@rasterburn.org> + * Roland Scheidegger <rscheidegger_lists@hispeed.ch> + */ +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "program.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "shader/programopt.h" +#include "tnl/tnl.h" + +#include "r200_context.h" +#include "r200_vertprog.h" +#include "r200_ioctl.h" +#include "r200_tcl.h" + +#if SWIZZLE_X != VSF_IN_COMPONENT_X || \ + SWIZZLE_Y != VSF_IN_COMPONENT_Y || \ + SWIZZLE_Z != VSF_IN_COMPONENT_Z || \ + SWIZZLE_W != VSF_IN_COMPONENT_W || \ + SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \ + SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \ + WRITEMASK_X != VSF_FLAG_X || \ + WRITEMASK_Y != VSF_FLAG_Y || \ + WRITEMASK_Z != VSF_FLAG_Z || \ + WRITEMASK_W != VSF_FLAG_W +#error Cannot change these! +#endif + +#define SCALAR_FLAG (1<<31) +#define FLAG_MASK (1<<31) +#define OP_MASK (0xf) /* we are unlikely to have more than 15 */ +#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} + +static struct{ + char *name; + int opcode; + unsigned long ip; /* number of input operands and flags */ +}op_names[]={ + OPN(ABS, 1), + OPN(ADD, 2), + OPN(ARL, 1|SCALAR_FLAG), + OPN(DP3, 2), + OPN(DP4, 2), + OPN(DPH, 2), + OPN(DST, 2), + OPN(EX2, 1|SCALAR_FLAG), + OPN(EXP, 1|SCALAR_FLAG), + OPN(FLR, 1), + OPN(FRC, 1), + OPN(LG2, 1|SCALAR_FLAG), + OPN(LIT, 1), + OPN(LOG, 1|SCALAR_FLAG), + OPN(MAD, 3), + OPN(MAX, 2), + OPN(MIN, 2), + OPN(MOV, 1), + OPN(MUL, 2), + OPN(POW, 2|SCALAR_FLAG), + OPN(RCP, 1|SCALAR_FLAG), + OPN(RSQ, 1|SCALAR_FLAG), + OPN(SGE, 2), + OPN(SLT, 2), + OPN(SUB, 2), + OPN(SWZ, 1), + OPN(XPD, 2), + OPN(PRINT, 0), + OPN(END, 0), +}; +#undef OPN + +static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp) +{ + r200ContextPtr rmesa = R200_CONTEXT( ctx ); + GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1]; + int pi; + struct gl_vertex_program *mesa_vp = &vp->mesa_program; + struct gl_program_parameter_list *paramList; + drm_radeon_cmd_header_t tmp; + + R200_STATECHANGE( rmesa, vpp[0] ); + R200_STATECHANGE( rmesa, vpp[1] ); + assert(mesa_vp->Base.Parameters); + _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); + paramList = mesa_vp->Base.Parameters; + + if(paramList->NumParameters > R200_VSF_MAX_PARAM){ + fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); + return GL_FALSE; + } + + for(pi = 0; pi < paramList->NumParameters; pi++) { + switch(paramList->Parameters[pi].Type) { + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); + case PROGRAM_CONSTANT: + *fcmd++ = paramList->ParameterValues[pi][0]; + *fcmd++ = paramList->ParameterValues[pi][1]; + *fcmd++ = paramList->ParameterValues[pi][2]; + *fcmd++ = paramList->ParameterValues[pi][3]; + break; + default: + _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); + break; + } + if (pi == 95) { + fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1]; + } + } + /* hack up the cmd_size so not the whole state atom is emitted always. */ + rmesa->hw.vpp[0].cmd_size = + 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters); + tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0]; + tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters; + rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i; + if (paramList->NumParameters > 96) { + rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96); + tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0]; + tmp.veclinear.count = paramList->NumParameters - 96; + rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i; + } + return GL_TRUE; +} + +static __inline unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & VSF_FLAG_ALL; +} + +static unsigned long t_dst(struct prog_dst_register *dst) +{ + switch(dst->File) { + case PROGRAM_TEMPORARY: + return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT) + | R200_VSF_OUT_CLASS_TMP); + case PROGRAM_OUTPUT: + switch (dst->Index) { + case VERT_RESULT_HPOS: + return R200_VSF_OUT_CLASS_RESULT_POS; + case VERT_RESULT_COL0: + return R200_VSF_OUT_CLASS_RESULT_COLOR; + case VERT_RESULT_COL1: + return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT) + | R200_VSF_OUT_CLASS_RESULT_COLOR); + case VERT_RESULT_FOGC: + return R200_VSF_OUT_CLASS_RESULT_FOGC; + case VERT_RESULT_TEX0: + case VERT_RESULT_TEX1: + case VERT_RESULT_TEX2: + case VERT_RESULT_TEX3: + case VERT_RESULT_TEX4: + case VERT_RESULT_TEX5: + return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT) + | R200_VSF_OUT_CLASS_RESULT_TEXC); + case VERT_RESULT_PSIZ: + return R200_VSF_OUT_CLASS_RESULT_POINTSIZE; + default: + fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index); + exit(0); + return 0; + } + case PROGRAM_ADDRESS: + assert (dst->Index == 0); + return R200_VSF_OUT_CLASS_ADDR; + default: + fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File); + exit(0); + return 0; + } +} + +static unsigned long t_src_class(enum register_file file) +{ + + switch(file){ + case PROGRAM_TEMPORARY: + return VSF_IN_CLASS_TMP; + + case PROGRAM_INPUT: + return VSF_IN_CLASS_ATTR; + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + return VSF_IN_CLASS_PARAM; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + exit(0); + } +} + +static __inline unsigned long t_swizzle(GLubyte swizzle) +{ +/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +#if 0 +static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller) +{ + int i; + + if(vp == NULL){ + fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller); + return ; + } + + fprintf(stderr, "%s:<", caller); + for(i=0; i < VERT_ATTRIB_MAX; i++) + fprintf(stderr, "%d ", vp->inputs[i]); + fprintf(stderr, ">\n"); + +} +#endif + +static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src) +{ +/* + int i; + int max_reg = -1; +*/ + if(src->File == PROGRAM_INPUT){ +/* if(vp->inputs[src->Index] != -1) + return vp->inputs[src->Index]; + + for(i=0; i < VERT_ATTRIB_MAX; i++) + if(vp->inputs[i] > max_reg) + max_reg = vp->inputs[i]; + + vp->inputs[src->Index] = max_reg+1;*/ + + //vp_dump_inputs(vp, __FUNCTION__); + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n"); + return 0; + } + return src->Index; + } +} + +static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src) +{ + + return MAKE_VSF_SOURCE(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->NegateBase) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src) +{ + + return MAKE_VSF_SOURCE(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); +} + +static unsigned long t_opcode(enum prog_opcode opcode) +{ + + switch(opcode){ + case OPCODE_ADD: return R200_VPI_OUT_OP_ADD; + /* FIXME: ARL works fine, but negative offsets won't work - fglrx just + * seems to ignore neg offsets which isn't quite correct... + */ + case OPCODE_ARL: return R200_VPI_OUT_OP_ARL; + case OPCODE_DP4: return R200_VPI_OUT_OP_DOT; + case OPCODE_DST: return R200_VPI_OUT_OP_DST; + case OPCODE_EX2: return R200_VPI_OUT_OP_EX2; + case OPCODE_EXP: return R200_VPI_OUT_OP_EXP; + case OPCODE_FRC: return R200_VPI_OUT_OP_FRC; + case OPCODE_LG2: return R200_VPI_OUT_OP_LG2; + case OPCODE_LIT: return R200_VPI_OUT_OP_LIT; + case OPCODE_LOG: return R200_VPI_OUT_OP_LOG; + case OPCODE_MAX: return R200_VPI_OUT_OP_MAX; + case OPCODE_MIN: return R200_VPI_OUT_OP_MIN; + case OPCODE_MUL: return R200_VPI_OUT_OP_MUL; + case OPCODE_RCP: return R200_VPI_OUT_OP_RCP; + case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ; + case OPCODE_SGE: return R200_VPI_OUT_OP_SGE; + case OPCODE_SLT: return R200_VPI_OUT_OP_SLT; + + default: + fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode); + } + exit(-1); + return 0; +} + +static unsigned long op_operands(enum prog_opcode opcode) +{ + int i; + + /* Can we trust mesas opcodes to be in order ? */ + for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++) + if(op_names[i].opcode == opcode) + return op_names[i].ip; + + fprintf(stderr, "op %d not found in op_names\n", opcode); + exit(-1); + return 0; +} + +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \ + ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ + t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ + (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ + t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \ + +/* fglrx on rv250 codes up unused sources as follows: + unused but necessary sources are same as previous source, zero-ed out. + unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set. + i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg + set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */ + +/* use these simpler definitions. Must obviously not be used with not yet set up regs. + Those are NOT semantically equivalent to the r300 ones, requires code changes */ +#define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \ + | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT)))) + +#define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \ + | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT)))) + +#define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \ + | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \ + | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT)))) + +#define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9) + +#define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9) + +#define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9) + + +/** + * Generate an R200 vertex program from Mesa's internal representation. + * + * \return GL_TRUE for success, GL_FALSE for failure. + */ +static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_vertex_program *vp) +{ + struct gl_vertex_program *mesa_vp = &vp->mesa_program; + struct prog_instruction *vpi; + int i; + VERTEX_SHADER_INSTRUCTION *o_inst; + unsigned long operands; + int are_srcs_scalar; + unsigned long hw_op; + int dofogfix = 0; + int fog_temp_i = 0; + int free_inputs; + int array_count = 0; + + vp->native = GL_FALSE; + vp->translated = GL_TRUE; + vp->fogmode = ctx->Fog.Mode; + + if (mesa_vp->Base.NumInstructions == 0) + return GL_FALSE; + +#if 0 + if ((mesa_vp->Base.InputsRead & + ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 | + VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 | + VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "can't handle vert prog inputs 0x%x\n", + mesa_vp->Base.InputsRead); + } + return GL_FALSE; + } +#endif + + if ((mesa_vp->Base.OutputsWritten & + ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) | + (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) | + (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) | + (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "can't handle vert prog outputs 0x%x\n", + mesa_vp->Base.OutputsWritten); + } + return GL_FALSE; + } + + if (mesa_vp->IsNVProgram) { + /* subtle differences in spec like guaranteed initialized regs could cause + headaches. Might want to remove the driconf option to enable it completely */ + return GL_FALSE; + } + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = R200_VSF_MAX_TEMPS - 1; + struct prog_src_register src[3]; + struct prog_dst_register dst; + +/* FIXME: is changing the prog safe to do here? */ + if (mesa_vp->IsPositionInvariant && + /* make sure we only do this once */ + !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) { + _mesa_insert_mvp_code(ctx, mesa_vp); + } + + /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with + base e isn't directly available neither. */ + if ((mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) && !vp->fogpidx) { + struct gl_program_parameter_list *paramList; + gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 }; + paramList = mesa_vp->Base.Parameters; + vp->fogpidx = _mesa_add_state_reference(paramList, tokens); + } + + vp->pos_end = 0; + mesa_vp->Base.NumNativeInstructions = 0; + if (mesa_vp->Base.Parameters) + mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters; + else + mesa_vp->Base.NumNativeParameters = 0; + + for(i = 0; i < VERT_ATTRIB_MAX; i++) + vp->inputs[i] = -1; + for(i = 0; i < 15; i++) + vp->inputmap_rev[i] = 255; + free_inputs = 0x2ffd; + +/* fglrx uses fixed inputs as follows for conventional attribs. + generic attribs use non-fixed assignment, fglrx will always use the + lowest attrib values available. We'll just do the same. + There are 12 generic attribs possible, corresponding to attrib 0, 2-11 + and 13 in a hw vertex prog. + attr 1 and 12 aren't used for generic attribs as those cannot be made vec4 + (correspond to vertex normal/weight - maybe weight actually could be made vec4). + Additionally, not more than 12 arrays in total are possible I think. + attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0 + attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0) + attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1) + attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0) +*/ + +/* attr 4,5 and 13 are only used with generic attribs. + Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is + not possibe to use with vertex progs as it is lacking in vert prog specification) */ +/* may look different when using idx buf / input_route instead of se_vtx_fmt? */ + if (mesa_vp->Base.InputsRead & VERT_BIT_POS) { + vp->inputs[VERT_ATTRIB_POS] = 0; + vp->inputmap_rev[0] = VERT_ATTRIB_POS; + free_inputs &= ~(1 << 0); + array_count++; + } + if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) { + vp->inputs[VERT_ATTRIB_WEIGHT] = 12; + vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT; + array_count++; + } + if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) { + vp->inputs[VERT_ATTRIB_NORMAL] = 1; + vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL; + array_count++; + } + if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) { + vp->inputs[VERT_ATTRIB_COLOR0] = 2; + vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0; + free_inputs &= ~(1 << 2); + array_count++; + } + if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) { + vp->inputs[VERT_ATTRIB_COLOR1] = 3; + vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1; + free_inputs &= ~(1 << 3); + array_count++; + } + if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) { + vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++; + vp->inputmap_rev[3] = VERT_ATTRIB_FOG; + array_count++; + } + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) { + if (mesa_vp->Base.InputsRead & (1 << i)) { + vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6; + vp->inputmap_rev[8 + i - VERT_ATTRIB_TEX0] = i; + free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6)); + array_count++; + } + } + /* using VERT_ATTRIB_TEX6/7 would be illegal */ + /* completely ignore aliasing? */ + for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) { + int j; + /* completely ignore aliasing? */ + if (mesa_vp->Base.InputsRead & (1 << i)) { + array_count++; + if (array_count > 12) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "more than 12 attribs used in vert prog\n"); + } + return GL_FALSE; + } + for (j = 0; j < 14; j++) { + /* will always find one due to limited array_count */ + if (free_inputs & (1 << j)) { + free_inputs &= ~(1 << j); + vp->inputs[i] = j; + if (j == 0) vp->inputmap_rev[j] = i; /* mapped to pos */ + else if (j < 12) vp->inputmap_rev[j + 2] = i; /* mapped to col/tex */ + else vp->inputmap_rev[j + 1] = i; /* mapped to pos1 */ + break; + } + } + } + } + + if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "can't handle vert prog without position output\n"); + } + return GL_FALSE; + } + if (free_inputs & 1) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "can't handle vert prog without position input\n"); + } + return GL_FALSE; + } + + o_inst = vp->instr; + for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){ + operands = op_operands(vpi->Opcode); + are_srcs_scalar = operands & SCALAR_FLAG; + operands &= OP_MASK; + + for(i = 0; i < operands; i++) { + src[i] = vpi->SrcReg[i]; + /* hack up default attrib values as per spec as swizzling. + normal, fog, secondary color. Crazy? + May need more if we don't submit vec4 elements? */ + if (src[i].File == PROGRAM_INPUT) { + if (src[i].Index == VERT_ATTRIB_NORMAL) { + int j; + for (j = 0; j < 4; j++) { + if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) { + src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); + src[i].Swizzle |= SWIZZLE_ONE << (j*3); + } + } + } + else if (src[i].Index == VERT_ATTRIB_COLOR1) { + int j; + for (j = 0; j < 4; j++) { + if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) { + src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); + src[i].Swizzle |= SWIZZLE_ZERO << (j*3); + } + } + } + else if (src[i].Index == VERT_ATTRIB_FOG) { + int j; + for (j = 0; j < 4; j++) { + if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) { + src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); + src[i].Swizzle |= SWIZZLE_ONE << (j*3); + } + else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) || + GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) { + src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); + src[i].Swizzle |= SWIZZLE_ZERO << (j*3); + } + } + } + } + } + + if(operands == 3){ + if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){ + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, + (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + VSF_FLAG_ALL); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), + SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_Z, SWIZZLE_W, + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4); + + o_inst->src1 = ZERO_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + + src[2].File = PROGRAM_TEMPORARY; + src[2].Index = u_temp_i; + src[2].RelAddr = 0; + u_temp_i--; + } + } + + if(operands >= 2){ + if( CMP_SRCS(src[1], src[0]) ){ + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, + (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + VSF_FLAG_ALL); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_Z, SWIZZLE_W, + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + o_inst->src1 = ZERO_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + + src[0].File = PROGRAM_TEMPORARY; + src[0].Index = u_temp_i; + src[0].RelAddr = 0; + u_temp_i--; + } + } + + dst = vpi->DstReg; + if (dst.File == PROGRAM_OUTPUT && + dst.Index == VERT_RESULT_FOGC && + dst.WriteMask & WRITEMASK_X) { + fog_temp_i = u_temp_i; + dst.File = PROGRAM_TEMPORARY; + dst.Index = fog_temp_i; + dofogfix = 1; + u_temp_i--; + } + + /* These ops need special handling. */ + switch(vpi->Opcode){ + case OPCODE_POW: +/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter). + So may need to insert additional instruction */ + if ((src[0].File == src[1].File) && + (src[0].Index == src[1].Index)) { + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst), + t_dst_mask(dst.WriteMask)); + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + SWIZZLE_ZERO, + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0].NegateBase) | (src[0].RelAddr << 4); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_0; + } + else { + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, + (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + VSF_FLAG_ALL); + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + SWIZZLE_ZERO, SWIZZLE_ZERO, + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst), + t_dst_mask(dst.WriteMask)); + o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i, + VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, + VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, + VSF_IN_CLASS_TMP, + VSF_FLAG_NONE); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_0; + u_temp_i--; + } + goto next; + + case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + case OPCODE_SWZ: + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst), + t_dst_mask(dst.WriteMask)); + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = ZERO_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + goto next; + + case OPCODE_MAD: + hw_op=(src[0].File == PROGRAM_TEMPORARY && + src[1].File == PROGRAM_TEMPORARY && + src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; + + o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst), + t_dst_mask(dst.WriteMask)); + o_inst->src0 = t_src(vp, &src[0]); +#if 0 +if ((o_inst - vp->instr) == 31) { +/* fix up the broken vertex program of quake4 demo... */ +o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, + t_src_class(src[1].File), + src[1].NegateBase) | (src[1].RelAddr << 4); +o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, + t_src_class(src[1].File), + src[1].NegateBase) | (src[1].RelAddr << 4); +} +else { + o_inst->src1 = t_src(vp, &src[1]); + o_inst->src2 = t_src(vp, &src[2]); +} +#else + o_inst->src1 = t_src(vp, &src[1]); + o_inst->src2 = t_src(vp, &src[2]); +#endif + goto next; + + case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst), + t_dst_mask(dst.WriteMask)); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0].NegateBase) | (src[0].RelAddr << 4); + + o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1].NegateBase) | (src[1].RelAddr << 4); + + o_inst->src2 = UNUSED_SRC_1; + goto next; + + case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst), + t_dst_mask(dst.WriteMask)); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + VSF_IN_COMPONENT_ONE, + t_src_class(src[0].File), + src[0].NegateBase) | (src[0].RelAddr << 4); + o_inst->src1 = t_src(vp, &src[1]); + o_inst->src2 = UNUSED_SRC_1; + goto next; + + case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst), + t_dst_mask(dst.WriteMask)); + + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + o_inst->src2 = UNUSED_SRC_1; + goto next; + + case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst), + t_dst_mask(dst.WriteMask)); + + o_inst->src0=t_src(vp, &src[0]); + o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + o_inst->src2 = UNUSED_SRC_1; + goto next; + + case OPCODE_FLR: + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC, + (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + t_dst_mask(dst.WriteMask)); + + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst), + t_dst_mask(dst.WriteMask)); + + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i, + VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, + VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, + VSF_IN_CLASS_TMP, + /* Not 100% sure about this */ + (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); + + o_inst->src2 = UNUSED_SRC_0; + u_temp_i--; + goto next; + + case OPCODE_XPD: + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + NOTE: might need MAD_2 + */ + + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, + (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + t_dst_mask(dst.WriteMask)); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0].NegateBase) | (src[0].RelAddr << 4); + + o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + src[1].NegateBase) | (src[1].RelAddr << 4); + + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + u_temp_i--; + + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst), + t_dst_mask(dst.WriteMask)); + + o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + + o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0].NegateBase) | (src[0].RelAddr << 4); + + o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1, + VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, + VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, + VSF_IN_CLASS_TMP, + VSF_FLAG_NONE); + goto next; + + case OPCODE_END: + assert(0); + default: + break; + } + + o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst), + t_dst_mask(dst.WriteMask)); + + if(are_srcs_scalar){ + switch(operands){ + case 1: + o_inst->src0 = t_src_scalar(vp, &src[0]); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + break; + + case 2: + o_inst->src0 = t_src_scalar(vp, &src[0]); + o_inst->src1 = t_src_scalar(vp, &src[1]); + o_inst->src2 = UNUSED_SRC_1; + break; + + case 3: + o_inst->src0 = t_src_scalar(vp, &src[0]); + o_inst->src1 = t_src_scalar(vp, &src[1]); + o_inst->src2 = t_src_scalar(vp, &src[2]); + break; + + default: + fprintf(stderr, "illegal number of operands %lu\n", operands); + exit(-1); + break; + } + } else { + switch(operands){ + case 1: + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + break; + + case 2: + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = t_src(vp, &src[1]); + o_inst->src2 = UNUSED_SRC_1; + break; + + case 3: + o_inst->src0 = t_src(vp, &src[0]); + o_inst->src1 = t_src(vp, &src[1]); + o_inst->src2 = t_src(vp, &src[2]); + break; + + default: + fprintf(stderr, "illegal number of operands %lu\n", operands); + exit(-1); + break; + } + } + next: + + if (dofogfix) { + o_inst++; + if (vp->fogmode == GL_EXP) { + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, + (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + VSF_FLAG_X); + o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); + o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE); + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E, + R200_VSF_OUT_CLASS_RESULT_FOGC, + VSF_FLAG_X); + o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + } + else if (vp->fogmode == GL_EXP2) { + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, + (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + VSF_FLAG_X); + o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); + o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE); + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, + (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + VSF_FLAG_X); + o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); + o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E, + R200_VSF_OUT_CLASS_RESULT_FOGC, + VSF_FLAG_X); + o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL); + o_inst->src1 = UNUSED_SRC_0; + o_inst->src2 = UNUSED_SRC_1; + } + else { /* fogmode == GL_LINEAR */ + /* could do that with single op (dot) if using params like + with fixed function pipeline fog */ + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, + (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, + VSF_FLAG_X); + o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL); + o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE); + o_inst->src2 = UNUSED_SRC_1; + o_inst++; + o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, + R200_VSF_OUT_CLASS_RESULT_FOGC, + VSF_FLAG_X); + o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); + o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE); + o_inst->src2 = UNUSED_SRC_1; + + } + dofogfix = 0; + } + + if (mesa_vp->Base.NumNativeTemporaries < + (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) { + mesa_vp->Base.NumNativeTemporaries = + mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i); + } + if (u_temp_i < mesa_vp->Base.NumTemporaries) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i); + } + return GL_FALSE; + } + u_temp_i = R200_VSF_MAX_TEMPS - 1; + if(o_inst - vp->instr >= R200_VSF_MAX_INST) { + mesa_vp->Base.NumNativeInstructions = 129; + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "more than 128 native instructions\n"); + } + return GL_FALSE; + } + if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) { + vp->pos_end = (o_inst - vp->instr); + } + } + + vp->native = GL_TRUE; + mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr); +#if 0 + fprintf(stderr, "hw program:\n"); + for(i=0; i < vp->program.length; i++) + fprintf(stderr, "%08x\n", vp->instr[i]); +#endif + return GL_TRUE; +} + +void r200SetupVertexProg( GLcontext *ctx ) { + r200ContextPtr rmesa = R200_CONTEXT(ctx); + struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current; + GLboolean fallback; + GLint i; + + if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) { + rmesa->curr_vp_hw = NULL; + r200_translate_vertex_program(ctx, vp); + } + /* could optimize setting up vertex progs away for non-tcl hw */ + fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) && + rmesa->r200Screen->drmSupportsVertexProgram); + TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback); + if (rmesa->TclFallback) return; + + R200_STATECHANGE( rmesa, vap ); + /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it? + maybe only when using more than 64 inst / 96 param? */ + rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/; + + R200_STATECHANGE( rmesa, pvs ); + + rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) | + ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) | + (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT); + rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | + (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT); + + /* maybe user clip planes just work with vertex progs... untested */ + if (ctx->Transform.ClipPlanesEnabled) { + R200_STATECHANGE( rmesa, tcl ); + if (vp->mesa_program.IsPositionInvariant) { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2); + } + else { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc); + } + } + + if (vp != rmesa->curr_vp_hw) { + GLuint count = vp->mesa_program.Base.NumNativeInstructions; + drm_radeon_cmd_header_t tmp; + + R200_STATECHANGE( rmesa, vpi[0] ); + R200_STATECHANGE( rmesa, vpi[1] ); + + /* FIXME: what about using a memcopy... */ + for (i = 0; (i < 64) && i < count; i++) { + rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op; + rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0; + rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1; + rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2; + } + /* hack up the cmd_size so not the whole state atom is emitted always. + This may require some more thought, we may emit half progs on lost state, but + hopefully it won't matter? + WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected) + packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */ + rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count); + tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0]; + tmp.veclinear.count = (count > 64) ? 64 : count; + rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i; + if (count > 64) { + for (i = 0; i < (count - 64); i++) { + rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op; + rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0; + rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1; + rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2; + } + rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64); + tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0]; + tmp.veclinear.count = count - 64; + rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i; + } + rmesa->curr_vp_hw = vp; + } +} + + +static void +r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + switch(target){ + case GL_VERTEX_PROGRAM_ARB: + rmesa->curr_vp_hw = NULL; + break; + default: + _mesa_problem(ctx, "Target not supported yet!"); + break; + } +} + +static struct gl_program * +r200NewProgram(GLcontext *ctx, GLenum target, GLuint id) +{ + struct r200_vertex_program *vp; + + switch(target){ + case GL_VERTEX_PROGRAM_ARB: + vp = CALLOC_STRUCT(r200_vertex_program); + return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); + case GL_FRAGMENT_PROGRAM_ARB: + case GL_FRAGMENT_PROGRAM_NV: + return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id ); + default: + _mesa_problem(ctx, "Bad target in r200NewProgram"); + } + return NULL; +} + + +static void +r200DeleteProgram(GLcontext *ctx, struct gl_program *prog) +{ + _mesa_delete_program(ctx, prog); +} + +static void +r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog) +{ + struct r200_vertex_program *vp = (void *)prog; + r200ContextPtr rmesa = R200_CONTEXT(ctx); + + switch(target) { + case GL_VERTEX_PROGRAM_ARB: + vp->translated = GL_FALSE; + vp->fogpidx = 0; +/* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/ + r200_translate_vertex_program(ctx, vp); + rmesa->curr_vp_hw = NULL; + break; + case GL_FRAGMENT_SHADER_ATI: + rmesa->afs_loaded = NULL; + break; + } + /* need this for tcl fallbacks */ + _tnl_program_string(ctx, target, prog); +} + +static GLboolean +r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog) +{ + struct r200_vertex_program *vp = (void *)prog; + + switch(target){ + case GL_VERTEX_STATE_PROGRAM_NV: + case GL_VERTEX_PROGRAM_ARB: + if (!vp->translated) { + r200_translate_vertex_program(ctx, vp); + } + /* does not take parameters etc. into account */ + return vp->native; + default: + _mesa_problem(ctx, "Bad target in r200NewProgram"); + } + return 0; +} + +void r200InitShaderFuncs(struct dd_function_table *functions) +{ + functions->NewProgram = r200NewProgram; + functions->BindProgram = r200BindProgram; + functions->DeleteProgram = r200DeleteProgram; + functions->ProgramStringNotify = r200ProgramStringNotify; + functions->IsProgramNative = r200IsProgramNative; +} diff --git a/r200/r200_vertprog.h b/r200/r200_vertprog.h new file mode 100644 index 0000000..9382376 --- /dev/null +++ b/r200/r200_vertprog.h @@ -0,0 +1,163 @@ +#ifndef __VERTEX_SHADER_H__ +#define __VERTEX_SHADER_H__ + +#include "r200_reg.h" + +typedef struct { + uint32_t op; + uint32_t src0; + uint32_t src1; + uint32_t src2; +} VERTEX_SHADER_INSTRUCTION; + +extern void r200InitShaderFuncs(struct dd_function_table *functions); +extern void r200SetupVertexProg( GLcontext *ctx ); + +#define VSF_FLAG_X 1 +#define VSF_FLAG_Y 2 +#define VSF_FLAG_Z 4 +#define VSF_FLAG_W 8 +#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z) +#define VSF_FLAG_ALL 0xf +#define VSF_FLAG_NONE 0 + +#define R200_VSF_MAX_INST 128 +#define R200_VSF_MAX_PARAM 192 +#define R200_VSF_MAX_TEMPS 12 + +#define R200_VPI_OUT_REG_INDEX_SHIFT 13 +#define R200_VPI_OUT_REG_INDEX_MASK (31 << 13) /* GUESS based on fglrx native limits */ + +#define R200_VPI_OUT_WRITE_X (1 << 20) +#define R200_VPI_OUT_WRITE_Y (1 << 21) +#define R200_VPI_OUT_WRITE_Z (1 << 22) +#define R200_VPI_OUT_WRITE_W (1 << 23) + +#define R200_VPI_IN_REG_CLASS_TEMPORARY (0 << 0) +#define R200_VPI_IN_REG_CLASS_ATTRIBUTE (1 << 0) +#define R200_VPI_IN_REG_CLASS_PARAMETER (2 << 0) +#define R200_VPI_IN_REG_CLASS_NONE (9 << 0) +#define R200_VPI_IN_REG_CLASS_MASK (31 << 0) /* GUESS */ + +#define R200_VPI_IN_REG_INDEX_SHIFT 5 +#define R200_VPI_IN_REG_INDEX_MASK (255 << 5) /* GUESS based on fglrx native limits */ + +/* The R200 can select components from the input register arbitrarily. +// Use the following constants, shifted by the component shift you +// want to select */ +#define R200_VPI_IN_SELECT_X 0 +#define R200_VPI_IN_SELECT_Y 1 +#define R200_VPI_IN_SELECT_Z 2 +#define R200_VPI_IN_SELECT_W 3 +#define R200_VPI_IN_SELECT_ZERO 4 +#define R200_VPI_IN_SELECT_ONE 5 +#define R200_VPI_IN_SELECT_MASK 7 + +#define R200_VPI_IN_X_SHIFT 13 +#define R200_VPI_IN_Y_SHIFT 16 +#define R200_VPI_IN_Z_SHIFT 19 +#define R200_VPI_IN_W_SHIFT 22 + +#define R200_VPI_IN_NEG_X (1 << 25) +#define R200_VPI_IN_NEG_Y (1 << 26) +#define R200_VPI_IN_NEG_Z (1 << 27) +#define R200_VPI_IN_NEG_W (1 << 28) + +#define R200_VSF_OUT_CLASS_TMP (0 << 8) +#define R200_VSF_OUT_CLASS_ADDR (3 << 8) +#define R200_VSF_OUT_CLASS_RESULT_POS (4 << 8) +#define R200_VSF_OUT_CLASS_RESULT_COLOR (5 << 8) +#define R200_VSF_OUT_CLASS_RESULT_TEXC (6 << 8) +#define R200_VSF_OUT_CLASS_RESULT_FOGC (7 << 8) +#define R200_VSF_OUT_CLASS_RESULT_POINTSIZE (8 << 8) +#define R200_VSF_OUT_CLASS_MASK (31 << 8) + +/* opcodes - they all are the same as on r300 it seems, however + LIT and POW require different setup */ +#define R200_VPI_OUT_OP_DOT (1 << 0) +#define R200_VPI_OUT_OP_MUL (2 << 0) +#define R200_VPI_OUT_OP_ADD (3 << 0) +#define R200_VPI_OUT_OP_MAD (4 << 0) +#define R200_VPI_OUT_OP_DST (5 << 0) +#define R200_VPI_OUT_OP_FRC (6 << 0) +#define R200_VPI_OUT_OP_MAX (7 << 0) +#define R200_VPI_OUT_OP_MIN (8 << 0) +#define R200_VPI_OUT_OP_SGE (9 << 0) +#define R200_VPI_OUT_OP_SLT (10 << 0) + +#define R200_VPI_OUT_OP_ARL (13 << 0) + +#define R200_VPI_OUT_OP_EXP (65 << 0) +#define R200_VPI_OUT_OP_LOG (66 << 0) +/* base e exp. Useful for fog. */ +#define R200_VPI_OUT_OP_EXP_E (67 << 0) + +#define R200_VPI_OUT_OP_LIT (68 << 0) +#define R200_VPI_OUT_OP_POW (69 << 0) +#define R200_VPI_OUT_OP_RCP (70 << 0) +#define R200_VPI_OUT_OP_RSQ (72 << 0) + +#define R200_VPI_OUT_OP_EX2 (75 << 0) +#define R200_VPI_OUT_OP_LG2 (76 << 0) + +#define R200_VPI_OUT_OP_MAD_2 (128 << 0) + +/* first CARD32 of an instruction */ + +/* possible operations: + DOT, MUL, ADD, MAD, FRC, MAX, MIN, SGE, SLT, EXP, LOG, LIT, POW, RCP, RSQ, EX2, + LG2, MAD_2, ARL */ + +#define MAKE_VSF_OP(op, out_reg, out_reg_fields) \ + ((op) | (out_reg) | ((out_reg_fields) << 20) ) + +#define VSF_IN_CLASS_TMP 0 +#define VSF_IN_CLASS_ATTR 1 +#define VSF_IN_CLASS_PARAM 2 +#define VSF_IN_CLASS_NONE 9 + +#define VSF_IN_COMPONENT_X 0 +#define VSF_IN_COMPONENT_Y 1 +#define VSF_IN_COMPONENT_Z 2 +#define VSF_IN_COMPONENT_W 3 +#define VSF_IN_COMPONENT_ZERO 4 +#define VSF_IN_COMPONENT_ONE 5 + +#define MAKE_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \ + ( ((in_reg_index)<<R200_VPI_IN_REG_INDEX_SHIFT) \ + | ((comp_x)<<R200_VPI_IN_X_SHIFT) \ + | ((comp_y)<<R200_VPI_IN_Y_SHIFT) \ + | ((comp_z)<<R200_VPI_IN_Z_SHIFT) \ + | ((comp_w)<<R200_VPI_IN_W_SHIFT) \ + | ((negate)<<25) | ((class))) + +#define EASY_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \ + MAKE_VSF_SOURCE(in_reg_index, \ + VSF_IN_COMPONENT_##comp_x, \ + VSF_IN_COMPONENT_##comp_y, \ + VSF_IN_COMPONENT_##comp_z, \ + VSF_IN_COMPONENT_##comp_w, \ + VSF_IN_CLASS_##class, VSF_FLAG_##negate) + +/* special sources: */ + +/* (1.0,1.0,1.0,1.0) vector (ATTR, plain ) */ +#define VSF_ATTR_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, ATTR, NONE) +#define VSF_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, NONE, NONE) + +/* contents of unmodified register */ +#define VSF_REG(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, ATTR, NONE) + +/* contents of unmodified parameter */ +#define VSF_PARAM(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, PARAM, NONE) + +/* contents of unmodified temporary register */ +#define VSF_TMP(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, TMP, NONE) + +/* components of ATTR register */ +#define VSF_ATTR_X(reg) EASY_VSF_SOURCE(reg, X, X, X, X, ATTR, NONE) +#define VSF_ATTR_Y(reg) EASY_VSF_SOURCE(reg, Y, Y, Y, Y, ATTR, NONE) +#define VSF_ATTR_Z(reg) EASY_VSF_SOURCE(reg, Z, Z, Z, Z, ATTR, NONE) +#define VSF_ATTR_W(reg) EASY_VSF_SOURCE(reg, W, W, W, W, ATTR, NONE) + +#endif diff --git a/r300/Lindent b/r300/Lindent new file mode 100755 index 0000000..7d8d889 --- /dev/null +++ b/r300/Lindent @@ -0,0 +1,2 @@ +#!/bin/sh +indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@" diff --git a/r300/Makefile.am b/r300/Makefile.am new file mode 100644 index 0000000..0992115 --- /dev/null +++ b/r300/Makefile.am @@ -0,0 +1,30 @@ +AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING + +R300_CFLAGS = -DCOMPILE_R300 -DR200_MERGED=0 -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 +R300_CFLAGS += -I../radeon -I../radeon/server + +r300_dri_la_LTLIBRARIES = r300_dri.la +r300_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(R300_CFLAGS) +r300_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \ + $(DRM_LIBS) $(DRI_LIBS) +r300_dri_ladir = @libdir@/dri +r300_dri_la_SOURCES = \ + ../radeon/radeon_screen.c \ + radeon_context.c \ + radeon_ioctl.c \ + radeon_lock.c \ + radeon_span.c \ + radeon_state.c \ + r300_mem.c \ + r300_context.c \ + r300_ioctl.c \ + r300_cmdbuf.c \ + r300_state.c \ + r300_render.c \ + r300_texmem.c \ + r300_tex.c \ + r300_texstate.c \ + r300_vertprog.c \ + r300_fragprog.c \ + r300_shader.c \ + r300_emit.c diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c new file mode 100644 index 0000000..3befa58 --- /dev/null +++ b/r300/r300_cmdbuf.c @@ -0,0 +1,590 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#include "glheader.h" +#include "state.h" +#include "imports.h" +#include "macros.h" +#include "context.h" +#include "swrast/swrast.h" +#include "simple_list.h" + +#include "drm.h" +#include "radeon_drm.h" + +#include "radeon_ioctl.h" +#include "r300_context.h" +#include "r300_ioctl.h" +#include "radeon_reg.h" +#include "r300_reg.h" +#include "r300_cmdbuf.h" +#include "r300_emit.h" +#include "r300_state.h" + +// Set this to 1 for extremely verbose debugging of command buffers +#define DEBUG_CMDBUF 0 + +/** + * Send the current command buffer via ioctl to the hardware. + */ +int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller) +{ + int ret; + int i; + drm_radeon_cmd_buffer_t cmd; + int start; + + if (r300->radeon.lost_context) { + start = 0; + r300->radeon.lost_context = GL_FALSE; + } else + start = r300->cmdbuf.count_reemit; + + if (RADEON_DEBUG & DEBUG_IOCTL) { + fprintf(stderr, "%s from %s - %i cliprects\n", + __FUNCTION__, caller, r300->radeon.numClipRects); + + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) + for (i = start; i < r300->cmdbuf.count_used; ++i) + fprintf(stderr, "%d: %08x\n", i, + r300->cmdbuf.cmd_buf[i]); + } + + cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start); + cmd.bufsz = (r300->cmdbuf.count_used - start) * 4; + + if (r300->radeon.state.scissor.enabled) { + cmd.nbox = r300->radeon.state.scissor.numClipRects; + cmd.boxes = + (drm_clip_rect_t *) r300->radeon.state.scissor.pClipRects; + } else { + cmd.nbox = r300->radeon.numClipRects; + cmd.boxes = (drm_clip_rect_t *) r300->radeon.pClipRects; + } + + ret = drmCommandWrite(r300->radeon.dri.fd, + DRM_RADEON_CMDBUF, &cmd, sizeof(cmd)); + + if (RADEON_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "Syncing in %s (from %s)\n\n", + __FUNCTION__, caller); + radeonWaitForIdleLocked(&r300->radeon); + } + + r300->dma.nr_released_bufs = 0; + r300->cmdbuf.count_used = 0; + r300->cmdbuf.count_reemit = 0; + + return ret; +} + +int r300FlushCmdBuf(r300ContextPtr r300, const char *caller) +{ + int ret; + + LOCK_HARDWARE(&r300->radeon); + + ret = r300FlushCmdBufLocked(r300, caller); + + UNLOCK_HARDWARE(&r300->radeon); + + if (ret) { + fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret); + _mesa_exit(ret); + } + + return ret; +} + +static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state) +{ + int i; + int dwords = (*state->check) (r300, state); + + fprintf(stderr, " emit %s/%d/%d\n", state->name, dwords, + state->cmd_size); + + if (RADEON_DEBUG & DEBUG_VERBOSE) + for (i = 0; i < dwords; i++) + fprintf(stderr, " %s[%d]: %08X\n", + state->name, i, state->cmd[i]); +} + +/** + * Emit all atoms with a dirty field equal to dirty. + * + * The caller must have ensured that there is enough space in the command + * buffer. + */ +static __inline__ void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) +{ + struct r300_state_atom *atom; + uint32_t *dest; + + dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used; + + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { + foreach(atom, &r300->hw.atomlist) { + if ((atom->dirty || r300->hw.all_dirty) == dirty) { + int dwords = (*atom->check) (r300, atom); + + if (dwords) + r300PrintStateAtom(r300, atom); + else + fprintf(stderr, + " skip state %s\n", + atom->name); + } + } + } + + /* Emit WAIT */ + *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN); + dest++; + r300->cmdbuf.count_used++; + + /* Emit cache flush */ + *dest = cmdpacket0(R300_TX_CNTL, 1); + dest++; + r300->cmdbuf.count_used++; + + *dest = R300_TX_FLUSH; + dest++; + r300->cmdbuf.count_used++; + + /* Emit END3D */ + *dest = cmdpacify(); + dest++; + r300->cmdbuf.count_used++; + + /* Emit actual atoms */ + + foreach(atom, &r300->hw.atomlist) { + if ((atom->dirty || r300->hw.all_dirty) == dirty) { + int dwords = (*atom->check) (r300, atom); + + if (dwords) { + memcpy(dest, atom->cmd, dwords * 4); + dest += dwords; + r300->cmdbuf.count_used += dwords; + atom->dirty = GL_FALSE; + } + } + } +} + +/** + * Copy dirty hardware state atoms into the command buffer. + * + * We also copy out clean state if we're at the start of a buffer. That makes + * it easy to recover from lost contexts. + */ +void r300EmitState(r300ContextPtr r300) +{ + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (r300->cmdbuf.count_used && !r300->hw.is_dirty + && !r300->hw.all_dirty) + return; + + /* To avoid going across the entire set of states multiple times, just check + * for enough space for the case of emitting all state, and inline the + * r300AllocCmdBuf code here without all the checks. + */ + r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__); + + if (!r300->cmdbuf.count_used) { + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "Begin reemit state\n"); + + r300EmitAtoms(r300, GL_FALSE); + r300->cmdbuf.count_reemit = r300->cmdbuf.count_used; + } + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "Begin dirty state\n"); + + r300EmitAtoms(r300, GL_TRUE); + + assert(r300->cmdbuf.count_used < r300->cmdbuf.size); + + r300->hw.is_dirty = GL_FALSE; + r300->hw.all_dirty = GL_FALSE; +} + +#define CHECK( NM, COUNT ) \ +static int check_##NM( r300ContextPtr r300, \ + struct r300_state_atom* atom ) \ +{ \ + (void) atom; (void) r300; \ + return (COUNT); \ +} + +#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count) +#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) + +CHECK(always, atom->cmd_size) + CHECK(variable, packet0_count(atom->cmd) ? (1 + packet0_count(atom->cmd)) : 0) + CHECK(vpu, vpu_count(atom->cmd) ? (1 + vpu_count(atom->cmd) * 4) : 0) +#undef packet0_count +#undef vpu_count +#define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ + do { \ + r300->hw.ATOM.cmd_size = (SZ); \ + r300->hw.ATOM.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t)); \ + r300->hw.ATOM.name = #ATOM; \ + r300->hw.ATOM.idx = (IDX); \ + r300->hw.ATOM.check = check_##CHK; \ + r300->hw.ATOM.dirty = GL_FALSE; \ + r300->hw.max_state_size += (SZ); \ + insert_at_tail(&r300->hw.atomlist, &r300->hw.ATOM); \ + } while (0) +/** + * Allocate memory for the command buffer and initialize the state atom + * list. Note that the initial hardware state is set by r300InitState(). + */ +void r300InitCmdBuf(r300ContextPtr r300) +{ + int size, mtu; + int has_tcl = 1; + + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + has_tcl = 0; + + r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ + + mtu = r300->radeon.glCtx->Const.MaxTextureUnits; + if (RADEON_DEBUG & DEBUG_TEXTURE) { + fprintf(stderr, "Using %d maximum texture units..\n", mtu); + } + + /* Setup the atom linked list */ + make_empty_list(&r300->hw.atomlist); + r300->hw.atomlist.name = "atom-list"; + + /* Initialize state atoms */ + ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0); + r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6); + ALLOC_STATE(vap_cntl, always, 2, 0); + r300->hw.vap_cntl.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1); + ALLOC_STATE(vte, always, 3, 0); + r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2); + ALLOC_STATE(unk2134, always, 3, 0); + r300->hw.unk2134.cmd[0] = cmdpacket0(0x2134, 2); + ALLOC_STATE(vap_cntl_status, always, 2, 0); + r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); + ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0); + r300->hw.vir[0].cmd[R300_VIR_CMD_0] = + cmdpacket0(R300_VAP_INPUT_ROUTE_0_0, 1); + ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1); + r300->hw.vir[1].cmd[R300_VIR_CMD_0] = + cmdpacket0(R300_VAP_INPUT_ROUTE_1_0, 1); + ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); + r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_INPUT_CNTL_0, 2); + ALLOC_STATE(unk21DC, always, 2, 0); + r300->hw.unk21DC.cmd[0] = cmdpacket0(0x21DC, 1); + ALLOC_STATE(vap_clip_cntl, always, 2, 0); + r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1); + ALLOC_STATE(unk2220, always, 5, 0); + r300->hw.unk2220.cmd[0] = cmdpacket0(0x2220, 4); + ALLOC_STATE(unk2288, always, 2, 0); + r300->hw.unk2288.cmd[0] = cmdpacket0(R300_VAP_UNKNOWN_2288, 1); + ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0); + r300->hw.vof.cmd[R300_VOF_CMD_0] = + cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2); + + if (has_tcl) { + ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0); + r300->hw.pvs.cmd[R300_PVS_CMD_0] = + cmdpacket0(R300_VAP_PVS_CNTL_1, 3); + } + + ALLOC_STATE(gb_enable, always, 2, 0); + r300->hw.gb_enable.cmd[0] = cmdpacket0(R300_GB_ENABLE, 1); + ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); + r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5); + ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); + r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1); + ALLOC_STATE(unk4200, always, 5, 0); + r300->hw.unk4200.cmd[0] = cmdpacket0(0x4200, 4); + ALLOC_STATE(unk4214, always, 2, 0); + r300->hw.unk4214.cmd[0] = cmdpacket0(0x4214, 1); + ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0); + r300->hw.ps.cmd[0] = cmdpacket0(R300_RE_POINTSIZE, 1); + ALLOC_STATE(unk4230, always, 4, 0); + r300->hw.unk4230.cmd[0] = cmdpacket0(0x4230, 3); + ALLOC_STATE(lcntl, always, 2, 0); + r300->hw.lcntl.cmd[0] = cmdpacket0(R300_RE_LINE_CNT, 1); + ALLOC_STATE(unk4260, always, 4, 0); + r300->hw.unk4260.cmd[0] = cmdpacket0(0x4260, 3); + ALLOC_STATE(shade, always, 5, 0); + r300->hw.shade.cmd[0] = cmdpacket0(R300_RE_SHADE, 4); + ALLOC_STATE(polygon_mode, always, 4, 0); + r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3); + ALLOC_STATE(fogp, always, 3, 0); + r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2); + ALLOC_STATE(zbias_cntl, always, 2, 0); + r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_RE_ZBIAS_CNTL, 1); + ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0); + r300->hw.zbs.cmd[R300_ZBS_CMD_0] = + cmdpacket0(R300_RE_ZBIAS_T_FACTOR, 4); + ALLOC_STATE(occlusion_cntl, always, 2, 0); + r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1); + ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0); + r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_RE_CULL_CNTL, 1); + ALLOC_STATE(unk42C0, always, 3, 0); + r300->hw.unk42C0.cmd[0] = cmdpacket0(0x42C0, 2); + ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); + r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_CNTL_0, 2); + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_INTERP_0, 8); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, 1); + ALLOC_STATE(unk43A4, always, 3, 0); + r300->hw.unk43A4.cmd[0] = cmdpacket0(0x43A4, 2); + ALLOC_STATE(unk43E8, always, 2, 0); + r300->hw.unk43E8.cmd[0] = cmdpacket0(0x43E8, 1); + ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); + r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4); + ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); + r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); + ALLOC_STATE(unk46A4, always, 6, 0); + r300->hw.unk46A4.cmd[0] = cmdpacket0(0x46A4, 5); + ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); + r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); + ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); + r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1); + ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); + r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); + ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); + r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); + r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_RE_FOG_STATE, 1); + ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); + r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FOG_COLOR_R, 3); + ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0); + r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_PP_ALPHA_TEST, 2); + ALLOC_STATE(unk4BD8, always, 2, 0); + r300->hw.unk4BD8.cmd[0] = cmdpacket0(0x4BD8, 1); + ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); + r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); + ALLOC_STATE(unk4E00, always, 2, 0); + r300->hw.unk4E00.cmd[0] = cmdpacket0(0x4E00, 1); + ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0); + r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2); + ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0); + r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(R300_RB3D_COLORMASK, 1); + ALLOC_STATE(blend_color, always, 4, 0); + r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 3); + ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); + r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1); + r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1); + ALLOC_STATE(unk4E50, always, 10, 0); + r300->hw.unk4E50.cmd[0] = cmdpacket0(0x4E50, 9); + ALLOC_STATE(unk4E88, always, 2, 0); + r300->hw.unk4E88.cmd[0] = cmdpacket0(0x4E88, 1); + ALLOC_STATE(unk4EA0, always, 3, 0); + r300->hw.unk4EA0.cmd[0] = cmdpacket0(0x4EA0, 2); + ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); + r300->hw.zs.cmd[R300_ZS_CMD_0] = + cmdpacket0(R300_RB3D_ZSTENCIL_CNTL_0, 3); + ALLOC_STATE(zstencil_format, always, 5, 0); + r300->hw.zstencil_format.cmd[0] = + cmdpacket0(R300_RB3D_ZSTENCIL_FORMAT, 4); + ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); + r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_RB3D_DEPTHOFFSET, 2); + ALLOC_STATE(unk4F28, always, 2, 0); + r300->hw.unk4F28.cmd[0] = cmdpacket0(0x4F28, 1); + ALLOC_STATE(unk4F30, always, 3, 0); + r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2); + ALLOC_STATE(unk4F44, always, 2, 0); + r300->hw.unk4F44.cmd[0] = cmdpacket0(0x4F44, 1); + ALLOC_STATE(unk4F54, always, 2, 0); + r300->hw.unk4F54.cmd[0] = cmdpacket0(0x4F54, 1); + + /* VPU only on TCL */ + if (has_tcl) { + int i; + ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); + r300->hw.vpi.cmd[R300_VPI_CMD_0] = + cmdvpu(R300_PVS_UPLOAD_PROGRAM, 0); + + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[R300_VPP_CMD_0] = + cmdvpu(R300_PVS_UPLOAD_PARAMETERS, 0); + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[R300_VPS_CMD_0] = + cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1); + + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + } + } + + /* Textures */ + ALLOC_STATE(tex.filter, variable, mtu + 1, 0); + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_FILTER_0, 0); + + ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_FILTER1_0, 0); + + ALLOC_STATE(tex.size, variable, mtu + 1, 0); + r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0); + + ALLOC_STATE(tex.format, variable, mtu + 1, 0); + r300->hw.tex.format.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_FORMAT_0, 0); + + ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); + r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_PITCH_0, 0); + + ALLOC_STATE(tex.offset, variable, mtu + 1, 0); + r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_OFFSET_0, 0); + + ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_CHROMA_KEY_0, 0); + + ALLOC_STATE(tex.border_color, variable, mtu + 1, 0); + r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_BORDER_COLOR_0, 0); + + r300->hw.is_dirty = GL_TRUE; + r300->hw.all_dirty = GL_TRUE; + + /* Initialize command buffer */ + size = + 256 * driQueryOptioni(&r300->radeon.optionCache, + "command_buffer_size"); + if (size < 2 * r300->hw.max_state_size) { + size = 2 * r300->hw.max_state_size + 65535; + } + if (size > 64 * 256) + size = 64 * 256; + + if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) { + fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n", + sizeof(drm_r300_cmd_header_t)); + fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n", + sizeof(drm_radeon_cmd_buffer_t)); + fprintf(stderr, + "Allocating %d bytes command buffer (max state is %d bytes)\n", + size * 4, r300->hw.max_state_size * 4); + } + + r300->cmdbuf.size = size; + r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4); + r300->cmdbuf.count_used = 0; + r300->cmdbuf.count_reemit = 0; +} + +/** + * Destroy the command buffer and state atoms. + */ +void r300DestroyCmdBuf(r300ContextPtr r300) +{ + struct r300_state_atom *atom; + + FREE(r300->cmdbuf.cmd_buf); + + foreach(atom, &r300->hw.atomlist) { + FREE(atom->cmd); + } +} + +void r300EmitBlit(r300ContextPtr rmesa, + GLuint color_fmt, + GLuint src_pitch, + GLuint src_offset, + GLuint dst_pitch, + GLuint dst_offset, + GLint srcx, GLint srcy, + GLint dstx, GLint dsty, GLuint w, GLuint h) +{ + drm_r300_cmd_header_t *cmd; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, + "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", + __FUNCTION__, src_pitch, src_offset, srcx, srcy, + dst_pitch, dst_offset, dstx, dsty, w, h); + + assert((src_pitch & 63) == 0); + assert((dst_pitch & 63) == 0); + assert((src_offset & 1023) == 0); + assert((dst_offset & 1023) == 0); + assert(w < (1 << 16)); + assert(h < (1 << 16)); + + cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__); + + cmd[0].header.cmd_type = R300_CMD_PACKET3; + cmd[0].header.pad0 = R300_CMD_PACKET3_RAW; + cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16); + cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + (color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_S | + RADEON_DP_SRC_SOURCE_MEMORY | + RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); + + cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10); + cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10); + cmd[5].u = (srcx << 16) | srcy; + cmd[6].u = (dstx << 16) | dsty; /* dst */ + cmd[7].u = (w << 16) | h; +} + +void r300EmitWait(r300ContextPtr rmesa, GLuint flags) +{ + drm_r300_cmd_header_t *cmd; + + assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D))); + + cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); + cmd[0].u = 0; + cmd[0].wait.cmd_type = R300_CMD_WAIT; + cmd[0].wait.flags = flags; +} diff --git a/r300/r300_cmdbuf.h b/r300/r300_cmdbuf.h new file mode 100644 index 0000000..bfb2eda --- /dev/null +++ b/r300/r300_cmdbuf.h @@ -0,0 +1,116 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __R300_CMDBUF_H__ +#define __R300_CMDBUF_H__ + +#include "r300_context.h" + +extern int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller); +extern int r300FlushCmdBuf(r300ContextPtr r300, const char *caller); + +extern void r300EmitState(r300ContextPtr r300); + +extern void r300InitCmdBuf(r300ContextPtr r300); +extern void r300DestroyCmdBuf(r300ContextPtr r300); + +/** + * Make sure that enough space is available in the command buffer + * by flushing if necessary. + * + * \param dwords The number of dwords we need to be free on the command buffer + */ +static __inline__ void r300EnsureCmdBufSpace(r300ContextPtr r300, + int dwords, const char *caller) +{ + assert(dwords < r300->cmdbuf.size); + + if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size) + r300FlushCmdBuf(r300, caller); +} + +/** + * Allocate the given number of dwords in the command buffer and return + * a pointer to the allocated area. + * When necessary, these functions cause a flush. r300AllocCmdBuf() also + * causes state reemission after a flush. This is necessary to ensure + * correct hardware state after an unlock. + */ +static __inline__ uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, + int dwords, const char *caller) +{ + uint32_t *ptr; + + r300EnsureCmdBufSpace(r300, dwords, caller); + + ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; + r300->cmdbuf.count_used += dwords; + return ptr; +} + +static __inline__ uint32_t *r300AllocCmdBuf(r300ContextPtr r300, + int dwords, const char *caller) +{ + uint32_t *ptr; + + r300EnsureCmdBufSpace(r300, dwords, caller); + + if (!r300->cmdbuf.count_used) { + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, + "Reemit state after flush (from %s)\n", caller); + r300EmitState(r300); + } + + ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; + r300->cmdbuf.count_used += dwords; + return ptr; +} + +extern void r300EmitBlit(r300ContextPtr rmesa, + GLuint color_fmt, + GLuint src_pitch, + GLuint src_offset, + GLuint dst_pitch, + GLuint dst_offset, + GLint srcx, GLint srcy, + GLint dstx, GLint dsty, GLuint w, GLuint h); + +extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags); +extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start); +extern void r300EmitVertexShader(r300ContextPtr rmesa); +extern void r300EmitPixelShader(r300ContextPtr rmesa); + +#endif /* __R300_CMDBUF_H__ */ diff --git a/r300/r300_context.c b/r300/r300_context.c new file mode 100644 index 0000000..9ea14ab --- /dev/null +++ b/r300/r300_context.c @@ -0,0 +1,532 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#include "glheader.h" +#include "api_arrayelt.h" +#include "context.h" +#include "simple_list.h" +#include "imports.h" +#include "matrix.h" +#include "extensions.h" +#include "state.h" +#include "bufferobj.h" + +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" + +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "tnl/t_vp_build.h" + +#include "drivers/common/driverfuncs.h" + +#include "radeon_ioctl.h" +#include "radeon_span.h" +#include "r300_context.h" +#include "r300_cmdbuf.h" +#include "r300_state.h" +#include "r300_ioctl.h" +#include "r300_tex.h" +#include "r300_emit.h" + +#ifdef USER_BUFFERS +#include "r300_mem.h" +#endif + +#include "vblank.h" +#include "utils.h" +#include "xmlpool.h" /* for symbolic values of enum-type options */ + +/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */ +int future_hw_tcl_on = 1; +int hw_tcl_on = 1; + +#define need_GL_EXT_stencil_two_side +#define need_GL_ARB_multisample +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object +#define need_GL_ARB_vertex_program +#define need_GL_EXT_blend_minmax +//#define need_GL_EXT_fog_coord +#define need_GL_EXT_secondary_color +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_gpu_program_parameters +#define need_GL_NV_vertex_program +#include "extension_helper.h" + +const struct dri_extension card_extensions[] = { + /* *INDENT-OFF* */ + {"GL_ARB_multisample", GL_ARB_multisample_functions}, + {"GL_ARB_multitexture", NULL}, + {"GL_ARB_texture_border_clamp", NULL}, + {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, + {"GL_ARB_texture_cube_map", NULL}, + {"GL_ARB_texture_env_add", NULL}, + {"GL_ARB_texture_env_combine", NULL}, + {"GL_ARB_texture_env_crossbar", NULL}, + {"GL_ARB_texture_env_dot3", NULL}, + {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, + {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, + {"GL_ARB_fragment_program", NULL}, + {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, + {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, + {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, + {"GL_EXT_blend_subtract", NULL}, +// {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, + {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, + {"GL_EXT_stencil_wrap", NULL}, + {"GL_EXT_texture_edge_clamp", NULL}, + {"GL_EXT_texture_env_combine", NULL}, + {"GL_EXT_texture_env_dot3", NULL}, + {"GL_EXT_texture_filter_anisotropic", NULL}, + {"GL_EXT_texture_lod_bias", NULL}, + {"GL_EXT_texture_mirror_clamp", NULL}, + {"GL_EXT_texture_rectangle", NULL}, + {"GL_ATI_texture_env_combine3", NULL}, + {"GL_ATI_texture_mirror_once", NULL}, + {"GL_MESA_pack_invert", NULL}, + {"GL_MESA_ycbcr_texture", NULL}, + {"GL_MESAX_texture_float", NULL}, + {"GL_NV_blend_square", NULL}, + {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, + {"GL_SGIS_generate_mipmap", NULL}, + {NULL, NULL} + /* *INDENT-ON* */ +}; + +extern struct tnl_pipeline_stage _r300_render_stage; +extern const struct tnl_pipeline_stage _r300_tcl_stage; + +static const struct tnl_pipeline_stage *r300_pipeline[] = { + + /* Try and go straight to t&l + */ + &_r300_tcl_stage, + + /* Catch any t&l fallbacks + */ + &_tnl_vertex_transform_stage, + &_tnl_normal_transform_stage, + &_tnl_lighting_stage, + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, + &_tnl_vertex_program_stage, + + /* Try again to go to tcl? + * - no good for asymmetric-twoside (do with multipass) + * - no good for asymmetric-unfilled (do with multipass) + * - good for material + * - good for texgen + * - need to manipulate a bit of state + * + * - worth it/not worth it? + */ + + /* Else do them here. + */ + &_r300_render_stage, + &_tnl_render_stage, /* FALLBACK */ + 0, +}; + +/* Create the device specific rendering context. + */ +GLboolean r300CreateContext(const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); + struct dd_function_table functions; + r300ContextPtr r300; + GLcontext *ctx; + int tcl_mode, i; + + assert(glVisual); + assert(driContextPriv); + assert(screen); + + /* Allocate the R300 context */ + r300 = (r300ContextPtr) CALLOC(sizeof(*r300)); + if (!r300) + return GL_FALSE; + + if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) + hw_tcl_on = future_hw_tcl_on = 0; + + /* Parse configuration files. + * Do this here so that initialMaxAnisotropy is set before we create + * the default textures. + */ + driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "r300"); + r300->initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, + "def_max_anisotropy"); + + /* Init default driver functions then plug in our R300-specific functions + * (the texture functions are especially important) + */ + _mesa_init_driver_functions(&functions); + r300InitIoctlFuncs(&functions); + r300InitStateFuncs(&functions); + r300InitTextureFuncs(&functions); + r300InitShaderFuncs(&functions); + +#ifdef USER_BUFFERS + r300_mem_init(r300); +#endif + + if (!radeonInitContext(&r300->radeon, &functions, + glVisual, driContextPriv, + sharedContextPrivate)) { + FREE(r300); + return GL_FALSE; + } + + /* Init r300 context data */ + r300->dma.buf0_address = + r300->radeon.radeonScreen->buffers->list[0].address; + + (void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps)); + make_empty_list(&r300->swapped); + + r300->nr_heaps = 1 /* screen->numTexHeaps */ ; + assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS); + for (i = 0; i < r300->nr_heaps; i++) { + /* *INDENT-OFF* */ + r300->texture_heaps[i] = driCreateTextureHeap(i, r300, + screen-> + texSize[i], 12, + RADEON_NR_TEX_REGIONS, + (drmTextureRegionPtr) + r300->radeon.sarea-> + tex_list[i], + &r300->radeon.sarea-> + tex_age[i], + &r300->swapped, + sizeof + (r300TexObj), + (destroy_texture_object_t + *) + r300DestroyTexObj); + /* *INDENT-ON* */ + } + r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache, + "texture_depth"); + if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) + r300->texture_depth = (screen->cpp == 4) ? + DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; + + /* Set the maximum texture size small enough that we can guarentee that + * all texture units can bind a maximal texture and have them both in + * texturable memory at once. + */ + + ctx = r300->radeon.glCtx; + + ctx->Const.MaxTextureImageUnits = + driQueryOptioni(&r300->radeon.optionCache, "texture_image_units"); + ctx->Const.MaxTextureCoordUnits = + driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units"); + ctx->Const.MaxTextureUnits = + MIN2(ctx->Const.MaxTextureImageUnits, + ctx->Const.MaxTextureCoordUnits); + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; + ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX; + + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; + ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; + +#ifdef USER_BUFFERS + /* Needs further modifications */ +#if 0 + ctx->Const.MaxArrayLockSize = + ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); +#endif +#endif + + /* Initialize the software rasterizer and helper modules. + */ + _swrast_CreateContext(ctx); + _vbo_CreateContext(ctx); + _tnl_CreateContext(ctx); + _swsetup_CreateContext(ctx); + _swsetup_Wakeup(ctx); + _ae_create_context(ctx); + + /* Install the customized pipeline: + */ + _tnl_destroy_pipeline(ctx); + _tnl_install_pipeline(ctx, r300_pipeline); + + /* Try and keep materials and vertices separate: + */ +/* _tnl_isolate_materials(ctx, GL_TRUE); */ + + /* Configure swrast and TNL to match hardware characteristics: + */ + _swrast_allow_pixel_fog(ctx, GL_FALSE); + _swrast_allow_vertex_fog(ctx, GL_TRUE); + _tnl_allow_pixel_fog(ctx, GL_FALSE); + _tnl_allow_vertex_fog(ctx, GL_TRUE); + + /* currently bogus data */ + ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeInstructions = + VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ + ctx->Const.VertexProgram.MaxTemps = 32; + ctx->Const.VertexProgram.MaxNativeTemps = + /*VSF_MAX_FRAGMENT_TEMPS */ 32; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + + ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; + ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ + ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; + ctx->Const.FragmentProgram.MaxNativeInstructions = + PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = + PFS_MAX_TEX_INDIRECT; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ + _tnl_ProgramCacheInit(ctx); + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + + driInitExtensions(ctx, card_extensions, GL_TRUE); + + if (driQueryOptionb + (&r300->radeon.optionCache, "disable_stencil_two_side")) + _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); + + if (r300->radeon.glCtx->Mesa_DXTn + && !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + _mesa_enable_extension(ctx, "GL_S3_s3tc"); + } else + if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable")) + { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } + + r300->disable_lowimpact_fallback = + driQueryOptionb(&r300->radeon.optionCache, + "disable_lowimpact_fallback"); + + radeonInitSpanFuncs(ctx); + r300InitCmdBuf(r300); + r300InitState(r300); + + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + + tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode"); + if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) { + fprintf(stderr, "disabling 3D acceleration\n"); +#if R200_MERGED + FALLBACK(&r300->radeon, RADEON_FALLBACK_DISABLE, 1); +#endif + } + if (tcl_mode == DRI_CONF_TCL_SW || + !(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { + if (r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + r300->radeon.radeonScreen->chip_flags &= + ~RADEON_CHIPSET_TCL; + fprintf(stderr, "Disabling HW TCL support\n"); + } + TCL_FALLBACK(r300->radeon.glCtx, + RADEON_TCL_FALLBACK_TCL_DISABLE, 1); + } + + return GL_TRUE; +} + +static void r300FreeGartAllocations(r300ContextPtr r300) +{ + int i, ret, tries = 0, done_age, in_use = 0; + drm_radeon_mem_free_t memfree; + + memfree.region = RADEON_MEM_REGION_GART; + +#ifdef USER_BUFFERS + for (i = r300->rmm->u_last; i > 0; i--) { + if (r300->rmm->u_list[i].ptr == NULL) { + continue; + } + + /* check whether this buffer is still in use */ + if (r300->rmm->u_list[i].pending) { + in_use++; + } + } + /* Cannot flush/lock if no context exists. */ + if (in_use) + r300FlushCmdBuf(r300, __FUNCTION__); + + done_age = radeonGetAge((radeonContextPtr) r300); + + for (i = r300->rmm->u_last; i > 0; i--) { + if (r300->rmm->u_list[i].ptr == NULL) { + continue; + } + + /* check whether this buffer is still in use */ + if (!r300->rmm->u_list[i].pending) { + continue; + } + + assert(r300->rmm->u_list[i].h_pending == 0); + + tries = 0; + while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) { + usleep(10); + done_age = radeonGetAge((radeonContextPtr) r300); + } + if (tries >= 1000) { + WARN_ONCE("Failed to idle region!"); + } + + memfree.region_offset = (char *)r300->rmm->u_list[i].ptr - + (char *)r300->radeon.radeonScreen->gartTextures.map; + + ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd, + DRM_RADEON_FREE, &memfree, + sizeof(memfree)); + if (ret) { + fprintf(stderr, "Failed to free at %p\nret = %s\n", + r300->rmm->u_list[i].ptr, strerror(-ret)); + } else { + if (i == r300->rmm->u_last) + r300->rmm->u_last--; + + r300->rmm->u_list[i].pending = 0; + r300->rmm->u_list[i].ptr = NULL; + } + } + r300->rmm->u_head = i; +#endif /* USER_BUFFERS */ +} + +/* Destroy the device specific context. + */ +void r300DestroyContext(__DRIcontextPrivate * driContextPriv) +{ + GET_CURRENT_CONTEXT(ctx); + r300ContextPtr r300 = (r300ContextPtr) driContextPriv->driverPrivate; + radeonContextPtr radeon = (radeonContextPtr) r300; + radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; + + if (RADEON_DEBUG & DEBUG_DRI) { + fprintf(stderr, "Destroying context !\n"); + } + + /* check if we're deleting the currently bound context */ + if (&r300->radeon == current) { + radeonFlush(r300->radeon.glCtx); + _mesa_make_current(NULL, NULL, NULL); + } + + /* Free r300 context resources */ + assert(r300); /* should never be null */ + + if (r300) { + GLboolean release_texture_heaps; + + release_texture_heaps = + (r300->radeon.glCtx->Shared->RefCount == 1); + _swsetup_DestroyContext(r300->radeon.glCtx); + _tnl_ProgramCacheDestroy(r300->radeon.glCtx); + _tnl_DestroyContext(r300->radeon.glCtx); + _vbo_DestroyContext(r300->radeon.glCtx); + _swrast_DestroyContext(r300->radeon.glCtx); + + if (r300->dma.current.buf) { + r300ReleaseDmaRegion(r300, &r300->dma.current, + __FUNCTION__); +#ifndef USER_BUFFERS + r300FlushCmdBuf(r300, __FUNCTION__); +#endif + } + r300FreeGartAllocations(r300); + r300DestroyCmdBuf(r300); + + if (radeon->state.scissor.pClipRects) { + FREE(radeon->state.scissor.pClipRects); + radeon->state.scissor.pClipRects = NULL; + } + + if (release_texture_heaps) { + /* This share group is about to go away, free our private + * texture object data. + */ + int i; + + for (i = 0; i < r300->nr_heaps; i++) { + driDestroyTextureHeap(r300->texture_heaps[i]); + r300->texture_heaps[i] = NULL; + } + + assert(is_empty_list(&r300->swapped)); + } + + radeonCleanupContext(&r300->radeon); + +#ifdef USER_BUFFERS + /* the memory manager might be accessed when Mesa frees the shared + * state, so don't destroy it earlier + */ + r300_mem_destroy(r300); +#endif + + /* free the option cache */ + driDestroyOptionCache(&r300->radeon.optionCache); + + FREE(r300); + } +} diff --git a/r300/r300_context.h b/r300/r300_context.h new file mode 100644 index 0000000..6b0a588 --- /dev/null +++ b/r300/r300_context.h @@ -0,0 +1,916 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __R300_CONTEXT_H__ +#define __R300_CONTEXT_H__ + +#include "tnl/t_vertex.h" +#include "drm.h" +#include "radeon_drm.h" +#include "dri_util.h" +#include "texmem.h" + +#include "macros.h" +#include "mtypes.h" +#include "colormac.h" + +#define USER_BUFFERS + +//#define OPTIMIZE_ELTS + +struct r300_context; +typedef struct r300_context r300ContextRec; +typedef struct r300_context *r300ContextPtr; + +#include "radeon_lock.h" +#include "mm.h" + +/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . + I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble + with other compilers ... GLUE! +*/ +#define WARN_ONCE(a, ...) { \ + static int warn##__LINE__=1; \ + if(warn##__LINE__){ \ + fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \ + fprintf(stderr, "File %s function %s line %d\n", \ + __FILE__, __FUNCTION__, __LINE__); \ + fprintf(stderr, a, ## __VA_ARGS__);\ + fprintf(stderr, "***************************************************************************\n"); \ + warn##__LINE__=0;\ + } \ + } + +#include "r300_vertprog.h" +#include "r300_fragprog.h" + +/** + * This function takes a float and packs it into a uint32_t + */ +static __inline__ uint32_t r300PackFloat32(float fl) +{ + union { + float fl; + uint32_t u; + } u; + + u.fl = fl; + return u.u; +} + +/* This is probably wrong for some values, I need to test this + * some more. Range checking would be a good idea also.. + * + * But it works for most things. I'll fix it later if someone + * else with a better clue doesn't + */ +static __inline__ uint32_t r300PackFloat24(float f) +{ + float mantissa; + int exponent; + uint32_t float24 = 0; + + if (f == 0.0) + return 0; + + mantissa = frexpf(f, &exponent); + + /* Handle -ve */ + if (mantissa < 0) { + float24 |= (1 << 23); + mantissa = mantissa * -1.0; + } + /* Handle exponent, bias of 63 */ + exponent += 62; + float24 |= (exponent << 16); + /* Kill 7 LSB of mantissa */ + float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7; + + return float24; +} + +/************ DMA BUFFERS **************/ + +/* Need refcounting on dma buffers: + */ +struct r300_dma_buffer { + int refcount; /**< the number of retained regions in buf */ + drmBufPtr buf; + int id; +}; +#undef GET_START +#ifdef USER_BUFFERS +#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start)) +#else +#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \ + (rvb)->address - rmesa->dma.buf0_address + \ + (rvb)->start) +#endif +/* A retained region, eg vertices for indexed vertices. + */ +struct r300_dma_region { + struct r300_dma_buffer *buf; + char *address; /* == buf->address */ + int start, end, ptr; /* offsets from start of buf */ + + int aos_offset; /* address in GART memory */ + int aos_stride; /* distance between elements, in dwords */ + int aos_size; /* number of components (1-4) */ + int aos_reg; /* VAP register assignment */ +}; + +struct r300_dma { + /* Active dma region. Allocations for vertices and retained + * regions come from here. Also used for emitting random vertices, + * these may be flushed by calling flush_current(); + */ + struct r300_dma_region current; + + void (*flush) (r300ContextPtr); + + char *buf0_address; /* start of buf[0], for index calcs */ + + /* Number of "in-flight" DMA buffers, i.e. the number of buffers + * for which a DISCARD command is currently queued in the command buffer. + */ + GLuint nr_released_bufs; +}; + + /* Texture related */ + +typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr; + +/* Texture object in locally shared texture space. + */ +struct r300_tex_obj { + driTextureObject base; + + GLuint bufAddr; /* Offset to start of locally + shared texture block */ + + GLuint dirty_state; /* Flags (1 per texunit) for + whether or not this texobj + has dirty hardware state + (pp_*) that needs to be + brought into the + texunit. */ + + drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; + /* Six, for the cube faces */ + + GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ + + GLuint pitch; /* this isn't sent to hardware just used in calculations */ + /* hardware register values */ + /* Note that R200 has 8 registers per texture and R300 only 7 */ + GLuint filter; + GLuint filter_1; + GLuint pitch_reg; + GLuint size; /* npot only */ + GLuint format; + GLuint offset; /* Image location in the card's address space. + All cube faces follow. */ + GLuint unknown4; + GLuint unknown5; + /* end hardware registers */ + + /* registers computed by r200 code - keep them here to + compare against what is actually written. + + to be removed later.. */ + GLuint pp_border_color; + GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ + GLuint format_x; + + GLboolean border_fallback; + + GLuint tile_bits; /* hw texture tile bits used on this texture */ +}; + +struct r300_texture_env_state { + r300TexObjPtr texobj; + GLenum format; + GLenum envMode; +}; + +/* The blit width for texture uploads + */ +#define R300_BLIT_WIDTH_BYTES 1024 +#define R300_MAX_TEXTURE_UNITS 8 + +struct r300_texture_state { + struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS]; + int tc_count; /* number of incoming texture coordinates from VAP */ +}; + +/** + * A block of hardware state. + * + * When check returns non-zero, the returned number of dwords must be + * copied verbatim into the command buffer in order to update a state atom + * when it is dirty. + */ +struct r300_state_atom { + struct r300_state_atom *next, *prev; + const char *name; /* for debug */ + int cmd_size; /* maximum size in dwords */ + GLuint idx; /* index in an array (e.g. textures) */ + uint32_t *cmd; + GLboolean dirty; + + int (*check) (r300ContextPtr, struct r300_state_atom * atom); +}; + +#define R300_VPT_CMD_0 0 +#define R300_VPT_XSCALE 1 +#define R300_VPT_XOFFSET 2 +#define R300_VPT_YSCALE 3 +#define R300_VPT_YOFFSET 4 +#define R300_VPT_ZSCALE 5 +#define R300_VPT_ZOFFSET 6 +#define R300_VPT_CMDSIZE 7 + +#define R300_VIR_CMD_0 0 /* vir is variable size (at least 1) */ +#define R300_VIR_CNTL_0 1 +#define R300_VIR_CNTL_1 2 +#define R300_VIR_CNTL_2 3 +#define R300_VIR_CNTL_3 4 +#define R300_VIR_CNTL_4 5 +#define R300_VIR_CNTL_5 6 +#define R300_VIR_CNTL_6 7 +#define R300_VIR_CNTL_7 8 +#define R300_VIR_CMDSIZE 9 + +#define R300_VIC_CMD_0 0 +#define R300_VIC_CNTL_0 1 +#define R300_VIC_CNTL_1 2 +#define R300_VIC_CMDSIZE 3 + +#define R300_VOF_CMD_0 0 +#define R300_VOF_CNTL_0 1 +#define R300_VOF_CNTL_1 2 +#define R300_VOF_CMDSIZE 3 + +#define R300_PVS_CMD_0 0 +#define R300_PVS_CNTL_1 1 +#define R300_PVS_CNTL_2 2 +#define R300_PVS_CNTL_3 3 +#define R300_PVS_CMDSIZE 4 + +#define R300_GB_MISC_CMD_0 0 +#define R300_GB_MISC_MSPOS_0 1 +#define R300_GB_MISC_MSPOS_1 2 +#define R300_GB_MISC_TILE_CONFIG 3 +#define R300_GB_MISC_SELECT 4 +#define R300_GB_MISC_AA_CONFIG 5 +#define R300_GB_MISC_CMDSIZE 6 + +#define R300_TXE_CMD_0 0 +#define R300_TXE_ENABLE 1 +#define R300_TXE_CMDSIZE 2 + +#define R300_PS_CMD_0 0 +#define R300_PS_POINTSIZE 1 +#define R300_PS_CMDSIZE 2 + +#define R300_ZBS_CMD_0 0 +#define R300_ZBS_T_FACTOR 1 +#define R300_ZBS_T_CONSTANT 2 +#define R300_ZBS_W_FACTOR 3 +#define R300_ZBS_W_CONSTANT 4 +#define R300_ZBS_CMDSIZE 5 + +#define R300_CUL_CMD_0 0 +#define R300_CUL_CULL 1 +#define R300_CUL_CMDSIZE 2 + +#define R300_RC_CMD_0 0 +#define R300_RC_CNTL_0 1 +#define R300_RC_CNTL_1 2 +#define R300_RC_CMDSIZE 3 + +#define R300_RI_CMD_0 0 +#define R300_RI_INTERP_0 1 +#define R300_RI_INTERP_1 2 +#define R300_RI_INTERP_2 3 +#define R300_RI_INTERP_3 4 +#define R300_RI_INTERP_4 5 +#define R300_RI_INTERP_5 6 +#define R300_RI_INTERP_6 7 +#define R300_RI_INTERP_7 8 +#define R300_RI_CMDSIZE 9 + +#define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */ +#define R300_RR_ROUTE_0 1 +#define R300_RR_ROUTE_1 2 +#define R300_RR_ROUTE_2 3 +#define R300_RR_ROUTE_3 4 +#define R300_RR_ROUTE_4 5 +#define R300_RR_ROUTE_5 6 +#define R300_RR_ROUTE_6 7 +#define R300_RR_ROUTE_7 8 +#define R300_RR_CMDSIZE 9 + +#define R300_FP_CMD_0 0 +#define R300_FP_CNTL0 1 +#define R300_FP_CNTL1 2 +#define R300_FP_CNTL2 3 +#define R300_FP_CMD_1 4 +#define R300_FP_NODE0 5 +#define R300_FP_NODE1 6 +#define R300_FP_NODE2 7 +#define R300_FP_NODE3 8 +#define R300_FP_CMDSIZE 9 + +#define R300_FPT_CMD_0 0 +#define R300_FPT_INSTR_0 1 +#define R300_FPT_CMDSIZE 65 + +#define R300_FPI_CMD_0 0 +#define R300_FPI_INSTR_0 1 +#define R300_FPI_CMDSIZE 65 + +#define R300_FPP_CMD_0 0 +#define R300_FPP_PARAM_0 1 +#define R300_FPP_CMDSIZE (32*4+1) + +#define R300_FOGS_CMD_0 0 +#define R300_FOGS_STATE 1 +#define R300_FOGS_CMDSIZE 2 + +#define R300_FOGC_CMD_0 0 +#define R300_FOGC_R 1 +#define R300_FOGC_G 2 +#define R300_FOGC_B 3 +#define R300_FOGC_CMDSIZE 4 + +#define R300_FOGP_CMD_0 0 +#define R300_FOGP_SCALE 1 +#define R300_FOGP_START 2 +#define R300_FOGP_CMDSIZE 3 + +#define R300_AT_CMD_0 0 +#define R300_AT_ALPHA_TEST 1 +#define R300_AT_UNKNOWN 2 +#define R300_AT_CMDSIZE 3 + +#define R300_BLD_CMD_0 0 +#define R300_BLD_CBLEND 1 +#define R300_BLD_ABLEND 2 +#define R300_BLD_CMDSIZE 3 + +#define R300_CMK_CMD_0 0 +#define R300_CMK_COLORMASK 1 +#define R300_CMK_CMDSIZE 2 + +#define R300_CB_CMD_0 0 +#define R300_CB_OFFSET 1 +#define R300_CB_CMD_1 2 +#define R300_CB_PITCH 3 +#define R300_CB_CMDSIZE 4 + +#define R300_ZS_CMD_0 0 +#define R300_ZS_CNTL_0 1 +#define R300_ZS_CNTL_1 2 +#define R300_ZS_CNTL_2 3 +#define R300_ZS_CMDSIZE 4 + +#define R300_ZB_CMD_0 0 +#define R300_ZB_OFFSET 1 +#define R300_ZB_PITCH 2 +#define R300_ZB_CMDSIZE 3 + +#define R300_VPI_CMD_0 0 +#define R300_VPI_INSTR_0 1 +#define R300_VPI_CMDSIZE 1025 /* 256 16 byte instructions */ + +#define R300_VPP_CMD_0 0 +#define R300_VPP_PARAM_0 1 +#define R300_VPP_CMDSIZE 1025 /* 256 4-component parameters */ + +#define R300_VPUCP_CMD_0 0 +#define R300_VPUCP_X 1 +#define R300_VPUCP_Y 2 +#define R300_VPUCP_Z 3 +#define R300_VPUCP_W 4 +#define R300_VPUCP_CMDSIZE 5 /* 256 4-component parameters */ + +#define R300_VPS_CMD_0 0 +#define R300_VPS_ZERO_0 1 +#define R300_VPS_ZERO_1 2 +#define R300_VPS_POINTSIZE 3 +#define R300_VPS_ZERO_3 4 +#define R300_VPS_CMDSIZE 5 + + /* the layout is common for all fields inside tex */ +#define R300_TEX_CMD_0 0 +#define R300_TEX_VALUE_0 1 +/* We don't really use this, instead specify mtu+1 dynamically +#define R300_TEX_CMDSIZE (MAX_TEXTURE_UNITS+1) +*/ + +/** + * Cache for hardware register state. + */ +struct r300_hw_state { + struct r300_state_atom atomlist; + + GLboolean is_dirty; + GLboolean all_dirty; + int max_state_size; /* in dwords */ + + struct r300_state_atom vpt; /* viewport (1D98) */ + struct r300_state_atom vap_cntl; + struct r300_state_atom vof; /* VAP output format register 0x2090 */ + struct r300_state_atom vte; /* (20B0) */ + struct r300_state_atom unk2134; /* (2134) */ + struct r300_state_atom vap_cntl_status; + struct r300_state_atom vir[2]; /* vap input route (2150/21E0) */ + struct r300_state_atom vic; /* vap input control (2180) */ + struct r300_state_atom unk21DC; /* (21DC) */ + struct r300_state_atom vap_clip_cntl; + struct r300_state_atom unk2220; /* (2220) */ + struct r300_state_atom unk2288; /* (2288) */ + struct r300_state_atom pvs; /* pvs_cntl (22D0) */ + struct r300_state_atom gb_enable; /* (4008) */ + struct r300_state_atom gb_misc; /* Multisampling position shifts ? (4010) */ + struct r300_state_atom unk4200; /* (4200) */ + struct r300_state_atom unk4214; /* (4214) */ + struct r300_state_atom ps; /* pointsize (421C) */ + struct r300_state_atom unk4230; /* (4230) */ + struct r300_state_atom lcntl; /* line control */ + struct r300_state_atom unk4260; /* (4260) */ + struct r300_state_atom shade; + struct r300_state_atom polygon_mode; + struct r300_state_atom fogp; /* fog parameters (4294) */ + struct r300_state_atom unk429C; /* (429C) */ + struct r300_state_atom zbias_cntl; + struct r300_state_atom zbs; /* zbias (42A4) */ + struct r300_state_atom occlusion_cntl; + struct r300_state_atom cul; /* cull cntl (42B8) */ + struct r300_state_atom unk42C0; /* (42C0) */ + struct r300_state_atom rc; /* rs control (4300) */ + struct r300_state_atom ri; /* rs interpolators (4310) */ + struct r300_state_atom rr; /* rs route (4330) */ + struct r300_state_atom unk43A4; /* (43A4) */ + struct r300_state_atom unk43E8; /* (43E8) */ + struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */ + struct r300_state_atom fpt; /* texi - (4620) */ + struct r300_state_atom unk46A4; /* (46A4) */ + struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ + struct r300_state_atom fogs; /* fog state (4BC0) */ + struct r300_state_atom fogc; /* fog color (4BC8) */ + struct r300_state_atom at; /* alpha test (4BD4) */ + struct r300_state_atom unk4BD8; /* (4BD8) */ + struct r300_state_atom fpp; /* 0x4C00 and following */ + struct r300_state_atom unk4E00; /* (4E00) */ + struct r300_state_atom bld; /* blending (4E04) */ + struct r300_state_atom cmk; /* colormask (4E0C) */ + struct r300_state_atom blend_color; /* constant blend color */ + struct r300_state_atom cb; /* colorbuffer (4E28) */ + struct r300_state_atom unk4E50; /* (4E50) */ + struct r300_state_atom unk4E88; /* (4E88) */ + struct r300_state_atom unk4EA0; /* (4E88) I saw it only written on RV350 hardware.. */ + struct r300_state_atom zs; /* zstencil control (4F00) */ + struct r300_state_atom zstencil_format; + struct r300_state_atom zb; /* z buffer (4F20) */ + struct r300_state_atom unk4F28; /* (4F28) */ + struct r300_state_atom unk4F30; /* (4F30) */ + struct r300_state_atom unk4F44; /* (4F44) */ + struct r300_state_atom unk4F54; /* (4F54) */ + + struct r300_state_atom vpi; /* vp instructions */ + struct r300_state_atom vpp; /* vp parameters */ + struct r300_state_atom vps; /* vertex point size (?) */ + struct r300_state_atom vpucp[6]; /* vp user clip plane - 6 */ + /* 8 texture units */ + /* the state is grouped by function and not by + texture unit. This makes single unit updates + really awkward - we are much better off + updating the whole thing at once */ + struct { + struct r300_state_atom filter; + struct r300_state_atom filter_1; + struct r300_state_atom size; + struct r300_state_atom format; + struct r300_state_atom pitch; + struct r300_state_atom offset; + struct r300_state_atom chroma_key; + struct r300_state_atom border_color; + } tex; + struct r300_state_atom txe; /* tex enable (4104) */ +}; + +/** + * This structure holds the command buffer while it is being constructed. + * + * The first batch of commands in the buffer is always the state that needs + * to be re-emitted when the context is lost. This batch can be skipped + * otherwise. + */ +struct r300_cmdbuf { + int size; /* DWORDs allocated for buffer */ + uint32_t *cmd_buf; + int count_used; /* DWORDs filled so far */ + int count_reemit; /* size of re-emission batch */ +}; + +/** + * State cache + */ + +struct r300_depthbuffer_state { + GLfloat scale; +}; + +struct r300_stencilbuffer_state { + GLuint clear; + GLboolean hw_stencil; + +}; + +/* Vertex shader state */ + +/* Perhaps more if we store programs in vmem? */ +/* drm_r300_cmd_header_t->vpu->count is unsigned char */ +#define VSF_MAX_FRAGMENT_LENGTH (255*4) + +/* Can be tested with colormat currently. */ +#define VSF_MAX_FRAGMENT_TEMPS (14) + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) +#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) + +struct r300_vertex_shader_fragment { + int length; + union { + GLuint d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + VERTEX_SHADER_INSTRUCTION i[VSF_MAX_FRAGMENT_LENGTH / 4]; + } body; +}; + +#define VSF_DEST_PROGRAM 0x0 +#define VSF_DEST_MATRIX0 0x200 +#define VSF_DEST_MATRIX1 0x204 +#define VSF_DEST_MATRIX2 0x208 +#define VSF_DEST_VECTOR0 0x20c +#define VSF_DEST_VECTOR1 0x20d +#define VSF_DEST_UNKNOWN1 0x400 +#define VSF_DEST_UNKNOWN2 0x406 + +struct r300_vertex_shader_state { + struct r300_vertex_shader_fragment program; + + struct r300_vertex_shader_fragment unknown1; + struct r300_vertex_shader_fragment unknown2; + + int program_start; + int unknown_ptr1; /* pointer within program space */ + int program_end; + + int param_offset; + int param_count; + + int unknown_ptr2; /* pointer within program space */ + int unknown_ptr3; /* pointer within program space */ +}; + +extern int hw_tcl_on; + +//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current) +#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp) + +/* Should but doesnt work */ +//#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp) + +/* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday. + * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly. + */ + +struct r300_vertex_program_key { + GLuint InputsRead; + GLuint OutputsWritten; +}; + +struct r300_vertex_program { + struct r300_vertex_program *next; + struct r300_vertex_program_key key; + int translated; + + struct r300_vertex_shader_fragment program; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int wpos_idx; + int inputs[VERT_ATTRIB_MAX]; + int outputs[VERT_RESULT_MAX]; + int native; + int ref_count; + int use_ref_count; +}; + +struct r300_vertex_program_cont { + struct gl_vertex_program mesa_program; /* Must be first */ + struct r300_vertex_shader_fragment params; + struct r300_vertex_program *progs; +}; + +#define PFS_MAX_ALU_INST 64 +#define PFS_MAX_TEX_INST 64 +#define PFS_MAX_TEX_INDIRECT 4 +#define PFS_NUM_TEMP_REGS 32 +#define PFS_NUM_CONST_REGS 16 + +/* Mapping Mesa registers to R300 temporaries */ +struct reg_acc { + int reg; /* Assigned hw temp */ + unsigned int refcount; /* Number of uses by mesa program */ +}; + +/** + * Describe the current lifetime information for an R300 temporary + */ +struct reg_lifetime { + /* Index of the first slot where this register is free in the sense + that it can be used as a new destination register. + This is -1 if the register has been assigned to a Mesa register + and the last access to the register has not yet been emitted */ + int free; + + /* Index of the first slot where this register is currently reserved. + This is used to stop e.g. a scalar operation from being moved + before the allocation time of a register that was first allocated + for a vector operation. */ + int reserved; + + /* Index of the first slot in which the register can be used as a + source without losing the value that is written by the last + emitted instruction that writes to the register */ + int vector_valid; + int scalar_valid; + + /* Index to the slot where the register was last read. + This is also the first slot in which the register may be written again */ + int vector_lastread; + int scalar_lastread; +}; + +/** + * Store usage information about an ALU instruction slot during the + * compilation of a fragment program. + */ +#define SLOT_SRC_VECTOR (1<<0) +#define SLOT_SRC_SCALAR (1<<3) +#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) +#define SLOT_OP_VECTOR (1<<16) +#define SLOT_OP_SCALAR (1<<17) +#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) + +struct r300_pfs_compile_slot { + /* Bitmask indicating which parts of the slot are used, using SLOT_ constants + defined above */ + unsigned int used; + + /* Selected sources */ + int vsrc[3]; + int ssrc[3]; +}; + +/** + * Store information during compilation of fragment programs. + */ +struct r300_pfs_compile_state { + int nrslots; /* number of ALU slots used so far */ + + /* Track which (parts of) slots are already filled with instructions */ + struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; + + /* Track the validity of R300 temporaries */ + struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; + + /* Used to map Mesa's inputs/temps onto hardware temps */ + int temp_in_use; + struct reg_acc temps[PFS_NUM_TEMP_REGS]; + struct reg_acc inputs[32]; /* don't actually need 32... */ + + /* Track usage of hardware temps, for register allocation, + * indirection detection, etc. */ + GLuint used_in_node; + GLuint dest_in_node; +}; + +/** + * Store everything about a fragment program that is needed + * to render with that program. + */ +struct r300_fragment_program { + struct gl_fragment_program mesa_program; + + GLcontext *ctx; + GLboolean translated; + GLboolean error; + struct r300_pfs_compile_state *cs; + + struct { + int length; + GLuint inst[PFS_MAX_TEX_INST]; + } tex; + + struct { + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + } inst[PFS_MAX_ALU_INST]; + } alu; + + struct { + int tex_offset; + int tex_end; + int alu_offset; + int alu_end; + int flags; + } node[4]; + int cur_node; + int first_node_has_tex; + + int alu_offset; + int alu_end; + int tex_offset; + int tex_end; + + /* Hardware constants. + * Contains a pointer to the value. The destination of the pointer + * is supposed to be updated when GL state changes. + * Typically, this is either a pointer into + * gl_program_parameter_list::ParameterValues, or a pointer to a + * global constant (e.g. for sin/cos-approximation) + */ + const GLfloat *constant[PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; + + GLuint optimization; +}; + +#define R300_MAX_AOS_ARRAYS 16 + +#define AOS_FORMAT_USHORT 0 +#define AOS_FORMAT_FLOAT 1 +#define AOS_FORMAT_UBYTE 2 +#define AOS_FORMAT_FLOAT_COLOR 3 + +#define REG_COORDS 0 +#define REG_COLOR0 1 +#define REG_TEX0 2 + +struct dt { + GLint size; + GLenum type; + GLsizei stride; + void *data; +}; + +struct radeon_vertex_buffer { + int Count; + void *Elts; + int elt_size; + int elt_min, elt_max; /* debug */ + + struct dt AttribPtr[VERT_ATTRIB_MAX]; + + const struct _mesa_prim *Primitive; + GLuint PrimitiveCount; + GLint LockFirst; + GLsizei LockCount; + int lock_uptodate; +}; + +struct r300_state { + struct r300_depthbuffer_state depth; + struct r300_texture_state texture; + int sw_tcl_inputs[VERT_ATTRIB_MAX]; + struct r300_vertex_shader_state vertex_shader; + struct r300_pfs_compile_state pfs_compile; + struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; + int aos_count; + struct radeon_vertex_buffer VB; + + GLuint *Elts; + struct r300_dma_region elt_dma; + + DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. + They are the same as tnl->render_inputs for fixed pipeline */ + + struct { + int transform_offset; /* Transform matrix offset, -1 if none */ + } vap_param; /* vertex processor parameter allocation - tells where to write parameters */ + + struct r300_stencilbuffer_state stencil; + +}; + +#define R300_FALLBACK_NONE 0 +#define R300_FALLBACK_TCL 1 +#define R300_FALLBACK_RAST 2 + +/** + * \brief R300 context structure. + */ +struct r300_context { + struct radeon_context radeon; /* parent class, must be first */ + + struct r300_hw_state hw; + struct r300_cmdbuf cmdbuf; + struct r300_state state; + struct gl_vertex_program *curr_vp; + struct r300_vertex_program *selected_vp; + + /* Vertex buffers + */ + struct r300_dma dma; + GLboolean save_on_next_unlock; + GLuint NewGLState; + + /* Texture object bookkeeping + */ + unsigned nr_heaps; + driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS]; + driTextureObject swapped; + int texture_depth; + float initialMaxAnisotropy; + + /* Clientdata textures; + */ + GLuint prefer_gart_client_texturing; + +#ifdef USER_BUFFERS + struct r300_memory_manager *rmm; +#endif + + GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; + GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + + GLboolean disable_lowimpact_fallback; +}; + +struct r300_buffer_object { + struct gl_buffer_object mesa_obj; + int id; +}; + +#define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) + +extern void r300DestroyContext(__DRIcontextPrivate * driContextPriv); +extern GLboolean r300CreateContext(const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); + +extern void r300SelectVertexShader(r300ContextPtr r300); +extern void r300InitShaderFuncs(struct dd_function_table *functions); +extern int r300VertexProgUpdateParams(GLcontext * ctx, + struct r300_vertex_program_cont *vp, + float *dst); + +#define RADEON_D_CAPTURE 0 +#define RADEON_D_PLAYBACK 1 +#define RADEON_D_PLAYBACK_RAW 2 +#define RADEON_D_T 3 + +#endif /* __R300_CONTEXT_H__ */ diff --git a/r300/r300_emit.c b/r300/r300_emit.c new file mode 100644 index 0000000..2c26069 --- /dev/null +++ b/r300/r300_emit.c @@ -0,0 +1,627 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "mtypes.h" +#include "colormac.h" +#include "imports.h" +#include "macros.h" +#include "image.h" + +#include "swrast_setup/swrast_setup.h" +#include "math/m_translate.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" + +#include "r300_context.h" +#include "radeon_ioctl.h" +#include "r300_state.h" +#include "r300_emit.h" +#include "r300_ioctl.h" + +#ifdef USER_BUFFERS +#include "r300_mem.h" +#endif + +#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ + SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ + SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \ + SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \ + SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \ + SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE +#error Cannot change these! +#endif + +#define DEBUG_ALL DEBUG_VERTS + +#if defined(USE_X86_ASM) +#define COPY_DWORDS( dst, src, nr ) \ +do { \ + int __tmp; \ + __asm__ __volatile__( "rep ; movsl" \ + : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ + : "0" (nr), \ + "D" ((long)dst), \ + "S" ((long)src) ); \ +} while (0) +#else +#define COPY_DWORDS( dst, src, nr ) \ +do { \ + int j; \ + for ( j = 0 ; j < nr ; j++ ) \ + dst[j] = ((int *)src)[j]; \ + dst += nr; \ +} while (0) +#endif + +static void r300EmitVec4(GLcontext * ctx, + struct r300_dma_region *rvb, + GLvoid * data, int stride, int count) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 4) + COPY_DWORDS(out, data, count); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out++; + data += stride; + } +} + +static void r300EmitVec8(GLcontext * ctx, + struct r300_dma_region *rvb, + GLvoid * data, int stride, int count) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 8) + COPY_DWORDS(out, data, count * 2); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out += 2; + data += stride; + } +} + +static void r300EmitVec12(GLcontext * ctx, + struct r300_dma_region *rvb, + GLvoid * data, int stride, int count) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 12) + COPY_DWORDS(out, data, count * 3); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out[2] = *(int *)(data + 8); + out += 3; + data += stride; + } +} + +static void r300EmitVec16(GLcontext * ctx, + struct r300_dma_region *rvb, + GLvoid * data, int stride, int count) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 16) + COPY_DWORDS(out, data, count * 4); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out[2] = *(int *)(data + 8); + out[3] = *(int *)(data + 12); + out += 4; + data += stride; + } +} + +static void r300EmitVec(GLcontext * ctx, + struct r300_dma_region *rvb, + GLvoid * data, int size, int stride, int count) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d size %d stride %d\n", + __FUNCTION__, count, size, stride); + + /* Gets triggered when playing with future_hw_tcl_on ... */ + //assert(!rvb->buf); + + if (stride == 0) { + r300AllocDmaRegion(rmesa, rvb, size * 4, 4); + count = 1; + rvb->aos_offset = GET_START(rvb); + rvb->aos_stride = 0; + } else { + r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); /* alignment? */ + rvb->aos_offset = GET_START(rvb); + rvb->aos_stride = size; + } + + /* Emit the data + */ + switch (size) { + case 1: + r300EmitVec4(ctx, rvb, data, stride, count); + break; + case 2: + r300EmitVec8(ctx, rvb, data, stride, count); + break; + case 3: + r300EmitVec12(ctx, rvb, data, stride, count); + break; + case 4: + r300EmitVec16(ctx, rvb, data, stride, count); + break; + default: + assert(0); + _mesa_exit(-1); + break; + } + +} + +static GLuint t_type(struct dt *dt) +{ + switch (dt->type) { + case GL_UNSIGNED_BYTE: + return AOS_FORMAT_UBYTE; + case GL_SHORT: + return AOS_FORMAT_USHORT; + case GL_FLOAT: + return AOS_FORMAT_FLOAT; + default: + assert(0); + break; + } + + return AOS_FORMAT_FLOAT; +} + +static GLuint t_vir0_size(struct dt *dt) +{ + switch (dt->type) { + case GL_UNSIGNED_BYTE: + return 4; + case GL_SHORT: + return 7; + case GL_FLOAT: + return dt->size - 1; + default: + assert(0); + break; + } + + return 0; +} + +static GLuint t_aos_size(struct dt *dt) +{ + switch (dt->type) { + case GL_UNSIGNED_BYTE: + return 1; + case GL_SHORT: + return 2; + case GL_FLOAT: + return dt->size; + default: + assert(0); + break; + } + + return 0; +} + +static GLuint t_vir0(uint32_t * dst, struct dt *dt, int *inputs, + GLint * tab, GLuint nr) +{ + GLuint i, dw; + + for (i = 0; i + 1 < nr; i += 2) { + dw = t_vir0_size(&dt[tab[i]]) | (inputs[tab[i]] << 8) | + (t_type(&dt[tab[i]]) << 14); + dw |= + (t_vir0_size(&dt[tab[i + 1]]) | + (inputs[tab[i + 1]] << 8) | (t_type(&dt[tab[i + 1]]) + << 14)) << 16; + + if (i + 2 == nr) { + dw |= (1 << (13 + 16)); + } + dst[i >> 1] = dw; + } + + if (nr & 1) { + dw = t_vir0_size(&dt[tab[nr - 1]]) | (inputs[tab[nr - 1]] + << 8) | + (t_type(&dt[tab[nr - 1]]) << 14); + dw |= 1 << 13; + + dst[nr >> 1] = dw; + } + + return (nr + 1) >> 1; +} + +static GLuint t_swizzle(int swizzle[4]) +{ + return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) | + (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) | + (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) | + (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT); +} + +static GLuint t_vir1(uint32_t * dst, int swizzle[][4], GLuint nr) +{ + GLuint i; + + for (i = 0; i + 1 < nr; i += 2) { + dst[i >> 1] = t_swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE; + dst[i >> 1] |= + (t_swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) + << 16; + } + + if (nr & 1) + dst[nr >> 1] = + t_swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE; + + return (nr + 1) >> 1; +} + +static GLuint t_emit_size(struct dt *dt) +{ + return dt->size; +} + +static GLuint t_vic(GLcontext * ctx, GLuint InputsRead) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint i, vic_1 = 0; + + if (InputsRead & (1 << VERT_ATTRIB_POS)) + vic_1 |= R300_INPUT_CNTL_POS; + + if (InputsRead & (1 << VERT_ATTRIB_NORMAL)) + vic_1 |= R300_INPUT_CNTL_NORMAL; + + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + vic_1 |= R300_INPUT_CNTL_COLOR; + + r300->state.texture.tc_count = 0; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) { + r300->state.texture.tc_count++; + vic_1 |= R300_INPUT_CNTL_TC0 << i; + } + + return vic_1; +} + +/* Emit vertex data to GART memory + * Route inputs to the vertex processor + * This function should never return R300_FALLBACK_TCL when using software tcl. + */ + +int r300EmitArrays(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + r300ContextPtr r300 = rmesa; + struct radeon_vertex_buffer *VB = &rmesa->state.VB; + GLuint nr; + GLuint count = VB->Count; + GLuint i; + GLuint InputsRead = 0, OutputsWritten = 0; + int *inputs = NULL; + int vir_inputs[VERT_ATTRIB_MAX]; + GLint tab[VERT_ATTRIB_MAX]; + int swizzle[VERT_ATTRIB_MAX][4]; + + if (hw_tcl_on) { + struct r300_vertex_program *prog = + (struct r300_vertex_program *) + CURRENT_VERTEX_SHADER(ctx); + inputs = prog->inputs; + InputsRead = CURRENT_VERTEX_SHADER(ctx)->key.InputsRead; + OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + } else { + DECLARE_RENDERINPUTS(inputs_bitset); + inputs = r300->state.sw_tcl_inputs; + + RENDERINPUTS_COPY(inputs_bitset, + TNL_CONTEXT(ctx)->render_inputs_bitset); + + assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_POS)); + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + + assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_NORMAL) + == 0); + + assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR0)); + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; + + if (RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR1)) { + InputsRead |= 1 << VERT_ATTRIB_COLOR1; + OutputsWritten |= 1 << VERT_RESULT_COL1; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (RENDERINPUTS_TEST + (inputs_bitset, _TNL_ATTRIB_TEX(i))) { + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + } + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) + if (InputsRead & (1 << i)) + inputs[i] = nr++; + else + inputs[i] = -1; + + if (! + (r300->radeon.radeonScreen-> + chip_flags & RADEON_CHIPSET_TCL)) { + /* Fixed, apply to vir0 only */ + memcpy(vir_inputs, inputs, + VERT_ATTRIB_MAX * sizeof(int)); + inputs = vir_inputs; + + if (InputsRead & VERT_ATTRIB_POS) + inputs[VERT_ATTRIB_POS] = 0; + + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + inputs[VERT_ATTRIB_COLOR0] = 2; + + if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) + inputs[VERT_ATTRIB_COLOR1] = 3; + + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); + } + + RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, + inputs_bitset); + } + assert(InputsRead); + assert(OutputsWritten); + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) + if (InputsRead & (1 << i)) + tab[nr++] = i; + + if (nr > R300_MAX_AOS_ARRAYS) + return R300_FALLBACK_TCL; + + for (i = 0; i < nr; i++) { + int ci; + int comp_size, fix, found = 0; + + swizzle[i][0] = SWIZZLE_ZERO; + swizzle[i][1] = SWIZZLE_ZERO; + swizzle[i][2] = SWIZZLE_ZERO; + swizzle[i][3] = SWIZZLE_ONE; + + for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++) + swizzle[i][ci] = ci; + +#if MESA_BIG_ENDIAN +#define SWAP_INT(a, b) do { \ + int __temp; \ + __temp = a;\ + a = b; \ + b = __temp; \ +} while (0) + + if (VB->AttribPtr[tab[i]].type == GL_UNSIGNED_BYTE) { + SWAP_INT(swizzle[i][0], swizzle[i][3]); + SWAP_INT(swizzle[i][1], swizzle[i][2]); + } +#endif /* MESA_BIG_ENDIAN */ + + if (r300IsGartMemory(rmesa, VB->AttribPtr[tab[i]].data, + /*(count-1)*stride */ 4)) { + if (VB->AttribPtr[tab[i]].stride % 4) + return R300_FALLBACK_TCL; + + rmesa->state.aos[i].address = + VB->AttribPtr[tab[i]].data; + rmesa->state.aos[i].start = 0; + rmesa->state.aos[i].aos_offset = + r300GartOffsetFromVirtual(rmesa, + VB-> + AttribPtr[tab[i]].data); + rmesa->state.aos[i].aos_stride = + VB->AttribPtr[tab[i]].stride / 4; + + rmesa->state.aos[i].aos_size = + t_emit_size(&VB->AttribPtr[tab[i]]); + } else { + /* TODO: r300EmitVec can only handle 4 byte vectors */ + if (VB->AttribPtr[tab[i]].type != GL_FLOAT) + return R300_FALLBACK_TCL; + + r300EmitVec(ctx, &rmesa->state.aos[i], + VB->AttribPtr[tab[i]].data, + t_emit_size(&VB->AttribPtr[tab[i]]), + VB->AttribPtr[tab[i]].stride, count); + } + + rmesa->state.aos[i].aos_size = + t_aos_size(&VB->AttribPtr[tab[i]]); + + comp_size = _mesa_sizeof_type(VB->AttribPtr[tab[i]].type); + + for (fix = 0; fix <= 4 - VB->AttribPtr[tab[i]].size; fix++) { + if ((rmesa->state.aos[i].aos_offset - + comp_size * fix) % 4) + continue; + + found = 1; + break; + } + + if (found) { + if (fix > 0) { + WARN_ONCE("Feeling lucky?\n"); + } + + rmesa->state.aos[i].aos_offset -= comp_size * fix; + + for (ci = 0; ci < VB->AttribPtr[tab[i]].size; ci++) + swizzle[i][ci] += fix; + } else { + WARN_ONCE + ("Cannot handle offset %x with stride %d, comp %d\n", + rmesa->state.aos[i].aos_offset, + rmesa->state.aos[i].aos_stride, + VB->AttribPtr[tab[i]].size); + return R300_FALLBACK_TCL; + } + } + + /* setup INPUT_ROUTE */ + R300_STATECHANGE(r300, vir[0]); + ((drm_r300_cmd_header_t *) r300->hw.vir[0].cmd)->packet0.count = + t_vir0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0], VB->AttribPtr, + inputs, tab, nr); + + R300_STATECHANGE(r300, vir[1]); + ((drm_r300_cmd_header_t *) r300->hw.vir[1].cmd)->packet0.count = + t_vir1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr); + + /* Set up input_cntl */ + /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */ + R300_STATECHANGE(r300, vic); + r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555; /* Hard coded value, no idea what it means */ + r300->hw.vic.cmd[R300_VIC_CNTL_1] = t_vic(ctx, InputsRead); + + /* Stage 3: VAP output */ + + R300_STATECHANGE(r300, vof); + + r300->hw.vof.cmd[R300_VOF_CNTL_0] = 0; + r300->hw.vof.cmd[R300_VOF_CNTL_1] = 0; + + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= + R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_COL0)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_COL1)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; + + /*if(OutputsWritten & (1 << VERT_RESULT_BFC0)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT; + + if(OutputsWritten & (1 << VERT_RESULT_BFC1)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; */ + //if(OutputsWritten & (1 << VERT_RESULT_FOGC)) + + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) + r300->hw.vof.cmd[R300_VOF_CNTL_0] |= + R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) + r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4 << (3 * i)); + + rmesa->state.aos_count = nr; + + return R300_FALLBACK_NONE; +} + +#ifdef USER_BUFFERS +void r300UseArrays(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + int i; + + if (rmesa->state.elt_dma.buf) + r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id); + + for (i = 0; i < rmesa->state.aos_count; i++) { + if (rmesa->state.aos[i].buf) + r300_mem_use(rmesa, rmesa->state.aos[i].buf->id); + } +} +#endif + +void r300ReleaseArrays(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + int i; + + r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__); + for (i = 0; i < rmesa->state.aos_count; i++) { + r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__); + } +} diff --git a/r300/r300_emit.h b/r300/r300_emit.h new file mode 100644 index 0000000..7be098f --- /dev/null +++ b/r300/r300_emit.h @@ -0,0 +1,238 @@ +/* + * Copyright (C) 2005 Vladimir Dergachev. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Vladimir Dergachev <volodya@mindspring.com> + * Nicolai Haehnle <prefect_@gmx.net> + * Aapo Tahkola <aet@rasterburn.org> + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ + +/* This files defines functions for accessing R300 hardware. + */ +#ifndef __R300_EMIT_H__ +#define __R300_EMIT_H__ + +#include "glheader.h" +#include "r300_context.h" +#include "r300_cmdbuf.h" +#include "radeon_reg.h" + +/* + * CP type-3 packets + */ +#define RADEON_CP_PACKET3_UNK1B 0xC0001B00 +#define RADEON_CP_PACKET3_INDX_BUFFER 0xC0003300 +#define RADEON_CP_PACKET3_3D_DRAW_VBUF_2 0xC0003400 +#define RADEON_CP_PACKET3_3D_DRAW_IMMD_2 0xC0003500 +#define RADEON_CP_PACKET3_3D_DRAW_INDX_2 0xC0003600 +#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00 +#define RADEON_CP_PACKET3_3D_CLEAR_ZMASK 0xC0003202 +#define RADEON_CP_PACKET3_3D_CLEAR_CMASK 0xC0003802 +#define RADEON_CP_PACKET3_3D_CLEAR_HIZ 0xC0003702 + +#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2)) + +static __inline__ uint32_t cmdpacket0(int reg, int count) +{ + drm_r300_cmd_header_t cmd; + + cmd.packet0.cmd_type = R300_CMD_PACKET0; + cmd.packet0.count = count; + cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8; + cmd.packet0.reglo = ((unsigned int)reg & 0x00FF); + + return cmd.u; +} + +static __inline__ uint32_t cmdvpu(int addr, int count) +{ + drm_r300_cmd_header_t cmd; + + cmd.vpu.cmd_type = R300_CMD_VPU; + cmd.vpu.count = count; + cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; + cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF); + + return cmd.u; +} + +static __inline__ uint32_t cmdpacket3(int packet) +{ + drm_r300_cmd_header_t cmd; + + cmd.packet3.cmd_type = R300_CMD_PACKET3; + cmd.packet3.packet = packet; + + return cmd.u; +} + +static __inline__ uint32_t cmdcpdelay(unsigned short count) +{ + drm_r300_cmd_header_t cmd; + + cmd.delay.cmd_type = R300_CMD_CP_DELAY; + cmd.delay.count = count; + + return cmd.u; +} + +static __inline__ uint32_t cmdwait(unsigned char flags) +{ + drm_r300_cmd_header_t cmd; + + cmd.wait.cmd_type = R300_CMD_WAIT; + cmd.wait.flags = flags; + + return cmd.u; +} + +static __inline__ uint32_t cmdpacify(void) +{ + drm_r300_cmd_header_t cmd; + + cmd.header.cmd_type = R300_CMD_END3D; + + return cmd.u; +} + +/** + * Prepare to write a register value to register at address reg. + * If num_extra > 0 then the following extra values are written + * to registers with address +4, +8 and so on.. + */ +#define reg_start(reg, num_extra) \ + do { \ + int _n; \ + _n=(num_extra); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+2), \ + __FUNCTION__); \ + cmd_reserved=_n+2; \ + cmd_written=1; \ + cmd[0].i=cmdpacket0((reg), _n+1); \ + } while (0); + +/** + * Emit GLuint freestyle + */ +#define e32(dword) \ + do { \ + if(cmd_written<cmd_reserved) { \ + cmd[cmd_written].i=(dword); \ + cmd_written++; \ + } else { \ + fprintf(stderr, \ + "e32 but no previous packet " \ + "declaration.\n" \ + "Aborting! in %s::%s at line %d, " \ + "cmd_written=%d cmd_reserved=%d\n", \ + __FILE__, __FUNCTION__, __LINE__, \ + cmd_written, cmd_reserved); \ + _mesa_exit(-1); \ + } \ + } while(0) + +#define efloat(f) e32(r300PackFloat32(f)) + +#define vsf_start_fragment(dest, length) \ + do { \ + int _n; \ + _n = (length); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+1), \ + __FUNCTION__); \ + cmd_reserved = _n+2; \ + cmd_written =1; \ + cmd[0].i = cmdvpu((dest), _n/4); \ + } while (0); + +#define start_packet3(packet, count) \ + { \ + int _n; \ + GLuint _p; \ + _n = (count); \ + _p = (packet); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+3), \ + __FUNCTION__); \ + cmd_reserved = _n+3; \ + cmd_written = 2; \ + if(_n > 0x3fff) { \ + fprintf(stderr,"Too big packet3 %08x: cannot " \ + "store %d dwords\n", \ + _p, _n); \ + _mesa_exit(-1); \ + } \ + cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW); \ + cmd[1].i = _p | ((_n & 0x3fff)<<16); \ + } + +/** + * Must be sent to switch to 2d commands + */ +void static inline end_3d(r300ContextPtr rmesa) +{ + drm_radeon_cmd_header_t *cmd = NULL; + + cmd = + (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); + cmd[0].header.cmd_type = R300_CMD_END3D; +} + +void static inline cp_delay(r300ContextPtr rmesa, unsigned short count) +{ + drm_radeon_cmd_header_t *cmd = NULL; + + cmd = + (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); + cmd[0].i = cmdcpdelay(count); +} + +void static inline cp_wait(r300ContextPtr rmesa, unsigned char flags) +{ + drm_radeon_cmd_header_t *cmd = NULL; + + cmd = + (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); + cmd[0].i = cmdwait(flags); +} + +extern int r300EmitArrays(GLcontext * ctx); + +#ifdef USER_BUFFERS +void r300UseArrays(GLcontext * ctx); +#endif + +extern void r300ReleaseArrays(GLcontext * ctx); + +#endif diff --git a/r300/r300_fragprog.c b/r300/r300_fragprog.c new file mode 100644 index 0000000..cce8e68 --- /dev/null +++ b/r300/r300_fragprog.c @@ -0,0 +1,2472 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs <darktama@iinet.net.au> + * + * \author Jerome Glisse <j.glisse@gmail.com> + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + * \todo Verify results of opcodes for accuracy, I've only checked them in + * specific cases. + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r300_context.h" +#include "r300_fragprog.h" +#include "r300_reg.h" +#include "r300_state.h" + +/* + * Usefull macros and values + */ +#define ERROR(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + fp->error = GL_TRUE; \ + } while(0) + +#define PFS_INVAL 0xFFFFFFFF +#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs + +#define SWIZZLE_XYZ 0 +#define SWIZZLE_XXX 1 +#define SWIZZLE_YYY 2 +#define SWIZZLE_ZZZ 3 +#define SWIZZLE_WWW 4 +#define SWIZZLE_YZX 5 +#define SWIZZLE_ZXY 6 +#define SWIZZLE_WZY 7 +#define SWIZZLE_111 8 +#define SWIZZLE_000 9 +#define SWIZZLE_HHH 10 + +#define swizzle(r, x, y, z, w) do_swizzle(fp, r, \ + ((SWIZZLE_##x<<0)| \ + (SWIZZLE_##y<<3)| \ + (SWIZZLE_##z<<6)| \ + (SWIZZLE_##w<<9)), \ + 0) + +#define REG_TYPE_INPUT 0 +#define REG_TYPE_OUTPUT 1 +#define REG_TYPE_TEMP 2 +#define REG_TYPE_CONST 3 + +#define REG_TYPE_SHIFT 0 +#define REG_INDEX_SHIFT 2 +#define REG_VSWZ_SHIFT 8 +#define REG_SSWZ_SHIFT 13 +#define REG_NEGV_SHIFT 18 +#define REG_NEGS_SHIFT 19 +#define REG_ABS_SHIFT 20 +#define REG_NO_USE_SHIFT 21 // Hack for refcounting +#define REG_VALID_SHIFT 22 // Does the register contain a defined value? +#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)? + +#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) +#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) +#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT) +#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT) +#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT) +#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT) +#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) +#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) +#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) +#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT) + +#define REG(type, index, vswz, sswz, nouse, valid, builtin) \ + (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_GET_TYPE(reg) \ + ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT) +#define REG_GET_INDEX(reg) \ + ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT) +#define REG_GET_VSWZ(reg) \ + ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT) +#define REG_GET_SSWZ(reg) \ + ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT) +#define REG_GET_NO_USE(reg) \ + ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) +#define REG_GET_VALID(reg) \ + ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) +#define REG_GET_BUILTIN(reg) \ + ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT) +#define REG_SET_TYPE(reg, type) \ + reg = ((reg & ~REG_TYPE_MASK) | \ + ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) +#define REG_SET_INDEX(reg, index) \ + reg = ((reg & ~REG_INDEX_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK)) +#define REG_SET_VSWZ(reg, vswz) \ + reg = ((reg & ~REG_VSWZ_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK)) +#define REG_SET_SSWZ(reg, sswz) \ + reg = ((reg & ~REG_SSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_SET_NO_USE(reg, nouse) \ + reg = ((reg & ~REG_NO_USE_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK)) +#define REG_SET_VALID(reg, valid) \ + reg = ((reg & ~REG_VALID_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) +#define REG_SET_BUILTIN(reg, builtin) \ + reg = ((reg & ~REG_BUILTIN_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK)) +#define REG_ABS(reg) \ + reg = (reg | REG_ABS_MASK) +#define REG_NEGV(reg) \ + reg = (reg | REG_NEGV_MASK) +#define REG_NEGS(reg) \ + reg = (reg | REG_NEGS_MASK) + +/* + * Datas structures for fragment program generation + */ + +/* description of r300 native hw instructions */ +static const struct { + const char *name; + int argc; + int v_op; + int s_op; +} r300_fpop[] = { + /* *INDENT-OFF* */ + {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD}, + {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4}, + {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4}, + {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN}, + {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX}, + {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP}, + {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC}, + {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2}, + {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2}, + {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP}, + {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ}, + {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL}, + {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL}, + /* *INDENT-ON* */ +}; + +/* vector swizzles r300 can support natively, with a couple of + * cases we handle specially + * + * REG_VSWZ/REG_SSWZ is an index into this table + */ + +/* mapping from SWIZZLE_* to r300 native values for scalar insns */ +#define SWIZZLE_HALF 6 + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ + SWIZZLE_##y, \ + SWIZZLE_##z, \ + SWIZZLE_ZERO)) +/* native swizzles */ +static const struct r300_pfs_swizzle { + GLuint hash; /* swizzle value this matches */ + GLuint base; /* base value for hw swizzle */ + GLuint stride; /* difference in base between arg0/1/2 */ + GLuint flags; +} v_swiz[] = { + /* *INDENT-OFF* */ + {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, + {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, + {PFS_INVAL, 0, 0, 0}, + /* *INDENT-ON* */ +}; + +/* used during matching of non-native swizzles */ +#define SWZ_X_MASK (7 << 0) +#define SWZ_Y_MASK (7 << 3) +#define SWZ_Z_MASK (7 << 6) +#define SWZ_W_MASK (7 << 9) +static const struct { + GLuint hash; /* used to mask matching swizzle components */ + int mask; /* actual outmask */ + int count; /* count of components matched */ +} s_mask[] = { + /* *INDENT-OFF* */ + {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3}, + {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2}, + {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2}, + {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2}, + {SWZ_X_MASK, 1, 1}, + {SWZ_Y_MASK, 2, 1}, + {SWZ_Z_MASK, 4, 1}, + {PFS_INVAL, PFS_INVAL, PFS_INVAL} + /* *INDENT-ON* */ +}; + +static const struct { + int base; /* hw value of swizzle */ + int stride; /* difference between SRC0/1/2 */ + GLuint flags; +} s_swiz[] = { + /* *INDENT-OFF* */ + {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, + {R300_FPI2_ARGA_ZERO, 0, 0}, + {R300_FPI2_ARGA_ONE, 0, 0}, + {R300_FPI2_ARGA_HALF, 0, 0} + /* *INDENT-ON* */ +}; + +/* boiler-plate reg, for convenience */ +static const GLuint undef = REG(REG_TYPE_TEMP, + 0, + SWIZZLE_XYZ, + SWIZZLE_W, + GL_FALSE, + GL_FALSE, + GL_FALSE); + +/* constant one source */ +static const GLuint pfs_one = REG(REG_TYPE_CONST, + 0, + SWIZZLE_111, + SWIZZLE_ONE, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* constant half source */ +static const GLuint pfs_half = REG(REG_TYPE_CONST, + 0, + SWIZZLE_HHH, + SWIZZLE_HALF, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* constant zero source */ +static const GLuint pfs_zero = REG(REG_TYPE_CONST, + 0, + SWIZZLE_000, + SWIZZLE_ZERO, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* + * Common functions prototypes + */ +static void dump_program(struct r300_fragment_program *fp); +static void emit_arith(struct r300_fragment_program *fp, int op, + GLuint dest, int mask, + GLuint src0, GLuint src1, GLuint src2, int flags); + +/** + * Get an R300 temporary that can be written to in the given slot. + */ +static int get_hw_temp(struct r300_fragment_program *fp, int slot) +{ + COMPILE_STATE; + int r; + + for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) { + ERROR("Out of hardware temps\n"); + return 0; + } + // Reserved is used to avoid the following scenario: + // R300 temporary X is first assigned to Mesa temporary Y during vector ops + // R300 temporary X is then assigned to Mesa temporary Z for further vector ops + // Then scalar ops on Mesa temporary Z are emitted and move back in time + // to overwrite the value of temporary Y. + // End scenario. + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = 0; + cs->hwtemps[r].scalar_valid = 0; + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + +/** + * Get an R300 temporary that will act as a TEX destination register. + */ +static int get_hw_temp_tex(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + int r; + + for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->used_in_node & (1 << r)) + continue; + + // Note: Be very careful here + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) + return get_hw_temp(fp, 0); /* Will cause an indirection */ + + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = cs->nrslots; + cs->hwtemps[r].scalar_valid = cs->nrslots; + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + +/** + * Mark the given hardware register as free. + */ +static void free_hw_temp(struct r300_fragment_program *fp, int idx) +{ + COMPILE_STATE; + + // Be very careful here. Consider sequences like + // MAD r0, r1,r2,r3 + // TEX r4, ... + // The TEX instruction may be moved in front of the MAD instruction + // due to the way nodes work. We don't want to alias r1 and r4 in + // this case. + // I'm certain the register allocation could be further sanitized, + // but it's tricky because of stuff that can happen inside emit_tex + // and emit_arith. + cs->hwtemps[idx].free = cs->nrslots + 1; +} + +/** + * Create a new Mesa temporary register. + */ +static GLuint get_temp_reg(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint index; + + index = ffs(~cs->temp_in_use); + if (!index) { + ERROR("Out of program temps\n"); + return r; + } + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = -1; + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); + return r; +} + +/** + * Create a new Mesa temporary register that will act as the destination + * register for a texture read. + */ +static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint index; + + index = ffs(~cs->temp_in_use); + if (!index) { + ERROR("Out of program temps\n"); + return r; + } + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = get_hw_temp_tex(fp); + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); + return r; +} + +/** + * Free a Mesa temporary and the associated R300 temporary. + */ +static void free_temp(struct r300_fragment_program *fp, GLuint r) +{ + COMPILE_STATE; + GLuint index = REG_GET_INDEX(r); + + if (!(cs->temp_in_use & (1 << index))) + return; + + if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { + free_hw_temp(fp, cs->temps[index].reg); + cs->temps[index].reg = -1; + cs->temp_in_use &= ~(1 << index); + } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { + free_hw_temp(fp, cs->inputs[index].reg); + cs->inputs[index].reg = -1; + } +} + +/** + * Emit a hardware constant/parameter. + * + * \p cp Stable pointer to an array of 4 floats. + * The pointer must be stable in the sense that it remains to be valid + * and hold the contents of the constant/parameter throughout the lifetime + * of the fragment program (actually, up until the next time the fragment + * program is translated). + */ +static GLuint emit_const4fv(struct r300_fragment_program *fp, + const GLfloat * cp) +{ + GLuint reg = undef; + int index; + + for (index = 0; index < fp->const_nr; ++index) { + if (fp->constant[index] == cp) + break; + } + + if (index >= fp->const_nr) { + if (index >= PFS_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return reg; + } + + fp->const_nr++; + fp->constant[index] = cp; + } + + REG_SET_TYPE(reg, REG_TYPE_CONST); + REG_SET_INDEX(reg, index); + REG_SET_VALID(reg, GL_TRUE); + return reg; +} + +static inline GLuint negate(GLuint r) +{ + REG_NEGS(r); + REG_NEGV(r); + return r; +} + +/* Hack, to prevent clobbering sources used multiple times when + * emulating non-native instructions + */ +static inline GLuint keep(GLuint r) +{ + REG_SET_NO_USE(r, GL_TRUE); + return r; +} + +static inline GLuint absolute(GLuint r) +{ + REG_ABS(r); + return r; +} + +static int swz_native(struct r300_fragment_program *fp, + GLuint src, GLuint * r, GLuint arbneg) +{ + /* Native swizzle, handle negation */ + src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); + + if ((arbneg & 0x7) == 0x0) { + src = src & ~REG_NEGV_MASK; + *r = src; + } else if ((arbneg & 0x7) == 0x7) { + src |= REG_NEGV_MASK; + *r = src; + } else { + if (!REG_GET_VALID(*r)) + *r = get_temp_reg(fp); + src |= REG_NEGV_MASK; + emit_arith(fp, + PFS_OP_MAD, + *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); + src = src & ~REG_NEGV_MASK; + emit_arith(fp, + PFS_OP_MAD, + *r, + (arbneg ^ 0x7) | WRITEMASK_W, + src, pfs_one, pfs_zero, 0); + } + + return 3; +} + +static int swz_emit_partial(struct r300_fragment_program *fp, + GLuint src, + GLuint * r, int mask, int mc, GLuint arbneg) +{ + GLuint tmp; + GLuint wmask = 0; + + if (!REG_GET_VALID(*r)) + *r = get_temp_reg(fp); + + /* A partial match, VSWZ/mask define what parts of the + * desired swizzle we match + */ + if (mc + s_mask[mask].count == 3) { + wmask = WRITEMASK_W; + src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT; + } + + tmp = arbneg & s_mask[mask].mask; + if (tmp) { + tmp = tmp ^ s_mask[mask].mask; + if (tmp) { + emit_arith(fp, + PFS_OP_MAD, + *r, + arbneg & s_mask[mask].mask, + keep(src) | REG_NEGV_MASK, + pfs_one, pfs_zero, 0); + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, + PFS_OP_MAD, + *r, tmp | wmask, src, pfs_one, pfs_zero, 0); + } else { + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, + PFS_OP_MAD, + *r, + (arbneg & s_mask[mask].mask) | wmask, + src | REG_NEGV_MASK, pfs_one, pfs_zero, 0); + } + } else { + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, PFS_OP_MAD, + *r, + s_mask[mask].mask | wmask, + src, pfs_one, pfs_zero, 0); + } + + return s_mask[mask].count; +} + +static GLuint do_swizzle(struct r300_fragment_program *fp, + GLuint src, GLuint arbswz, GLuint arbneg) +{ + GLuint r = undef; + GLuint vswz; + int c_mask = 0; + int v_match = 0; + + /* If swizzling from something without an XYZW native swizzle, + * emit result to a temp, and do new swizzle from the temp. + */ +#if 0 + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint temp = get_temp_reg(fp); + emit_arith(fp, + PFS_OP_MAD, + temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0); + src = temp; + } +#endif + + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint vsrcswz = + (v_swiz[REG_GET_VSWZ(src)]. + hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) | + REG_GET_SSWZ(src) << 9; + GLint i; + + GLuint newswz = 0; + GLuint offset; + for (i = 0; i < 4; ++i) { + offset = GET_SWZ(arbswz, i); + + newswz |= + (offset <= 3) ? GET_SWZ(vsrcswz, + offset) << i * + 3 : offset << i * 3; + } + + arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK); + REG_SET_SSWZ(src, GET_SWZ(newswz, 3)); + } else { + /* set scalar swizzling */ + REG_SET_SSWZ(src, GET_SWZ(arbswz, 3)); + + } + do { + vswz = REG_GET_VSWZ(src); + do { + int chash; + + REG_SET_VSWZ(src, vswz); + chash = v_swiz[REG_GET_VSWZ(src)].hash & + s_mask[c_mask].hash; + + if (chash == (arbswz & s_mask[c_mask].hash)) { + if (s_mask[c_mask].count == 3) { + v_match += swz_native(fp, + src, &r, arbneg); + } else { + v_match += swz_emit_partial(fp, + src, + &r, + c_mask, + v_match, + arbneg); + } + + if (v_match == 3) + return r; + + /* Fill with something invalid.. all 0's was + * wrong before, matched SWIZZLE_X. So all + * 1's will be okay for now + */ + arbswz |= (PFS_INVAL & s_mask[c_mask].hash); + } + } while (v_swiz[++vswz].hash != PFS_INVAL); + REG_SET_VSWZ(src, SWIZZLE_XYZ); + } while (s_mask[++c_mask].hash != PFS_INVAL); + + ERROR("should NEVER get here\n"); + return r; +} + +static GLuint t_src(struct r300_fragment_program *fp, + struct prog_src_register fpsrc) +{ + GLuint r = undef; + + switch (fpsrc.File) { + case PROGRAM_TEMPORARY: + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); + break; + case PROGRAM_INPUT: + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_INPUT); + break; + case PROGRAM_LOCAL_PARAM: + r = emit_const4fv(fp, + fp->mesa_program.Base.LocalParams[fpsrc. + Index]); + break; + case PROGRAM_ENV_PARAM: + r = emit_const4fv(fp, + fp->ctx->FragmentProgram.Parameters[fpsrc. + Index]); + break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + r = emit_const4fv(fp, + fp->mesa_program.Base.Parameters-> + ParameterValues[fpsrc.Index]); + break; + default: + ERROR("unknown SrcReg->File %x\n", fpsrc.File); + return r; + } + + /* no point swizzling ONE/ZERO/HALF constants... */ + if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) + r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase); + return r; +} + +static GLuint t_scalar_src(struct r300_fragment_program *fp, + struct prog_src_register fpsrc) +{ + struct prog_src_register src = fpsrc; + int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */ + + src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9)); + + return t_src(fp, src); +} + +static GLuint t_dst(struct r300_fragment_program *fp, + struct prog_dst_register dest) +{ + GLuint r = undef; + + switch (dest.File) { + case PROGRAM_TEMPORARY: + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); + return r; + case PROGRAM_OUTPUT: + REG_SET_TYPE(r, REG_TYPE_OUTPUT); + switch (dest.Index) { + case FRAG_RESULT_COLR: + case FRAG_RESULT_DEPR: + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + return r; + default: + ERROR("Bad DstReg->Index 0x%x\n", dest.Index); + return r; + } + default: + ERROR("Bad DstReg->File 0x%x\n", dest.File); + return r; + } +} + +static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) +{ + COMPILE_STATE; + int idx; + int index = REG_GET_INDEX(src); + + switch (REG_GET_TYPE(src)) { + case REG_TYPE_TEMP: + /* NOTE: if reg==-1 here, a source is being read that + * hasn't been written to. Undefined results. + */ + if (cs->temps[index].reg == -1) + cs->temps[index].reg = get_hw_temp(fp, cs->nrslots); + + idx = cs->temps[index].reg; + + if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0)) + free_temp(fp, src); + break; + case REG_TYPE_INPUT: + idx = cs->inputs[index].reg; + + if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0)) + free_hw_temp(fp, cs->inputs[index].reg); + break; + case REG_TYPE_CONST: + return (index | SRC_CONST); + default: + ERROR("Invalid type for source reg\n"); + return (0 | SRC_CONST); + } + + if (!tex) + cs->used_in_node |= (1 << idx); + + return idx; +} + +static int t_hw_dst(struct r300_fragment_program *fp, + GLuint dest, GLboolean tex, int slot) +{ + COMPILE_STATE; + int idx; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch (REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { + if (!tex) { + cs->temps[index].reg = get_hw_temp(fp, slot); + } else { + cs->temps[index].reg = get_hw_temp_tex(fp); + } + } + idx = cs->temps[index].reg; + + if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0)) + free_temp(fp, dest); + + cs->dest_in_node |= (1 << idx); + cs->used_in_node |= (1 << idx); + break; + case REG_TYPE_OUTPUT: + switch (index) { + case FRAG_RESULT_COLR: + fp->node[fp->cur_node].flags |= + R300_PFS_NODE_OUTPUT_COLOR; + break; + case FRAG_RESULT_DEPR: + fp->node[fp->cur_node].flags |= + R300_PFS_NODE_OUTPUT_DEPTH; + break; + } + return index; + break; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; + } + + return idx; +} + +static void emit_nop(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return; + } + + fp->alu.inst[cs->nrslots].inst0 = NOP_INST0; + fp->alu.inst[cs->nrslots].inst1 = NOP_INST1; + fp->alu.inst[cs->nrslots].inst2 = NOP_INST2; + fp->alu.inst[cs->nrslots].inst3 = NOP_INST3; + cs->nrslots++; +} + +static void emit_tex(struct r300_fragment_program *fp, + struct prog_instruction *fpi, int opcode) +{ + COMPILE_STATE; + GLuint coord = t_src(fp, fpi->SrcReg[0]); + GLuint dest = undef, rdest = undef; + GLuint din, uin; + int unit = fpi->TexSrcUnit; + int hwsrc, hwdest; + GLuint tempreg = 0; + + uin = cs->used_in_node; + din = cs->dest_in_node; + + /* Resolve source/dest to hardware registers */ + if (opcode != R300_FPITX_OP_KIL) { + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { + /** + * Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + * + * \todo Refactor this once we have proper rewriting/optimization + * support for programs. + */ + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + int factor_index; + GLuint factorreg; + + tokens[2] = unit; + factor_index = + _mesa_add_state_reference(fp->mesa_program.Base. + Parameters, tokens); + factorreg = + emit_const4fv(fp, + fp->mesa_program.Base.Parameters-> + ParameterValues[factor_index]); + tempreg = keep(get_temp_reg(fp)); + + emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + coord, factorreg, pfs_zero, 0); + + /* Ensure correct node indirection */ + uin = cs->used_in_node; + din = cs->dest_in_node; + + hwsrc = t_hw_src(fp, tempreg, GL_TRUE); + } else { + hwsrc = t_hw_src(fp, coord, GL_TRUE); + } + + dest = t_dst(fp, fpi->DstReg); + + /* r300 doesn't seem to be able to do TEX->output reg */ + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + rdest = dest; + dest = get_temp_reg_tex(fp); + } + hwdest = + t_hw_dst(fp, dest, GL_TRUE, + fp->node[fp->cur_node].alu_offset); + + /* Use a temp that hasn't been used in this node, rather + * than causing an indirection + */ + if (uin & (1 << hwdest)) { + free_hw_temp(fp, hwdest); + hwdest = get_hw_temp_tex(fp); + cs->temps[REG_GET_INDEX(dest)].reg = hwdest; + } + } else { + hwdest = 0; + unit = 0; + hwsrc = t_hw_src(fp, coord, GL_TRUE); + } + + /* Indirection if source has been written in this node, or if the + * dest has been read/written in this node + */ + if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && + (din & (1 << hwsrc))) || (uin & (1 << hwdest))) { + + /* Finish off current node */ + if (fp->node[fp->cur_node].alu_offset == cs->nrslots) + emit_nop(fp); + + fp->node[fp->cur_node].alu_end = + cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; + assert(fp->node[fp->cur_node].alu_end >= 0); + + if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) { + ERROR("too many levels of texture indirection\n"); + return; + } + + /* Start new node */ + fp->node[fp->cur_node].tex_offset = fp->tex.length; + fp->node[fp->cur_node].alu_offset = cs->nrslots; + fp->node[fp->cur_node].tex_end = -1; + fp->node[fp->cur_node].alu_end = -1; + fp->node[fp->cur_node].flags = 0; + cs->used_in_node = 0; + cs->dest_in_node = 0; + } + + if (fp->cur_node == 0) + fp->first_node_has_tex = 1; + + fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT) + | (hwdest << R300_FPITX_DST_SHIFT) + | (unit << R300_FPITX_IMAGE_SHIFT) + /* not entirely sure about this */ + | (opcode << R300_FPITX_OPCODE_SHIFT); + + cs->dest_in_node |= (1 << hwdest); + if (REG_GET_TYPE(coord) != REG_TYPE_CONST) + cs->used_in_node |= (1 << hwsrc); + + fp->node[fp->cur_node].tex_end++; + + /* Copy from temp to output if needed */ + if (REG_GET_VALID(rdest)) { + emit_arith(fp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest, + pfs_one, pfs_zero, 0); + free_temp(fp, dest); + } + + /* Free temp register */ + if (tempreg != 0) + free_temp(fp, tempreg); +} + +/** + * Returns the first slot where we could possibly allow writing to dest, + * according to register allocation. + */ +static int get_earliest_allowed_write(struct r300_fragment_program *fp, + GLuint dest, int mask) +{ + COMPILE_STATE; + int idx; + int pos; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch (REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[index].reg == -1) + return 0; + + idx = cs->temps[index].reg; + break; + case REG_TYPE_OUTPUT: + return 0; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; + } + + pos = cs->hwtemps[idx].reserved; + if (mask & WRITEMASK_XYZ) { + if (pos < cs->hwtemps[idx].vector_lastread) + pos = cs->hwtemps[idx].vector_lastread; + } + if (mask & WRITEMASK_W) { + if (pos < cs->hwtemps[idx].scalar_lastread) + pos = cs->hwtemps[idx].scalar_lastread; + } + + return pos; +} + +/** + * Allocates a slot for an ALU instruction that can consist of + * a vertex part or a scalar part or both. + * + * Sources from src (src[0] to src[argc-1]) are added to the slot in the + * appropriate position (vector and/or scalar), and their positions are + * recorded in the srcpos array. + * + * This function emits instruction code for the source fetch and the + * argument selection. It does not emit instruction code for the + * opcode or the destination selection. + * + * @return the index of the slot + */ +static int find_and_prepare_slot(struct r300_fragment_program *fp, + GLboolean emit_vop, + GLboolean emit_sop, + int argc, GLuint * src, GLuint dest, int mask) +{ + COMPILE_STATE; + int hwsrc[3]; + int srcpos[3]; + unsigned int used; + int tempused; + int tempvsrc[3]; + int tempssrc[3]; + int pos; + int regnr; + int i, j; + + // Determine instruction slots, whether sources are required on + // vector or scalar side, and the smallest slot number where + // all source registers are available + used = 0; + if (emit_vop) + used |= SLOT_OP_VECTOR; + if (emit_sop) + used |= SLOT_OP_SCALAR; + + pos = get_earliest_allowed_write(fp, dest, mask); + + if (fp->node[fp->cur_node].alu_offset > pos) + pos = fp->node[fp->cur_node].alu_offset; + for (i = 0; i < argc; ++i) { + if (!REG_GET_BUILTIN(src[i])) { + if (emit_vop) + used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i; + if (emit_sop) + used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; + } + + hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ + regnr = hwsrc[i] & 31; + + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_valid > pos) + pos = cs->hwtemps[regnr].vector_valid; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_valid > pos) + pos = cs->hwtemps[regnr].scalar_valid; + } + } + } + + // Find a slot that fits + for (;; ++pos) { + if (cs->slot[pos].used & used & SLOT_OP_BOTH) + continue; + + if (pos >= cs->nrslots) { + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return -1; + } + + fp->alu.inst[pos].inst0 = NOP_INST0; + fp->alu.inst[pos].inst1 = NOP_INST1; + fp->alu.inst[pos].inst2 = NOP_INST2; + fp->alu.inst[pos].inst3 = NOP_INST3; + + cs->nrslots++; + } + // Note: When we need both parts (vector and scalar) of a source, + // we always try to put them into the same position. This makes the + // code easier to read, and it is optimal (i.e. one doesn't gain + // anything by splitting the parts). + // It also avoids headaches with swizzles that access both parts (i.e WXY) + tempused = cs->slot[pos].used; + for (i = 0; i < 3; ++i) { + tempvsrc[i] = cs->slot[pos].vsrc[i]; + tempssrc[i] = cs->slot[pos].ssrc[i]; + } + + for (i = 0; i < argc; ++i) { + int flags = (used >> i) & SLOT_SRC_BOTH; + + if (!flags) { + srcpos[i] = 0; + continue; + } + + for (j = 0; j < 3; ++j) { + if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { + if (tempvsrc[j] != hwsrc[i]) + continue; + } + + if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { + if (tempssrc[j] != hwsrc[i]) + continue; + } + + break; + } + + if (j == 3) + break; + + srcpos[i] = j; + tempused |= flags << j; + if (flags & SLOT_SRC_VECTOR) + tempvsrc[j] = hwsrc[i]; + if (flags & SLOT_SRC_SCALAR) + tempssrc[j] = hwsrc[i]; + } + + if (i == argc) + break; + } + + // Found a slot, reserve it + cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); + for (i = 0; i < 3; ++i) { + cs->slot[pos].vsrc[i] = tempvsrc[i]; + cs->slot[pos].ssrc[i] = tempssrc[i]; + } + + for (i = 0; i < argc; ++i) { + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + int regnr = hwsrc[i] & 31; + + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_lastread < pos) + cs->hwtemps[regnr].vector_lastread = + pos; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_lastread < pos) + cs->hwtemps[regnr].scalar_lastread = + pos; + } + } + } + + // Emit the source fetch code + fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; + fp->alu.inst[pos].inst1 |= + ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | + (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | + (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); + + fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; + fp->alu.inst[pos].inst3 |= + ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | + (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | + (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); + + // Emit the argument selection code + if (emit_vop) { + int swz[3]; + + for (i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + + (srcpos[i] * + v_swiz[REG_GET_VSWZ(src[i])]. + stride)) | ((src[i] & REG_NEGV_MASK) + ? ARG_NEG : 0) | ((src[i] + & + REG_ABS_MASK) + ? + ARG_ABS + : 0); + } else { + swz[i] = R300_FPI0_ARGC_ZERO; + } + } + + fp->alu.inst[pos].inst0 &= + ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK | + R300_FPI0_ARG2C_MASK); + fp->alu.inst[pos].inst0 |= + (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] << + R300_FPI0_ARG1C_SHIFT) + | (swz[2] << R300_FPI0_ARG2C_SHIFT); + } + + if (emit_sop) { + int swz[3]; + + for (i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + + (srcpos[i] * + s_swiz[REG_GET_SSWZ(src[i])]. + stride)) | ((src[i] & REG_NEGV_MASK) + ? ARG_NEG : 0) | ((src[i] + & + REG_ABS_MASK) + ? + ARG_ABS + : 0); + } else { + swz[i] = R300_FPI2_ARGA_ZERO; + } + } + + fp->alu.inst[pos].inst2 &= + ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK | + R300_FPI2_ARG2A_MASK); + fp->alu.inst[pos].inst2 |= + (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] << + R300_FPI2_ARG1A_SHIFT) + | (swz[2] << R300_FPI2_ARG2A_SHIFT); + } + + return pos; +} + +/** + * Append an ALU instruction to the instruction list. + */ +static void emit_arith(struct r300_fragment_program *fp, + int op, + GLuint dest, + int mask, + GLuint src0, GLuint src1, GLuint src2, int flags) +{ + COMPILE_STATE; + GLuint src[3] = { src0, src1, src2 }; + int hwdest; + GLboolean emit_vop, emit_sop; + int vop, sop, argc; + int pos; + + vop = r300_fpop[op].v_op; + sop = r300_fpop[op].s_op; + argc = r300_fpop[op].argc; + + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && + REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { + if (mask & WRITEMASK_Z) { + mask = WRITEMASK_W; + } else { + return; + } + } + + emit_vop = GL_FALSE; + emit_sop = GL_FALSE; + if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) + emit_vop = GL_TRUE; + if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) + emit_sop = GL_TRUE; + + pos = + find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest, + mask); + if (pos < 0) + return; + + hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ + + if (flags & PFS_FLAG_SAT) { + vop |= R300_FPI0_OUTC_SAT; + sop |= R300_FPI2_OUTA_SAT; + } + + /* Throw the pieces together and get FPI0/1 */ + if (emit_vop) { + fp->alu.inst[pos].inst0 |= vop; + + fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; + + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { + fp->alu.inst[pos].inst1 |= + (mask & WRITEMASK_XYZ) << + R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; + } else + assert(0); + } else { + fp->alu.inst[pos].inst1 |= + (mask & WRITEMASK_XYZ) << + R300_FPI1_DSTC_REG_MASK_SHIFT; + + cs->hwtemps[hwdest].vector_valid = pos + 1; + } + } + + /* And now FPI2/3 */ + if (emit_sop) { + fp->alu.inst[pos].inst2 |= sop; + + if (mask & WRITEMASK_W) { + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { + fp->alu.inst[pos].inst3 |= + (hwdest << R300_FPI3_DSTA_SHIFT) | + R300_FPI3_DSTA_OUTPUT; + } else if (REG_GET_INDEX(dest) == + FRAG_RESULT_DEPR) { + fp->alu.inst[pos].inst3 |= + R300_FPI3_DSTA_DEPTH; + } else + assert(0); + } else { + fp->alu.inst[pos].inst3 |= + (hwdest << R300_FPI3_DSTA_SHIFT) | + R300_FPI3_DSTA_REG; + + cs->hwtemps[hwdest].scalar_valid = pos + 1; + } + } + } + + return; +} + +#if 0 +static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + GLuint r = undef; + + if (!(mp->Base.InputsRead & (1 << attr))) { + ERROR("Attribute %d was not provided!\n", attr); + return undef; + } + + REG_SET_TYPE(r, REG_TYPE_INPUT); + REG_SET_INDEX(r, attr); + REG_SET_VALID(r, GL_TRUE); + return r; +} +#endif + +static GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.0, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } +}; + +/** + * Emit a LIT instruction. + * \p flags may be PFS_FLAG_SAT + * + * Definition of LIT (from ARB_fragment_program): + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots. So unless there's some special undocumented opcode, + * this implementation is potentially optimal. Unfortunately, + * emit_arith is a bit too conservative because it doesn't understand + * partial writes to the vector component. + */ +static const GLfloat LitConst[4] = + { 127.999999, 127.999999, 127.999999, -127.999999 }; + +static void emit_lit(struct r300_fragment_program *fp, + GLuint dest, int mask, GLuint src, int flags) +{ + COMPILE_STATE; + GLuint cnst; + int needTemporary; + GLuint temp; + + cnst = emit_const4fv(fp, LitConst); + + needTemporary = 0; + if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = keep(get_temp_reg(fp)); + } else { + temp = keep(dest); + } + + // Note: The order of emit_arith inside the slots is relevant, + // because emit_arith only looks at scalar vs. vector when resolving + // dependencies, and it does not consider individual vector components, + // so swizzling between the two parts can create fake dependencies. + + // First slot + emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY, + keep(src), pfs_zero, undef, 0); + emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); + + // Second slot + emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z, + swizzle(temp, W, W, W, W), cnst, undef, 0); + emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W, + swizzle(temp, Y, Y, Y, Y), undef, undef, 0); + + // Third slot + // If desired, we saturate the y result here. + // This does not affect the use as a condition variable in the CMP later + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, + temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); + + // Fourth slot + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X, + pfs_one, pfs_one, pfs_zero, 0); + emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); + + // Fifth slot + emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z, + pfs_zero, swizzle(temp, W, W, W, W), + negate(swizzle(temp, Y, Y, Y, Y)), flags); + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, + pfs_zero, 0); + + if (needTemporary) { + emit_arith(fp, PFS_OP_MAD, dest, mask, + temp, pfs_one, pfs_zero, flags); + free_temp(fp, temp); + } else { + // Decrease refcount of the destination + t_hw_dst(fp, dest, GL_FALSE, cs->nrslots); + } +} + +static GLboolean parse_program(struct r300_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + const struct prog_instruction *inst = mp->Base.Instructions; + struct prog_instruction *fpi; + GLuint src[3], dest, temp[2]; + int flags, mask = 0; + int const_sin[2]; + + if (!inst || inst[0].Opcode == OPCODE_END) { + ERROR("empty program?\n"); + return GL_FALSE; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + if (fpi->SaturateMode == SATURATE_ZERO_ONE) + flags = PFS_FLAG_SAT; + else + flags = 0; + + if (fpi->Opcode != OPCODE_KIL) { + dest = t_dst(fp, fpi->DstReg); + mask = fpi->DstReg.WriteMask; + } + + switch (fpi->Opcode) { + case OPCODE_ABS: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + absolute(src[0]), pfs_one, pfs_zero, flags); + break; + case OPCODE_ADD: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, src[1], flags); + break; + case OPCODE_CMP: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c + * r300 - if src2.c < 0.0 ? src1.c : src0.c + */ + emit_arith(fp, PFS_OP_CMP, dest, mask, + src[2], src[1], src[0], flags); + break; + case OPCODE_COS: + /* + * cos using a parabola (see SIN): + * cos(x): + * x = (x/(2*PI))+0.75 + * x = frac(x) + * x = (x*2*PI)-PI + * result = sin(x) + */ + temp[0] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* add 0.5*PI and do range reduction */ + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(src[0], X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + swizzle(const_sin[1], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + swizzle(temp[0], X, X, X, X), + undef, undef, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI + 0); + + /* SIN */ + + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + swizzle(temp[0], X, X, X, X), + absolute(swizzle(temp[0], X, X, X, X)), + negate(swizzle(temp[0], X, X, X, X)), 0); + + emit_arith(fp, PFS_OP_MAD, dest, mask, + swizzle(temp[0], Y, Y, Y, Y), + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[0], X, X, X, X), flags); + + free_temp(fp, temp[0]); + break; + case OPCODE_DP3: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_DP3, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_DP4: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_DP4, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_DPH: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + /* src0.xyz1 -> temp + * DP4 dest, temp, src1 + */ +#if 0 + temp[0] = get_temp_reg(fp); + src[0].s_swz = SWIZZLE_ONE; + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, pfs_zero, 0); + emit_arith(fp, PFS_OP_DP4, dest, mask, + temp[0], src[1], undef, flags); + free_temp(fp, temp[0]); +#else + emit_arith(fp, PFS_OP_DP4, dest, mask, + swizzle(src[0], X, Y, Z, ONE), src[1], + undef, flags); +#endif + break; + case OPCODE_DST: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + /* dest.y = src0.y * src1.y */ + if (mask & WRITEMASK_Y) + emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y, + keep(src[0]), keep(src[1]), + pfs_zero, flags); + /* dest.z = src0.z */ + if (mask & WRITEMASK_Z) + emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z, + src[0], pfs_one, pfs_zero, flags); + /* result.x = 1.0 + * result.w = src1.w */ + if (mask & WRITEMASK_XW) { + REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & WRITEMASK_XW, + src[1], pfs_one, pfs_zero, flags); + } + break; + case OPCODE_EX2: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_EX2, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_FLR: + src[0] = t_src(fp, fpi->SrcReg[0]); + temp[0] = get_temp_reg(fp); + /* FRC temp, src0 + * MAD dest, src0, 1.0, -temp + */ + emit_arith(fp, PFS_OP_FRC, temp[0], mask, + keep(src[0]), undef, undef, 0); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, negate(temp[0]), flags); + free_temp(fp, temp[0]); + break; + case OPCODE_FRC: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_FRC, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_KIL: + emit_tex(fp, fpi, R300_FPITX_OP_KIL); + break; + case OPCODE_LG2: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_LG2, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_LIT: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_lit(fp, dest, mask, src[0], flags); + break; + case OPCODE_LRP: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + /* result = tmp0tmp1 + (1 - tmp0)tmp2 + * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 + * MAD temp, -tmp0, tmp2, tmp2 + * MAD result, tmp0, tmp1, temp + */ + temp[0] = get_temp_reg(fp); + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + negate(keep(src[0])), keep(src[2]), src[2], + 0); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], temp[0], flags); + free_temp(fp, temp[0]); + break; + case OPCODE_MAD: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], src[2], flags); + break; + case OPCODE_MAX: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAX, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_MIN: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MIN, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_MOV: + case OPCODE_SWZ: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, pfs_zero, flags); + break; + case OPCODE_MUL: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], pfs_zero, flags); + break; + case OPCODE_POW: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + src[1] = t_scalar_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W, + src[0], undef, undef, 0); + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + temp[0], src[1], pfs_zero, 0); + emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, + temp[0], undef, undef, 0); + free_temp(fp, temp[0]); + break; + case OPCODE_RCP: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_RCP, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_RSQ: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_RSQ, dest, mask, + absolute(src[0]), pfs_zero, pfs_zero, flags); + break; + case OPCODE_SCS: + /* + * scs using a parabola : + * scs(x): + * result.x = sin(-abs(x)+0.5*PI) (cos) + * result.y = sin(x) (sin) + * + */ + temp[0] = get_temp_reg(fp); + temp[1] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* x = -abs(x)+0.5*PI */ + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI + pfs_half, + negate(abs + (swizzle(keep(src[0]), X, X, X, X))), + 0); + + /* C*x (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + swizzle(const_sin[0], Y, Y, Y, Y), + swizzle(keep(src[0]), X, X, X, X), + pfs_zero, 0); + + /* B*x, C*x (cos) */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + /* B*x (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + swizzle(const_sin[0], X, X, X, X), + keep(src[0]), pfs_zero, 0); + + /* y = B*x + C*x*abs(x) (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z, + absolute(src[0]), + swizzle(temp[0], W, W, W, W), + swizzle(temp[1], W, W, W, W), 0); + + /* y = B*x + C*x*abs(x) (cos) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], + W, Z, Y, + X), + absolute(swizzle(temp[1], W, Z, Y, X)), + negate(swizzle(temp[1], W, Z, Y, X)), 0); + + /* dest.xy = mad(temp.xy, P, temp2.wz) */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[1], W, Z, Y, X), flags); + + free_temp(fp, temp[0]); + free_temp(fp, temp[1]); + break; + case OPCODE_SGE: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0 - src1 + * dest.c = (temp.c < 0.0) ? 0 : 1 + */ + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, negate(src[1]), 0); + emit_arith(fp, PFS_OP_CMP, dest, mask, + pfs_one, pfs_zero, temp[0], 0); + free_temp(fp, temp[0]); + break; + case OPCODE_SIN: + /* + * using a parabola: + * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x) + * extra precision is obtained by weighting against + * itself squared. + */ + + temp[0] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* do range reduction */ + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(keep(src[0]), X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + pfs_half, 0); + + emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + swizzle(temp[0], X, X, X, X), + undef, undef, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI + 0); + + /* SIN */ + + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + swizzle(temp[0], X, X, X, X), + absolute(swizzle(temp[0], X, X, X, X)), + negate(swizzle(temp[0], X, X, X, X)), 0); + + emit_arith(fp, PFS_OP_MAD, dest, mask, + swizzle(temp[0], Y, Y, Y, Y), + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[0], X, X, X, X), flags); + + free_temp(fp, temp[0]); + break; + case OPCODE_SLT: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0 - src1 + * dest.c = (temp.c < 0.0) ? 1 : 0 + */ + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, negate(src[1]), 0); + emit_arith(fp, PFS_OP_CMP, dest, mask, + pfs_zero, pfs_one, temp[0], 0); + free_temp(fp, temp[0]); + break; + case OPCODE_SUB: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, negate(src[1]), flags); + break; + case OPCODE_TEX: + emit_tex(fp, fpi, R300_FPITX_OP_TEX); + break; + case OPCODE_TXB: + emit_tex(fp, fpi, R300_FPITX_OP_TXB); + break; + case OPCODE_TXP: + emit_tex(fp, fpi, R300_FPITX_OP_TXP); + break; + case OPCODE_XPD:{ + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0.zxy * src1.yzx */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_XYZ, swizzle(keep(src[0]), + Z, X, Y, W), + swizzle(keep(src[1]), Y, Z, X, W), + pfs_zero, 0); + /* dest.xyz = src0.yzx * src1.zxy - temp + * dest.w = undefined + * */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & WRITEMASK_XYZ, swizzle(src[0], + Y, Z, + X, W), + swizzle(src[1], Z, X, Y, W), + negate(temp[0]), flags); + /* cleanup */ + free_temp(fp, temp[0]); + break; + } + default: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + } + + if (fp->error) + return GL_FALSE; + + } + + return GL_TRUE; +} + +static void insert_wpos(struct gl_program *prog) +{ + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = prog->NumTemporaries; + /* should do something else if no temps left... */ + prog->NumTemporaries++; + + fpi = _mesa_alloc_instructions(prog->NumInstructions + 3); + _mesa_init_instructions(fpi, prog->NumInstructions + 3); + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(prog->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + _mesa_copy_instructions(&fpi[i], prog->Instructions, + prog->NumInstructions); + + free(prog->Instructions); + + prog->Instructions = fpi; + + prog->NumInstructions += i; + fpi = &prog->Instructions[prog->NumInstructions - 1]; + + assert(fpi->Opcode == OPCODE_END); + + for (fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++) { + for (i = 0; i < 3; i++) + if (fpi->SrcReg[i].File == PROGRAM_INPUT && + fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { + fpi->SrcReg[i].File = PROGRAM_TEMPORARY; + fpi->SrcReg[i].Index = tempregi; + } + } +} + +/* - Init structures + * - Determine what hwregs each input corresponds to + */ +static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) +{ + struct r300_pfs_compile_state *cs = NULL; + struct gl_fragment_program *mp = &fp->mesa_program; + struct prog_instruction *fpi; + GLuint InputsRead = mp->Base.InputsRead; + GLuint temps_used = 0; /* for fp->temps[] */ + int i, j; + + /* New compile, reset tracking data */ + fp->optimization = + driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); + fp->translated = GL_FALSE; + fp->error = GL_FALSE; + fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); + fp->tex.length = 0; + fp->cur_node = 0; + fp->first_node_has_tex = 0; + fp->const_nr = 0; + fp->max_temp_idx = 0; + fp->node[0].alu_end = -1; + fp->node[0].tex_end = -1; + + _mesa_memset(cs, 0, sizeof(*fp->cs)); + for (i = 0; i < PFS_MAX_ALU_INST; i++) { + for (j = 0; j < 3; j++) { + cs->slot[i].vsrc[j] = SRC_CONST; + cs->slot[i].ssrc[j] = SRC_CONST; + } + } + + /* Work out what temps the Mesa inputs correspond to, this must match + * what setup_rs_unit does, which shouldn't be a problem as rs_unit + * configures itself based on the fragprog's InputsRead + * + * NOTE: this depends on get_hw_temp() allocating registers in order, + * starting from register 0. + */ + + /* Texcoords come first */ + for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; + cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = + get_hw_temp(fp, 0); + } + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) { + cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; + cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); + insert_wpos(&mp->Base); + } + InputsRead &= ~FRAG_BIT_WPOS; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) { + cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); + } + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) { + cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); + } + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i = 0; i < 32; i++) + if (InputsRead & (1 << i)) + cs->inputs[i].reg = 0; + } + + /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. + * That way, we can free up the reg when it's no longer needed + */ + if (!mp->Base.Instructions) { + ERROR("No instructions found in program\n"); + return; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + int idx; + + for (i = 0; i < 3; i++) { + idx = fpi->SrcReg[i].Index; + switch (fpi->SrcReg[i].File) { + case PROGRAM_TEMPORARY: + if (!(temps_used & (1 << idx))) { + cs->temps[idx].reg = -1; + cs->temps[idx].refcount = 1; + temps_used |= (1 << idx); + } else + cs->temps[idx].refcount++; + break; + case PROGRAM_INPUT: + cs->inputs[idx].refcount++; + break; + default: + break; + } + } + + idx = fpi->DstReg.Index; + if (fpi->DstReg.File == PROGRAM_TEMPORARY) { + if (!(temps_used & (1 << idx))) { + cs->temps[idx].reg = -1; + cs->temps[idx].refcount = 1; + temps_used |= (1 << idx); + } else + cs->temps[idx].refcount++; + } + } + cs->temp_in_use = temps_used; +} + +static void update_params(struct r300_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); +} + +void r300TranslateFragmentShader(r300ContextPtr r300, + struct r300_fragment_program *fp) +{ + struct r300_pfs_compile_state *cs = NULL; + + if (!fp->translated) { + + init_program(r300, fp); + cs = fp->cs; + + if (parse_program(fp) == GL_FALSE) { + dump_program(fp); + return; + } + + /* Finish off */ + fp->node[fp->cur_node].alu_end = + cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; + if (fp->node[fp->cur_node].tex_end < 0) + fp->node[fp->cur_node].tex_end = 0; + fp->alu_offset = 0; + fp->alu_end = cs->nrslots - 1; + fp->tex_offset = 0; + fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0; + assert(fp->node[fp->cur_node].alu_end >= 0); + assert(fp->alu_end >= 0); + + fp->translated = GL_TRUE; + if (RADEON_DEBUG & DEBUG_PIXEL) + dump_program(fp); + r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + } + + update_params(fp); +} + +/* just some random things... */ +static void dump_program(struct r300_fragment_program *fp) +{ + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Mesa program:\n"); + fprintf(stderr, "-------------\n"); + _mesa_print_program(&fp->mesa_program.Base); + fflush(stdout); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n < (fp->cur_node + 1); n++) { + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d\n", n, + fp->node[n].alu_offset, + fp->node[n].tex_offset, + fp->node[n].alu_end, fp->node[n].tex_end); + + if (fp->tex.length) { + fprintf(stderr, " TEX:\n"); + for (i = fp->node[n].tex_offset; + i <= fp->node[n].tex_offset + fp->node[n].tex_end; + ++i) { + const char *instr; + + switch ((fp->tex. + inst[i] >> R300_FPITX_OPCODE_SHIFT) & + 15) { + case R300_FPITX_OP_TEX: + instr = "TEX"; + break; + case R300_FPITX_OP_KIL: + instr = "KIL"; + break; + case R300_FPITX_OP_TXP: + instr = "TXP"; + break; + case R300_FPITX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (fp->tex. + inst[i] >> R300_FPITX_DST_SHIFT) & 31, + (fp->tex. + inst[i] & R300_FPITX_SRC_CONST) ? 'c' : + 't', + (fp->tex. + inst[i] >> R300_FPITX_SRC_SHIFT) & 31, + (fp->tex. + inst[i] & R300_FPITX_IMAGE_MASK) >> + R300_FPITX_IMAGE_SHIFT, + fp->tex.inst[i]); + } + } + + for (i = fp->node[n].alu_offset; + i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = fp->alu.inst[i].inst1 >> (j * 6); + int rega = fp->alu.inst[i].inst3 >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (fp->alu.inst[i]. + inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (fp->alu.inst[i]. + inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (fp->alu.inst[i]. + inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + } + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (fp->alu.inst[i]. + inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + fp->alu.inst[i].inst1, srca[0], srca[1], + srca[2], dsta, fp->alu.inst[i].inst3); + + for (j = 0; j < 3; ++j) { + int regc = fp->alu.inst[i].inst0 >> (j * 7); + int rega = fp->alu.inst[i].inst2 >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_FPI0_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + fp->alu.inst[i].inst0, arga[0], arga[1], + arga[2], fp->alu.inst[i].inst2); + } + } +} diff --git a/r300/r300_fragprog.h b/r300/r300_fragprog.h new file mode 100644 index 0000000..72fca77 --- /dev/null +++ b/r300/r300_fragprog.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "r300_context.h" + +typedef struct r300_fragment_program_swizzle { + GLuint length; + GLuint src[4]; + GLuint inst[8]; +} r300_fragment_program_swizzle_t; + +/* supported hw opcodes */ +#define PFS_OP_MAD 0 +#define PFS_OP_DP3 1 +#define PFS_OP_DP4 2 +#define PFS_OP_MIN 3 +#define PFS_OP_MAX 4 +#define PFS_OP_CMP 5 +#define PFS_OP_FRC 6 +#define PFS_OP_EX2 7 +#define PFS_OP_LG2 8 +#define PFS_OP_RCP 9 +#define PFS_OP_RSQ 10 +#define PFS_OP_REPL_ALPHA 11 +#define PFS_OP_CMPH 12 +#define MAX_PFS_OP 12 + +#define PFS_FLAG_SAT (1 << 0) +#define PFS_FLAG_ABS (1 << 1) + +#define ARG_NEG (1 << 5) +#define ARG_ABS (1 << 6) +#define ARG_MASK (127 << 0) +#define ARG_STRIDE 7 +#define SRC_CONST (1 << 5) +#define SRC_MASK (63 << 0) +#define SRC_STRIDE 6 + +#define NOP_INST0 ( \ + (R300_FPI0_OUTC_MAD) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT)) +#define NOP_INST1 ( \ + ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT)) +#define NOP_INST2 ( \ + (R300_FPI2_OUTA_MAD) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT)) +#define NOP_INST3 ( \ + ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT)) + +#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 +#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 + +struct r300_fragment_program; + +extern void r300TranslateFragmentShader(r300ContextPtr r300, + struct r300_fragment_program *fp); + +#endif diff --git a/r300/r300_ioctl.c b/r300/r300_ioctl.c new file mode 100644 index 0000000..ea94ce2 --- /dev/null +++ b/r300/r300_ioctl.c @@ -0,0 +1,719 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. +Copyright (C) 2004 Nicolai Haehnle. +All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#include <sched.h> +#include <errno.h> + +#include "glheader.h" +#include "imports.h" +#include "macros.h" +#include "context.h" +#include "swrast/swrast.h" + +#include "r300_context.h" +#include "radeon_ioctl.h" +#include "r300_ioctl.h" +#include "r300_cmdbuf.h" +#include "r300_state.h" +#include "r300_program.h" +#include "radeon_reg.h" +#include "r300_emit.h" + +#include "vblank.h" + +#define CLEARBUFFER_COLOR 0x1 +#define CLEARBUFFER_DEPTH 0x2 +#define CLEARBUFFER_STENCIL 0x4 + +static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) +{ + GLcontext *ctx = r300->radeon.glCtx; + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + GLuint cboffset, cbpitch; + drm_r300_cmd_header_t *cmd2; + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + r300ContextPtr rmesa = r300; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n", + __FUNCTION__, buffer ? "back" : "front", + dPriv->x, dPriv->y, dPriv->w, dPriv->h); + + if (buffer) { + cboffset = r300->radeon.radeonScreen->backOffset; + cbpitch = r300->radeon.radeonScreen->backPitch; + } else { + cboffset = r300->radeon.radeonScreen->frontOffset; + cbpitch = r300->radeon.radeonScreen->frontPitch; + } + + cboffset += r300->radeon.radeonScreen->fbLocation; + + cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + end_3d(rmesa); + + R300_STATECHANGE(r300, cb); + reg_start(R300_RB3D_COLOROFFSET0, 0); + e32(cboffset); + + if (r300->radeon.radeonScreen->cpp == 4) + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + else + cbpitch |= R300_COLOR_FORMAT_RGB565; + + if (r300->radeon.sarea->tiling_enabled) + cbpitch |= R300_COLOR_TILE_ENABLE; + + reg_start(R300_RB3D_COLORPITCH0, 0); + e32(cbpitch); + + R300_STATECHANGE(r300, cmk); + reg_start(R300_RB3D_COLORMASK, 0); + + if (flags & CLEARBUFFER_COLOR) { + e32((ctx->Color.ColorMask[BCOMP] ? R300_COLORMASK0_B : 0) | + (ctx->Color.ColorMask[GCOMP] ? R300_COLORMASK0_G : 0) | + (ctx->Color.ColorMask[RCOMP] ? R300_COLORMASK0_R : 0) | + (ctx->Color.ColorMask[ACOMP] ? R300_COLORMASK0_A : 0)); + } else { + e32(0x0); + } + + R300_STATECHANGE(r300, zs); + reg_start(R300_RB3D_ZSTENCIL_CNTL_0, 2); + + { + uint32_t t1, t2; + + t1 = 0x0; + t2 = 0x0; + + if (flags & CLEARBUFFER_DEPTH) { + t1 |= R300_RB3D_Z_WRITE_ONLY; + t2 |= + (R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); + } else { + t1 |= R300_RB3D_Z_DISABLED_1; // disable + } + + if (flags & CLEARBUFFER_STENCIL) { + t1 |= R300_RB3D_STENCIL_ENABLE; + t2 |= + (R300_ZS_ALWAYS << + R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | + (R300_ZS_REPLACE << + R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) | + (R300_ZS_REPLACE << + R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT) | + (R300_ZS_REPLACE << + R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) | + (R300_ZS_ALWAYS << + R300_RB3D_ZS1_BACK_FUNC_SHIFT) | + (R300_ZS_REPLACE << + R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) | + (R300_ZS_REPLACE << + R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT) | + (R300_ZS_REPLACE << + R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT); + } + + e32(t1); + e32(t2); + e32(r300->state.stencil.clear); + } + + cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); + cmd2[0].packet3.cmd_type = R300_CMD_PACKET3; + cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR; + cmd2[1].u = r300PackFloat32(dPriv->w / 2.0); + cmd2[2].u = r300PackFloat32(dPriv->h / 2.0); + cmd2[3].u = r300PackFloat32(ctx->Depth.Clear); + cmd2[4].u = r300PackFloat32(1.0); + cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]); + cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]); + cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]); + cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); + + reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); + + reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); + e32(R300_RB3D_ZCACHE_UNKNOWN_03); + cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); +} + +static void r300EmitClearState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + r300ContextPtr rmesa = r300; + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + int i; + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + int has_tcl = 1; + + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + has_tcl = 0; + + /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and + * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are + * quite complex; see the functions in r300_emit.c. + * + * I believe it would be a good idea to extend the functions in + * r300_emit.c so that they can be used to setup the default values for + * these registers, as well as the actual values used for rendering. + */ + R300_STATECHANGE(r300, vir[0]); + reg_start(R300_VAP_INPUT_ROUTE_0_0, 0); + if (!has_tcl) + e32(0x22030003); + else + e32(0x21030003); + + /* disable fog */ + R300_STATECHANGE(r300, fogs); + reg_start(R300_RE_FOG_STATE, 0); + e32(0x0); + + R300_STATECHANGE(r300, vir[1]); + reg_start(R300_VAP_INPUT_ROUTE_1_0, 0); + e32(0xF688F688); + + /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ + R300_STATECHANGE(r300, vic); + reg_start(R300_VAP_INPUT_CNTL_0, 1); + e32(R300_INPUT_CNTL_0_COLOR); + e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); + + if (!has_tcl) { + R300_STATECHANGE(r300, vte); + /* comes from fglrx startup of clear */ + reg_start(R300_SE_VTE_CNTL, 1); + e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | + R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | + R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | + R300_VPORT_Z_OFFSET_ENA); + e32(0x8); + + reg_start(0x21dc, 0); + e32(0xaaaaaaaa); + } + + R300_STATECHANGE(r300, vof); + reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1); + e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT); + e32(0x0); /* no textures */ + + R300_STATECHANGE(r300, txe); + reg_start(R300_TX_ENABLE, 0); + e32(0x0); + + R300_STATECHANGE(r300, vpt); + reg_start(R300_SE_VPORT_XSCALE, 5); + efloat(1.0); + efloat(dPriv->x); + efloat(1.0); + efloat(dPriv->y); + efloat(1.0); + efloat(0.0); + + R300_STATECHANGE(r300, at); + reg_start(R300_PP_ALPHA_TEST, 0); + e32(0x0); + + R300_STATECHANGE(r300, bld); + reg_start(R300_RB3D_CBLEND, 1); + e32(0x0); + e32(0x0); + + R300_STATECHANGE(r300, vap_clip_cntl); + reg_start(R300_VAP_CLIP_CNTL, 0); + e32(R300_221C_CLEAR); + + R300_STATECHANGE(r300, ps); + reg_start(R300_RE_POINTSIZE, 0); + e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | + ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); + + R300_STATECHANGE(r300, ri); + reg_start(R300_RS_INTERP_0, 8); + for (i = 0; i < 8; ++i) { + e32(R300_RS_INTERP_USED); + } + + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_CNTL_0, 1); + e32((1 << R300_RS_CNTL_CI_CNT_SHIFT) | R300_RS_CNTL_0_UNKNOWN_18); + e32(0x0); + + R300_STATECHANGE(r300, rr); + reg_start(R300_RS_ROUTE_0, 0); + e32(R300_RS_ROUTE_0_COLOR); + + R300_STATECHANGE(r300, fp); + reg_start(R300_PFS_CNTL_0, 2); + e32(0x0); + e32(0x0); + e32(0x0); + reg_start(R300_PFS_NODE_0, 3); + e32(0x0); + e32(0x0); + e32(0x0); + e32(R300_PFS_NODE_OUTPUT_COLOR); + + R300_STATECHANGE(r300, fpi[0]); + R300_STATECHANGE(r300, fpi[1]); + R300_STATECHANGE(r300, fpi[2]); + R300_STATECHANGE(r300, fpi[3]); + + reg_start(R300_PFS_INSTR0_0, 0); + e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); + + reg_start(R300_PFS_INSTR1_0, 0); + e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); + + reg_start(R300_PFS_INSTR2_0, 0); + e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); + + reg_start(R300_PFS_INSTR3_0, 0); + e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + + if (has_tcl) { + R300_STATECHANGE(r300, pvs); + reg_start(R300_VAP_PVS_CNTL_1, 2); + e32((0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | + (0 << R300_PVS_CNTL_1_POS_END_SHIFT) | + (1 << R300_PVS_CNTL_1_PROGRAM_END_SHIFT)); + e32(0x0); + e32(1 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT); + + R300_STATECHANGE(r300, vpi); + vsf_start_fragment(0x0, 8); + e32(VP_OUT(ADD, OUT, 0, XYZW)); + e32(VP_IN(IN, 0)); + e32(VP_ZERO()); + e32(0x0); + + e32(VP_OUT(ADD, OUT, 1, XYZW)); + e32(VP_IN(IN, 1)); + e32(VP_ZERO()); + e32(0x0); + } +} + +/** + * Buffer clear + */ +static void r300Clear(GLcontext * ctx, GLbitfield mask) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + int flags = 0; + int bits = 0; + int swapped; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "r300Clear\n"); + + { + LOCK_HARDWARE(&r300->radeon); + UNLOCK_HARDWARE(&r300->radeon); + if (dPriv->numClipRects == 0) + return; + } + + if (mask & BUFFER_BIT_FRONT_LEFT) { + flags |= BUFFER_BIT_FRONT_LEFT; + mask &= ~BUFFER_BIT_FRONT_LEFT; + } + + if (mask & BUFFER_BIT_BACK_LEFT) { + flags |= BUFFER_BIT_BACK_LEFT; + mask &= ~BUFFER_BIT_BACK_LEFT; + } + + if (mask & BUFFER_BIT_DEPTH) { + bits |= CLEARBUFFER_DEPTH; + mask &= ~BUFFER_BIT_DEPTH; + } + + if ((mask & BUFFER_BIT_STENCIL) && r300->state.stencil.hw_stencil) { + bits |= CLEARBUFFER_STENCIL; + mask &= ~BUFFER_BIT_STENCIL; + } + + if (mask) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s: swrast clear, mask: %x\n", + __FUNCTION__, mask); + _swrast_Clear(ctx, mask); + } + + swapped = r300->radeon.sarea->pfCurrentPage == 1; + + /* Make sure it fits there. */ + r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__); + if (flags || bits) + r300EmitClearState(ctx); + + if (flags & BUFFER_BIT_FRONT_LEFT) { + r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped); + bits = 0; + } + + if (flags & BUFFER_BIT_BACK_LEFT) { + r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1); + bits = 0; + } + + if (bits) + r300ClearBuffer(r300, bits, 0); + +} + +void r300Flush(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (r300->cmdbuf.count_used > r300->cmdbuf.count_reemit) + r300FlushCmdBuf(r300, __FUNCTION__); +} + +#ifdef USER_BUFFERS +#include "r300_mem.h" + +static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) +{ + struct r300_dma_buffer *dmabuf; + size = MAX2(size, RADEON_BUFFER_SIZE * 16); + + if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->dma.flush) { + rmesa->dma.flush(rmesa); + } + + if (rmesa->dma.current.buf) + r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); + + if (rmesa->dma.nr_released_bufs > 4) + r300FlushCmdBuf(rmesa, __FUNCTION__); + + dmabuf = CALLOC_STRUCT(r300_dma_buffer); + dmabuf->buf = (void *)1; /* hack */ + dmabuf->refcount = 1; + + dmabuf->id = r300_mem_alloc(rmesa, 4, size); + if (dmabuf->id == 0) { + LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ + + r300FlushCmdBufLocked(rmesa, __FUNCTION__); + radeonWaitForIdleLocked(&rmesa->radeon); + + dmabuf->id = r300_mem_alloc(rmesa, 4, size); + + UNLOCK_HARDWARE(&rmesa->radeon); + + if (dmabuf->id == 0) { + fprintf(stderr, + "Error: Could not get dma buffer... exiting\n"); + _mesa_exit(-1); + } + } + + rmesa->dma.current.buf = dmabuf; + rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id); + rmesa->dma.current.end = size; + rmesa->dma.current.start = 0; + rmesa->dma.current.ptr = 0; +} + +void r300ReleaseDmaRegion(r300ContextPtr rmesa, + struct r300_dma_region *region, const char *caller) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); + + if (!region->buf) + return; + + if (rmesa->dma.flush) + rmesa->dma.flush(rmesa); + + if (--region->buf->refcount == 0) { + r300_mem_free(rmesa, region->buf->id); + FREE(region->buf); + rmesa->dma.nr_released_bufs++; + } + + region->buf = 0; + region->start = 0; +} + +/* Allocates a region from rmesa->dma.current. If there isn't enough + * space in current, grab a new buffer (and discard what was left of current) + */ +void r300AllocDmaRegion(r300ContextPtr rmesa, + struct r300_dma_region *region, + int bytes, int alignment) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); + + if (rmesa->dma.flush) + rmesa->dma.flush(rmesa); + + if (region->buf) + r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); + + alignment--; + rmesa->dma.current.start = rmesa->dma.current.ptr = + (rmesa->dma.current.ptr + alignment) & ~alignment; + + if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) + r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7); + + region->start = rmesa->dma.current.start; + region->ptr = rmesa->dma.current.start; + region->end = rmesa->dma.current.start + bytes; + region->address = rmesa->dma.current.address; + region->buf = rmesa->dma.current.buf; + region->buf->refcount++; + + rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ + rmesa->dma.current.start = + rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; + + assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); +} + +#else +static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) +{ + struct r300_dma_buffer *dmabuf; + int fd = rmesa->radeon.dri.fd; + int index = 0; + int size = 0; + drmDMAReq dma; + int ret; + + if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->dma.flush) { + rmesa->dma.flush(rmesa); + } + + if (rmesa->dma.current.buf) + r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); + + if (rmesa->dma.nr_released_bufs > 4) + r300FlushCmdBuf(rmesa, __FUNCTION__); + + dma.context = rmesa->radeon.dri.hwContext; + dma.send_count = 0; + dma.send_list = NULL; + dma.send_sizes = NULL; + dma.flags = 0; + dma.request_count = 1; + dma.request_size = RADEON_BUFFER_SIZE; + dma.request_list = &index; + dma.request_sizes = &size; + dma.granted_count = 0; + + LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ + + ret = drmDMA(fd, &dma); + + if (ret != 0) { + /* Try to release some buffers and wait until we can't get any more */ + if (rmesa->dma.nr_released_bufs) { + r300FlushCmdBufLocked(rmesa, __FUNCTION__); + } + + if (RADEON_DEBUG & DEBUG_DMA) + fprintf(stderr, "Waiting for buffers\n"); + + radeonWaitForIdleLocked(&rmesa->radeon); + ret = drmDMA(fd, &dma); + + if (ret != 0) { + UNLOCK_HARDWARE(&rmesa->radeon); + fprintf(stderr, + "Error: Could not get dma buffer... exiting\n"); + _mesa_exit(-1); + } + } + + UNLOCK_HARDWARE(&rmesa->radeon); + + if (RADEON_DEBUG & DEBUG_DMA) + fprintf(stderr, "Allocated buffer %d\n", index); + + dmabuf = CALLOC_STRUCT(r300_dma_buffer); + dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index]; + dmabuf->refcount = 1; + + rmesa->dma.current.buf = dmabuf; + rmesa->dma.current.address = dmabuf->buf->address; + rmesa->dma.current.end = dmabuf->buf->total; + rmesa->dma.current.start = 0; + rmesa->dma.current.ptr = 0; +} + +void r300ReleaseDmaRegion(r300ContextPtr rmesa, + struct r300_dma_region *region, const char *caller) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); + + if (!region->buf) + return; + + if (rmesa->dma.flush) + rmesa->dma.flush(rmesa); + + if (--region->buf->refcount == 0) { + drm_radeon_cmd_header_t *cmd; + + if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) + fprintf(stderr, "%s -- DISCARD BUF %d\n", + __FUNCTION__, region->buf->buf->idx); + cmd = + (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, + sizeof + (*cmd) / 4, + __FUNCTION__); + cmd->dma.cmd_type = R300_CMD_DMA_DISCARD; + cmd->dma.buf_idx = region->buf->buf->idx; + + FREE(region->buf); + rmesa->dma.nr_released_bufs++; + } + + region->buf = 0; + region->start = 0; +} + +/* Allocates a region from rmesa->dma.current. If there isn't enough + * space in current, grab a new buffer (and discard what was left of current) + */ +void r300AllocDmaRegion(r300ContextPtr rmesa, + struct r300_dma_region *region, + int bytes, int alignment) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); + + if (rmesa->dma.flush) + rmesa->dma.flush(rmesa); + + if (region->buf) + r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); + + alignment--; + rmesa->dma.current.start = rmesa->dma.current.ptr = + (rmesa->dma.current.ptr + alignment) & ~alignment; + + if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) + r300RefillCurrentDmaRegion(rmesa); + + region->start = rmesa->dma.current.start; + region->ptr = rmesa->dma.current.start; + region->end = rmesa->dma.current.start + bytes; + region->address = rmesa->dma.current.address; + region->buf = rmesa->dma.current.buf; + region->buf->refcount++; + + rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ + rmesa->dma.current.start = + rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; + + assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); +} + +#endif + +GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer, + GLint size) +{ + int offset = + (char *)pointer - + (char *)rmesa->radeon.radeonScreen->gartTextures.map; + int valid = (size >= 0 && offset >= 0 + && offset + size < + rmesa->radeon.radeonScreen->gartTextures.size); + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer, + valid); + + return valid; +} + +GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer) +{ + int offset = + (char *)pointer - + (char *)rmesa->radeon.radeonScreen->gartTextures.map; + + //fprintf(stderr, "offset=%08x\n", offset); + + if (offset < 0 + || offset > rmesa->radeon.radeonScreen->gartTextures.size) + return ~0; + else + return rmesa->radeon.radeonScreen->gart_texture_offset + offset; +} + +void r300InitIoctlFuncs(struct dd_function_table *functions) +{ + functions->Clear = r300Clear; + functions->Finish = radeonFinish; + functions->Flush = r300Flush; +} diff --git a/r300/r300_ioctl.h b/r300/r300_ioctl.h new file mode 100644 index 0000000..7a19a2c --- /dev/null +++ b/r300/r300_ioctl.h @@ -0,0 +1,59 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __R300_IOCTL_H__ +#define __R300_IOCTL_H__ + +#include "r300_context.h" +#include "radeon_drm.h" + +extern GLboolean r300IsGartMemory(r300ContextPtr rmesa, + const GLvoid * pointer, GLint size); + +extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, + const GLvoid * pointer); + +extern void r300Flush(GLcontext * ctx); + +extern void r300ReleaseDmaRegion(r300ContextPtr rmesa, + struct r300_dma_region *region, + const char *caller); +extern void r300AllocDmaRegion(r300ContextPtr rmesa, + struct r300_dma_region *region, int bytes, + int alignment); + +extern void r300InitIoctlFuncs(struct dd_function_table *functions); + +#endif /* __R300_IOCTL_H__ */ diff --git a/r300/r300_mem.c b/r300/r300_mem.c new file mode 100644 index 0000000..f8f9d4f --- /dev/null +++ b/r300/r300_mem.c @@ -0,0 +1,385 @@ +/* + * Copyright (C) 2005 Aapo Tahkola. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Aapo Tahkola <aet@rasterburn.org> + */ + +#include <unistd.h> + +#include "r300_context.h" +#include "r300_cmdbuf.h" +#include "r300_ioctl.h" +#include "r300_mem.h" +#include "radeon_ioctl.h" + +#ifdef USER_BUFFERS + +static void resize_u_list(r300ContextPtr rmesa) +{ + void *temp; + int nsize; + + temp = rmesa->rmm->u_list; + nsize = rmesa->rmm->u_size * 2; + + rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list)); + _mesa_memset(rmesa->rmm->u_list, 0, + nsize * sizeof(*rmesa->rmm->u_list)); + + if (temp) { + r300FlushCmdBuf(rmesa, __FUNCTION__); + + _mesa_memcpy(rmesa->rmm->u_list, temp, + rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list)); + _mesa_free(temp); + } + + rmesa->rmm->u_size = nsize; +} + +void r300_mem_init(r300ContextPtr rmesa) +{ + rmesa->rmm = malloc(sizeof(struct r300_memory_manager)); + memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager)); + + rmesa->rmm->u_size = 128; + resize_u_list(rmesa); +} + +void r300_mem_destroy(r300ContextPtr rmesa) +{ + _mesa_free(rmesa->rmm->u_list); + rmesa->rmm->u_list = NULL; + + _mesa_free(rmesa->rmm); + rmesa->rmm = NULL; +} + +void *r300_mem_ptr(r300ContextPtr rmesa, int id) +{ + assert(id <= rmesa->rmm->u_last); + return rmesa->rmm->u_list[id].ptr; +} + +int r300_mem_find(r300ContextPtr rmesa, void *ptr) +{ + int i; + + for (i = 1; i < rmesa->rmm->u_size + 1; i++) + if (rmesa->rmm->u_list[i].ptr && + ptr >= rmesa->rmm->u_list[i].ptr && + ptr < + rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size) + break; + + if (i < rmesa->rmm->u_size + 1) + return i; + + fprintf(stderr, "%p failed\n", ptr); + return 0; +} + +//#define MM_DEBUG +int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size) +{ + drm_radeon_mem_alloc_t alloc; + int offset = 0, ret; + int i, free = -1; + int done_age; + drm_radeon_mem_free_t memfree; + int tries = 0; + static int bytes_wasted = 0, allocated = 0; + + if (size < 4096) + bytes_wasted += 4096 - size; + + allocated += size; + +#if 0 + static int t = 0; + if (t != time(NULL)) { + t = time(NULL); + fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n", + rmesa->rmm->u_last, bytes_wasted / 1024, + allocated / 1024); + } +#endif + + memfree.region = RADEON_MEM_REGION_GART; + + again: + + done_age = radeonGetAge((radeonContextPtr) rmesa); + + if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size) + resize_u_list(rmesa); + + for (i = rmesa->rmm->u_last + 1; i > 0; i--) { + if (rmesa->rmm->u_list[i].ptr == NULL) { + free = i; + continue; + } + + if (rmesa->rmm->u_list[i].h_pending == 0 && + rmesa->rmm->u_list[i].pending + && rmesa->rmm->u_list[i].age <= done_age) { + memfree.region_offset = + (char *)rmesa->rmm->u_list[i].ptr - + (char *)rmesa->radeon.radeonScreen->gartTextures. + map; + + ret = + drmCommandWrite(rmesa->radeon.radeonScreen-> + driScreen->fd, DRM_RADEON_FREE, + &memfree, sizeof(memfree)); + + if (ret) { + fprintf(stderr, "Failed to free at %p\n", + rmesa->rmm->u_list[i].ptr); + fprintf(stderr, "ret = %s\n", strerror(-ret)); + exit(1); + } else { +#ifdef MM_DEBUG + fprintf(stderr, "really freed %d at age %x\n", + i, + radeonGetAge((radeonContextPtr) rmesa)); +#endif + if (i == rmesa->rmm->u_last) + rmesa->rmm->u_last--; + + if (rmesa->rmm->u_list[i].size < 4096) + bytes_wasted -= + 4096 - rmesa->rmm->u_list[i].size; + + allocated -= rmesa->rmm->u_list[i].size; + rmesa->rmm->u_list[i].pending = 0; + rmesa->rmm->u_list[i].ptr = NULL; + free = i; + } + } + } + rmesa->rmm->u_head = i; + + if (free == -1) { + WARN_ONCE("Ran out of slots!\n"); + //usleep(100); + r300FlushCmdBuf(rmesa, __FUNCTION__); + tries++; + if (tries > 100) { + WARN_ONCE("Ran out of slots!\n"); + exit(1); + } + goto again; + } + + alloc.region = RADEON_MEM_REGION_GART; + alloc.alignment = alignment; + alloc.size = size; + alloc.region_offset = &offset; + + ret = + drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc, + sizeof(alloc)); + if (ret) { +#if 0 + WARN_ONCE("Ran out of mem!\n"); + r300FlushCmdBuf(rmesa, __FUNCTION__); + //usleep(100); + tries2++; + tries = 0; + if (tries2 > 100) { + WARN_ONCE("Ran out of GART memory!\n"); + exit(1); + } + goto again; +#else + WARN_ONCE + ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n", + size); + return 0; +#endif + } + + i = free; + + if (i > rmesa->rmm->u_last) + rmesa->rmm->u_last = i; + + rmesa->rmm->u_list[i].ptr = + ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset; + rmesa->rmm->u_list[i].size = size; + rmesa->rmm->u_list[i].age = 0; + //fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i); + +#ifdef MM_DEBUG + fprintf(stderr, "allocated %d at age %x\n", i, + radeonGetAge((radeonContextPtr) rmesa)); +#endif + + return i; +} + +void r300_mem_use(r300ContextPtr rmesa, int id) +{ + uint64_t ull; +#ifdef MM_DEBUG + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, + radeonGetAge((radeonContextPtr) rmesa)); +#endif + drm_r300_cmd_header_t *cmd; + + assert(id <= rmesa->rmm->u_last); + + if (id == 0) + return; + + cmd = + (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, + 2 + sizeof(ull) / 4, + __FUNCTION__); + cmd[0].scratch.cmd_type = R300_CMD_SCRATCH; + cmd[0].scratch.reg = R300_MEM_SCRATCH; + cmd[0].scratch.n_bufs = 1; + cmd[0].scratch.flags = 0; + cmd++; + + ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age; + _mesa_memcpy(cmd, &ull, sizeof(ull)); + cmd += sizeof(ull) / 4; + + cmd[0].u = /*id */ 0; + + LOCK_HARDWARE(&rmesa->radeon); /* Protect from DRM. */ + rmesa->rmm->u_list[id].h_pending++; + UNLOCK_HARDWARE(&rmesa->radeon); +} + +unsigned long r300_mem_offset(r300ContextPtr rmesa, int id) +{ + unsigned long offset; + + assert(id <= rmesa->rmm->u_last); + + offset = (char *)rmesa->rmm->u_list[id].ptr - + (char *)rmesa->radeon.radeonScreen->gartTextures.map; + offset += rmesa->radeon.radeonScreen->gart_texture_offset; + + return offset; +} + +void *r300_mem_map(r300ContextPtr rmesa, int id, int access) +{ +#ifdef MM_DEBUG + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, + radeonGetAge((radeonContextPtr) rmesa)); +#endif + void *ptr; + int tries = 0; + + assert(id <= rmesa->rmm->u_last); + + if (access == R300_MEM_R) { + + if (rmesa->rmm->u_list[id].mapped == 1) + WARN_ONCE("buffer %d already mapped\n", id); + + rmesa->rmm->u_list[id].mapped = 1; + ptr = r300_mem_ptr(rmesa, id); + + return ptr; + } + + if (rmesa->rmm->u_list[id].h_pending) + r300FlushCmdBuf(rmesa, __FUNCTION__); + + if (rmesa->rmm->u_list[id].h_pending) { + return NULL; + } + + while (rmesa->rmm->u_list[id].age > + radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000) + usleep(10); + + if (tries >= 1000) { + fprintf(stderr, "Idling failed (%x vs %x)\n", + rmesa->rmm->u_list[id].age, + radeonGetAge((radeonContextPtr) rmesa)); + return NULL; + } + + if (rmesa->rmm->u_list[id].mapped == 1) + WARN_ONCE("buffer %d already mapped\n", id); + + rmesa->rmm->u_list[id].mapped = 1; + ptr = r300_mem_ptr(rmesa, id); + + return ptr; +} + +void r300_mem_unmap(r300ContextPtr rmesa, int id) +{ +#ifdef MM_DEBUG + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, + radeonGetAge((radeonContextPtr) rmesa)); +#endif + + assert(id <= rmesa->rmm->u_last); + + if (rmesa->rmm->u_list[id].mapped == 0) + WARN_ONCE("buffer %d not mapped\n", id); + + rmesa->rmm->u_list[id].mapped = 0; +} + +void r300_mem_free(r300ContextPtr rmesa, int id) +{ +#ifdef MM_DEBUG + fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, + radeonGetAge((radeonContextPtr) rmesa)); +#endif + + assert(id <= rmesa->rmm->u_last); + + if (id == 0) + return; + + if (rmesa->rmm->u_list[id].ptr == NULL) { + WARN_ONCE("Not allocated!\n"); + return; + } + + if (rmesa->rmm->u_list[id].pending) { + WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr); + return; + } + + rmesa->rmm->u_list[id].pending = 1; +} +#endif diff --git a/r300/r300_mem.h b/r300/r300_mem.h new file mode 100644 index 0000000..625a7f6 --- /dev/null +++ b/r300/r300_mem.h @@ -0,0 +1,37 @@ +#ifndef __R300_MEM_H__ +#define __R300_MEM_H__ + +//#define R300_MEM_PDL 0 +#define R300_MEM_UL 1 + +#define R300_MEM_R 1 +#define R300_MEM_W 2 +#define R300_MEM_RW (R300_MEM_R | R300_MEM_W) + +#define R300_MEM_SCRATCH 2 + +struct r300_memory_manager { + struct { + void *ptr; + uint32_t size; + uint32_t age; + uint32_t h_pending; + int pending; + int mapped; + } *u_list; + int u_head, u_size, u_last; + +}; + +extern void r300_mem_init(r300ContextPtr rmesa); +extern void r300_mem_destroy(r300ContextPtr rmesa); +extern void *r300_mem_ptr(r300ContextPtr rmesa, int id); +extern int r300_mem_find(r300ContextPtr rmesa, void *ptr); +extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size); +extern void r300_mem_use(r300ContextPtr rmesa, int id); +extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id); +extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access); +extern void r300_mem_unmap(r300ContextPtr rmesa, int id); +extern void r300_mem_free(r300ContextPtr rmesa, int id); + +#endif diff --git a/r300/r300_program.h b/r300/r300_program.h new file mode 100644 index 0000000..eddd783 --- /dev/null +++ b/r300/r300_program.h @@ -0,0 +1,150 @@ +/* +Copyright (C) 2004 Nicolai Haehnle. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __R300_PROGRAM_H__ +#define __R300_PROGRAM_H__ + +#include "r300_reg.h" + +/** + * Vertex program helper macros + */ + +/* Produce out dword */ +#define VP_OUTCLASS_TMP R300_VPI_OUT_REG_CLASS_TEMPORARY +#define VP_OUTCLASS_OUT R300_VPI_OUT_REG_CLASS_RESULT + +#define VP_OUTMASK_X R300_VPI_OUT_WRITE_X +#define VP_OUTMASK_Y R300_VPI_OUT_WRITE_Y +#define VP_OUTMASK_Z R300_VPI_OUT_WRITE_Z +#define VP_OUTMASK_W R300_VPI_OUT_WRITE_W +#define VP_OUTMASK_XY (VP_OUTMASK_X|VP_OUTMASK_Y) +#define VP_OUTMASK_XZ (VP_OUTMASK_X|VP_OUTMASK_Z) +#define VP_OUTMASK_XW (VP_OUTMASK_X|VP_OUTMASK_W) +#define VP_OUTMASK_XYZ (VP_OUTMASK_XY|VP_OUTMASK_Z) +#define VP_OUTMASK_XYW (VP_OUTMASK_XY|VP_OUTMASK_W) +#define VP_OUTMASK_XZW (VP_OUTMASK_XZ|VP_OUTMASK_W) +#define VP_OUTMASK_XYZW (VP_OUTMASK_XYZ|VP_OUTMASK_W) +#define VP_OUTMASK_YZ (VP_OUTMASK_Y|VP_OUTMASK_Z) +#define VP_OUTMASK_YW (VP_OUTMASK_Y|VP_OUTMASK_W) +#define VP_OUTMASK_YZW (VP_OUTMASK_YZ|VP_OUTMASK_W) +#define VP_OUTMASK_ZW (VP_OUTMASK_Z|VP_OUTMASK_W) + +#define VP_OUT(instr,outclass,outidx,outmask) \ + (R300_VPI_OUT_OP_##instr | \ + ((outidx) << R300_VPI_OUT_REG_INDEX_SHIFT) | \ + VP_OUTCLASS_##outclass | \ + VP_OUTMASK_##outmask) + +/* Produce in dword */ +#define VP_INCLASS_TMP R300_VPI_IN_REG_CLASS_TEMPORARY +#define VP_INCLASS_IN R300_VPI_IN_REG_CLASS_ATTRIBUTE +#define VP_INCLASS_CONST R300_VPI_IN_REG_CLASS_PARAMETER + +#define VP_IN(class,idx) \ + (((idx) << R300_VPI_IN_REG_INDEX_SHIFT) | \ + VP_INCLASS_##class | \ + (R300_VPI_IN_SELECT_X << R300_VPI_IN_X_SHIFT) | \ + (R300_VPI_IN_SELECT_Y << R300_VPI_IN_Y_SHIFT) | \ + (R300_VPI_IN_SELECT_Z << R300_VPI_IN_Z_SHIFT) | \ + (R300_VPI_IN_SELECT_W << R300_VPI_IN_W_SHIFT)) +#define VP_ZERO() \ + ((R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_X_SHIFT) | \ + (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_Y_SHIFT) | \ + (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_Z_SHIFT) | \ + (R300_VPI_IN_SELECT_ZERO << R300_VPI_IN_W_SHIFT)) +#define VP_ONE() \ + ((R300_VPI_IN_SELECT_ONE << R300_VPI_IN_X_SHIFT) | \ + (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_Y_SHIFT) | \ + (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_Z_SHIFT) | \ + (R300_VPI_IN_SELECT_ONE << R300_VPI_IN_W_SHIFT)) + +#define VP_NEG(in,comp) ((in) ^ (R300_VPI_IN_NEG_##comp)) +#define VP_NEGALL(in,comp) VP_NEG(VP_NEG(VP_NEG(VP_NEG((in),X),Y),Z),W) + +/** + * Fragment program helper macros + */ + +/* Produce unshifted source selectors */ +#define FP_TMP(idx) (idx) +#define FP_CONST(idx) ((idx) | (1 << 5)) + +/* Produce source/dest selector dword */ +#define FP_SELC_MASK_NO 0 +#define FP_SELC_MASK_X 1 +#define FP_SELC_MASK_Y 2 +#define FP_SELC_MASK_XY 3 +#define FP_SELC_MASK_Z 4 +#define FP_SELC_MASK_XZ 5 +#define FP_SELC_MASK_YZ 6 +#define FP_SELC_MASK_XYZ 7 + +#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_FPI1_DSTC_SHIFT) | \ + (FP_SELC_MASK_##regmask << 23) | \ + (FP_SELC_MASK_##outmask << 26) | \ + ((src0) << R300_FPI1_SRC0C_SHIFT) | \ + ((src1) << R300_FPI1_SRC1C_SHIFT) | \ + ((src2) << R300_FPI1_SRC2C_SHIFT)) + +#define FP_SELA_MASK_NO 0 +#define FP_SELA_MASK_W 1 + +#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_FPI3_DSTA_SHIFT) | \ + (FP_SELA_MASK_##regmask << 23) | \ + (FP_SELA_MASK_##outmask << 24) | \ + ((src0) << R300_FPI3_SRC0A_SHIFT) | \ + ((src1) << R300_FPI3_SRC1A_SHIFT) | \ + ((src2) << R300_FPI3_SRC2A_SHIFT)) + +/* Produce unshifted argument selectors */ +#define FP_ARGC(source) R300_FPI0_ARGC_##source +#define FP_ARGA(source) R300_FPI2_ARGA_##source +#define FP_ABS(arg) ((arg) | (1 << 6)) +#define FP_NEG(arg) ((arg) ^ (1 << 5)) + +/* Produce instruction dword */ +#define FP_INSTRC(opcode,arg0,arg1,arg2) \ + (R300_FPI0_OUTC_##opcode | \ + ((arg0) << R300_FPI0_ARG0C_SHIFT) | \ + ((arg1) << R300_FPI0_ARG1C_SHIFT) | \ + ((arg2) << R300_FPI0_ARG2C_SHIFT)) + +#define FP_INSTRA(opcode,arg0,arg1,arg2) \ + (R300_FPI2_OUTA_##opcode | \ + ((arg0) << R300_FPI2_ARG0A_SHIFT) | \ + ((arg1) << R300_FPI2_ARG1A_SHIFT) | \ + ((arg2) << R300_FPI2_ARG2A_SHIFT)) + +extern void debug_vp(GLcontext * ctx, struct gl_vertex_program *vp); + +#endif /* __R300_PROGRAM_H__ */ diff --git a/r300/r300_reg.h b/r300/r300_reg.h new file mode 100644 index 0000000..e5501b6 --- /dev/null +++ b/r300/r300_reg.h @@ -0,0 +1,1635 @@ +/************************************************************************** + +Copyright (C) 2004-2005 Nicolai Haehnle et al. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* *INDENT-OFF* */ + +#ifndef _R300_REG_H +#define _R300_REG_H + +#define R300_MC_INIT_MISC_LAT_TIMER 0x180 +# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28 + + +#define R300_MC_INIT_GFX_LAT_TIMER 0x154 +# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28 + +/* + * This file contains registers and constants for the R300. They have been + * found mostly by examining command buffers captured using glxtest, as well + * as by extrapolating some known registers and constants from the R200. + * I am fairly certain that they are correct unless stated otherwise + * in comments. + */ + +#define R300_SE_VPORT_XSCALE 0x1D98 +#define R300_SE_VPORT_XOFFSET 0x1D9C +#define R300_SE_VPORT_YSCALE 0x1DA0 +#define R300_SE_VPORT_YOFFSET 0x1DA4 +#define R300_SE_VPORT_ZSCALE 0x1DA8 +#define R300_SE_VPORT_ZOFFSET 0x1DAC + + +/* + * Vertex Array Processing (VAP) Control + * Stolen from r200 code from Christoph Brill (It's a guess!) + */ +#define R300_VAP_CNTL 0x2080 + +/* This register is written directly and also starts data section + * in many 3d CP_PACKET3's + */ +#define R300_VAP_VF_CNTL 0x2084 +# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0 +# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0) +# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0) +# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0) +# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0) +# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0) +# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0) + +# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4 + /* State based - direct writes to registers trigger vertex + generation */ +# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4) + + /* I don't think I saw these three used.. */ +# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6 +# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9 +# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10 + + /* index size - when not set the indices are assumed to be 16 bit */ +# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11) + /* number of vertices */ +# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 + +/* BEGIN: Wild guesses */ +#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 +# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT (1<<1) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) /* GUESS */ +# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) /* GUESS */ + +#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 +/* END: Wild guesses */ + +#define R300_SE_VTE_CNTL 0x20b0 +# define R300_VPORT_X_SCALE_ENA 0x00000001 +# define R300_VPORT_X_OFFSET_ENA 0x00000002 +# define R300_VPORT_Y_SCALE_ENA 0x00000004 +# define R300_VPORT_Y_OFFSET_ENA 0x00000008 +# define R300_VPORT_Z_SCALE_ENA 0x00000010 +# define R300_VPORT_Z_OFFSET_ENA 0x00000020 +# define R300_VTX_XY_FMT 0x00000100 +# define R300_VTX_Z_FMT 0x00000200 +# define R300_VTX_W0_FMT 0x00000400 +# define R300_VTX_W0_NORMALIZE 0x00000800 +# define R300_VTX_ST_DENORMALIZED 0x00001000 + +/* BEGIN: Vertex data assembly - lots of uncertainties */ + +/* gap */ + +#define R300_VAP_CNTL_STATUS 0x2140 +# define R300_VC_NO_SWAP (0 << 0) +# define R300_VC_16BIT_SWAP (1 << 0) +# define R300_VC_32BIT_SWAP (2 << 0) +# define R300_VAP_TCL_BYPASS (1 << 8) + +/* gap */ + +/* Where do we get our vertex data? + * + * Vertex data either comes either from immediate mode registers or from + * vertex arrays. + * There appears to be no mixed mode (though we can force the pitch of + * vertex arrays to 0, effectively reusing the same element over and over + * again). + * + * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure + * if these registers influence vertex array processing. + * + * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. + * + * In both cases, vertex attributes are then passed through INPUT_ROUTE. + * + * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data + * into the vertex processor's input registers. + * The first word routes the first input, the second word the second, etc. + * The corresponding input is routed into the register with the given index. + * The list is ended by a word with INPUT_ROUTE_END set. + * + * Always set COMPONENTS_4 in immediate mode. + */ + +#define R300_VAP_INPUT_ROUTE_0_0 0x2150 +# define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_2 (1 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_3 (2 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_4 (3 << 0) +# define R300_INPUT_ROUTE_COMPONENTS_RGBA (4 << 0) /* GUESS */ +# define R300_VAP_INPUT_ROUTE_IDX_SHIFT 8 +# define R300_VAP_INPUT_ROUTE_IDX_MASK (31 << 8) /* GUESS */ +# define R300_VAP_INPUT_ROUTE_END (1 << 13) +# define R300_INPUT_ROUTE_IMMEDIATE_MODE (0 << 14) /* GUESS */ +# define R300_INPUT_ROUTE_FLOAT (1 << 14) /* GUESS */ +# define R300_INPUT_ROUTE_UNSIGNED_BYTE (2 << 14) /* GUESS */ +# define R300_INPUT_ROUTE_FLOAT_COLOR (3 << 14) /* GUESS */ +#define R300_VAP_INPUT_ROUTE_0_1 0x2154 +#define R300_VAP_INPUT_ROUTE_0_2 0x2158 +#define R300_VAP_INPUT_ROUTE_0_3 0x215C +#define R300_VAP_INPUT_ROUTE_0_4 0x2160 +#define R300_VAP_INPUT_ROUTE_0_5 0x2164 +#define R300_VAP_INPUT_ROUTE_0_6 0x2168 +#define R300_VAP_INPUT_ROUTE_0_7 0x216C + +/* gap */ + +/* Notes: + * - always set up to produce at least two attributes: + * if vertex program uses only position, fglrx will set normal, too + * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. + */ +#define R300_VAP_INPUT_CNTL_0 0x2180 +# define R300_INPUT_CNTL_0_COLOR 0x00000001 +#define R300_VAP_INPUT_CNTL_1 0x2184 +# define R300_INPUT_CNTL_POS 0x00000001 +# define R300_INPUT_CNTL_NORMAL 0x00000002 +# define R300_INPUT_CNTL_COLOR 0x00000004 +# define R300_INPUT_CNTL_TC0 0x00000400 +# define R300_INPUT_CNTL_TC1 0x00000800 +# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */ +# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */ +# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */ +# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */ +# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */ +# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ + +/* gap */ + +/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 + * are set to a swizzling bit pattern, other words are 0. + * + * In immediate mode, the pattern is always set to xyzw. In vertex array + * mode, the swizzling pattern is e.g. used to set zw components in texture + * coordinates with only tweo components. + */ +#define R300_VAP_INPUT_ROUTE_1_0 0x21E0 +# define R300_INPUT_ROUTE_SELECT_X 0 +# define R300_INPUT_ROUTE_SELECT_Y 1 +# define R300_INPUT_ROUTE_SELECT_Z 2 +# define R300_INPUT_ROUTE_SELECT_W 3 +# define R300_INPUT_ROUTE_SELECT_ZERO 4 +# define R300_INPUT_ROUTE_SELECT_ONE 5 +# define R300_INPUT_ROUTE_SELECT_MASK 7 +# define R300_INPUT_ROUTE_X_SHIFT 0 +# define R300_INPUT_ROUTE_Y_SHIFT 3 +# define R300_INPUT_ROUTE_Z_SHIFT 6 +# define R300_INPUT_ROUTE_W_SHIFT 9 +# define R300_INPUT_ROUTE_ENABLE (15 << 12) +#define R300_VAP_INPUT_ROUTE_1_1 0x21E4 +#define R300_VAP_INPUT_ROUTE_1_2 0x21E8 +#define R300_VAP_INPUT_ROUTE_1_3 0x21EC +#define R300_VAP_INPUT_ROUTE_1_4 0x21F0 +#define R300_VAP_INPUT_ROUTE_1_5 0x21F4 +#define R300_VAP_INPUT_ROUTE_1_6 0x21F8 +#define R300_VAP_INPUT_ROUTE_1_7 0x21FC + +/* END: Vertex data assembly */ + +/* gap */ + +/* BEGIN: Upload vertex program and data */ + +/* + * The programmable vertex shader unit has a memory bank of unknown size + * that can be written to in 16 byte units by writing the address into + * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). + * + * Pointers into the memory bank are always in multiples of 16 bytes. + * + * The memory bank is divided into areas with fixed meaning. + * + * Starting at address UPLOAD_PROGRAM: Vertex program instructions. + * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), + * whereas the difference between known addresses suggests size 512. + * + * Starting at address UPLOAD_PARAMETERS: Vertex program parameters. + * Native reported limits and the VPI layout suggest size 256, whereas + * difference between known addresses suggests size 512. + * + * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the + * floating point pointsize. The exact purpose of this state is uncertain, + * as there is also the R300_RE_POINTSIZE register. + * + * Multiple vertex programs and parameter sets can be loaded at once, + * which could explain the size discrepancy. + */ +#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 +# define R300_PVS_UPLOAD_PROGRAM 0x00000000 +# define R300_PVS_UPLOAD_PARAMETERS 0x00000200 +# define R300_PVS_UPLOAD_CLIP_PLANE0 0x00000400 +# define R300_PVS_UPLOAD_CLIP_PLANE1 0x00000401 +# define R300_PVS_UPLOAD_CLIP_PLANE2 0x00000402 +# define R300_PVS_UPLOAD_CLIP_PLANE3 0x00000403 +# define R300_PVS_UPLOAD_CLIP_PLANE4 0x00000404 +# define R300_PVS_UPLOAD_CLIP_PLANE5 0x00000405 +# define R300_PVS_UPLOAD_POINTSIZE 0x00000406 + +# define R500_PVS_UPLOAD_CLIP_PLANE0 0x00000600 +# define R500_PVS_UPLOAD_CLIP_PLANE1 0x00000601 +# define R500_PVS_UPLOAD_CLIP_PLANE2 0x00000602 +# define R500_PVS_UPLOAD_CLIP_PLANE3 0x00000603 +# define R500_PVS_UPLOAD_CLIP_PLANE4 0x00000604 +# define R500_PVS_UPLOAD_CLIP_PLANE5 0x00000605 + +/* gap */ + +#define R300_VAP_PVS_UPLOAD_DATA 0x2208 + +/* END: Upload vertex program and data */ + +/* gap */ + +/* I do not know the purpose of this register. However, I do know that + * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL + * for normal rendering. + * + * 2007-11-05: This register is the user clip plane control register, but there + * also seems to be a rendering mode control; the NORMAL/CLEAR defines. + * + * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view + */ +#define R300_VAP_CLIP_CNTL 0x221C +# define R300_221C_NORMAL 0x00000000 +# define R300_221C_CLEAR 0x0001C000 +#define R300_VAP_UCP_ENABLE_0 (1 << 0) + +/* gap */ + +/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between + * rendering commands and overwriting vertex program parameters. + * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and + * avoids bugs caused by still running shaders reading bad data from memory. + */ +#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */ + +/* Absolutely no clue what this register is about. */ +#define R300_VAP_UNKNOWN_2288 0x2288 +# define R300_2288_R300 0x00750000 /* -- nh */ +# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ + +/* gap */ + +/* Addresses are relative to the vertex program instruction area of the + * memory bank. PROGRAM_END points to the last instruction of the active + * program + * + * The meaning of the two UNKNOWN fields is obviously not known. However, + * experiments so far have shown that both *must* point to an instruction + * inside the vertex program, otherwise the GPU locks up. + * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and + * CNTL_1_UNKNOWN points to instruction where last write to position takes + * place. + * Most likely this is used to ignore rest of the program in cases + * where group of verts arent visible. For some reason this "section" + * is sometimes accepted other instruction that have no relationship with + *position calculations. + */ +#define R300_VAP_PVS_CNTL_1 0x22D0 +# define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 +# define R300_PVS_CNTL_1_POS_END_SHIFT 10 +# define R300_PVS_CNTL_1_PROGRAM_END_SHIFT 20 +/* Addresses are relative the the vertex program parameters area. */ +#define R300_VAP_PVS_CNTL_2 0x22D4 +# define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 +# define R300_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 +#define R300_VAP_PVS_CNTL_3 0x22D8 +# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT 10 +# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0 + +/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for + * immediate vertices + */ +#define R300_VAP_VTX_COLOR_R 0x2464 +#define R300_VAP_VTX_COLOR_G 0x2468 +#define R300_VAP_VTX_COLOR_B 0x246C +#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */ +#define R300_VAP_VTX_POS_0_Y_1 0x2494 +#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */ +#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */ +#define R300_VAP_VTX_POS_0_Y_2 0x24A4 +#define R300_VAP_VTX_POS_0_Z_2 0x24A8 +/* write 0 to indicate end of packet? */ +#define R300_VAP_VTX_END_OF_PKT 0x24AC + +/* gap */ + +/* These are values from r300_reg/r300_reg.h - they are known to be correct + * and are here so we can use one register file instead of several + * - Vladimir + */ +#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000 +# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5) +# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16) + +#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004 + /* each of the following is 3 bits wide, specifies number + of components */ +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 + +/* UNK30 seems to enables point to quad transformation on textures + * (or something closely related to that). + * This bit is rather fatal at the time being due to lackings at pixel + * shader side + */ +#define R300_GB_ENABLE 0x4008 +# define R300_GB_POINT_STUFF_ENABLE (1<<0) +# define R300_GB_LINE_STUFF_ENABLE (1<<1) +# define R300_GB_TRIANGLE_STUFF_ENABLE (1<<2) +# define R300_GB_STENCIL_AUTO_ENABLE (1<<4) +# define R300_GB_UNK31 (1<<31) + /* each of the following is 2 bits wide */ +#define R300_GB_TEX_REPLICATE 0 +#define R300_GB_TEX_ST 1 +#define R300_GB_TEX_STR 2 +# define R300_GB_TEX0_SOURCE_SHIFT 16 +# define R300_GB_TEX1_SOURCE_SHIFT 18 +# define R300_GB_TEX2_SOURCE_SHIFT 20 +# define R300_GB_TEX3_SOURCE_SHIFT 22 +# define R300_GB_TEX4_SOURCE_SHIFT 24 +# define R300_GB_TEX5_SOURCE_SHIFT 26 +# define R300_GB_TEX6_SOURCE_SHIFT 28 +# define R300_GB_TEX7_SOURCE_SHIFT 30 + +/* MSPOS - positions for multisample antialiasing (?) */ +#define R300_GB_MSPOS0 0x4010 + /* shifts - each of the fields is 4 bits */ +# define R300_GB_MSPOS0__MS_X0_SHIFT 0 +# define R300_GB_MSPOS0__MS_Y0_SHIFT 4 +# define R300_GB_MSPOS0__MS_X1_SHIFT 8 +# define R300_GB_MSPOS0__MS_Y1_SHIFT 12 +# define R300_GB_MSPOS0__MS_X2_SHIFT 16 +# define R300_GB_MSPOS0__MS_Y2_SHIFT 20 +# define R300_GB_MSPOS0__MSBD0_Y 24 +# define R300_GB_MSPOS0__MSBD0_X 28 + +#define R300_GB_MSPOS1 0x4014 +# define R300_GB_MSPOS1__MS_X3_SHIFT 0 +# define R300_GB_MSPOS1__MS_Y3_SHIFT 4 +# define R300_GB_MSPOS1__MS_X4_SHIFT 8 +# define R300_GB_MSPOS1__MS_Y4_SHIFT 12 +# define R300_GB_MSPOS1__MS_X5_SHIFT 16 +# define R300_GB_MSPOS1__MS_Y5_SHIFT 20 +# define R300_GB_MSPOS1__MSBD1 24 + + +#define R300_GB_TILE_CONFIG 0x4018 +# define R300_GB_TILE_ENABLE (1<<0) +# define R300_GB_TILE_PIPE_COUNT_RV300 0 +# define R300_GB_TILE_PIPE_COUNT_R300 (3<<1) +# define R300_GB_TILE_PIPE_COUNT_R420 (7<<1) +# define R300_GB_TILE_PIPE_COUNT_RV410 (3<<1) +# define R300_GB_TILE_SIZE_8 0 +# define R300_GB_TILE_SIZE_16 (1<<4) +# define R300_GB_TILE_SIZE_32 (2<<4) +# define R300_GB_SUPER_SIZE_1 (0<<6) +# define R300_GB_SUPER_SIZE_2 (1<<6) +# define R300_GB_SUPER_SIZE_4 (2<<6) +# define R300_GB_SUPER_SIZE_8 (3<<6) +# define R300_GB_SUPER_SIZE_16 (4<<6) +# define R300_GB_SUPER_SIZE_32 (5<<6) +# define R300_GB_SUPER_SIZE_64 (6<<6) +# define R300_GB_SUPER_SIZE_128 (7<<6) +# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */ +# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */ +# define R300_GB_SUPER_TILE_A 0 +# define R300_GB_SUPER_TILE_B (1<<15) +# define R300_GB_SUBPIXEL_1_12 0 +# define R300_GB_SUBPIXEL_1_16 (1<<16) + +#define R300_GB_FIFO_SIZE 0x4024 + /* each of the following is 2 bits wide */ +#define R300_GB_FIFO_SIZE_32 0 +#define R300_GB_FIFO_SIZE_64 1 +#define R300_GB_FIFO_SIZE_128 2 +#define R300_GB_FIFO_SIZE_256 3 +# define R300_SC_IFIFO_SIZE_SHIFT 0 +# define R300_SC_TZFIFO_SIZE_SHIFT 2 +# define R300_SC_BFIFO_SIZE_SHIFT 4 + +# define R300_US_OFIFO_SIZE_SHIFT 12 +# define R300_US_WFIFO_SIZE_SHIFT 14 + /* the following use the same constants as above, but meaning is + is times 2 (i.e. instead of 32 words it means 64 */ +# define R300_RS_TFIFO_SIZE_SHIFT 6 +# define R300_RS_CFIFO_SIZE_SHIFT 8 +# define R300_US_RAM_SIZE_SHIFT 10 + /* watermarks, 3 bits wide */ +# define R300_RS_HIGHWATER_COL_SHIFT 16 +# define R300_RS_HIGHWATER_TEX_SHIFT 19 +# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ +# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 + +#define R300_GB_SELECT 0x401C +# define R300_GB_FOG_SELECT_C0A 0 +# define R300_GB_FOG_SELECT_C1A 1 +# define R300_GB_FOG_SELECT_C2A 2 +# define R300_GB_FOG_SELECT_C3A 3 +# define R300_GB_FOG_SELECT_1_1_W 4 +# define R300_GB_FOG_SELECT_Z 5 +# define R300_GB_DEPTH_SELECT_Z 0 +# define R300_GB_DEPTH_SELECT_1_1_W (1<<3) +# define R300_GB_W_SELECT_1_W 0 +# define R300_GB_W_SELECT_1 (1<<4) + +#define R300_GB_AA_CONFIG 0x4020 +# define R300_AA_DISABLE 0x00 +# define R300_AA_ENABLE 0x01 +# define R300_AA_SUBSAMPLES_2 0 +# define R300_AA_SUBSAMPLES_3 (1<<1) +# define R300_AA_SUBSAMPLES_4 (2<<1) +# define R300_AA_SUBSAMPLES_6 (3<<1) + +/* gap */ + +/* Zero to flush caches. */ +#define R300_TX_CNTL 0x4100 +#define R300_TX_FLUSH 0x0 + +/* The upper enable bits are guessed, based on fglrx reported limits. */ +#define R300_TX_ENABLE 0x4104 +# define R300_TX_ENABLE_0 (1 << 0) +# define R300_TX_ENABLE_1 (1 << 1) +# define R300_TX_ENABLE_2 (1 << 2) +# define R300_TX_ENABLE_3 (1 << 3) +# define R300_TX_ENABLE_4 (1 << 4) +# define R300_TX_ENABLE_5 (1 << 5) +# define R300_TX_ENABLE_6 (1 << 6) +# define R300_TX_ENABLE_7 (1 << 7) +# define R300_TX_ENABLE_8 (1 << 8) +# define R300_TX_ENABLE_9 (1 << 9) +# define R300_TX_ENABLE_10 (1 << 10) +# define R300_TX_ENABLE_11 (1 << 11) +# define R300_TX_ENABLE_12 (1 << 12) +# define R300_TX_ENABLE_13 (1 << 13) +# define R300_TX_ENABLE_14 (1 << 14) +# define R300_TX_ENABLE_15 (1 << 15) + +/* The pointsize is given in multiples of 6. The pointsize can be + * enormous: Clear() renders a single point that fills the entire + * framebuffer. + */ +#define R300_RE_POINTSIZE 0x421C +# define R300_POINTSIZE_Y_SHIFT 0 +# define R300_POINTSIZE_Y_MASK (0xFFFF << 0) /* GUESS */ +# define R300_POINTSIZE_X_SHIFT 16 +# define R300_POINTSIZE_X_MASK (0xFFFF << 16) /* GUESS */ +# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) + +/* The line width is given in multiples of 6. + * In default mode lines are classified as vertical lines. + * HO: horizontal + * VE: vertical or horizontal + * HO & VE: no classification + */ +#define R300_RE_LINE_CNT 0x4234 +# define R300_LINESIZE_SHIFT 0 +# define R300_LINESIZE_MASK (0xFFFF << 0) /* GUESS */ +# define R300_LINESIZE_MAX (R300_LINESIZE_MASK / 6) +# define R300_LINE_CNT_HO (1 << 16) +# define R300_LINE_CNT_VE (1 << 17) + +/* Some sort of scale or clamp value for texcoordless textures. */ +#define R300_RE_UNK4238 0x4238 + +/* Something shade related */ +#define R300_RE_SHADE 0x4274 + +#define R300_RE_SHADE_MODEL 0x4278 +# define R300_RE_SHADE_MODEL_SMOOTH 0x3aaaa +# define R300_RE_SHADE_MODEL_FLAT 0x39595 + +/* Dangerous */ +#define R300_RE_POLYGON_MODE 0x4288 +# define R300_PM_ENABLED (1 << 0) +# define R300_PM_FRONT_POINT (0 << 0) +# define R300_PM_BACK_POINT (0 << 0) +# define R300_PM_FRONT_LINE (1 << 4) +# define R300_PM_FRONT_FILL (1 << 5) +# define R300_PM_BACK_LINE (1 << 7) +# define R300_PM_BACK_FILL (1 << 8) + +/* Fog parameters */ +#define R300_RE_FOG_SCALE 0x4294 +#define R300_RE_FOG_START 0x4298 + +/* Not sure why there are duplicate of factor and constant values. + * My best guess so far is that there are seperate zbiases for test and write. + * Ordering might be wrong. + * Some of the tests indicate that fgl has a fallback implementation of zbias + * via pixel shaders. + */ +#define R300_RE_ZBIAS_CNTL 0x42A0 /* GUESS */ +#define R300_RE_ZBIAS_T_FACTOR 0x42A4 +#define R300_RE_ZBIAS_T_CONSTANT 0x42A8 +#define R300_RE_ZBIAS_W_FACTOR 0x42AC +#define R300_RE_ZBIAS_W_CONSTANT 0x42B0 + +/* This register needs to be set to (1<<1) for RV350 to correctly + * perform depth test (see --vb-triangles in r300_demo) + * Don't know about other chips. - Vladimir + * This is set to 3 when GL_POLYGON_OFFSET_FILL is on. + * My guess is that there are two bits for each zbias primitive + * (FILL, LINE, POINT). + * One to enable depth test and one for depth write. + * Yet this doesnt explain why depth writes work ... + */ +#define R300_RE_OCCLUSION_CNTL 0x42B4 +# define R300_OCCLUSION_ON (1<<1) + +#define R300_RE_CULL_CNTL 0x42B8 +# define R300_CULL_FRONT (1 << 0) +# define R300_CULL_BACK (1 << 1) +# define R300_FRONT_FACE_CCW (0 << 2) +# define R300_FRONT_FACE_CW (1 << 2) + + +/* BEGIN: Rasterization / Interpolators - many guesses */ + +/* 0_UNKNOWN_18 has always been set except for clear operations. + * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends + * on the vertex program, *not* the fragment program) + */ +#define R300_RS_CNTL_0 0x4300 +# define R300_RS_CNTL_TC_CNT_SHIFT 2 +# define R300_RS_CNTL_TC_CNT_MASK (7 << 2) + /* number of color interpolators used */ +# define R300_RS_CNTL_CI_CNT_SHIFT 7 +# define R300_RS_CNTL_0_UNKNOWN_18 (1 << 18) + /* Guess: RS_CNTL_1 holds the index of the highest used RS_ROUTE_n + register. */ +#define R300_RS_CNTL_1 0x4304 + +/* gap */ + +/* Only used for texture coordinates. + * Use the source field to route texture coordinate input from the + * vertex program to the desired interpolator. Note that the source + * field is relative to the outputs the vertex program *actually* + * writes. If a vertex program only writes texcoord[1], this will + * be source index 0. + * Set INTERP_USED on all interpolators that produce data used by + * the fragment program. INTERP_USED looks like a swizzling mask, + * but I haven't seen it used that way. + * + * Note: The _UNKNOWN constants are always set in their respective + * register. I don't know if this is necessary. + */ +#define R300_RS_INTERP_0 0x4310 +#define R300_RS_INTERP_1 0x4314 +# define R300_RS_INTERP_1_UNKNOWN 0x40 +#define R300_RS_INTERP_2 0x4318 +# define R300_RS_INTERP_2_UNKNOWN 0x80 +#define R300_RS_INTERP_3 0x431C +# define R300_RS_INTERP_3_UNKNOWN 0xC0 +#define R300_RS_INTERP_4 0x4320 +#define R300_RS_INTERP_5 0x4324 +#define R300_RS_INTERP_6 0x4328 +#define R300_RS_INTERP_7 0x432C +# define R300_RS_INTERP_SRC_SHIFT 2 +# define R300_RS_INTERP_SRC_MASK (7 << 2) +# define R300_RS_INTERP_USED 0x00D10000 + +/* These DWORDs control how vertex data is routed into fragment program + * registers, after interpolators. + */ +#define R300_RS_ROUTE_0 0x4330 +#define R300_RS_ROUTE_1 0x4334 +#define R300_RS_ROUTE_2 0x4338 +#define R300_RS_ROUTE_3 0x433C /* GUESS */ +#define R300_RS_ROUTE_4 0x4340 /* GUESS */ +#define R300_RS_ROUTE_5 0x4344 /* GUESS */ +#define R300_RS_ROUTE_6 0x4348 /* GUESS */ +#define R300_RS_ROUTE_7 0x434C /* GUESS */ +# define R300_RS_ROUTE_SOURCE_INTERP_0 0 +# define R300_RS_ROUTE_SOURCE_INTERP_1 1 +# define R300_RS_ROUTE_SOURCE_INTERP_2 2 +# define R300_RS_ROUTE_SOURCE_INTERP_3 3 +# define R300_RS_ROUTE_SOURCE_INTERP_4 4 +# define R300_RS_ROUTE_SOURCE_INTERP_5 5 /* GUESS */ +# define R300_RS_ROUTE_SOURCE_INTERP_6 6 /* GUESS */ +# define R300_RS_ROUTE_SOURCE_INTERP_7 7 /* GUESS */ +# define R300_RS_ROUTE_ENABLE (1 << 3) /* GUESS */ +# define R300_RS_ROUTE_DEST_SHIFT 6 +# define R300_RS_ROUTE_DEST_MASK (31 << 6) /* GUESS */ + +/* Special handling for color: When the fragment program uses color, + * the ROUTE_0_COLOR bit is set and ROUTE_0_COLOR_DEST contains the + * color register index. + * + * Apperently you may set the R300_RS_ROUTE_0_COLOR bit, but not provide any + * R300_RS_ROUTE_0_COLOR_DEST value; this setup is used for clearing the state. + * See r300_ioctl.c:r300EmitClearState. I'm not sure if this setup is strictly + * correct or not. - Oliver. + */ +# define R300_RS_ROUTE_0_COLOR (1 << 14) +# define R300_RS_ROUTE_0_COLOR_DEST_SHIFT 17 +# define R300_RS_ROUTE_0_COLOR_DEST_MASK (31 << 17) /* GUESS */ +/* As above, but for secondary color */ +# define R300_RS_ROUTE_1_COLOR1 (1 << 14) +# define R300_RS_ROUTE_1_COLOR1_DEST_SHIFT 17 +# define R300_RS_ROUTE_1_COLOR1_DEST_MASK (31 << 17) +# define R300_RS_ROUTE_1_UNKNOWN11 (1 << 11) +/* END: Rasterization / Interpolators - many guesses */ + +/* BEGIN: Scissors and cliprects */ + +/* There are four clipping rectangles. Their corner coordinates are inclusive. + * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending + * on whether the pixel is inside cliprects 0-3, respectively. For example, + * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned + * the number 3 (binary 0011). + * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, + * the pixel is rasterized. + * + * In addition to this, there is a scissors rectangle. Only pixels inside the + * scissors rectangle are drawn. (coordinates are inclusive) + * + * For some reason, the top-left corner of the framebuffer is at (1440, 1440) + * for the purpose of clipping and scissors. + */ +#define R300_RE_CLIPRECT_TL_0 0x43B0 +#define R300_RE_CLIPRECT_BR_0 0x43B4 +#define R300_RE_CLIPRECT_TL_1 0x43B8 +#define R300_RE_CLIPRECT_BR_1 0x43BC +#define R300_RE_CLIPRECT_TL_2 0x43C0 +#define R300_RE_CLIPRECT_BR_2 0x43C4 +#define R300_RE_CLIPRECT_TL_3 0x43C8 +#define R300_RE_CLIPRECT_BR_3 0x43CC +# define R300_CLIPRECT_OFFSET 1440 +# define R300_CLIPRECT_MASK 0x1FFF +# define R300_CLIPRECT_X_SHIFT 0 +# define R300_CLIPRECT_X_MASK (0x1FFF << 0) +# define R300_CLIPRECT_Y_SHIFT 13 +# define R300_CLIPRECT_Y_MASK (0x1FFF << 13) +#define R300_RE_CLIPRECT_CNTL 0x43D0 +# define R300_CLIP_OUT (1 << 0) +# define R300_CLIP_0 (1 << 1) +# define R300_CLIP_1 (1 << 2) +# define R300_CLIP_10 (1 << 3) +# define R300_CLIP_2 (1 << 4) +# define R300_CLIP_20 (1 << 5) +# define R300_CLIP_21 (1 << 6) +# define R300_CLIP_210 (1 << 7) +# define R300_CLIP_3 (1 << 8) +# define R300_CLIP_30 (1 << 9) +# define R300_CLIP_31 (1 << 10) +# define R300_CLIP_310 (1 << 11) +# define R300_CLIP_32 (1 << 12) +# define R300_CLIP_320 (1 << 13) +# define R300_CLIP_321 (1 << 14) +# define R300_CLIP_3210 (1 << 15) + +/* gap */ + +#define R300_RE_SCISSORS_TL 0x43E0 +#define R300_RE_SCISSORS_BR 0x43E4 +# define R300_SCISSORS_OFFSET 1440 +# define R300_SCISSORS_X_SHIFT 0 +# define R300_SCISSORS_X_MASK (0x1FFF << 0) +# define R300_SCISSORS_Y_SHIFT 13 +# define R300_SCISSORS_Y_MASK (0x1FFF << 13) +/* END: Scissors and cliprects */ + +/* BEGIN: Texture specification */ + +/* + * The texture specification dwords are grouped by meaning and not by texture + * unit. This means that e.g. the offset for texture image unit N is found in + * register TX_OFFSET_0 + (4*N) + */ +#define R300_TX_FILTER_0 0x4400 +# define R300_TX_REPEAT 0 +# define R300_TX_MIRRORED 1 +# define R300_TX_CLAMP 4 +# define R300_TX_CLAMP_TO_EDGE 2 +# define R300_TX_CLAMP_TO_BORDER 6 +# define R300_TX_WRAP_S_SHIFT 0 +# define R300_TX_WRAP_S_MASK (7 << 0) +# define R300_TX_WRAP_T_SHIFT 3 +# define R300_TX_WRAP_T_MASK (7 << 3) +# define R300_TX_WRAP_Q_SHIFT 6 +# define R300_TX_WRAP_Q_MASK (7 << 6) +# define R300_TX_MAG_FILTER_NEAREST (1 << 9) +# define R300_TX_MAG_FILTER_LINEAR (2 << 9) +# define R300_TX_MAG_FILTER_MASK (3 << 9) +# define R300_TX_MIN_FILTER_NEAREST (1 << 11) +# define R300_TX_MIN_FILTER_LINEAR (2 << 11) +# define R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST (5 << 11) +# define R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR (9 << 11) +# define R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST (6 << 11) +# define R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR (10 << 11) + +/* NOTE: NEAREST doesnt seem to exist. + * Im not seting MAG_FILTER_MASK and (3 << 11) on for all + * anisotropy modes because that would void selected mag filter + */ +# define R300_TX_MIN_FILTER_ANISO_NEAREST (0 << 13) +# define R300_TX_MIN_FILTER_ANISO_LINEAR (0 << 13) +# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (1 << 13) +# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR (2 << 13) +# define R300_TX_MIN_FILTER_MASK ( (15 << 11) | (3 << 13) ) +# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) +# define R300_TX_MAX_ANISO_2_TO_1 (2 << 21) +# define R300_TX_MAX_ANISO_4_TO_1 (4 << 21) +# define R300_TX_MAX_ANISO_8_TO_1 (6 << 21) +# define R300_TX_MAX_ANISO_16_TO_1 (8 << 21) +# define R300_TX_MAX_ANISO_MASK (14 << 21) + +#define R300_TX_FILTER1_0 0x4440 +# define R300_CHROMA_KEY_MODE_DISABLE 0 +# define R300_CHROMA_KEY_FORCE 1 +# define R300_CHROMA_KEY_BLEND 2 +# define R300_MC_ROUND_NORMAL (0<<2) +# define R300_MC_ROUND_MPEG4 (1<<2) +# define R300_LOD_BIAS_MASK 0x1fff +# define R300_EDGE_ANISO_EDGE_DIAG (0<<13) +# define R300_EDGE_ANISO_EDGE_ONLY (1<<13) +# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14) +# define R300_MC_COORD_TRUNCATE_MPEG (1<<14) +# define R300_TX_TRI_PERF_0_8 (0<<15) +# define R300_TX_TRI_PERF_1_8 (1<<15) +# define R300_TX_TRI_PERF_1_4 (2<<15) +# define R300_TX_TRI_PERF_3_8 (3<<15) +# define R300_ANISO_THRESHOLD_MASK (7<<17) + +#define R300_TX_SIZE_0 0x4480 +# define R300_TX_WIDTHMASK_SHIFT 0 +# define R300_TX_WIDTHMASK_MASK (2047 << 0) +# define R300_TX_HEIGHTMASK_SHIFT 11 +# define R300_TX_HEIGHTMASK_MASK (2047 << 11) +# define R300_TX_UNK23 (1 << 23) +# define R300_TX_MAX_MIP_LEVEL_SHIFT 26 +# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) +# define R300_TX_SIZE_PROJECTED (1<<30) +# define R300_TX_SIZE_TXPITCH_EN (1<<31) +#define R300_TX_FORMAT_0 0x44C0 + /* The interpretation of the format word by Wladimir van der Laan */ + /* The X, Y, Z and W refer to the layout of the components. + They are given meanings as R, G, B and Alpha by the swizzle + specification */ +# define R300_TX_FORMAT_X8 0x0 +# define R300_TX_FORMAT_X16 0x1 +# define R300_TX_FORMAT_Y4X4 0x2 +# define R300_TX_FORMAT_Y8X8 0x3 +# define R300_TX_FORMAT_Y16X16 0x4 +# define R300_TX_FORMAT_Z3Y3X2 0x5 +# define R300_TX_FORMAT_Z5Y6X5 0x6 +# define R300_TX_FORMAT_Z6Y5X5 0x7 +# define R300_TX_FORMAT_Z11Y11X10 0x8 +# define R300_TX_FORMAT_Z10Y11X11 0x9 +# define R300_TX_FORMAT_W4Z4Y4X4 0xA +# define R300_TX_FORMAT_W1Z5Y5X5 0xB +# define R300_TX_FORMAT_W8Z8Y8X8 0xC +# define R300_TX_FORMAT_W2Z10Y10X10 0xD +# define R300_TX_FORMAT_W16Z16Y16X16 0xE +# define R300_TX_FORMAT_DXT1 0xF +# define R300_TX_FORMAT_DXT3 0x10 +# define R300_TX_FORMAT_DXT5 0x11 +# define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */ +# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */ +# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */ +# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ + /* 0x16 - some 16 bit green format.. ?? */ +# define R300_TX_FORMAT_UNK25 (1 << 25) /* no swizzle */ +# define R300_TX_FORMAT_CUBIC_MAP (1 << 26) + + /* gap */ + /* Floating point formats */ + /* Note - hardware supports both 16 and 32 bit floating point */ +# define R300_TX_FORMAT_FL_I16 0x18 +# define R300_TX_FORMAT_FL_I16A16 0x19 +# define R300_TX_FORMAT_FL_R16G16B16A16 0x1A +# define R300_TX_FORMAT_FL_I32 0x1B +# define R300_TX_FORMAT_FL_I32A32 0x1C +# define R300_TX_FORMAT_FL_R32G32B32A32 0x1D + /* alpha modes, convenience mostly */ + /* if you have alpha, pick constant appropriate to the + number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */ +# define R300_TX_FORMAT_ALPHA_1CH 0x000 +# define R300_TX_FORMAT_ALPHA_2CH 0x200 +# define R300_TX_FORMAT_ALPHA_4CH 0x600 +# define R300_TX_FORMAT_ALPHA_NONE 0xA00 + /* Swizzling */ + /* constants */ +# define R300_TX_FORMAT_X 0 +# define R300_TX_FORMAT_Y 1 +# define R300_TX_FORMAT_Z 2 +# define R300_TX_FORMAT_W 3 +# define R300_TX_FORMAT_ZERO 4 +# define R300_TX_FORMAT_ONE 5 + /* 2.0*Z, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_Z 6 + /* 2.0*W, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_W 7 + +# define R300_TX_FORMAT_B_SHIFT 18 +# define R300_TX_FORMAT_G_SHIFT 15 +# define R300_TX_FORMAT_R_SHIFT 12 +# define R300_TX_FORMAT_A_SHIFT 9 + /* Convenience macro to take care of layout and swizzling */ +# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \ + ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \ + | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \ + | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \ + | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \ + | (R300_TX_FORMAT_##FMT) \ + ) + /* These can be ORed with result of R300_EASY_TX_FORMAT() + We don't really know what they do. Take values from a + constant color ? */ +# define R300_TX_FORMAT_CONST_X (1<<5) +# define R300_TX_FORMAT_CONST_Y (2<<5) +# define R300_TX_FORMAT_CONST_Z (4<<5) +# define R300_TX_FORMAT_CONST_W (8<<5) + +# define R300_TX_FORMAT_YUV_MODE 0x00800000 + +#define R300_TX_PITCH_0 0x4500 /* obvious missing in gap */ +#define R300_TX_OFFSET_0 0x4540 + /* BEGIN: Guess from R200 */ +# define R300_TXO_ENDIAN_NO_SWAP (0 << 0) +# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) +# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) +# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) +# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MICRO_TILE (1 << 3) +# define R300_TXO_OFFSET_MASK 0xffffffe0 +# define R300_TXO_OFFSET_SHIFT 5 + /* END: Guess from R200 */ + +/* 32 bit chroma key */ +#define R300_TX_CHROMA_KEY_0 0x4580 +/* ff00ff00 == { 0, 1.0, 0, 1.0 } */ +#define R300_TX_BORDER_COLOR_0 0x45C0 + +/* END: Texture specification */ + +/* BEGIN: Fragment program instruction set */ + +/* Fragment programs are written directly into register space. + * There are separate instruction streams for texture instructions and ALU + * instructions. + * In order to synchronize these streams, the program is divided into up + * to 4 nodes. Each node begins with a number of TEX operations, followed + * by a number of ALU operations. + * The first node can have zero TEX ops, all subsequent nodes must have at + * least + * one TEX ops. + * All nodes must have at least one ALU op. + * + * The index of the last node is stored in PFS_CNTL_0: A value of 0 means + * 1 node, a value of 3 means 4 nodes. + * The total amount of instructions is defined in PFS_CNTL_2. The offsets are + * offsets into the respective instruction streams, while *_END points to the + * last instruction relative to this offset. + */ +#define R300_PFS_CNTL_0 0x4600 +# define R300_PFS_CNTL_LAST_NODES_SHIFT 0 +# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) +# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) +#define R300_PFS_CNTL_1 0x4604 +/* There is an unshifted value here which has so far always been equal to the + * index of the highest used temporary register. + */ +#define R300_PFS_CNTL_2 0x4608 +# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 +# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) +# define R300_PFS_CNTL_ALU_END_SHIFT 6 +# define R300_PFS_CNTL_ALU_END_MASK (63 << 6) +# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 12 +# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 12) /* GUESS */ +# define R300_PFS_CNTL_TEX_END_SHIFT 18 +# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* GUESS */ + +/* gap */ + +/* Nodes are stored backwards. The last active node is always stored in + * PFS_NODE_3. + * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The + * first node is stored in NODE_2, the second node is stored in NODE_3. + * + * Offsets are relative to the master offset from PFS_CNTL_2. + * LAST_NODE is set for the last node, and only for the last node. + */ +#define R300_PFS_NODE_0 0x4610 +#define R300_PFS_NODE_1 0x4614 +#define R300_PFS_NODE_2 0x4618 +#define R300_PFS_NODE_3 0x461C +# define R300_PFS_NODE_ALU_OFFSET_SHIFT 0 +# define R300_PFS_NODE_ALU_OFFSET_MASK (63 << 0) +# define R300_PFS_NODE_ALU_END_SHIFT 6 +# define R300_PFS_NODE_ALU_END_MASK (63 << 6) +# define R300_PFS_NODE_TEX_OFFSET_SHIFT 12 +# define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12) +# define R300_PFS_NODE_TEX_END_SHIFT 17 +# define R300_PFS_NODE_TEX_END_MASK (31 << 17) +/*# define R300_PFS_NODE_LAST_NODE (1 << 22) */ +# define R300_PFS_NODE_OUTPUT_COLOR (1 << 22) +# define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23) + +/* TEX + * As far as I can tell, texture instructions cannot write into output + * registers directly. A subsequent ALU instruction is always necessary, + * even if it's just MAD o0, r0, 1, 0 + */ +#define R300_PFS_TEXI_0 0x4620 +# define R300_FPITX_SRC_SHIFT 0 +# define R300_FPITX_SRC_MASK (31 << 0) + /* GUESS */ +# define R300_FPITX_SRC_CONST (1 << 5) +# define R300_FPITX_DST_SHIFT 6 +# define R300_FPITX_DST_MASK (31 << 6) +# define R300_FPITX_IMAGE_SHIFT 11 + /* GUESS based on layout and native limits */ +# define R300_FPITX_IMAGE_MASK (15 << 11) +/* Unsure if these are opcodes, or some kind of bitfield, but this is how + * they were set when I checked + */ +# define R300_FPITX_OPCODE_SHIFT 15 +# define R300_FPITX_OP_TEX 1 +# define R300_FPITX_OP_KIL 2 +# define R300_FPITX_OP_TXP 3 +# define R300_FPITX_OP_TXB 4 +# define R300_FPITX_OPCODE_MASK (7 << 15) + +/* ALU + * The ALU instructions register blocks are enumerated according to the order + * in which fglrx. I assume there is space for 64 instructions, since + * each block has space for a maximum of 64 DWORDs, and this matches reported + * native limits. + * + * The basic functional block seems to be one MAD for each color and alpha, + * and an adder that adds all components after the MUL. + * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands + * - DP4: Use OUTC_DP4, OUTA_DP4 + * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands + * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands + * - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1 + * - CMP: If ARG2 < 0, return ARG1, else return ARG0 + * - FLR: use FRC+MAD + * - XPD: use MAD+MAD + * - SGE, SLT: use MAD+CMP + * - RSQ: use ABS modifier for argument + * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation + * (e.g. RCP) into color register + * - apparently, there's no quick DST operation + * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2" + * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0" + * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1" + * + * Operand selection + * First stage selects three sources from the available registers and + * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha). + * fglrx sorts the three source fields: Registers before constants, + * lower indices before higher indices; I do not know whether this is + * necessary. + * + * fglrx fills unused sources with "read constant 0" + * According to specs, you cannot select more than two different constants. + * + * Second stage selects the operands from the sources. This is defined in + * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants + * zero and one. + * Swizzling and negation happens in this stage, as well. + * + * Important: Color and alpha seem to be mostly separate, i.e. their sources + * selection appears to be fully independent (the register storage is probably + * physically split into a color and an alpha section). + * However (because of the apparent physical split), there is some interaction + * WRT swizzling. If, for example, you want to load an R component into an + * Alpha operand, this R component is taken from a *color* source, not from + * an alpha source. The corresponding register doesn't even have to appear in + * the alpha sources list. (I hope this all makes sense to you) + * + * Destination selection + * The destination register index is in FPI1 (color) and FPI3 (alpha) + * together with enable bits. + * There are separate enable bits for writing into temporary registers + * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * /DSTA_OUTPUT). You can write to both at once, or not write at all (the + * same index must be used for both). + * + * Note: There is a special form for LRP + * - Argument order is the same as in ARB_fragment_program. + * - Operation is MAD + * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP + * - Set FPI0/FPI2_SPECIAL_LRP + * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD + */ +#define R300_PFS_INSTR1_0 0x46C0 +# define R300_FPI1_SRC0C_SHIFT 0 +# define R300_FPI1_SRC0C_MASK (31 << 0) +# define R300_FPI1_SRC0C_CONST (1 << 5) +# define R300_FPI1_SRC1C_SHIFT 6 +# define R300_FPI1_SRC1C_MASK (31 << 6) +# define R300_FPI1_SRC1C_CONST (1 << 11) +# define R300_FPI1_SRC2C_SHIFT 12 +# define R300_FPI1_SRC2C_MASK (31 << 12) +# define R300_FPI1_SRC2C_CONST (1 << 17) +# define R300_FPI1_SRC_MASK 0x0003ffff +# define R300_FPI1_DSTC_SHIFT 18 +# define R300_FPI1_DSTC_MASK (31 << 18) +# define R300_FPI1_DSTC_REG_MASK_SHIFT 23 +# define R300_FPI1_DSTC_REG_X (1 << 23) +# define R300_FPI1_DSTC_REG_Y (1 << 24) +# define R300_FPI1_DSTC_REG_Z (1 << 25) +# define R300_FPI1_DSTC_OUTPUT_MASK_SHIFT 26 +# define R300_FPI1_DSTC_OUTPUT_X (1 << 26) +# define R300_FPI1_DSTC_OUTPUT_Y (1 << 27) +# define R300_FPI1_DSTC_OUTPUT_Z (1 << 28) + +#define R300_PFS_INSTR3_0 0x47C0 +# define R300_FPI3_SRC0A_SHIFT 0 +# define R300_FPI3_SRC0A_MASK (31 << 0) +# define R300_FPI3_SRC0A_CONST (1 << 5) +# define R300_FPI3_SRC1A_SHIFT 6 +# define R300_FPI3_SRC1A_MASK (31 << 6) +# define R300_FPI3_SRC1A_CONST (1 << 11) +# define R300_FPI3_SRC2A_SHIFT 12 +# define R300_FPI3_SRC2A_MASK (31 << 12) +# define R300_FPI3_SRC2A_CONST (1 << 17) +# define R300_FPI3_SRC_MASK 0x0003ffff +# define R300_FPI3_DSTA_SHIFT 18 +# define R300_FPI3_DSTA_MASK (31 << 18) +# define R300_FPI3_DSTA_REG (1 << 23) +# define R300_FPI3_DSTA_OUTPUT (1 << 24) +# define R300_FPI3_DSTA_DEPTH (1 << 27) + +#define R300_PFS_INSTR0_0 0x48C0 +# define R300_FPI0_ARGC_SRC0C_XYZ 0 +# define R300_FPI0_ARGC_SRC0C_XXX 1 +# define R300_FPI0_ARGC_SRC0C_YYY 2 +# define R300_FPI0_ARGC_SRC0C_ZZZ 3 +# define R300_FPI0_ARGC_SRC1C_XYZ 4 +# define R300_FPI0_ARGC_SRC1C_XXX 5 +# define R300_FPI0_ARGC_SRC1C_YYY 6 +# define R300_FPI0_ARGC_SRC1C_ZZZ 7 +# define R300_FPI0_ARGC_SRC2C_XYZ 8 +# define R300_FPI0_ARGC_SRC2C_XXX 9 +# define R300_FPI0_ARGC_SRC2C_YYY 10 +# define R300_FPI0_ARGC_SRC2C_ZZZ 11 +# define R300_FPI0_ARGC_SRC0A 12 +# define R300_FPI0_ARGC_SRC1A 13 +# define R300_FPI0_ARGC_SRC2A 14 +# define R300_FPI0_ARGC_SRC1C_LRP 15 +# define R300_FPI0_ARGC_ZERO 20 +# define R300_FPI0_ARGC_ONE 21 + /* GUESS */ +# define R300_FPI0_ARGC_HALF 22 +# define R300_FPI0_ARGC_SRC0C_YZX 23 +# define R300_FPI0_ARGC_SRC1C_YZX 24 +# define R300_FPI0_ARGC_SRC2C_YZX 25 +# define R300_FPI0_ARGC_SRC0C_ZXY 26 +# define R300_FPI0_ARGC_SRC1C_ZXY 27 +# define R300_FPI0_ARGC_SRC2C_ZXY 28 +# define R300_FPI0_ARGC_SRC0CA_WZY 29 +# define R300_FPI0_ARGC_SRC1CA_WZY 30 +# define R300_FPI0_ARGC_SRC2CA_WZY 31 + +# define R300_FPI0_ARG0C_SHIFT 0 +# define R300_FPI0_ARG0C_MASK (31 << 0) +# define R300_FPI0_ARG0C_NEG (1 << 5) +# define R300_FPI0_ARG0C_ABS (1 << 6) +# define R300_FPI0_ARG1C_SHIFT 7 +# define R300_FPI0_ARG1C_MASK (31 << 7) +# define R300_FPI0_ARG1C_NEG (1 << 12) +# define R300_FPI0_ARG1C_ABS (1 << 13) +# define R300_FPI0_ARG2C_SHIFT 14 +# define R300_FPI0_ARG2C_MASK (31 << 14) +# define R300_FPI0_ARG2C_NEG (1 << 19) +# define R300_FPI0_ARG2C_ABS (1 << 20) +# define R300_FPI0_SPECIAL_LRP (1 << 21) +# define R300_FPI0_OUTC_MAD (0 << 23) +# define R300_FPI0_OUTC_DP3 (1 << 23) +# define R300_FPI0_OUTC_DP4 (2 << 23) +# define R300_FPI0_OUTC_MIN (4 << 23) +# define R300_FPI0_OUTC_MAX (5 << 23) +# define R300_FPI0_OUTC_CMPH (7 << 23) +# define R300_FPI0_OUTC_CMP (8 << 23) +# define R300_FPI0_OUTC_FRC (9 << 23) +# define R300_FPI0_OUTC_REPL_ALPHA (10 << 23) +# define R300_FPI0_OUTC_SAT (1 << 30) +# define R300_FPI0_INSERT_NOP (1 << 31) + +#define R300_PFS_INSTR2_0 0x49C0 +# define R300_FPI2_ARGA_SRC0C_X 0 +# define R300_FPI2_ARGA_SRC0C_Y 1 +# define R300_FPI2_ARGA_SRC0C_Z 2 +# define R300_FPI2_ARGA_SRC1C_X 3 +# define R300_FPI2_ARGA_SRC1C_Y 4 +# define R300_FPI2_ARGA_SRC1C_Z 5 +# define R300_FPI2_ARGA_SRC2C_X 6 +# define R300_FPI2_ARGA_SRC2C_Y 7 +# define R300_FPI2_ARGA_SRC2C_Z 8 +# define R300_FPI2_ARGA_SRC0A 9 +# define R300_FPI2_ARGA_SRC1A 10 +# define R300_FPI2_ARGA_SRC2A 11 +# define R300_FPI2_ARGA_SRC1A_LRP 15 +# define R300_FPI2_ARGA_ZERO 16 +# define R300_FPI2_ARGA_ONE 17 + /* GUESS */ +# define R300_FPI2_ARGA_HALF 18 +# define R300_FPI2_ARG0A_SHIFT 0 +# define R300_FPI2_ARG0A_MASK (31 << 0) +# define R300_FPI2_ARG0A_NEG (1 << 5) + /* GUESS */ +# define R300_FPI2_ARG0A_ABS (1 << 6) +# define R300_FPI2_ARG1A_SHIFT 7 +# define R300_FPI2_ARG1A_MASK (31 << 7) +# define R300_FPI2_ARG1A_NEG (1 << 12) + /* GUESS */ +# define R300_FPI2_ARG1A_ABS (1 << 13) +# define R300_FPI2_ARG2A_SHIFT 14 +# define R300_FPI2_ARG2A_MASK (31 << 14) +# define R300_FPI2_ARG2A_NEG (1 << 19) + /* GUESS */ +# define R300_FPI2_ARG2A_ABS (1 << 20) +# define R300_FPI2_SPECIAL_LRP (1 << 21) +# define R300_FPI2_OUTA_MAD (0 << 23) +# define R300_FPI2_OUTA_DP4 (1 << 23) +# define R300_FPI2_OUTA_MIN (2 << 23) +# define R300_FPI2_OUTA_MAX (3 << 23) +# define R300_FPI2_OUTA_CMP (6 << 23) +# define R300_FPI2_OUTA_FRC (7 << 23) +# define R300_FPI2_OUTA_EX2 (8 << 23) +# define R300_FPI2_OUTA_LG2 (9 << 23) +# define R300_FPI2_OUTA_RCP (10 << 23) +# define R300_FPI2_OUTA_RSQ (11 << 23) +# define R300_FPI2_OUTA_SAT (1 << 30) +# define R300_FPI2_UNKNOWN_31 (1 << 31) +/* END: Fragment program instruction set */ + +/* Fog state and color */ +#define R300_RE_FOG_STATE 0x4BC0 +# define R300_FOG_ENABLE (1 << 0) +# define R300_FOG_MODE_LINEAR (0 << 1) +# define R300_FOG_MODE_EXP (1 << 1) +# define R300_FOG_MODE_EXP2 (2 << 1) +# define R300_FOG_MODE_MASK (3 << 1) +#define R300_FOG_COLOR_R 0x4BC8 +#define R300_FOG_COLOR_G 0x4BCC +#define R300_FOG_COLOR_B 0x4BD0 + +#define R300_PP_ALPHA_TEST 0x4BD4 +# define R300_REF_ALPHA_MASK 0x000000ff +# define R300_ALPHA_TEST_FAIL (0 << 8) +# define R300_ALPHA_TEST_LESS (1 << 8) +# define R300_ALPHA_TEST_LEQUAL (3 << 8) +# define R300_ALPHA_TEST_EQUAL (2 << 8) +# define R300_ALPHA_TEST_GEQUAL (6 << 8) +# define R300_ALPHA_TEST_GREATER (4 << 8) +# define R300_ALPHA_TEST_NEQUAL (5 << 8) +# define R300_ALPHA_TEST_PASS (7 << 8) +# define R300_ALPHA_TEST_OP_MASK (7 << 8) +# define R300_ALPHA_TEST_ENABLE (1 << 11) + +/* gap */ + +/* Fragment program parameters in 7.16 floating point */ +#define R300_PFS_PARAM_0_X 0x4C00 +#define R300_PFS_PARAM_0_Y 0x4C04 +#define R300_PFS_PARAM_0_Z 0x4C08 +#define R300_PFS_PARAM_0_W 0x4C0C +/* GUESS: PARAM_31 is last, based on native limits reported by fglrx */ +#define R300_PFS_PARAM_31_X 0x4DF0 +#define R300_PFS_PARAM_31_Y 0x4DF4 +#define R300_PFS_PARAM_31_Z 0x4DF8 +#define R300_PFS_PARAM_31_W 0x4DFC + +/* Notes: + * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in + * the application + * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND + * are set to the same + * function (both registers are always set up completely in any case) + * - Most blend flags are simply copied from R200 and not tested yet + */ +#define R300_RB3D_CBLEND 0x4E04 +#define R300_RB3D_ABLEND 0x4E08 +/* the following only appear in CBLEND */ +# define R300_BLEND_ENABLE (1 << 0) +# define R300_BLEND_UNKNOWN (3 << 1) +# define R300_BLEND_NO_SEPARATE (1 << 3) +/* the following are shared between CBLEND and ABLEND */ +# define R300_FCN_MASK (3 << 12) +# define R300_COMB_FCN_ADD_CLAMP (0 << 12) +# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12) +# define R300_COMB_FCN_SUB_CLAMP (2 << 12) +# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12) +# define R300_COMB_FCN_MIN (4 << 12) +# define R300_COMB_FCN_MAX (5 << 12) +# define R300_COMB_FCN_RSUB_CLAMP (6 << 12) +# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12) +# define R300_BLEND_GL_ZERO (32) +# define R300_BLEND_GL_ONE (33) +# define R300_BLEND_GL_SRC_COLOR (34) +# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35) +# define R300_BLEND_GL_DST_COLOR (36) +# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37) +# define R300_BLEND_GL_SRC_ALPHA (38) +# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39) +# define R300_BLEND_GL_DST_ALPHA (40) +# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41) +# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42) +# define R300_BLEND_GL_CONST_COLOR (43) +# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44) +# define R300_BLEND_GL_CONST_ALPHA (45) +# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46) +# define R300_BLEND_MASK (63) +# define R300_SRC_BLEND_SHIFT (16) +# define R300_DST_BLEND_SHIFT (24) +#define R300_RB3D_BLEND_COLOR 0x4E10 +#define R300_RB3D_COLORMASK 0x4E0C +# define R300_COLORMASK0_B (1<<0) +# define R300_COLORMASK0_G (1<<1) +# define R300_COLORMASK0_R (1<<2) +# define R300_COLORMASK0_A (1<<3) + +/* gap */ + +#define R300_RB3D_COLOROFFSET0 0x4E28 +# define R300_COLOROFFSET_MASK 0xFFFFFFF0 /* GUESS */ +#define R300_RB3D_COLOROFFSET1 0x4E2C /* GUESS */ +#define R300_RB3D_COLOROFFSET2 0x4E30 /* GUESS */ +#define R300_RB3D_COLOROFFSET3 0x4E34 /* GUESS */ + +/* gap */ + +/* Bit 16: Larger tiles + * Bit 17: 4x2 tiles + * Bit 18: Extremely weird tile like, but some pixels duplicated? + */ +#define R300_RB3D_COLORPITCH0 0x4E38 +# define R300_COLORPITCH_MASK 0x00001FF8 /* GUESS */ +# define R300_COLOR_TILE_ENABLE (1 << 16) /* GUESS */ +# define R300_COLOR_MICROTILE_ENABLE (1 << 17) /* GUESS */ +# define R300_COLOR_ENDIAN_NO_SWAP (0 << 18) /* GUESS */ +# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 18) /* GUESS */ +# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 18) /* GUESS */ +# define R300_COLOR_FORMAT_RGB565 (2 << 22) +# define R300_COLOR_FORMAT_ARGB8888 (3 << 22) +#define R300_RB3D_COLORPITCH1 0x4E3C /* GUESS */ +#define R300_RB3D_COLORPITCH2 0x4E40 /* GUESS */ +#define R300_RB3D_COLORPITCH3 0x4E44 /* GUESS */ + +/* gap */ + +/* Guess by Vladimir. + * Set to 0A before 3D operations, set to 02 afterwards. + */ +#define R300_RB3D_DSTCACHE_CTLSTAT 0x4E4C +# define R300_RB3D_DSTCACHE_UNKNOWN_02 0x00000002 +# define R300_RB3D_DSTCACHE_UNKNOWN_0A 0x0000000A + +/* gap */ +/* There seems to be no "write only" setting, so use Z-test = ALWAYS + * for this. + * Bit (1<<8) is the "test" bit. so plain write is 6 - vd + */ +#define R300_RB3D_ZSTENCIL_CNTL_0 0x4F00 +# define R300_RB3D_Z_DISABLED_1 0x00000010 +# define R300_RB3D_Z_DISABLED_2 0x00000014 +# define R300_RB3D_Z_TEST 0x00000012 +# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016 +# define R300_RB3D_Z_WRITE_ONLY 0x00000006 + +# define R300_RB3D_Z_TEST 0x00000012 +# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016 +# define R300_RB3D_Z_WRITE_ONLY 0x00000006 +# define R300_RB3D_STENCIL_ENABLE 0x00000001 + +#define R300_RB3D_ZSTENCIL_CNTL_1 0x4F04 + /* functions */ +# define R300_ZS_NEVER 0 +# define R300_ZS_LESS 1 +# define R300_ZS_LEQUAL 2 +# define R300_ZS_EQUAL 3 +# define R300_ZS_GEQUAL 4 +# define R300_ZS_GREATER 5 +# define R300_ZS_NOTEQUAL 6 +# define R300_ZS_ALWAYS 7 +# define R300_ZS_MASK 7 + /* operations */ +# define R300_ZS_KEEP 0 +# define R300_ZS_ZERO 1 +# define R300_ZS_REPLACE 2 +# define R300_ZS_INCR 3 +# define R300_ZS_DECR 4 +# define R300_ZS_INVERT 5 +# define R300_ZS_INCR_WRAP 6 +# define R300_ZS_DECR_WRAP 7 + /* front and back refer to operations done for front + and back faces, i.e. separate stencil function support */ +# define R300_RB3D_ZS1_DEPTH_FUNC_SHIFT 0 +# define R300_RB3D_ZS1_FRONT_FUNC_SHIFT 3 +# define R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT 6 +# define R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT 9 +# define R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT 12 +# define R300_RB3D_ZS1_BACK_FUNC_SHIFT 15 +# define R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT 18 +# define R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT 21 +# define R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT 24 + +#define R300_RB3D_ZSTENCIL_CNTL_2 0x4F08 +# define R300_RB3D_ZS2_STENCIL_REF_SHIFT 0 +# define R300_RB3D_ZS2_STENCIL_MASK 0xFF +# define R300_RB3D_ZS2_STENCIL_MASK_SHIFT 8 +# define R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT 16 + +/* gap */ + +#define R300_RB3D_ZSTENCIL_FORMAT 0x4F10 +# define R300_DEPTH_FORMAT_16BIT_INT_Z (0 << 0) +# define R300_DEPTH_FORMAT_24BIT_INT_Z (2 << 0) + /* 16 bit format or some aditional bit ? */ +# define R300_DEPTH_FORMAT_UNK32 (32 << 0) + +#define R300_RB3D_EARLY_Z 0x4F14 +# define R300_EARLY_Z_DISABLE (0 << 0) +# define R300_EARLY_Z_ENABLE (1 << 0) + +/* gap */ + +#define R300_RB3D_ZCACHE_CTLSTAT 0x4F18 /* GUESS */ +# define R300_RB3D_ZCACHE_UNKNOWN_01 0x1 +# define R300_RB3D_ZCACHE_UNKNOWN_03 0x3 + +/* gap */ + +#define R300_RB3D_DEPTHOFFSET 0x4F20 +#define R300_RB3D_DEPTHPITCH 0x4F24 +# define R300_DEPTHPITCH_MASK 0x00001FF8 /* GUESS */ +# define R300_DEPTH_TILE_ENABLE (1 << 16) /* GUESS */ +# define R300_DEPTH_MICROTILE_ENABLE (1 << 17) /* GUESS */ +# define R300_DEPTH_ENDIAN_NO_SWAP (0 << 18) /* GUESS */ +# define R300_DEPTH_ENDIAN_WORD_SWAP (1 << 18) /* GUESS */ +# define R300_DEPTH_ENDIAN_DWORD_SWAP (2 << 18) /* GUESS */ + +/* BEGIN: Vertex program instruction set */ + +/* Every instruction is four dwords long: + * DWORD 0: output and opcode + * DWORD 1: first argument + * DWORD 2: second argument + * DWORD 3: third argument + * + * Notes: + * - ABS r, a is implemented as MAX r, a, -a + * - MOV is implemented as ADD to zero + * - XPD is implemented as MUL + MAD + * - FLR is implemented as FRC + ADD + * - apparently, fglrx tries to schedule instructions so that there is at + * least one instruction between the write to a temporary and the first + * read from said temporary; however, violations of this scheduling are + * allowed + * - register indices seem to be unrelated with OpenGL aliasing to + * conventional state + * - only one attribute and one parameter can be loaded at a time; however, + * the same attribute/parameter can be used for more than one argument + * - the second software argument for POW is the third hardware argument + * (no idea why) + * - MAD with only temporaries as input seems to use VPI_OUT_SELECT_MAD_2 + * + * There is some magic surrounding LIT: + * The single argument is replicated across all three inputs, but swizzled: + * First argument: xyzy + * Second argument: xyzx + * Third argument: xyzw + * Whenever the result is used later in the fragment program, fglrx forces + * x and w to be 1.0 in the input selection; I don't know whether this is + * strictly necessary + */ +#define R300_VPI_OUT_OP_DOT (1 << 0) +#define R300_VPI_OUT_OP_MUL (2 << 0) +#define R300_VPI_OUT_OP_ADD (3 << 0) +#define R300_VPI_OUT_OP_MAD (4 << 0) +#define R300_VPI_OUT_OP_DST (5 << 0) +#define R300_VPI_OUT_OP_FRC (6 << 0) +#define R300_VPI_OUT_OP_MAX (7 << 0) +#define R300_VPI_OUT_OP_MIN (8 << 0) +#define R300_VPI_OUT_OP_SGE (9 << 0) +#define R300_VPI_OUT_OP_SLT (10 << 0) + /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, vector(scalar, vector) */ +#define R300_VPI_OUT_OP_UNK12 (12 << 0) +#define R300_VPI_OUT_OP_ARL (13 << 0) +#define R300_VPI_OUT_OP_EXP (65 << 0) +#define R300_VPI_OUT_OP_LOG (66 << 0) + /* Used in fog computations, scalar(scalar) */ +#define R300_VPI_OUT_OP_UNK67 (67 << 0) +#define R300_VPI_OUT_OP_LIT (68 << 0) +#define R300_VPI_OUT_OP_POW (69 << 0) +#define R300_VPI_OUT_OP_RCP (70 << 0) +#define R300_VPI_OUT_OP_RSQ (72 << 0) + /* Used in GL_POINT_DISTANCE_ATTENUATION_ARB, scalar(scalar) */ +#define R300_VPI_OUT_OP_UNK73 (73 << 0) +#define R300_VPI_OUT_OP_EX2 (75 << 0) +#define R300_VPI_OUT_OP_LG2 (76 << 0) +#define R300_VPI_OUT_OP_MAD_2 (128 << 0) + /* all temps, vector(scalar, vector, vector) */ +#define R300_VPI_OUT_OP_UNK129 (129 << 0) + +#define R300_VPI_OUT_REG_CLASS_TEMPORARY (0 << 8) +#define R300_VPI_OUT_REG_CLASS_ADDR (1 << 8) +#define R300_VPI_OUT_REG_CLASS_RESULT (2 << 8) +#define R300_VPI_OUT_REG_CLASS_MASK (31 << 8) + +#define R300_VPI_OUT_REG_INDEX_SHIFT 13 + /* GUESS based on fglrx native limits */ +#define R300_VPI_OUT_REG_INDEX_MASK (31 << 13) + +#define R300_VPI_OUT_WRITE_X (1 << 20) +#define R300_VPI_OUT_WRITE_Y (1 << 21) +#define R300_VPI_OUT_WRITE_Z (1 << 22) +#define R300_VPI_OUT_WRITE_W (1 << 23) + +#define R300_VPI_IN_REG_CLASS_TEMPORARY (0 << 0) +#define R300_VPI_IN_REG_CLASS_ATTRIBUTE (1 << 0) +#define R300_VPI_IN_REG_CLASS_PARAMETER (2 << 0) +#define R300_VPI_IN_REG_CLASS_NONE (9 << 0) +#define R300_VPI_IN_REG_CLASS_MASK (31 << 0) + +#define R300_VPI_IN_REG_INDEX_SHIFT 5 + /* GUESS based on fglrx native limits */ +#define R300_VPI_IN_REG_INDEX_MASK (255 << 5) + +/* The R300 can select components from the input register arbitrarily. + * Use the following constants, shifted by the component shift you + * want to select + */ +#define R300_VPI_IN_SELECT_X 0 +#define R300_VPI_IN_SELECT_Y 1 +#define R300_VPI_IN_SELECT_Z 2 +#define R300_VPI_IN_SELECT_W 3 +#define R300_VPI_IN_SELECT_ZERO 4 +#define R300_VPI_IN_SELECT_ONE 5 +#define R300_VPI_IN_SELECT_MASK 7 + +#define R300_VPI_IN_X_SHIFT 13 +#define R300_VPI_IN_Y_SHIFT 16 +#define R300_VPI_IN_Z_SHIFT 19 +#define R300_VPI_IN_W_SHIFT 22 + +#define R300_VPI_IN_NEG_X (1 << 25) +#define R300_VPI_IN_NEG_Y (1 << 26) +#define R300_VPI_IN_NEG_Z (1 << 27) +#define R300_VPI_IN_NEG_W (1 << 28) +/* END: Vertex program instruction set */ + +/* BEGIN: Packet 3 commands */ + +/* A primitive emission dword. */ +#define R300_PRIM_TYPE_NONE (0 << 0) +#define R300_PRIM_TYPE_POINT (1 << 0) +#define R300_PRIM_TYPE_LINE (2 << 0) +#define R300_PRIM_TYPE_LINE_STRIP (3 << 0) +#define R300_PRIM_TYPE_TRI_LIST (4 << 0) +#define R300_PRIM_TYPE_TRI_FAN (5 << 0) +#define R300_PRIM_TYPE_TRI_STRIP (6 << 0) +#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0) +#define R300_PRIM_TYPE_RECT_LIST (8 << 0) +#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0) +#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0) + /* GUESS (based on r200) */ +#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0) +#define R300_PRIM_TYPE_LINE_LOOP (12 << 0) +#define R300_PRIM_TYPE_QUADS (13 << 0) +#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0) +#define R300_PRIM_TYPE_POLYGON (15 << 0) +#define R300_PRIM_TYPE_MASK 0xF +#define R300_PRIM_WALK_IND (1 << 4) +#define R300_PRIM_WALK_LIST (2 << 4) +#define R300_PRIM_WALK_RING (3 << 4) +#define R300_PRIM_WALK_MASK (3 << 4) + /* GUESS (based on r200) */ +#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6) +#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6) +#define R300_PRIM_NUM_VERTICES_SHIFT 16 +#define R300_PRIM_NUM_VERTICES_MASK 0xffff + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. + * Two parameter dwords: + * 0. The first parameter appears to be always 0 + * 1. The second parameter is a standard primitive emission dword. + */ +#define R300_PACKET3_3D_DRAW_VBUF 0x00002800 + +/* Specify the full set of vertex arrays as (address, stride). + * The first parameter is the number of vertex arrays specified. + * The rest of the command is a variable length list of blocks, where + * each block is three dwords long and specifies two arrays. + * The first dword of a block is split into two words, the lower significant + * word refers to the first array, the more significant word to the second + * array in the block. + * The low byte of each word contains the size of an array entry in dwords, + * the high byte contains the stride of the array. + * The second dword of a block contains the pointer to the first array, + * the third dword of a block contains the pointer to the second array. + * Note that if the total number of arrays is odd, the third dword of + * the last block is omitted. + */ +#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 + +#define R300_PACKET3_INDX_BUFFER 0x00003300 +# define R300_EB_UNK1_SHIFT 24 +# define R300_EB_UNK1 (0x80<<24) +# define R300_EB_UNK2 0x0810 +#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600 + +/* END: Packet 3 commands */ + + +/* Color formats for 2d packets + */ +#define R300_CP_COLOR_FORMAT_CI8 2 +#define R300_CP_COLOR_FORMAT_ARGB1555 3 +#define R300_CP_COLOR_FORMAT_RGB565 4 +#define R300_CP_COLOR_FORMAT_ARGB8888 6 +#define R300_CP_COLOR_FORMAT_RGB332 7 +#define R300_CP_COLOR_FORMAT_RGB8 9 +#define R300_CP_COLOR_FORMAT_ARGB4444 15 + +/* + * CP type-3 packets + */ +#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00 + +#endif /* _R300_REG_H */ + +/* *INDENT-ON* */ diff --git a/r300/r300_render.c b/r300/r300_render.c new file mode 100644 index 0000000..cc13e9a --- /dev/null +++ b/r300/r300_render.c @@ -0,0 +1,536 @@ +/************************************************************************** + +Copyright (C) 2004 Nicolai Haehnle. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \brief R300 Render (Vertex Buffer Implementation) + * + * The immediate implementation has been removed from CVS in favor of the vertex + * buffer implementation. + * + * The render functions are called by the pipeline manager to render a batch of + * primitives. They return TRUE to pass on to the next stage (i.e. software + * rasterization) or FALSE to indicate that the pipeline has finished after + * rendering something. + * + * When falling back to software TCL still attempt to use hardware + * rasterization. + * + * I am not sure that the cache related registers are setup correctly, but + * obviously this does work... Further investigation is needed. + * + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#include "glheader.h" +#include "state.h" +#include "imports.h" +#include "enums.h" +#include "macros.h" +#include "context.h" +#include "dd.h" +#include "simple_list.h" +#include "api_arrayelt.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_vp_build.h" +#include "radeon_reg.h" +#include "radeon_macros.h" +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "r300_context.h" +#include "r300_ioctl.h" +#include "r300_state.h" +#include "r300_reg.h" +#include "r300_tex.h" +#include "r300_emit.h" +extern int future_hw_tcl_on; + +/** + * \brief Convert a OpenGL primitive type into a R300 primitive type. + */ +static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim) +{ + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + return R300_VAP_VF_CNTL__PRIM_POINTS; + break; + case GL_LINES: + return R300_VAP_VF_CNTL__PRIM_LINES; + break; + case GL_LINE_STRIP: + return R300_VAP_VF_CNTL__PRIM_LINE_STRIP; + break; + case GL_LINE_LOOP: + return R300_VAP_VF_CNTL__PRIM_LINE_LOOP; + break; + case GL_TRIANGLES: + return R300_VAP_VF_CNTL__PRIM_TRIANGLES; + break; + case GL_TRIANGLE_STRIP: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + break; + case GL_TRIANGLE_FAN: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + break; + case GL_QUADS: + return R300_VAP_VF_CNTL__PRIM_QUADS; + break; + case GL_QUAD_STRIP: + return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; + break; + case GL_POLYGON: + return R300_VAP_VF_CNTL__PRIM_POLYGON; + break; + default: + assert(0); + return -1; + break; + } +} + +static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) +{ + int verts_off = 0; + + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + verts_off = 0; + break; + case GL_LINES: + verts_off = num_verts % 2; + break; + case GL_LINE_STRIP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_LINE_LOOP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_TRIANGLES: + verts_off = num_verts % 3; + break; + case GL_TRIANGLE_STRIP: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_TRIANGLE_FAN: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_QUADS: + verts_off = num_verts % 4; + break; + case GL_QUAD_STRIP: + if (num_verts < 4) + verts_off = num_verts; + else + verts_off = num_verts % 2; + break; + case GL_POLYGON: + if (num_verts < 3) + verts_off = num_verts; + break; + default: + assert(0); + return -1; + break; + } + + return num_verts - verts_off; +} + +static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, + int elt_size) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_dma_region *rvb = &rmesa->state.elt_dma; + void *out; + + assert(elt_size == 2 || elt_size == 4); + + if (r300IsGartMemory(rmesa, elts, n_elts * elt_size)) { + rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; + rvb->start = ((char *)elts) - rvb->address; + rvb->aos_offset = + rmesa->radeon.radeonScreen->gart_texture_offset + + rvb->start; + return; + } else if (r300IsGartMemory(rmesa, elts, 1)) { + WARN_ONCE("Pointer not within GART memory!\n"); + _mesa_exit(-1); + } + + r300AllocDmaRegion(rmesa, rvb, n_elts * elt_size, elt_size); + rvb->aos_offset = GET_START(rvb); + + out = rvb->address + rvb->start; + memcpy(out, elts, n_elts * elt_size); +} + +static void r300FireEB(r300ContextPtr rmesa, unsigned long addr, + int vertex_count, int type, int elt_size) +{ + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + unsigned long t_addr; + unsigned long magic_1, magic_2; + + assert(elt_size == 2 || elt_size == 4); + + if (addr & (elt_size - 1)) { + WARN_ONCE("Badly aligned buffer\n"); + return; + } + + magic_1 = (addr % 32) / 4; + t_addr = addr & ~0x1d; + magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1; + + start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); + if (elt_size == 4) { + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type); + } + + start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); +#ifdef OPTIMIZE_ELTS + if (elt_size == 4) { + e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + e32(addr); + } else { + e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2); + e32(t_addr); + } +#else + e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + e32(addr); +#endif + + if (elt_size == 4) { + e32(vertex_count); + } else { +#ifdef OPTIMIZE_ELTS + e32(magic_2); +#else + e32((vertex_count + 1) / 2); +#endif + } +} + +static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) +{ + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, + offset); + + start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1); + e32(nr); + for (i = 0; i + 1 < nr; i += 2) { + e32((rmesa->state.aos[i].aos_size << 0) + | (rmesa->state.aos[i].aos_stride << 8) + | (rmesa->state.aos[i + 1].aos_size << 16) + | (rmesa->state.aos[i + 1].aos_stride << 24) + ); + e32(rmesa->state.aos[i].aos_offset + + offset * 4 * rmesa->state.aos[i].aos_stride); + e32(rmesa->state.aos[i + 1].aos_offset + + offset * 4 * rmesa->state.aos[i + 1].aos_stride); + } + + if (nr & 1) { + e32((rmesa->state.aos[nr - 1].aos_size << 0) + | (rmesa->state.aos[nr - 1].aos_stride << 8) + ); + e32(rmesa->state.aos[nr - 1].aos_offset + + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); + } +} + +static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) +{ + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + + start_packet3(RADEON_CP_PACKET3_3D_DRAW_VBUF_2, 0); + e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) + | type); +} + +static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, + int start, int end, int prim) +{ + int type, num_verts; + + type = r300PrimitiveType(rmesa, ctx, prim); + num_verts = r300NumVerts(rmesa, end - start, prim); + + if (type < 0 || num_verts <= 0) + return; + + if (rmesa->state.VB.Elts) { + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + if (num_verts > 65535) { + /* not implemented yet */ + WARN_ONCE("Too many elts\n"); + return; + } + r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, + rmesa->state.VB.elt_size); + r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, + num_verts, type, rmesa->state.VB.elt_size); + } else { + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + r300FireAOS(rmesa, num_verts, type); + } +} + +#define CONV_VB(a, b) rvb->AttribPtr[(a)].size = vb->b->size, \ + rvb->AttribPtr[(a)].type = GL_FLOAT, \ + rvb->AttribPtr[(a)].stride = vb->b->stride, \ + rvb->AttribPtr[(a)].data = vb->b->data + +static void radeon_vb_to_rvb(r300ContextPtr rmesa, + struct radeon_vertex_buffer *rvb, + struct vertex_buffer *vb) +{ + int i; + GLcontext *ctx; + ctx = rmesa->radeon.glCtx; + + memset(rvb, 0, sizeof(*rvb)); + + rvb->Elts = vb->Elts; + rvb->elt_size = 4; + rvb->elt_min = 0; + rvb->elt_max = vb->Count; + + rvb->Count = vb->Count; + + if (hw_tcl_on) { + CONV_VB(VERT_ATTRIB_POS, ObjPtr); + } else { + assert(vb->ClipPtr); + CONV_VB(VERT_ATTRIB_POS, ClipPtr); + } + + CONV_VB(VERT_ATTRIB_NORMAL, NormalPtr); + CONV_VB(VERT_ATTRIB_COLOR0, ColorPtr[0]); + CONV_VB(VERT_ATTRIB_COLOR1, SecondaryColorPtr[0]); + CONV_VB(VERT_ATTRIB_FOG, FogCoordPtr); + + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) + CONV_VB(VERT_ATTRIB_TEX0 + i, TexCoordPtr[i]); + + for (i = 0; i < MAX_VERTEX_PROGRAM_ATTRIBS; i++) + CONV_VB(VERT_ATTRIB_GENERIC0 + i, + AttribPtr[VERT_ATTRIB_GENERIC0 + i]); + + rvb->Primitive = vb->Primitive; + rvb->PrimitiveCount = vb->PrimitiveCount; + rvb->LockFirst = rvb->LockCount = 0; + rvb->lock_uptodate = GL_FALSE; +} + +static GLboolean r300RunRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct radeon_vertex_buffer *VB = &rmesa->state.VB; + int i; + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (stage) { + TNLcontext *tnl = TNL_CONTEXT(ctx); + radeon_vb_to_rvb(rmesa, VB, &tnl->vb); + } + + r300UpdateShaders(rmesa); + if (r300EmitArrays(ctx)) + return GL_TRUE; + + r300UpdateShaderStates(rmesa); + + reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); + + reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); + e32(R300_RB3D_ZCACHE_UNKNOWN_03); + + r300EmitState(rmesa); + + for (i = 0; i < VB->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); + GLuint start = VB->Primitive[i].start; + GLuint end = VB->Primitive[i].start + VB->Primitive[i].count; + r300RunRenderPrimitive(rmesa, ctx, start, end, prim); + } + + reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); + + reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); + e32(R300_RB3D_ZCACHE_UNKNOWN_03); + +#ifdef USER_BUFFERS + r300UseArrays(ctx); +#endif + + r300ReleaseArrays(ctx); + + return GL_FALSE; +} + +#define FALLBACK_IF(expr) \ + do { \ + if (expr) { \ + if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ + WARN_ONCE("Software fallback:%s\n", \ + #expr); \ + return R300_FALLBACK_RAST; \ + } \ + } while(0) + +static int r300Fallback(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + + if (fp) { + if (!fp->translated) + r300TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + + FALLBACK_IF(ctx->RenderMode != GL_RENDER); + + FALLBACK_IF(ctx->Stencil._TestTwoSide + && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1] + || ctx->Stencil.ValueMask[0] != + ctx->Stencil.ValueMask[1] + || ctx->Stencil.WriteMask[0] != + ctx->Stencil.WriteMask[1])); + + FALLBACK_IF(ctx->Color.ColorLogicOpEnabled); + + if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) + FALLBACK_IF(ctx->Point.PointSprite); + + if (!r300->disable_lowimpact_fallback) { + FALLBACK_IF(ctx->Polygon.OffsetPoint); + FALLBACK_IF(ctx->Polygon.OffsetLine); + FALLBACK_IF(ctx->Polygon.StippleFlag); + FALLBACK_IF(ctx->Multisample.Enabled); + FALLBACK_IF(ctx->Line.StippleFlag); + FALLBACK_IF(ctx->Line.SmoothFlag); + FALLBACK_IF(ctx->Point.SmoothFlag); + } + + return R300_FALLBACK_NONE; +} + +static GLboolean r300RunNonTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (r300Fallback(ctx) >= R300_FALLBACK_RAST) + return GL_TRUE; + + return r300RunRender(ctx, stage); +} + +static GLboolean r300RunTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_vertex_program *vp; + + hw_tcl_on = future_hw_tcl_on; + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (hw_tcl_on == GL_FALSE) + return GL_TRUE; + + if (r300Fallback(ctx) >= R300_FALLBACK_TCL) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } + + r300UpdateShaders(rmesa); + + vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + if (vp->native == GL_FALSE) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } + + return r300RunRender(ctx, stage); +} + +const struct tnl_pipeline_stage _r300_render_stage = { + "r300 Hardware Rasterization", + NULL, + NULL, + NULL, + NULL, + r300RunNonTCLRender +}; + +const struct tnl_pipeline_stage _r300_tcl_stage = { + "r300 Hardware Transform, Clipping and Lighting", + NULL, + NULL, + NULL, + NULL, + r300RunTCLRender +}; diff --git a/r300/r300_shader.c b/r300/r300_shader.c new file mode 100644 index 0000000..59fe17b --- /dev/null +++ b/r300/r300_shader.c @@ -0,0 +1,73 @@ +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "program.h" +#include "tnl/tnl.h" +#include "r300_context.h" +#include "r300_fragprog.h" + +static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, + GLuint id) +{ + struct r300_vertex_program_cont *vp; + struct r300_fragment_program *fp; + + switch (target) { + case GL_VERTEX_STATE_PROGRAM_NV: + case GL_VERTEX_PROGRAM_ARB: + vp = CALLOC_STRUCT(r300_vertex_program_cont); + return _mesa_init_vertex_program(ctx, &vp->mesa_program, + target, id); + case GL_FRAGMENT_PROGRAM_ARB: + fp = CALLOC_STRUCT(r300_fragment_program); + fp->ctx = ctx; + return _mesa_init_fragment_program(ctx, &fp->mesa_program, + target, id); + case GL_FRAGMENT_PROGRAM_NV: + fp = CALLOC_STRUCT(r300_fragment_program); + return _mesa_init_fragment_program(ctx, &fp->mesa_program, + target, id); + default: + _mesa_problem(ctx, "Bad target in r300NewProgram"); + } + + return NULL; +} + +static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) +{ + _mesa_delete_program(ctx, prog); +} + +static void +r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + struct r300_vertex_program_cont *vp = (void *)prog; + struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + vp->progs = NULL; + break; + case GL_FRAGMENT_PROGRAM_ARB: + fp->translated = GL_FALSE; + break; + } + /* need this for tcl fallbacks */ + _tnl_program_string(ctx, target, prog); +} + +static GLboolean +r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + return 1; +} + +void r300InitShaderFuncs(struct dd_function_table *functions) +{ + functions->NewProgram = r300NewProgram; + functions->DeleteProgram = r300DeleteProgram; + functions->ProgramStringNotify = r300ProgramStringNotify; + functions->IsProgramNative = r300IsProgramNative; +} diff --git a/r300/r300_state.c b/r300/r300_state.c new file mode 100644 index 0000000..2aaf041 --- /dev/null +++ b/r300/r300_state.c @@ -0,0 +1,2375 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. +Copyright (C) 2004 Nicolai Haehnle. +All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#include "glheader.h" +#include "state.h" +#include "imports.h" +#include "enums.h" +#include "macros.h" +#include "context.h" +#include "dd.h" +#include "simple_list.h" + +#include "api_arrayelt.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "texformat.h" + +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "r300_context.h" +#include "r300_ioctl.h" +#include "r300_state.h" +#include "r300_reg.h" +#include "r300_emit.h" +#include "r300_fragprog.h" +#include "r300_tex.h" + +#include "drirenderbuffer.h" + +static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) +{ + GLubyte color[4]; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + R300_STATECHANGE(rmesa, blend_color); + + CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]); + CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]); + CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]); + CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]); + + rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0], + color[1], color[2]); +} + +/** + * Calculate the hardware blend factor setting. This same function is used + * for source and destination of both alpha and RGB. + * + * \returns + * The hardware register value for the specified blend factor. This value + * will need to be shifted into the correct position for either source or + * destination factor. + * + * \todo + * Since the two cases where source and destination are handled differently + * are essentially error cases, they should never happen. Determine if these + * cases can be removed. + */ +static int blend_factor(GLenum factor, GLboolean is_src) +{ + switch (factor) { + case GL_ZERO: + return R300_BLEND_GL_ZERO; + break; + case GL_ONE: + return R300_BLEND_GL_ONE; + break; + case GL_DST_COLOR: + return R300_BLEND_GL_DST_COLOR; + break; + case GL_ONE_MINUS_DST_COLOR: + return R300_BLEND_GL_ONE_MINUS_DST_COLOR; + break; + case GL_SRC_COLOR: + return R300_BLEND_GL_SRC_COLOR; + break; + case GL_ONE_MINUS_SRC_COLOR: + return R300_BLEND_GL_ONE_MINUS_SRC_COLOR; + break; + case GL_SRC_ALPHA: + return R300_BLEND_GL_SRC_ALPHA; + break; + case GL_ONE_MINUS_SRC_ALPHA: + return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA; + break; + case GL_DST_ALPHA: + return R300_BLEND_GL_DST_ALPHA; + break; + case GL_ONE_MINUS_DST_ALPHA: + return R300_BLEND_GL_ONE_MINUS_DST_ALPHA; + break; + case GL_SRC_ALPHA_SATURATE: + return (is_src) ? R300_BLEND_GL_SRC_ALPHA_SATURATE : + R300_BLEND_GL_ZERO; + break; + case GL_CONSTANT_COLOR: + return R300_BLEND_GL_CONST_COLOR; + break; + case GL_ONE_MINUS_CONSTANT_COLOR: + return R300_BLEND_GL_ONE_MINUS_CONST_COLOR; + break; + case GL_CONSTANT_ALPHA: + return R300_BLEND_GL_CONST_ALPHA; + break; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; + break; + default: + fprintf(stderr, "unknown blend factor %x\n", factor); + return (is_src) ? R300_BLEND_GL_ONE : R300_BLEND_GL_ZERO; + break; + } +} + +/** + * Sets both the blend equation and the blend function. + * This is done in a single + * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX) + * change the interpretation of the blend function. + * Also, make sure that blend function and blend equation are set to their + * default value if color blending is not enabled, since at least blend + * equations GL_MIN and GL_FUNC_REVERSE_SUBTRACT will cause wrong results + * otherwise for unknown reasons. + */ + +/* helper function */ +static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn, + int cbits, int funcA, int eqnA) +{ + GLuint new_ablend, new_cblend; + +#if 0 + fprintf(stderr, + "eqnA=%08x funcA=%08x eqn=%08x func=%08x cbits=%08x\n", + eqnA, funcA, eqn, func, cbits); +#endif + new_ablend = eqnA | funcA; + new_cblend = eqn | func; + + /* Some blend factor combinations don't seem to work when the + * BLEND_NO_SEPARATE bit is set. + * + * Especially problematic candidates are the ONE_MINUS_* flags, + * but I can't see a real pattern. + */ +#if 0 + if (new_ablend == new_cblend) { + new_cblend |= R300_BLEND_NO_SEPARATE; + } +#endif + new_cblend |= cbits; + + if ((new_ablend != r300->hw.bld.cmd[R300_BLD_ABLEND]) || + (new_cblend != r300->hw.bld.cmd[R300_BLD_CBLEND])) { + R300_STATECHANGE(r300, bld); + r300->hw.bld.cmd[R300_BLD_ABLEND] = new_ablend; + r300->hw.bld.cmd[R300_BLD_CBLEND] = new_cblend; + } +} + +static void r300SetBlendState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + int eqn = R300_COMB_FCN_ADD_CLAMP; + int funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + int eqnA = R300_COMB_FCN_ADD_CLAMP; + + if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) { + r300SetBlendCntl(r300, func, eqn, 0, func, eqn); + return; + } + + func = + (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) << + R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstRGB, + GL_FALSE) << + R300_DST_BLEND_SHIFT); + + switch (ctx->Color.BlendEquationRGB) { + case GL_FUNC_ADD: + eqn = R300_COMB_FCN_ADD_CLAMP; + break; + + case GL_FUNC_SUBTRACT: + eqn = R300_COMB_FCN_SUB_CLAMP; + break; + + case GL_FUNC_REVERSE_SUBTRACT: + eqn = R300_COMB_FCN_RSUB_CLAMP; + break; + + case GL_MIN: + eqn = R300_COMB_FCN_MIN; + func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + case GL_MAX: + eqn = R300_COMB_FCN_MAX; + func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + default: + fprintf(stderr, + "[%s:%u] Invalid RGB blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB); + return; + } + + funcA = + (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) << + R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstA, + GL_FALSE) << + R300_DST_BLEND_SHIFT); + + switch (ctx->Color.BlendEquationA) { + case GL_FUNC_ADD: + eqnA = R300_COMB_FCN_ADD_CLAMP; + break; + + case GL_FUNC_SUBTRACT: + eqnA = R300_COMB_FCN_SUB_CLAMP; + break; + + case GL_FUNC_REVERSE_SUBTRACT: + eqnA = R300_COMB_FCN_RSUB_CLAMP; + break; + + case GL_MIN: + eqnA = R300_COMB_FCN_MIN; + funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + case GL_MAX: + eqnA = R300_COMB_FCN_MAX; + funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + default: + fprintf(stderr, + "[%s:%u] Invalid A blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationA); + return; + } + + r300SetBlendCntl(r300, + func, eqn, + R300_BLEND_UNKNOWN | R300_BLEND_ENABLE, funcA, eqnA); +} + +static void r300BlendEquationSeparate(GLcontext * ctx, + GLenum modeRGB, GLenum modeA) +{ + r300SetBlendState(ctx); +} + +static void r300BlendFuncSeparate(GLcontext * ctx, + GLenum sfactorRGB, GLenum dfactorRGB, + GLenum sfactorA, GLenum dfactorA) +{ + r300SetBlendState(ctx); +} + +/** + * Update our tracked culling state based on Mesa's state. + */ +static void r300UpdateCulling(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t val = 0; + + R300_STATECHANGE(r300, cul); + if (ctx->Polygon.CullFlag) { + if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + val = R300_CULL_FRONT | R300_CULL_BACK; + else if (ctx->Polygon.CullFaceMode == GL_FRONT) + val = R300_CULL_FRONT; + else + val = R300_CULL_BACK; + + if (ctx->Polygon.FrontFace == GL_CW) + val |= R300_FRONT_FACE_CW; + else + val |= R300_FRONT_FACE_CCW; + } + r300->hw.cul.cmd[R300_CUL_CULL] = val; +} + +static void r300SetEarlyZState(GLcontext * ctx) +{ + /* updates register R300_RB3D_EARLY_Z (0x4F14) + if depth test is not enabled it should be R300_EARLY_Z_DISABLE + if depth is enabled and alpha not it should be R300_EARLY_Z_ENABLE + if depth and alpha is enabled it should be R300_EARLY_Z_DISABLE + */ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + R300_STATECHANGE(r300, zstencil_format); + if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) + /* disable early Z */ + r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; + else { + if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) + /* enable early Z */ + r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE; + else + /* disable early Z */ + r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; + } +} + +static void r300SetAlphaState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLubyte refByte; + uint32_t pp_misc = 0x0; + GLboolean really_enabled = ctx->Color.AlphaEnabled; + + CLAMPED_FLOAT_TO_UBYTE(refByte, ctx->Color.AlphaRef); + + switch (ctx->Color.AlphaFunc) { + case GL_NEVER: + pp_misc |= R300_ALPHA_TEST_FAIL; + break; + case GL_LESS: + pp_misc |= R300_ALPHA_TEST_LESS; + break; + case GL_EQUAL: + pp_misc |= R300_ALPHA_TEST_EQUAL; + break; + case GL_LEQUAL: + pp_misc |= R300_ALPHA_TEST_LEQUAL; + break; + case GL_GREATER: + pp_misc |= R300_ALPHA_TEST_GREATER; + break; + case GL_NOTEQUAL: + pp_misc |= R300_ALPHA_TEST_NEQUAL; + break; + case GL_GEQUAL: + pp_misc |= R300_ALPHA_TEST_GEQUAL; + break; + case GL_ALWAYS: + /*pp_misc |= R300_ALPHA_TEST_PASS; */ + really_enabled = GL_FALSE; + break; + } + + if (really_enabled) { + pp_misc |= R300_ALPHA_TEST_ENABLE; + pp_misc |= (refByte & R300_REF_ALPHA_MASK); + } else { + pp_misc = 0x0; + } + + R300_STATECHANGE(r300, at); + r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; + + r300SetEarlyZState(ctx); +} + +static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) +{ + (void)func; + (void)ref; + r300SetAlphaState(ctx); +} + +static int translate_func(int func) +{ + switch (func) { + case GL_NEVER: + return R300_ZS_NEVER; + case GL_LESS: + return R300_ZS_LESS; + case GL_EQUAL: + return R300_ZS_EQUAL; + case GL_LEQUAL: + return R300_ZS_LEQUAL; + case GL_GREATER: + return R300_ZS_GREATER; + case GL_NOTEQUAL: + return R300_ZS_NOTEQUAL; + case GL_GEQUAL: + return R300_ZS_GEQUAL; + case GL_ALWAYS: + return R300_ZS_ALWAYS; + } + return 0; +} + +static void r300SetDepthState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + R300_STATECHANGE(r300, zs); + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE; + r300->hw.zs.cmd[R300_ZS_CNTL_1] &= + ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); + + if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) { + if (ctx->Depth.Mask) + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= + R300_RB3D_Z_TEST_AND_WRITE; + else + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST; + + r300->hw.zs.cmd[R300_ZS_CNTL_1] |= + translate_func(ctx->Depth. + Func) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; + } else { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1; + r300->hw.zs.cmd[R300_ZS_CNTL_1] |= + translate_func(GL_NEVER) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; + } + + r300SetEarlyZState(ctx); +} + +static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ); + +/** + * Handle glEnable()/glDisable(). + * + * \note Mesa already filters redundant calls to glEnable/glDisable. + */ +static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint p; + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(cap), + state ? "GL_TRUE" : "GL_FALSE"); + + switch (cap) { + /* Fast track this one... + */ + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + break; + + case GL_FOG: + R300_STATECHANGE(r300, fogs); + if (state) { + r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FOG_ENABLE; + + ctx->Driver.Fogfv(ctx, GL_FOG_MODE, NULL); + ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, + &ctx->Fog.Density); + ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); + ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); + ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); + } else { + r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FOG_ENABLE; + } + + break; + + case GL_ALPHA_TEST: + r300SetAlphaState(ctx); + break; + + case GL_BLEND: + case GL_COLOR_LOGIC_OP: + r300SetBlendState(ctx); + break; + + + case GL_CLIP_PLANE0: + case GL_CLIP_PLANE1: + case GL_CLIP_PLANE2: + case GL_CLIP_PLANE3: + case GL_CLIP_PLANE4: + case GL_CLIP_PLANE5: + /* no VAP UCP on non-TCL chipsets */ + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return; + + p = cap-GL_CLIP_PLANE0; + R300_STATECHANGE( r300, vap_clip_cntl ); + if (state) { + r300->hw.vap_clip_cntl.cmd[1] |= (R300_VAP_UCP_ENABLE_0<<p); + r300ClipPlane( ctx, cap, NULL ); + } + else { + r300->hw.vap_clip_cntl.cmd[1] &= ~(R300_VAP_UCP_ENABLE_0<<p); + } + break; + case GL_DEPTH_TEST: + r300SetDepthState(ctx); + break; + + case GL_STENCIL_TEST: + if (r300->state.stencil.hw_stencil) { + R300_STATECHANGE(r300, zs); + if (state) { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= + R300_RB3D_STENCIL_ENABLE; + } else { + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= + ~R300_RB3D_STENCIL_ENABLE; + } + } else { +#if R200_MERGED + FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state); +#endif + } + break; + + case GL_CULL_FACE: + r300UpdateCulling(ctx); + break; + + case GL_POLYGON_OFFSET_POINT: + case GL_POLYGON_OFFSET_LINE: + break; + + case GL_POLYGON_OFFSET_FILL: + R300_STATECHANGE(r300, occlusion_cntl); + if (state) { + r300->hw.occlusion_cntl.cmd[1] |= (3 << 0); + } else { + r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0); + } + break; + default: + radeonEnable(ctx, cap, state); + return; + } +} + +static void r300UpdatePolygonMode(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t hw_mode = 0; + + if (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL) { + GLenum f, b; + + if (ctx->Polygon.FrontFace == GL_CCW) { + f = ctx->Polygon.FrontMode; + b = ctx->Polygon.BackMode; + } else { + f = ctx->Polygon.BackMode; + b = ctx->Polygon.FrontMode; + } + + hw_mode |= R300_PM_ENABLED; + + switch (f) { + case GL_LINE: + hw_mode |= R300_PM_FRONT_LINE; + break; + case GL_POINT: /* noop */ + hw_mode |= R300_PM_FRONT_POINT; + break; + case GL_FILL: + hw_mode |= R300_PM_FRONT_FILL; + break; + } + + switch (b) { + case GL_LINE: + hw_mode |= R300_PM_BACK_LINE; + break; + case GL_POINT: /* noop */ + hw_mode |= R300_PM_BACK_POINT; + break; + case GL_FILL: + hw_mode |= R300_PM_BACK_FILL; + break; + } + } + + if (r300->hw.polygon_mode.cmd[1] != hw_mode) { + R300_STATECHANGE(r300, polygon_mode); + r300->hw.polygon_mode.cmd[1] = hw_mode; + } +} + +/** + * Change the culling mode. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300CullFace(GLcontext * ctx, GLenum mode) +{ + (void)mode; + + r300UpdateCulling(ctx); +} + +/** + * Change the polygon orientation. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300FrontFace(GLcontext * ctx, GLenum mode) +{ + (void)mode; + + r300UpdateCulling(ctx); + r300UpdatePolygonMode(ctx); +} + +/** + * Change the depth testing function. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300DepthFunc(GLcontext * ctx, GLenum func) +{ + (void)func; + r300SetDepthState(ctx); +} + +/** + * Enable/Disable depth writing. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300DepthMask(GLcontext * ctx, GLboolean mask) +{ + (void)mask; + r300SetDepthState(ctx); +} + +/** + * Handle glColorMask() + */ +static void r300ColorMask(GLcontext * ctx, + GLboolean r, GLboolean g, GLboolean b, GLboolean a) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int mask = (r ? R300_COLORMASK0_R : 0) | + (g ? R300_COLORMASK0_G : 0) | + (b ? R300_COLORMASK0_B : 0) | (a ? R300_COLORMASK0_A : 0); + + if (mask != r300->hw.cmk.cmd[R300_CMK_COLORMASK]) { + R300_STATECHANGE(r300, cmk); + r300->hw.cmk.cmd[R300_CMK_COLORMASK] = mask; + } +} + +/* ============================================================= + * Fog + */ +static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + union { + int i; + float f; + } fogScale, fogStart; + + (void)param; + + fogScale.i = r300->hw.fogp.cmd[R300_FOGP_SCALE]; + fogStart.i = r300->hw.fogp.cmd[R300_FOGP_START]; + + switch (pname) { + case GL_FOG_MODE: + if (!ctx->Fog.Enabled) + return; + switch (ctx->Fog.Mode) { + case GL_LINEAR: + R300_STATECHANGE(r300, fogs); + r300->hw.fogs.cmd[R300_FOGS_STATE] = + (r300->hw.fogs. + cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) | + R300_FOG_MODE_LINEAR; + + if (ctx->Fog.Start == ctx->Fog.End) { + fogScale.f = -1.0; + fogStart.f = 1.0; + } else { + fogScale.f = + 1.0 / (ctx->Fog.End - ctx->Fog.Start); + fogStart.f = + -ctx->Fog.Start / (ctx->Fog.End - + ctx->Fog.Start); + } + break; + case GL_EXP: + R300_STATECHANGE(r300, fogs); + r300->hw.fogs.cmd[R300_FOGS_STATE] = + (r300->hw.fogs. + cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) | + R300_FOG_MODE_EXP; + fogScale.f = 0.0933 * ctx->Fog.Density; + fogStart.f = 0.0; + break; + case GL_EXP2: + R300_STATECHANGE(r300, fogs); + r300->hw.fogs.cmd[R300_FOGS_STATE] = + (r300->hw.fogs. + cmd[R300_FOGS_STATE] & ~R300_FOG_MODE_MASK) | + R300_FOG_MODE_EXP2; + fogScale.f = 0.3 * ctx->Fog.Density; + fogStart.f = 0.0; + default: + return; + } + break; + case GL_FOG_DENSITY: + switch (ctx->Fog.Mode) { + case GL_EXP: + fogScale.f = 0.0933 * ctx->Fog.Density; + fogStart.f = 0.0; + break; + case GL_EXP2: + fogScale.f = 0.3 * ctx->Fog.Density; + fogStart.f = 0.0; + default: + break; + } + break; + case GL_FOG_START: + case GL_FOG_END: + if (ctx->Fog.Mode == GL_LINEAR) { + if (ctx->Fog.Start == ctx->Fog.End) { + fogScale.f = -1.0; + fogStart.f = 1.0; + } else { + fogScale.f = + 1.0 / (ctx->Fog.End - ctx->Fog.Start); + fogStart.f = + -ctx->Fog.Start / (ctx->Fog.End - + ctx->Fog.Start); + } + } + break; + case GL_FOG_COLOR: + R300_STATECHANGE(r300, fogc); + r300->hw.fogc.cmd[R300_FOGC_R] = + (GLuint) (ctx->Fog.Color[0] * 1023.0F) & 0x3FF; + r300->hw.fogc.cmd[R300_FOGC_G] = + (GLuint) (ctx->Fog.Color[1] * 1023.0F) & 0x3FF; + r300->hw.fogc.cmd[R300_FOGC_B] = + (GLuint) (ctx->Fog.Color[2] * 1023.0F) & 0x3FF; + break; + case GL_FOG_COORD_SRC: + break; + default: + return; + } + + if (fogScale.i != r300->hw.fogp.cmd[R300_FOGP_SCALE] || + fogStart.i != r300->hw.fogp.cmd[R300_FOGP_START]) { + R300_STATECHANGE(r300, fogp); + r300->hw.fogp.cmd[R300_FOGP_SCALE] = fogScale.i; + r300->hw.fogp.cmd[R300_FOGP_START] = fogStart.i; + } +} + +/* ============================================================= + * Point state + */ +static void r300PointSize(GLcontext * ctx, GLfloat size) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + size = ctx->Point._Size; + + R300_STATECHANGE(r300, ps); + r300->hw.ps.cmd[R300_PS_POINTSIZE] = + ((int)(size * 6) << R300_POINTSIZE_X_SHIFT) | + ((int)(size * 6) << R300_POINTSIZE_Y_SHIFT); +} + +/* ============================================================= + * Line state + */ +static void r300LineWidth(GLcontext * ctx, GLfloat widthf) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + widthf = ctx->Line._Width; + + R300_STATECHANGE(r300, lcntl); + r300->hw.lcntl.cmd[1] = (int)(widthf * 6.0); + r300->hw.lcntl.cmd[1] |= R300_LINE_CNT_VE; +} + +static void r300PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) +{ + (void)face; + (void)mode; + + r300UpdatePolygonMode(ctx); +} + +/* ============================================================= + * Stencil + */ + +static int translate_stencil_op(int op) +{ + switch (op) { + case GL_KEEP: + return R300_ZS_KEEP; + case GL_ZERO: + return R300_ZS_ZERO; + case GL_REPLACE: + return R300_ZS_REPLACE; + case GL_INCR: + return R300_ZS_INCR; + case GL_DECR: + return R300_ZS_DECR; + case GL_INCR_WRAP_EXT: + return R300_ZS_INCR_WRAP; + case GL_DECR_WRAP_EXT: + return R300_ZS_DECR_WRAP; + case GL_INVERT: + return R300_ZS_INVERT; + default: + WARN_ONCE("Do not know how to translate stencil op"); + return R300_ZS_KEEP; + } + return 0; +} + +static void r300ShadeModel(GLcontext * ctx, GLenum mode) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + R300_STATECHANGE(rmesa, shade); + switch (mode) { + case GL_FLAT: + rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT; + break; + case GL_SMOOTH: + rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH; + break; + default: + return; + } +} + +static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, + GLenum func, GLint ref, GLuint mask) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint refmask = + (((ctx->Stencil. + Ref[0] & 0xff) << R300_RB3D_ZS2_STENCIL_REF_SHIFT) | ((ctx-> + Stencil. + ValueMask + [0] & + 0xff) + << + R300_RB3D_ZS2_STENCIL_MASK_SHIFT)); + + GLuint flag; + + R300_STATECHANGE(rmesa, zs); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK << + R300_RB3D_ZS1_FRONT_FUNC_SHIFT) + | (R300_ZS_MASK << + R300_RB3D_ZS1_BACK_FUNC_SHIFT)); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= + ~((R300_RB3D_ZS2_STENCIL_MASK << + R300_RB3D_ZS2_STENCIL_REF_SHIFT) | + (R300_RB3D_ZS2_STENCIL_MASK << R300_RB3D_ZS2_STENCIL_MASK_SHIFT)); + + flag = translate_func(ctx->Stencil.Function[0]); + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT); + + if (ctx->Stencil._TestTwoSide) + flag = translate_func(ctx->Stencil.Function[1]); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT); + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask; +} + +static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + R300_STATECHANGE(rmesa, zs); + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= + ~(R300_RB3D_ZS2_STENCIL_MASK << + R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT); + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= + (ctx->Stencil. + WriteMask[0] & 0xff) << R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT; +} + +static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, + GLenum fail, GLenum zfail, GLenum zpass) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + R300_STATECHANGE(rmesa, zs); + /* It is easier to mask what's left.. */ + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= + (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT) | + (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | + (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (translate_stencil_op(ctx->Stencil.FailFunc[0]) << + R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << + R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << + R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT); + + if (ctx->Stencil._TestTwoSide) { + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (translate_stencil_op(ctx->Stencil.FailFunc[1]) << + R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZFailFunc[1]) << + R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZPassFunc[1]) << + R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT); + } else { + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (translate_stencil_op(ctx->Stencil.FailFunc[0]) << + R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << + R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << + R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT); + } +} + +static void r300ClearStencil(GLcontext * ctx, GLint s) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + rmesa->state.stencil.clear = + ((GLuint) (ctx->Stencil.Clear & 0xff) | + (R300_RB3D_ZS2_STENCIL_MASK << + R300_RB3D_ZS2_STENCIL_MASK_SHIFT) | ((ctx->Stencil. + WriteMask[0] & 0xff) << + R300_RB3D_ZS2_STENCIL_WRITE_MASK_SHIFT)); +} + +/* ============================================================= + * Window position and viewport transformation + */ + +/* + * To correctly position primitives: + */ +#define SUBPIXEL_X 0.125 +#define SUBPIXEL_Y 0.125 + +static void r300UpdateWindow(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; + GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; + const GLfloat *v = ctx->Viewport._WindowMap.m; + + GLfloat sx = v[MAT_SX]; + GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X; + GLfloat sy = -v[MAT_SY]; + GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y; + GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale; + GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale; + + R300_FIREVERTICES(rmesa); + R300_STATECHANGE(rmesa, vpt); + + rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx); + rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx); + rmesa->hw.vpt.cmd[R300_VPT_YSCALE] = r300PackFloat32(sy); + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty); + rmesa->hw.vpt.cmd[R300_VPT_ZSCALE] = r300PackFloat32(sz); + rmesa->hw.vpt.cmd[R300_VPT_ZOFFSET] = r300PackFloat32(tz); +} + +static void r300Viewport(GLcontext * ctx, GLint x, GLint y, + GLsizei width, GLsizei height) +{ + /* Don't pipeline viewport changes, conflict with window offset + * setting below. Could apply deltas to rescue pipelined viewport + * values, or keep the originals hanging around. + */ + r300UpdateWindow(ctx); +} + +static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) +{ + r300UpdateWindow(ctx); +} + +void r300UpdateViewportOffset(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = ((radeonContextPtr) rmesa)->dri.drawable; + GLfloat xoffset = (GLfloat) dPriv->x; + GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h; + const GLfloat *v = ctx->Viewport._WindowMap.m; + + GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X; + GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y; + + if (rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) || + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty)) { + /* Note: this should also modify whatever data the context reset + * code uses... + */ + R300_STATECHANGE(rmesa, vpt); + rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx); + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty); + + } + + radeonUpdateScissor(ctx); +} + +/** + * Tell the card where to render (offset, pitch). + * Effected by glDrawBuffer, etc + */ +void r300UpdateDrawBuffer(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + r300ContextPtr r300 = rmesa; + struct gl_framebuffer *fb = ctx->DrawBuffer; + driRenderbuffer *drb; + + if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) { + /* draw to front */ + drb = + (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT]. + Renderbuffer; + } else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) { + /* draw to back */ + drb = + (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT]. + Renderbuffer; + } else { + /* drawing to multiple buffers, or none */ + return; + } + + assert(drb); + assert(drb->flippedPitch); + + R300_STATECHANGE(rmesa, cb); + + r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset + + r300->radeon.radeonScreen->fbLocation; + r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch; //r300->radeon.state.color.drawPitch; + + if (r300->radeon.radeonScreen->cpp == 4) + r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; + else + r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; + + if (r300->radeon.sarea->tiling_enabled) + r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; +#if 0 + R200_STATECHANGE(rmesa, ctx); + + /* Note: we used the (possibly) page-flipped values */ + rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] + = ((drb->flippedOffset + rmesa->r200Screen->fbLocation) + & R200_COLOROFFSET_MASK); + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch; + + if (rmesa->sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= + R200_COLOR_TILE_ENABLE; + } +#endif +} + +static void +r300FetchStateParameter(GLcontext * ctx, + const gl_state_index state[STATE_LENGTH], + GLfloat * value) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + switch (state[0]) { + case STATE_INTERNAL: + switch (state[1]) { + case STATE_R300_WINDOW_DIMENSION: + value[0] = r300->radeon.dri.drawable->w * 0.5f; /* width*0.5 */ + value[1] = r300->radeon.dri.drawable->h * 0.5f; /* height*0.5 */ + value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ + value[3] = 1.0F; /* not used */ + break; + + case STATE_R300_TEXRECT_FACTOR:{ + struct gl_texture_object *t = + ctx->Texture.Unit[state[2]].CurrentRect; + + if (t && t->Image[0][t->BaseLevel]) { + struct gl_texture_image *image = + t->Image[0][t->BaseLevel]; + value[0] = 1.0 / image->Width2; + value[1] = 1.0 / image->Height2; + } else { + value[0] = 1.0; + value[1] = 1.0; + } + value[2] = 1.0; + value[3] = 1.0; + break; + } + + default: + break; + } + break; + + default: + break; + } +} + +/** + * Update R300's own internal state parameters. + * For now just STATE_R300_WINDOW_DIMENSION + */ +void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) +{ + struct r300_fragment_program *fp; + struct gl_program_parameter_list *paramList; + GLuint i; + + if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM))) + return; + + fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; + if (!fp) + return; + + paramList = fp->mesa_program.Base.Parameters; + + if (!paramList) + return; + + for (i = 0; i < paramList->NumParameters; i++) { + if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { + r300FetchStateParameter(ctx, + paramList->Parameters[i]. + StateIndexes, + paramList->ParameterValues[i]); + } + } +} + +/* ============================================================= + * Polygon state + */ +static void r300PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLfloat constant = units; + + switch (ctx->Visual.depthBits) { + case 16: + constant *= 4.0; + break; + case 24: + constant *= 2.0; + break; + } + + factor *= 12.0; + +/* fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */ + + R300_STATECHANGE(rmesa, zbs); + rmesa->hw.zbs.cmd[R300_ZBS_T_FACTOR] = r300PackFloat32(factor); + rmesa->hw.zbs.cmd[R300_ZBS_T_CONSTANT] = r300PackFloat32(constant); + rmesa->hw.zbs.cmd[R300_ZBS_W_FACTOR] = r300PackFloat32(factor); + rmesa->hw.zbs.cmd[R300_ZBS_W_CONSTANT] = r300PackFloat32(constant); +} + +/* Routing and texture-related */ + +/* r300 doesnt handle GL_CLAMP and GL_MIRROR_CLAMP_EXT correctly when filter is NEAREST. + * Since texwrap produces same results for GL_CLAMP and GL_CLAMP_TO_EDGE we use them instead. + * We need to recalculate wrap modes whenever filter mode is changed because someone might do: + * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP); + * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + * Since r300 completely ignores R300_TX_CLAMP when either min or mag is nearest it cant handle + * combinations where only one of them is nearest. + */ +static unsigned long gen_fixed_filter(unsigned long f) +{ + unsigned long mag, min, needs_fixing = 0; + //return f; + + /* We ignore MIRROR bit so we dont have to do everything twice */ + if ((f & ((7 - 1) << R300_TX_WRAP_S_SHIFT)) == + (R300_TX_CLAMP << R300_TX_WRAP_S_SHIFT)) { + needs_fixing |= 1; + } + if ((f & ((7 - 1) << R300_TX_WRAP_T_SHIFT)) == + (R300_TX_CLAMP << R300_TX_WRAP_T_SHIFT)) { + needs_fixing |= 2; + } + if ((f & ((7 - 1) << R300_TX_WRAP_Q_SHIFT)) == + (R300_TX_CLAMP << R300_TX_WRAP_Q_SHIFT)) { + needs_fixing |= 4; + } + + if (!needs_fixing) + return f; + + mag = f & R300_TX_MAG_FILTER_MASK; + min = f & R300_TX_MIN_FILTER_MASK; + + /* TODO: Check for anisto filters too */ + if ((mag != R300_TX_MAG_FILTER_NEAREST) + && (min != R300_TX_MIN_FILTER_NEAREST)) + return f; + + /* r300 cant handle these modes hence we force nearest to linear */ + if ((mag == R300_TX_MAG_FILTER_NEAREST) + && (min != R300_TX_MIN_FILTER_NEAREST)) { + f &= ~R300_TX_MAG_FILTER_NEAREST; + f |= R300_TX_MAG_FILTER_LINEAR; + return f; + } + + if ((min == R300_TX_MIN_FILTER_NEAREST) + && (mag != R300_TX_MAG_FILTER_NEAREST)) { + f &= ~R300_TX_MIN_FILTER_NEAREST; + f |= R300_TX_MIN_FILTER_LINEAR; + return f; + } + + /* Both are nearest */ + if (needs_fixing & 1) { + f &= ~((7 - 1) << R300_TX_WRAP_S_SHIFT); + f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT; + } + if (needs_fixing & 2) { + f &= ~((7 - 1) << R300_TX_WRAP_T_SHIFT); + f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT; + } + if (needs_fixing & 4) { + f &= ~((7 - 1) << R300_TX_WRAP_Q_SHIFT); + f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_Q_SHIFT; + } + return f; +} + +static void r300SetupTextures(GLcontext * ctx) +{ + int i, mtu; + struct r300_tex_obj *t; + r300ContextPtr r300 = R300_CONTEXT(ctx); + int hw_tmu = 0; + int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */ + int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, }; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + + R300_STATECHANGE(r300, txe); + R300_STATECHANGE(r300, tex.filter); + R300_STATECHANGE(r300, tex.filter_1); + R300_STATECHANGE(r300, tex.size); + R300_STATECHANGE(r300, tex.format); + R300_STATECHANGE(r300, tex.pitch); + R300_STATECHANGE(r300, tex.offset); + R300_STATECHANGE(r300, tex.chroma_key); + R300_STATECHANGE(r300, tex.border_color); + + r300->hw.txe.cmd[R300_TXE_ENABLE] = 0x0; + + mtu = r300->radeon.glCtx->Const.MaxTextureUnits; + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "mtu=%d\n", mtu); + + if (mtu > R300_MAX_TEXTURE_UNITS) { + fprintf(stderr, + "Aiiee ! mtu=%d is greater than R300_MAX_TEXTURE_UNITS=%d\n", + mtu, R300_MAX_TEXTURE_UNITS); + _mesa_exit(-1); + } + + /* We cannot let disabled tmu offsets pass DRM */ + for (i = 0; i < mtu; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + +#if 0 /* Enables old behaviour */ + hw_tmu = i; +#endif + tmu_mappings[i] = hw_tmu; + + t = r300->state.texture.unit[i].texobj; + /* XXX questionable fix for bug 9170: */ + if (!t) + continue; + + if ((t->format & 0xffffff00) == 0xffffff00) { + WARN_ONCE + ("unknown texture format (entry %x) encountered. Help me !\n", + t->format & 0xff); + } + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, + "Activating texture unit %d\n", i); + + r300->hw.txe.cmd[R300_TXE_ENABLE] |= (1 << hw_tmu); + + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + + hw_tmu] = + gen_fixed_filter(t->filter) | (hw_tmu << 28); + /* Currently disabled! */ + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80; + r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->size; + r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + + hw_tmu] = t->format; + r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->pitch_reg; + r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 + + hw_tmu] = t->offset; + + if (t->offset & R300_TXO_MACRO_TILE) { + WARN_ONCE("macro tiling enabled!\n"); + } + + if (t->offset & R300_TXO_MICRO_TILE) { + WARN_ONCE("micro tiling enabled!\n"); + } + + r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 + + hw_tmu] = 0x0; + r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 + + hw_tmu] = + t->pp_border_color; + + last_hw_tmu = hw_tmu; + + hw_tmu++; + } + } + + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_FILTER_0, last_hw_tmu + 1); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1); + r300->hw.tex.size.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1); + r300->hw.tex.format.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1); + r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_PITCH_0, last_hw_tmu + 1); + r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); + r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = + cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); + + if (!fp) /* should only happenen once, just after context is created */ + return; + + R300_STATECHANGE(r300, fpt); + + for (i = 0; i < fp->tex.length; i++) { + int unit; + int opcode; + unsigned long val; + + unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT; + unit &= 15; + + val = fp->tex.inst[i]; + val &= ~R300_FPITX_IMAGE_MASK; + + opcode = + (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT; + if (opcode == R300_FPITX_OP_KIL) { + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + if (tmu_mappings[unit] >= 0) { + val |= + tmu_mappings[unit] << + R300_FPITX_IMAGE_SHIFT; + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + // We get here when the corresponding texture image is incomplete + // (e.g. incomplete mipmaps etc.) + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } + } + } + + r300->hw.fpt.cmd[R300_FPT_CMD_0] = + cmdpacket0(R300_PFS_TEXI_0, fp->tex.length); + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", + r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu); +} + +union r300_outputs_written { + GLuint vp_outputs; /* hw_tcl_on */ + DECLARE_RENDERINPUTS(index_bitset); /* !hw_tcl_on */ +}; + +#define R300_OUTPUTS_WRITTEN_TEST(ow, vp_result, tnl_attrib) \ + ((hw_tcl_on) ? (ow).vp_outputs & (1 << (vp_result)) : \ + RENDERINPUTS_TEST( (ow.index_bitset), (tnl_attrib) )) + +static void r300SetupRSUnit(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + /* I'm still unsure if these are needed */ + GLuint interp_magic[8] = { + 0x00, + R300_RS_INTERP_1_UNKNOWN, + R300_RS_INTERP_2_UNKNOWN, + R300_RS_INTERP_3_UNKNOWN, + 0x00, + 0x00, + 0x00, + 0x00 + }; + union r300_outputs_written OutputsWritten; + GLuint InputsRead; + int fp_reg, high_rr; + int in_texcoords, col_interp_nr; + int i; + + if (hw_tcl_on) + OutputsWritten.vp_outputs = + CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + else + RENDERINPUTS_COPY(OutputsWritten.index_bitset, + r300->state.render_inputs_bitset); + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + else { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; /* This should only ever happen once.. */ + } + + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + fp_reg = in_texcoords = col_interp_nr = high_rr = 0; + + r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0; + + if (InputsRead & FRAG_BIT_WPOS) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (!(InputsRead & (FRAG_BIT_TEX0 << i))) + break; + + if (i == ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tno free texcoord found...\n"); + _mesa_exit(-1); + } + + InputsRead |= (FRAG_BIT_TEX0 << i); + InputsRead &= ~FRAG_BIT_WPOS; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 + | R300_RS_INTERP_USED + | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) + | interp_magic[i]; + + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + //assert(r300->state.texture.tc_count != 0); + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R300_RS_ROUTE_ENABLE | i /* source INTERP */ + | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); + high_rr = fp_reg; + + if (!R300_OUTPUTS_WRITTEN_TEST + (OutputsWritten, VERT_RESULT_TEX0 + i, + _TNL_ATTRIB_TEX(i))) { + /* Passing invalid data here can lock the GPU. */ + WARN_ONCE + ("fragprog wants coords for tex%d, vp doesn't provide them!\n", + i); + //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base); + //_mesa_exit(-1); + } + InputsRead &= ~(FRAG_BIT_TEX0 << i); + fp_reg++; + } + /* Need to count all coords enabled at vof */ + if (R300_OUTPUTS_WRITTEN_TEST + (OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) + in_texcoords++; + } + + if (InputsRead & FRAG_BIT_COL0) { + if (!R300_OUTPUTS_WRITTEN_TEST + (OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + WARN_ONCE + ("fragprog wants col0, vp doesn't provide it\n"); + goto out; /* FIXME */ + //_mesa_print_program(&CURRENT_VERTEX_SHADER(ctx)->Base); + //_mesa_exit(-1); + } + + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 + | R300_RS_ROUTE_0_COLOR + | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL0; + col_interp_nr++; + } + out: + + if (InputsRead & FRAG_BIT_COL1) { + if (!R300_OUTPUTS_WRITTEN_TEST + (OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + WARN_ONCE + ("fragprog wants col1, vp doesn't provide it\n"); + //_mesa_exit(-1); + } + + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= + R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | + (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL1; + if (high_rr < 1) + high_rr = 1; + col_interp_nr++; + } + + /* Need at least one. This might still lock as the values are undefined... */ + if (in_texcoords == 0 && col_interp_nr == 0) { + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 + | R300_RS_ROUTE_0_COLOR + | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + col_interp_nr++; + } + + r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_RS_CNTL_TC_CNT_SHIFT) + | (col_interp_nr << R300_RS_CNTL_CI_CNT_SHIFT) + | R300_RS_CNTL_0_UNKNOWN_18; + + assert(high_rr >= 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = + cmdpacket0(R300_RS_ROUTE_0, high_rr + 1); + r300->hw.rc.cmd[2] = 0xC0 | high_rr; + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", + InputsRead); +} + +#define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) + +#define bump_vpu_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->vpu.count)_p->vpu.count=_nc;\ + }while(0) + +void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, struct + r300_vertex_shader_fragment + *vsf) +{ + int i; + + if (vsf->length == 0) + return; + + if (vsf->length & 0x3) { + fprintf(stderr, + "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n"); + _mesa_exit(-1); + } + + switch ((dest >> 8) & 0xf) { + case 0: + R300_STATECHANGE(r300, vpi); + for (i = 0; i < vsf->length; i++) + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, + vsf->length + 4 * (dest & 0xff)); + break; + + case 2: + R300_STATECHANGE(r300, vpp); + for (i = 0; i < vsf->length; i++) + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, + vsf->length + 4 * (dest & 0xff)); + break; + case 4: + R300_STATECHANGE(r300, vps); + for (i = 0; i < vsf->length; i++) + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = + (vsf->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, + vsf->length + 4 * (dest & 0xff)); + break; + default: + fprintf(stderr, + "%s:%s don't know how to handle dest %04x\n", + __FILE__, __FUNCTION__, dest); + _mesa_exit(-1); + } +} + +/* just a skeleton for now.. */ + +/* Generate a vertex shader that simply transforms vertex and texture coordinates, + while leaving colors intact. Nothing fancy (like lights) + + If implementing lights make a copy first, so it is easy to switch between the two versions */ +static void r300GenerateSimpleVertexShader(r300ContextPtr r300) +{ + int i; + GLuint o_reg = 0; + + /* Allocate parameters */ + r300->state.vap_param.transform_offset = 0x0; /* transform matrix */ + r300->state.vertex_shader.param_offset = 0x0; + r300->state.vertex_shader.param_count = 0x4; /* 4 vector values - 4x4 matrix */ + + r300->state.vertex_shader.program_start = 0x0; + r300->state.vertex_shader.unknown_ptr1 = 0x4; /* magic value ? */ + r300->state.vertex_shader.program_end = 0x0; + + r300->state.vertex_shader.unknown_ptr2 = 0x0; /* magic value */ + r300->state.vertex_shader.unknown_ptr3 = 0x4; /* magic value */ + + r300->state.vertex_shader.unknown1.length = 0; + r300->state.vertex_shader.unknown2.length = 0; + +#define WRITE_OP(oper,source1,source2,source3) {\ + r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].op=(oper); \ + r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[0]=(source1); \ + r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[1]=(source2); \ + r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[2]=(source3); \ + r300->state.vertex_shader.program_end++; \ + } + + for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) + if (r300->state.sw_tcl_inputs[i] != -1) { + WRITE_OP(EASY_VSF_OP(MUL, o_reg++, ALL, RESULT), + VSF_REG(r300->state.sw_tcl_inputs[i]), + VSF_ATTR_UNITY(r300->state. + sw_tcl_inputs[i]), + VSF_UNITY(r300->state.sw_tcl_inputs[i]) + ) + + } + + r300->state.vertex_shader.program_end--; /* r300 wants program length to be one more - no idea why */ + r300->state.vertex_shader.program.length = + (r300->state.vertex_shader.program_end + 1) * 4; + + r300->state.vertex_shader.unknown_ptr1 = r300->state.vertex_shader.program_end; /* magic value ? */ + r300->state.vertex_shader.unknown_ptr2 = r300->state.vertex_shader.program_end; /* magic value ? */ + r300->state.vertex_shader.unknown_ptr3 = r300->state.vertex_shader.program_end; /* magic value ? */ + +} + +static void r300SetupVertexProgram(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + int inst_count; + int param_count; + struct r300_vertex_program *prog = + (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + + ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + R300_STATECHANGE(rmesa, vpp); + param_count = + r300VertexProgUpdateParams(ctx, (struct r300_vertex_program_cont *) + ctx->VertexProgram._Current /*prog */ , + (float *)&rmesa->hw.vpp. + cmd[R300_VPP_PARAM_0]); + bump_vpu_count(rmesa->hw.vpp.cmd, param_count); + param_count /= 4; + + /* Reset state, in case we don't use something */ + ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + + setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, &(prog->program)); + +#if 0 + setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, + &(rmesa->state.vertex_shader.unknown1)); + setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, + &(rmesa->state.vertex_shader.unknown2)); +#endif + + inst_count = prog->program.length / 4 - 1; + + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = + (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) + | (inst_count /*pos_end */ << R300_PVS_CNTL_1_POS_END_SHIFT) + | (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = + (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) + | (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = + (0 /*rmesa->state.vertex_shader.unknown_ptr2 */ << + R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) + | (inst_count /*rmesa->state.vertex_shader.unknown_ptr3 */ << + 0); + + /* This is done for vertex shader fragments, but also needs to be done for vap_pvs, + so I leave it as a reminder */ +#if 0 + reg_start(R300_VAP_PVS_WAITIDLE, 0); + e32(0x00000000); +#endif +} + +static void r300SetupVertexShader(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + + /* Reset state, in case we don't use something */ + ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + + /* Not sure why this doesnt work... + 0x400 area might have something to do with pixel shaders as it appears right after pfs programming. + 0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */ + //setup_vertex_shader_fragment(rmesa, 0x406, &unk4); + if (hw_tcl_on + && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))-> + translated) { + r300SetupVertexProgram(rmesa); + return; + } + + /* This needs to be replaced by vertex shader generation code */ + r300GenerateSimpleVertexShader(rmesa); + + setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, + &(rmesa->state.vertex_shader.program)); + +#if 0 + setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, + &(rmesa->state.vertex_shader.unknown1)); + setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, + &(rmesa->state.vertex_shader.unknown2)); +#endif + + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = + (rmesa->state.vertex_shader. + program_start << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) + | (rmesa->state.vertex_shader. + unknown_ptr1 << R300_PVS_CNTL_1_POS_END_SHIFT) + | (rmesa->state.vertex_shader. + program_end << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = + (rmesa->state.vertex_shader. + param_offset << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) + | (rmesa->state.vertex_shader. + param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = + (rmesa->state.vertex_shader. + unknown_ptr2 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) + | (rmesa->state.vertex_shader.unknown_ptr3 << 0); + + /* This is done for vertex shader fragments, but also needs to be done for vap_pvs, + so I leave it as a reminder */ +#if 0 + reg_start(R300_VAP_PVS_WAITIDLE, 0); + e32(0x00000000); +#endif +} + +/** + * Completely recalculates hardware state based on the Mesa state. + */ +static void r300ResetHwState(r300ContextPtr r300) +{ + GLcontext *ctx = r300->radeon.glCtx; + int has_tcl = 1; + + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + has_tcl = 0; + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s\n", __FUNCTION__); + + /* This is a place to initialize registers which + have bitfields accessed by different functions + and not all bits are used */ + + /* go and compute register values from GL state */ + + r300UpdateWindow(ctx); + + r300ColorMask(ctx, + ctx->Color.ColorMask[RCOMP], + ctx->Color.ColorMask[GCOMP], + ctx->Color.ColorMask[BCOMP], ctx->Color.ColorMask[ACOMP]); + + r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); + r300DepthMask(ctx, ctx->Depth.Mask); + r300DepthFunc(ctx, ctx->Depth.Func); + + /* stencil */ + r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled); + r300StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]); + r300StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0], + ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]); + r300StencilOpSeparate(ctx, 0, ctx->Stencil.FailFunc[0], + ctx->Stencil.ZFailFunc[0], + ctx->Stencil.ZPassFunc[0]); + + r300UpdateCulling(ctx); + + r300UpdateTextureState(ctx); + + r300SetBlendState(ctx); + + r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef); + r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); + + /* Initialize magic registers + TODO : learn what they really do, or get rid of + those we don't have to touch */ + if (!has_tcl) + r300->hw.vap_cntl.cmd[1] = 0x0014045a; + else + r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */ + r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA + | R300_VPORT_X_OFFSET_ENA + | R300_VPORT_Y_SCALE_ENA + | R300_VPORT_Y_OFFSET_ENA + | R300_VPORT_Z_SCALE_ENA + | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT; + r300->hw.vte.cmd[2] = 0x00000008; + + r300->hw.unk2134.cmd[1] = 0x00FFFFFF; + r300->hw.unk2134.cmd[2] = 0x00000000; + if (_mesa_little_endian()) + r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP; + else + r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP; + + /* disable VAP/TCL on non-TCL capable chips */ + if (!has_tcl) + r300->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS; + + r300->hw.unk21DC.cmd[1] = 0xAAAAAAAA; + + r300->hw.vap_clip_cntl.cmd[1] = R300_221C_NORMAL; + + r300->hw.unk2220.cmd[1] = r300PackFloat32(1.0); + r300->hw.unk2220.cmd[2] = r300PackFloat32(1.0); + r300->hw.unk2220.cmd[3] = r300PackFloat32(1.0); + r300->hw.unk2220.cmd[4] = r300PackFloat32(1.0); + + /* what about other chips than r300 or rv350??? */ + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) + r300->hw.unk2288.cmd[1] = R300_2288_R300; + else + r300->hw.unk2288.cmd[1] = R300_2288_RV350; + + r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE + | R300_GB_LINE_STUFF_ENABLE + | R300_GB_TRIANGLE_STUFF_ENABLE /*| R300_GB_UNK31 */ ; + + r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666; + r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666; + if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350)) + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = + R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_R300 | + R300_GB_TILE_SIZE_16; + else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = + R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_RV410 | + R300_GB_TILE_SIZE_16; + else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = + R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_R420 | + R300_GB_TILE_SIZE_16; + else + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = + R300_GB_TILE_ENABLE | R300_GB_TILE_PIPE_COUNT_RV300 | + R300_GB_TILE_SIZE_16; + /* set to 0 when fog is disabled? */ + r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = R300_GB_FOG_SELECT_1_1_W; + r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = R300_AA_DISABLE; /* No antialiasing */ + + r300->hw.unk4200.cmd[1] = r300PackFloat32(0.0); + r300->hw.unk4200.cmd[2] = r300PackFloat32(0.0); + r300->hw.unk4200.cmd[3] = r300PackFloat32(1.0); + r300->hw.unk4200.cmd[4] = r300PackFloat32(1.0); + + r300->hw.unk4214.cmd[1] = 0x00050005; + + r300PointSize(ctx, 0.0); + + r300->hw.unk4230.cmd[1] = 0x18000006; + r300->hw.unk4230.cmd[2] = 0x00020006; + r300->hw.unk4230.cmd[3] = r300PackFloat32(1.0 / 192.0); + + r300LineWidth(ctx, 0.0); + + r300->hw.unk4260.cmd[1] = 0; + r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0); + r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0); + + r300->hw.shade.cmd[1] = 0x00000002; + r300ShadeModel(ctx, ctx->Light.ShadeModel); + r300->hw.shade.cmd[3] = 0x00000000; + r300->hw.shade.cmd[4] = 0x00000000; + + r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode); + r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode); + r300->hw.polygon_mode.cmd[2] = 0x00000001; + r300->hw.polygon_mode.cmd[3] = 0x00000000; + r300->hw.zbias_cntl.cmd[1] = 0x00000000; + + r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, + ctx->Polygon.OffsetUnits); + r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill); + + r300->hw.unk42C0.cmd[1] = 0x4B7FFFFF; + r300->hw.unk42C0.cmd[2] = 0x00000000; + + r300->hw.unk43A4.cmd[1] = 0x0000001C; + r300->hw.unk43A4.cmd[2] = 0x2DA49525; + + r300->hw.unk43E8.cmd[1] = 0x00FFFFFF; + + r300->hw.unk46A4.cmd[1] = 0x00001B01; + r300->hw.unk46A4.cmd[2] = 0x00001B0F; + r300->hw.unk46A4.cmd[3] = 0x00001B0F; + r300->hw.unk46A4.cmd[4] = 0x00001B0F; + r300->hw.unk46A4.cmd[5] = 0x00000001; + + r300Enable(ctx, GL_FOG, ctx->Fog.Enabled); + ctx->Driver.Fogfv(ctx, GL_FOG_MODE, NULL); + ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); + ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); + ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); + ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); + ctx->Driver.Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL); + + r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; + r300->hw.unk4BD8.cmd[1] = 0; + + r300->hw.unk4E00.cmd[1] = 0; + + r300BlendColor(ctx, ctx->Color.BlendColor); + r300->hw.blend_color.cmd[2] = 0; + r300->hw.blend_color.cmd[3] = 0; + + /* Again, r300ClearBuffer uses this */ + r300->hw.cb.cmd[R300_CB_OFFSET] = + r300->radeon.state.color.drawOffset + + r300->radeon.radeonScreen->fbLocation; + r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; + + if (r300->radeon.radeonScreen->cpp == 4) + r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; + else + r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; + + if (r300->radeon.sarea->tiling_enabled) + r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; + + r300->hw.unk4E50.cmd[1] = 0; + r300->hw.unk4E50.cmd[2] = 0; + r300->hw.unk4E50.cmd[3] = 0; + r300->hw.unk4E50.cmd[4] = 0; + r300->hw.unk4E50.cmd[5] = 0; + r300->hw.unk4E50.cmd[6] = 0; + r300->hw.unk4E50.cmd[7] = 0; + r300->hw.unk4E50.cmd[8] = 0; + r300->hw.unk4E50.cmd[9] = 0; + + r300->hw.unk4E88.cmd[1] = 0; + + r300->hw.unk4EA0.cmd[1] = 0x00000000; + r300->hw.unk4EA0.cmd[2] = 0xffffffff; + + switch (ctx->Visual.depthBits) { + case 16: + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; + break; + case 24: + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; + break; + default: + fprintf(stderr, "Error: Unsupported depth %d... exiting\n", + ctx->Visual.depthBits); + _mesa_exit(-1); + + } + /* z compress? */ + //r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32; + + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; + + r300->hw.zb.cmd[R300_ZB_OFFSET] = + r300->radeon.radeonScreen->depthOffset + + r300->radeon.radeonScreen->fbLocation; + r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch; + + if (r300->radeon.sarea->tiling_enabled) { + /* Turn off when clearing buffers ? */ + r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTH_TILE_ENABLE; + + if (ctx->Visual.depthBits == 24) + r300->hw.zb.cmd[R300_ZB_PITCH] |= + R300_DEPTH_MICROTILE_ENABLE; + } + + r300->hw.unk4F28.cmd[1] = 0; + + r300->hw.unk4F30.cmd[1] = 0; + r300->hw.unk4F30.cmd[2] = 0; + + r300->hw.unk4F44.cmd[1] = 0; + + r300->hw.unk4F54.cmd[1] = 0; + + if (has_tcl) { + r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0; + r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0; + r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0); + r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0; + } +//END: TODO + r300->hw.all_dirty = GL_TRUE; +} + + +extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx); + +extern int future_hw_tcl_on; +void r300UpdateShaders(r300ContextPtr rmesa) +{ + GLcontext *ctx; + struct r300_vertex_program *vp; + int i; + + ctx = rmesa->radeon.glCtx; + + if (rmesa->NewGLState && hw_tcl_on) { + rmesa->NewGLState = 0; + + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + rmesa->temp_attrib[i] = + TNL_CONTEXT(ctx)->vb.AttribPtr[i]; + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = + &rmesa->dummy_attrib[i]; + } + + _tnl_UpdateFixedFunctionProgram(ctx); + + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = + rmesa->temp_attrib[i]; + } + + r300SelectVertexShader(rmesa); + vp = (struct r300_vertex_program *) + CURRENT_VERTEX_SHADER(ctx); + /*if (vp->translated == GL_FALSE) + r300TranslateVertexShader(vp); */ + if (vp->translated == GL_FALSE) { + fprintf(stderr, "Failing back to sw-tcl\n"); + hw_tcl_on = future_hw_tcl_on = 0; + r300ResetHwState(rmesa); + + return; + } + r300UpdateStateParameters(ctx, _NEW_PROGRAM); + } + +} + +static void r300SetupPixelShader(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + int i, k; + + if (!fp) /* should only happenen once, just after context is created */ + return; + + r300TranslateFragmentShader(rmesa, fp); + if (!fp->translated) { + fprintf(stderr, "%s: No valid fragment shader, exiting\n", + __FUNCTION__); + return; + } +#define OUTPUT_FIELD(st, reg, field) \ + R300_STATECHANGE(rmesa, st); \ + for(i=0;i<=fp->alu_end;i++) \ + rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=fp->alu.inst[i].field;\ + rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmdpacket0(reg, fp->alu_end+1); + + OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0); + OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1); + OUTPUT_FIELD(fpi[2], R300_PFS_INSTR2_0, inst2); + OUTPUT_FIELD(fpi[3], R300_PFS_INSTR3_0, inst3); +#undef OUTPUT_FIELD + + R300_STATECHANGE(rmesa, fp); + /* I just want to say, the way these nodes are stored.. weird.. */ + for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) { + if (i < (fp->cur_node + 1)) { + rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = + (fp->node[i]. + alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT) + | (fp->node[i]. + alu_end << R300_PFS_NODE_ALU_END_SHIFT) + | (fp->node[i]. + tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT) + | (fp->node[i]. + tex_end << R300_PFS_NODE_TEX_END_SHIFT) + | fp->node[i].flags; /* ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0); */ + } else { + rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; + } + } + + /* PFS_CNTL_0 */ + rmesa->hw.fp.cmd[R300_FP_CNTL0] = + fp->cur_node | (fp->first_node_has_tex << 3); + /* PFS_CNTL_1 */ + rmesa->hw.fp.cmd[R300_FP_CNTL1] = fp->max_temp_idx; + /* PFS_CNTL_2 */ + rmesa->hw.fp.cmd[R300_FP_CNTL2] = + (fp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) + | (fp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) + | (fp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) + | (fp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); + + R300_STATECHANGE(rmesa, fpp); + for (i = 0; i < fp->const_nr; i++) { + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = + r300PackFloat24(fp->constant[i][0]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = + r300PackFloat24(fp->constant[i][1]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = + r300PackFloat24(fp->constant[i][2]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = + r300PackFloat24(fp->constant[i][3]); + } + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = + cmdpacket0(R300_PFS_PARAM_0_X, fp->const_nr * 4); +} + +void r300UpdateShaderStates(r300ContextPtr rmesa) +{ + GLcontext *ctx; + ctx = rmesa->radeon.glCtx; + + r300UpdateTextureState(ctx); + + r300SetupPixelShader(rmesa); + r300SetupTextures(ctx); + + if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + r300SetupVertexShader(rmesa); + r300SetupRSUnit(ctx); +} + +/** + * Called by Mesa after an internal state update. + */ +static void r300InvalidateState(GLcontext * ctx, GLuint new_state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + _swrast_InvalidateState(ctx, new_state); + _swsetup_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _ae_invalidate_state(ctx, new_state); + + if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + r300UpdateDrawBuffer(ctx); + } + + r300UpdateStateParameters(ctx, new_state); + + r300->NewGLState |= new_state; +} + +/** + * Calculate initial hardware state and register state functions. + * Assumes that the command buffer and state atoms have been + * initialized already. + */ +void r300InitState(r300ContextPtr r300) +{ + GLcontext *ctx = r300->radeon.glCtx; + GLuint depth_fmt; + + radeonInitState(&r300->radeon); + + switch (ctx->Visual.depthBits) { + case 16: + r300->state.depth.scale = 1.0 / (GLfloat) 0xffff; + depth_fmt = R300_DEPTH_FORMAT_16BIT_INT_Z; + r300->state.stencil.clear = 0x00000000; + break; + case 24: + r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff; + depth_fmt = R300_DEPTH_FORMAT_24BIT_INT_Z; + r300->state.stencil.clear = 0x00ff0000; + break; + + + default: + fprintf(stderr, "Error: Unsupported depth %d... exiting\n", + ctx->Visual.depthBits); + _mesa_exit(-1); + } + + /* Only have hw stencil when depth buffer is 24 bits deep */ + r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 && + ctx->Visual.depthBits == 24); + + memset(&(r300->state.texture), 0, sizeof(r300->state.texture)); + + r300ResetHwState(r300); +} + +static void r300RenderMode(GLcontext * ctx, GLenum mode) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + (void)rmesa; + (void)mode; +} + +static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0; + GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + R300_STATECHANGE( rmesa, vpucp[p] ); + rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3]; +} + + +void r300UpdateClipPlanes( GLcontext *ctx ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint p; + + for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { + GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + R300_STATECHANGE( rmesa, vpucp[p] ); + rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3]; + } + } +} + +/** + * Initialize driver's state callback functions + */ +void r300InitStateFuncs(struct dd_function_table *functions) +{ + radeonInitStateFuncs(functions); + + functions->UpdateState = r300InvalidateState; + functions->AlphaFunc = r300AlphaFunc; + functions->BlendColor = r300BlendColor; + functions->BlendEquationSeparate = r300BlendEquationSeparate; + functions->BlendFuncSeparate = r300BlendFuncSeparate; + functions->Enable = r300Enable; + functions->ColorMask = r300ColorMask; + functions->DepthFunc = r300DepthFunc; + functions->DepthMask = r300DepthMask; + functions->CullFace = r300CullFace; + functions->Fogfv = r300Fogfv; + functions->FrontFace = r300FrontFace; + functions->ShadeModel = r300ShadeModel; + + /* Stencil related */ + functions->ClearStencil = r300ClearStencil; + functions->StencilFuncSeparate = r300StencilFuncSeparate; + functions->StencilMaskSeparate = r300StencilMaskSeparate; + functions->StencilOpSeparate = r300StencilOpSeparate; + + /* Viewport related */ + functions->Viewport = r300Viewport; + functions->DepthRange = r300DepthRange; + functions->PointSize = r300PointSize; + functions->LineWidth = r300LineWidth; + + functions->PolygonOffset = r300PolygonOffset; + functions->PolygonMode = r300PolygonMode; + + functions->RenderMode = r300RenderMode; + + functions->ClipPlane = r300ClipPlane; +} diff --git a/r300/r300_state.h b/r300/r300_state.h new file mode 100644 index 0000000..21a49b7 --- /dev/null +++ b/r300/r300_state.h @@ -0,0 +1,70 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __R300_STATE_H__ +#define __R300_STATE_H__ + +#include "r300_context.h" + +#define R300_STATECHANGE(r300, atom) \ + do { \ + r300->hw.atom.dirty = GL_TRUE; \ + r300->hw.is_dirty = GL_TRUE; \ + } while(0) + +#define R300_PRINT_STATE(r300, atom) \ + r300PrintStateAtom(r300, &r300->hw.atom) + +/* Fire the buffered vertices no matter what. + TODO: This has not been implemented yet + */ +#define R300_FIREVERTICES( r300 ) \ +do { \ + \ + if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) { \ + r300Flush( (r300)->radeon.glCtx ); \ + } \ + \ +} while (0) + +extern void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state); +extern void r300InitState(r300ContextPtr r300); +extern void r300InitStateFuncs(struct dd_function_table *functions); +extern void r300UpdateViewportOffset(GLcontext * ctx); +extern void r300UpdateDrawBuffer(GLcontext * ctx); + +extern void r300UpdateShaders(r300ContextPtr rmesa); +extern void r300UpdateShaderStates(r300ContextPtr rmesa); + +#endif /* __R300_STATE_H__ */ diff --git a/r300/r300_tex.c b/r300/r300_tex.c new file mode 100644 index 0000000..2a21c61 --- /dev/null +++ b/r300/r300_tex.c @@ -0,0 +1,1166 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "colormac.h" +#include "context.h" +#include "enums.h" +#include "image.h" +#include "simple_list.h" +#include "texformat.h" +#include "texstore.h" +#include "texmem.h" +#include "teximage.h" +#include "texobj.h" + +#include "r300_context.h" +#include "r300_state.h" +#include "r300_ioctl.h" +#include "r300_tex.h" + +#include "xmlpool.h" + +/** + * Set the texture wrap modes. + * + * \param t Texture object whose wrap modes are to be set + * \param swrap Wrap mode for the \a s texture coordinate + * \param twrap Wrap mode for the \a t texture coordinate + */ + +static void r300SetTexWrap(r300TexObjPtr t, GLenum swrap, GLenum twrap, + GLenum rwrap) +{ + unsigned long hw_swrap = 0, hw_twrap = 0, hw_qwrap = 0; + + t->filter &= + ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_Q_MASK); + + switch (swrap) { + case GL_REPEAT: + hw_swrap |= R300_TX_REPEAT; + break; + case GL_CLAMP: + hw_swrap |= R300_TX_CLAMP; + break; + case GL_CLAMP_TO_EDGE: + hw_swrap |= R300_TX_CLAMP_TO_EDGE; + break; + case GL_CLAMP_TO_BORDER: + hw_swrap |= R300_TX_CLAMP_TO_BORDER; + break; + case GL_MIRRORED_REPEAT: + hw_swrap |= R300_TX_REPEAT | R300_TX_MIRRORED; + break; + case GL_MIRROR_CLAMP_EXT: + hw_swrap |= R300_TX_CLAMP | R300_TX_MIRRORED; + break; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + hw_swrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; + break; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + hw_swrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED; + break; + default: + _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__); + } + + switch (twrap) { + case GL_REPEAT: + hw_twrap |= R300_TX_REPEAT; + break; + case GL_CLAMP: + hw_twrap |= R300_TX_CLAMP; + break; + case GL_CLAMP_TO_EDGE: + hw_twrap |= R300_TX_CLAMP_TO_EDGE; + break; + case GL_CLAMP_TO_BORDER: + hw_twrap |= R300_TX_CLAMP_TO_BORDER; + break; + case GL_MIRRORED_REPEAT: + hw_twrap |= R300_TX_REPEAT | R300_TX_MIRRORED; + break; + case GL_MIRROR_CLAMP_EXT: + hw_twrap |= R300_TX_CLAMP | R300_TX_MIRRORED; + break; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + hw_twrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; + break; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + hw_twrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED; + break; + default: + _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__); + } + + switch (rwrap) { + case GL_REPEAT: + hw_qwrap |= R300_TX_REPEAT; + break; + case GL_CLAMP: + hw_qwrap |= R300_TX_CLAMP; + break; + case GL_CLAMP_TO_EDGE: + hw_qwrap |= R300_TX_CLAMP_TO_EDGE; + break; + case GL_CLAMP_TO_BORDER: + hw_qwrap |= R300_TX_CLAMP_TO_BORDER; + break; + case GL_MIRRORED_REPEAT: + hw_qwrap |= R300_TX_REPEAT | R300_TX_MIRRORED; + break; + case GL_MIRROR_CLAMP_EXT: + hw_qwrap |= R300_TX_CLAMP | R300_TX_MIRRORED; + break; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + hw_qwrap |= R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; + break; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + hw_qwrap |= R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED; + break; + default: + _mesa_problem(NULL, "bad R wrap mode in %s", __FUNCTION__); + } + + t->filter |= hw_swrap << R300_TX_WRAP_S_SHIFT; + t->filter |= hw_twrap << R300_TX_WRAP_T_SHIFT; + t->filter |= hw_qwrap << R300_TX_WRAP_Q_SHIFT; +} + +static void r300SetTexMaxAnisotropy(r300TexObjPtr t, GLfloat max) +{ + + t->filter &= ~R300_TX_MAX_ANISO_MASK; + + if (max <= 1.0) { + t->filter |= R300_TX_MAX_ANISO_1_TO_1; + } else if (max <= 2.0) { + t->filter |= R300_TX_MAX_ANISO_2_TO_1; + } else if (max <= 4.0) { + t->filter |= R300_TX_MAX_ANISO_4_TO_1; + } else if (max <= 8.0) { + t->filter |= R300_TX_MAX_ANISO_8_TO_1; + } else { + t->filter |= R300_TX_MAX_ANISO_16_TO_1; + } +} + +/** + * Set the texture magnification and minification modes. + * + * \param t Texture whose filter modes are to be set + * \param minf Texture minification mode + * \param magf Texture magnification mode + */ + +static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf) +{ + GLuint anisotropy = (t->filter & R300_TX_MAX_ANISO_MASK); + + t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MAG_FILTER_MASK); + + if (anisotropy == R300_TX_MAX_ANISO_1_TO_1) { + switch (minf) { + case GL_NEAREST: + t->filter |= R300_TX_MIN_FILTER_NEAREST; + break; + case GL_LINEAR: + t->filter |= R300_TX_MIN_FILTER_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: + t->filter |= R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + t->filter |= R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR; + break; + case GL_LINEAR_MIPMAP_NEAREST: + t->filter |= R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST; + break; + case GL_LINEAR_MIPMAP_LINEAR: + t->filter |= R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR; + break; + } + } else { + switch (minf) { + case GL_NEAREST: + t->filter |= R300_TX_MIN_FILTER_ANISO_NEAREST; + break; + case GL_LINEAR: + t->filter |= R300_TX_MIN_FILTER_ANISO_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: + case GL_LINEAR_MIPMAP_NEAREST: + t->filter |= + R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + case GL_LINEAR_MIPMAP_LINEAR: + t->filter |= + R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR; + break; + } + } + + /* Note we don't have 3D mipmaps so only use the mag filter setting + * to set the 3D texture filter mode. + */ + switch (magf) { + case GL_NEAREST: + t->filter |= R300_TX_MAG_FILTER_NEAREST; + break; + case GL_LINEAR: + t->filter |= R300_TX_MAG_FILTER_LINEAR; + break; + } +} + +static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4]) +{ + t->pp_border_color = PACK_COLOR_8888(c[0], c[1], c[2], c[3]); +} + +/** + * Allocate space for and load the mesa images into the texture memory block. + * This will happen before drawing with a new texture, or drawing with a + * texture after it was swapped out or teximaged again. + */ + +static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) +{ + r300TexObjPtr t; + + t = CALLOC_STRUCT(r300_tex_obj); + texObj->DriverData = t; + if (t != NULL) { + if (RADEON_DEBUG & DEBUG_TEXTURE) { + fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, + (void *)texObj, (void *)t); + } + + /* Initialize non-image-dependent parts of the state: + */ + t->base.tObj = texObj; + t->border_fallback = GL_FALSE; + + make_empty_list(&t->base); + + r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR); + r300SetTexMaxAnisotropy(t, texObj->MaxAnisotropy); + r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter); + r300SetTexBorderColor(t, texObj->_BorderChan); + } + + return t; +} + +/* try to find a format which will only need a memcopy */ +static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat, + GLenum srcType) +{ + const GLuint ui = 1; + const GLubyte littleEndian = *((const GLubyte *)&ui); + + if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE + && !littleEndian) || (srcFormat == GL_ABGR_EXT + && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) + || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE + && littleEndian)) { + return &_mesa_texformat_rgba8888; + } else + if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) + || (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE + && littleEndian) || (srcFormat == GL_ABGR_EXT + && srcType == GL_UNSIGNED_INT_8_8_8_8) + || (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE + && !littleEndian)) { + return &_mesa_texformat_rgba8888_rev; + } else if (srcFormat == GL_BGRA && + ((srcType == GL_UNSIGNED_BYTE && !littleEndian) || + srcType == GL_UNSIGNED_INT_8_8_8_8)) { + return &_mesa_texformat_argb8888_rev; + } else if (srcFormat == GL_BGRA && + ((srcType == GL_UNSIGNED_BYTE && littleEndian) || + srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) { + return &_mesa_texformat_argb8888; + } else + return _dri_texformat_argb8888; +} + +static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx, + GLint + internalFormat, + GLenum format, + GLenum type) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + const GLboolean do32bpt = + (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32); + const GLboolean force16bpt = + (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16); + (void)format; + +#if 0 + fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n", + _mesa_lookup_enum_by_nr(internalFormat), internalFormat, + _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format)); + fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt); +#endif + + switch (internalFormat) { + case 4: + case GL_RGBA: + case GL_COMPRESSED_RGBA: + switch (type) { + case GL_UNSIGNED_INT_10_10_10_2: + case GL_UNSIGNED_INT_2_10_10_10_REV: + return do32bpt ? _dri_texformat_argb8888 : + _dri_texformat_argb1555; + case GL_UNSIGNED_SHORT_4_4_4_4: + case GL_UNSIGNED_SHORT_4_4_4_4_REV: + return _dri_texformat_argb4444; + case GL_UNSIGNED_SHORT_5_5_5_1: + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + return _dri_texformat_argb1555; + default: + return do32bpt ? r300Choose8888TexFormat(format, type) : + _dri_texformat_argb4444; + } + + case 3: + case GL_RGB: + case GL_COMPRESSED_RGB: + switch (type) { + case GL_UNSIGNED_SHORT_4_4_4_4: + case GL_UNSIGNED_SHORT_4_4_4_4_REV: + return _dri_texformat_argb4444; + case GL_UNSIGNED_SHORT_5_5_5_1: + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + return _dri_texformat_argb1555; + case GL_UNSIGNED_SHORT_5_6_5: + case GL_UNSIGNED_SHORT_5_6_5_REV: + return _dri_texformat_rgb565; + default: + return do32bpt ? _dri_texformat_argb8888 : + _dri_texformat_rgb565; + } + + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + case GL_RGBA16: + return !force16bpt ? + r300Choose8888TexFormat(format, + type) : _dri_texformat_argb4444; + + case GL_RGBA4: + case GL_RGBA2: + return _dri_texformat_argb4444; + + case GL_RGB5_A1: + return _dri_texformat_argb1555; + + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + return !force16bpt ? _dri_texformat_argb8888 : + _dri_texformat_rgb565; + + case GL_RGB5: + case GL_RGB4: + case GL_R3_G3_B2: + return _dri_texformat_rgb565; + + case GL_ALPHA: + case GL_ALPHA4: + case GL_ALPHA8: + case GL_ALPHA12: + case GL_ALPHA16: + case GL_COMPRESSED_ALPHA: + return _dri_texformat_a8; + + case 1: + case GL_LUMINANCE: + case GL_LUMINANCE4: + case GL_LUMINANCE8: + case GL_LUMINANCE12: + case GL_LUMINANCE16: + case GL_COMPRESSED_LUMINANCE: + return _dri_texformat_l8; + + case 2: + case GL_LUMINANCE_ALPHA: + case GL_LUMINANCE4_ALPHA4: + case GL_LUMINANCE6_ALPHA2: + case GL_LUMINANCE8_ALPHA8: + case GL_LUMINANCE12_ALPHA4: + case GL_LUMINANCE12_ALPHA12: + case GL_LUMINANCE16_ALPHA16: + case GL_COMPRESSED_LUMINANCE_ALPHA: + return _dri_texformat_al88; + + case GL_INTENSITY: + case GL_INTENSITY4: + case GL_INTENSITY8: + case GL_INTENSITY12: + case GL_INTENSITY16: + case GL_COMPRESSED_INTENSITY: + return _dri_texformat_i8; + + case GL_YCBCR_MESA: + if (type == GL_UNSIGNED_SHORT_8_8_APPLE || + type == GL_UNSIGNED_BYTE) + return &_mesa_texformat_ycbcr; + else + return &_mesa_texformat_ycbcr_rev; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return &_mesa_texformat_rgb_dxt1; + + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return &_mesa_texformat_rgba_dxt1; + + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return &_mesa_texformat_rgba_dxt3; + + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return &_mesa_texformat_rgba_dxt5; + + case GL_ALPHA16F_ARB: + return &_mesa_texformat_alpha_float16; + case GL_ALPHA32F_ARB: + return &_mesa_texformat_alpha_float32; + case GL_LUMINANCE16F_ARB: + return &_mesa_texformat_luminance_float16; + case GL_LUMINANCE32F_ARB: + return &_mesa_texformat_luminance_float32; + case GL_LUMINANCE_ALPHA16F_ARB: + return &_mesa_texformat_luminance_alpha_float16; + case GL_LUMINANCE_ALPHA32F_ARB: + return &_mesa_texformat_luminance_alpha_float32; + case GL_INTENSITY16F_ARB: + return &_mesa_texformat_intensity_float16; + case GL_INTENSITY32F_ARB: + return &_mesa_texformat_intensity_float32; + case GL_RGB16F_ARB: + return &_mesa_texformat_rgba_float16; + case GL_RGB32F_ARB: + return &_mesa_texformat_rgba_float32; + case GL_RGBA16F_ARB: + return &_mesa_texformat_rgba_float16; + case GL_RGBA32F_ARB: + return &_mesa_texformat_rgba_float32; + + default: + _mesa_problem(ctx, + "unexpected internalFormat 0x%x in r300ChooseTextureFormat", + (int)internalFormat); + return NULL; + } + + return NULL; /* never get here */ +} + +static GLboolean +r300ValidateClientStorage(GLcontext * ctx, GLenum target, + GLint internalFormat, + GLint srcWidth, GLint srcHeight, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "intformat %s format %s type %s\n", + _mesa_lookup_enum_by_nr(internalFormat), + _mesa_lookup_enum_by_nr(format), + _mesa_lookup_enum_by_nr(type)); + + if (!ctx->Unpack.ClientStorage) + return 0; + + if (ctx->_ImageTransferState || + texImage->IsCompressed || texObj->GenerateMipmap) + return 0; + + /* This list is incomplete, may be different on ppc??? + */ + switch (internalFormat) { + case GL_RGBA: + if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) { + texImage->TexFormat = _dri_texformat_argb8888; + } else + return 0; + break; + + case GL_RGB: + if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { + texImage->TexFormat = _dri_texformat_rgb565; + } else + return 0; + break; + + case GL_YCBCR_MESA: + if (format == GL_YCBCR_MESA && + type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) { + texImage->TexFormat = &_mesa_texformat_ycbcr_rev; + } else if (format == GL_YCBCR_MESA && + (type == GL_UNSIGNED_SHORT_8_8_APPLE || + type == GL_UNSIGNED_BYTE)) { + texImage->TexFormat = &_mesa_texformat_ycbcr; + } else + return 0; + break; + + default: + return 0; + } + + /* Could deal with these packing issues, but currently don't: + */ + if (packing->SkipPixels || + packing->SkipRows || packing->SwapBytes || packing->LsbFirst) { + return 0; + } + + { + GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, + format, type); + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: srcRowStride %d/%x\n", + __FUNCTION__, srcRowStride, srcRowStride); + + /* Could check this later in upload, pitch restrictions could be + * relaxed, but would need to store the image pitch somewhere, + * as packing details might change before image is uploaded: + */ + if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride) + || (srcRowStride & 63)) + return 0; + + /* Have validated that _mesa_transfer_teximage would be a straight + * memcpy at this point. NOTE: future calls to TexSubImage will + * overwrite the client data. This is explicitly mentioned in the + * extension spec. + */ + texImage->Data = (void *)pixels; + texImage->IsClientData = GL_TRUE; + texImage->RowStride = + srcRowStride / texImage->TexFormat->TexelBytes; + + return 1; + } +} + +static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + driTextureObject *t = (driTextureObject *) texObj->DriverData; + + if (t) { + driSwapOutTextureObject(t); + } else { + t = (driTextureObject *) r300AllocTexObj(texObj); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); + return; + } + } + + /* Note, this will call ChooseTextureFormat */ + _mesa_store_teximage1d(ctx, target, level, internalFormat, + width, border, format, type, pixels, + &ctx->Unpack, texObj, texImage); + + t->dirty_images[0] |= (1 << level); +} + +static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, + GLsizei width, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + driTextureObject *t = (driTextureObject *) texObj->DriverData; + + assert(t); /* this _should_ be true */ + if (t) { + driSwapOutTextureObject(t); + } else { + t = (driTextureObject *) r300AllocTexObj(texObj); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); + return; + } + } + + _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, + format, type, pixels, packing, texObj, + texImage); + + t->dirty_images[0] |= (1 << level); +} + +static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + driTextureObject *t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = + (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + if (t != NULL) { + driSwapOutTextureObject(t); + } else { + t = (driTextureObject *) r300AllocTexObj(texObj); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); + return; + } + } + + texImage->IsClientData = GL_FALSE; + + if (r300ValidateClientStorage(ctx, target, + internalFormat, + width, height, + format, type, pixels, + packing, texObj, texImage)) { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using client storage\n", + __FUNCTION__); + } else { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using normal storage\n", + __FUNCTION__); + + /* Normal path: copy (to cached memory) and eventually upload + * via another copy to GART memory and then a blit... Could + * eliminate one copy by going straight to (permanent) GART. + * + * Note, this will call r300ChooseTextureFormat. + */ + _mesa_store_teximage2d(ctx, target, level, internalFormat, + width, height, border, format, type, + pixels, &ctx->Unpack, texObj, texImage); + + t->dirty_images[face] |= (1 << level); + } +} + +static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + driTextureObject *t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = + (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + assert(t); /* this _should_ be true */ + if (t) { + driSwapOutTextureObject(t); + } else { + t = (driTextureObject *) r300AllocTexObj(texObj); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); + return; + } + } + + _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, + height, format, type, pixels, packing, texObj, + texImage); + + t->dirty_images[face] |= (1 << level); +} + +static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, + GLint level, GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid * data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + driTextureObject *t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = + (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + if (t != NULL) { + driSwapOutTextureObject(t); + } else { + t = (driTextureObject *) r300AllocTexObj(texObj); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, + "glCompressedTexImage2D"); + return; + } + } + + texImage->IsClientData = GL_FALSE; + + /* can't call this, different parameters. Would never evaluate to true anyway currently */ +#if 0 + if (r300ValidateClientStorage(ctx, target, + internalFormat, + width, height, + format, type, pixels, + packing, texObj, texImage)) { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using client storage\n", + __FUNCTION__); + } else +#endif + { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using normal storage\n", + __FUNCTION__); + + /* Normal path: copy (to cached memory) and eventually upload + * via another copy to GART memory and then a blit... Could + * eliminate one copy by going straight to (permanent) GART. + * + * Note, this will call r300ChooseTextureFormat. + */ + _mesa_store_compressed_teximage2d(ctx, target, level, + internalFormat, width, height, + border, imageSize, data, + texObj, texImage); + + t->dirty_images[face] |= (1 << level); + } +} + +static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, + GLint level, GLint xoffset, + GLint yoffset, GLsizei width, + GLsizei height, GLenum format, + GLsizei imageSize, const GLvoid * data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + driTextureObject *t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = + (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + assert(t); /* this _should_ be true */ + if (t) { + driSwapOutTextureObject(t); + } else { + t = (driTextureObject *) r300AllocTexObj(texObj); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, + "glCompressedTexSubImage3D"); + return; + } + } + + _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, + yoffset, width, height, format, + imageSize, data, texObj, texImage); + + t->dirty_images[face] |= (1 << level); +} + +static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + driTextureObject *t = (driTextureObject *) texObj->DriverData; + + if (t) { + driSwapOutTextureObject(t); + } else { + t = (driTextureObject *) r300AllocTexObj(texObj); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D"); + return; + } + } + + texImage->IsClientData = GL_FALSE; + +#if 0 + if (r300ValidateClientStorage(ctx, target, + internalFormat, + width, height, + format, type, pixels, + packing, texObj, texImage)) { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using client storage\n", + __FUNCTION__); + } else +#endif + { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Using normal storage\n", + __FUNCTION__); + + /* Normal path: copy (to cached memory) and eventually upload + * via another copy to GART memory and then a blit... Could + * eliminate one copy by going straight to (permanent) GART. + * + * Note, this will call r300ChooseTextureFormat. + */ + _mesa_store_teximage3d(ctx, target, level, internalFormat, + width, height, depth, border, + format, type, pixels, + &ctx->Unpack, texObj, texImage); + + t->dirty_images[0] |= (1 << level); + } +} + +static void +r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + driTextureObject *t = (driTextureObject *) texObj->DriverData; + +/* fprintf(stderr, "%s\n", __FUNCTION__); */ + + assert(t); /* this _should_ be true */ + if (t) { + driSwapOutTextureObject(t); + } else { + t = (driTextureObject *) r300AllocTexObj(texObj); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D"); + return; + } + texObj->DriverData = t; + } + + _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, packing, texObj, + texImage); + + t->dirty_images[0] |= (1 << level); +} + +static void r300TexEnv(GLcontext * ctx, GLenum target, + GLenum pname, const GLfloat * param) +{ + if (RADEON_DEBUG & DEBUG_STATE) { + fprintf(stderr, "%s( %s )\n", + __FUNCTION__, _mesa_lookup_enum_by_nr(pname)); + } + + /* This is incorrect: Need to maintain this data for each of + * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch + * between them according to _ReallyEnabled. + */ + switch (pname) { + case GL_TEXTURE_LOD_BIAS_EXT:{ +#if 0 /* Needs to be relocated in order to make sure we got the right tmu */ + GLfloat bias, min; + GLuint b; + + /* The R300's LOD bias is a signed 2's complement value with a + * range of -16.0 <= bias < 16.0. + * + * NOTE: Add a small bias to the bias for conform mipsel.c test. + */ + bias = *param + .01; + min = + driQueryOptionb(&rmesa->radeon.optionCache, + "no_neg_lod_bias") ? 0.0 : -16.0; + bias = CLAMP(bias, min, 16.0); + + /* 0.0 - 16.0 == 0x0 - 0x1000 */ + /* 0.0 - -16.0 == 0x1001 - 0x1fff */ + b = 0x1000 / 16.0 * bias; + b &= R300_LOD_BIAS_MASK; + + if (b != + (rmesa->hw.tex.unknown1. + cmd[R300_TEX_VALUE_0 + + unit] & R300_LOD_BIAS_MASK)) { + R300_STATECHANGE(rmesa, tex.unknown1); + rmesa->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 + + unit] &= + ~R300_LOD_BIAS_MASK; + rmesa->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 + + unit] |= b; + } +#endif + break; + } + + default: + return; + } +} + +/** + * Changes variables and flags for a state update, which will happen at the + * next UpdateTextureState + */ + +static void r300TexParameter(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat * params) +{ + r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(pname)); + } + + switch (pname) { + case GL_TEXTURE_MIN_FILTER: + case GL_TEXTURE_MAG_FILTER: + case GL_TEXTURE_MAX_ANISOTROPY_EXT: + r300SetTexMaxAnisotropy(t, texObj->MaxAnisotropy); + r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter); + break; + + case GL_TEXTURE_WRAP_S: + case GL_TEXTURE_WRAP_T: + case GL_TEXTURE_WRAP_R: + r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR); + break; + + case GL_TEXTURE_BORDER_COLOR: + r300SetTexBorderColor(t, texObj->_BorderChan); + break; + + case GL_TEXTURE_BASE_LEVEL: + case GL_TEXTURE_MAX_LEVEL: + case GL_TEXTURE_MIN_LOD: + case GL_TEXTURE_MAX_LOD: + /* This isn't the most efficient solution but there doesn't appear to + * be a nice alternative. Since there's no LOD clamping, + * we just have to rely on loading the right subset of mipmap levels + * to simulate a clamped LOD. + */ + driSwapOutTextureObject((driTextureObject *) t); + break; + + default: + return; + } + + /* Mark this texobj as dirty (one bit per tex unit) + */ + t->dirty_state = TEX_ALL; +} + +static void r300BindTexture(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj) +{ + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__, + (void *)texObj, ctx->Texture.CurrentUnit); + } + + if ((target == GL_TEXTURE_1D) + || (target == GL_TEXTURE_2D) + || (target == GL_TEXTURE_3D) + || (target == GL_TEXTURE_CUBE_MAP) + || (target == GL_TEXTURE_RECTANGLE_NV)) { + assert(texObj->DriverData != NULL); + } +} + +static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + driTextureObject *t = (driTextureObject *) texObj->DriverData; + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + (void *)texObj, + _mesa_lookup_enum_by_nr(texObj->Target)); + } + + if (t != NULL) { + if (rmesa) { + R300_FIREVERTICES(rmesa); + } + + driDestroyTextureObject(t); + } + /* Free mipmap images and the texture object itself */ + _mesa_delete_texture_object(ctx, texObj); +} + +/** + * Allocate a new texture object. + * Called via ctx->Driver.NewTextureObject. + * Note: this function will be called during context creation to + * allocate the default texture objects. + * Note: we could use containment here to 'derive' the driver-specific + * texture object from the core mesa gl_texture_object. Not done at this time. + * Fixup MaxAnisotropy according to user preference. + */ +static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, + GLuint name, + GLenum target) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_object *obj; + obj = _mesa_new_texture_object(ctx, name, target); + if (!obj) + return NULL; + obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; + + r300AllocTexObj(obj); + return obj; +} + +void r300InitTextureFuncs(struct dd_function_table *functions) +{ + /* Note: we only plug in the functions we implement in the driver + * since _mesa_init_driver_functions() was already called. + */ + functions->ChooseTextureFormat = r300ChooseTextureFormat; + functions->TexImage1D = r300TexImage1D; + functions->TexImage2D = r300TexImage2D; + functions->TexImage3D = r300TexImage3D; + functions->TexSubImage1D = r300TexSubImage1D; + functions->TexSubImage2D = r300TexSubImage2D; + functions->TexSubImage3D = r300TexSubImage3D; + functions->NewTextureObject = r300NewTextureObject; + functions->BindTexture = r300BindTexture; + functions->DeleteTexture = r300DeleteTexture; + functions->IsTextureResident = driIsTextureResident; + + functions->TexEnv = r300TexEnv; + functions->TexParameter = r300TexParameter; + + functions->CompressedTexImage2D = r300CompressedTexImage2D; + functions->CompressedTexSubImage2D = r300CompressedTexSubImage2D; + + driInitTextureFormats(); +} diff --git a/r300/r300_tex.h b/r300/r300_tex.h new file mode 100644 index 0000000..f67a8e6 --- /dev/null +++ b/r300/r300_tex.h @@ -0,0 +1,51 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __r300_TEX_H__ +#define __r300_TEX_H__ + +extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, + unsigned long long offset, GLint depth, + GLuint pitch); + +extern void r300UpdateTextureState(GLcontext * ctx); + +extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, + GLuint face); + +extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t); + +extern void r300InitTextureFuncs(struct dd_function_table *functions); + +#endif /* __r300_TEX_H__ */ diff --git a/r300/r300_texmem.c b/r300/r300_texmem.c new file mode 100644 index 0000000..e2e8355 --- /dev/null +++ b/r300/r300_texmem.c @@ -0,0 +1,584 @@ +/************************************************************************** + +Copyright (C) Tungsten Graphics 2002. All Rights Reserved. +The Weather Channel, Inc. funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 +license. This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation on the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR +SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Gareth Hughes <gareth@valinux.com> + * + * \author Kevin E. Martin <martin@valinux.com> + */ + +#include <errno.h> + +#include "glheader.h" +#include "imports.h" +#include "context.h" +#include "colormac.h" +#include "macros.h" +#include "simple_list.h" +#include "radeon_reg.h" /* gets definition for usleep */ +#include "r300_context.h" +#include "r300_state.h" +#include "r300_cmdbuf.h" +#include "radeon_ioctl.h" +#include "r300_tex.h" +#include "r300_ioctl.h" +#include <unistd.h> /* for usleep() */ + +#ifdef USER_BUFFERS +#include "r300_mem.h" +#endif + +/** + * Destroy any device-dependent state associated with the texture. This may + * include NULLing out hardware state that points to the texture. + */ +void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t) +{ + if (RADEON_DEBUG & DEBUG_TEXTURE) { + fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, + (void *)t, (void *)t->base.tObj); + } + + if (rmesa != NULL) { + unsigned i; + + for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { + if (t == rmesa->state.texture.unit[i].texobj) { + rmesa->state.texture.unit[i].texobj = NULL; + /* This code below is meant to shorten state + pushed to the hardware by not programming + unneeded units. + + This does not appear to be worthwhile on R300 */ +#if 0 + remove_from_list(&rmesa->hw.tex[i]); + make_empty_list(&rmesa->hw.tex[i]); + remove_from_list(&rmesa->hw.cube[i]); + make_empty_list(&rmesa->hw.cube[i]); +#endif + } + } + } +} + +/* ------------------------------------------------------------ + * Texture image conversions + */ + +static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, + r300TexObjPtr t, + struct gl_texture_image *texImage, + GLint hwlevel, + GLint x, GLint y, + GLint width, GLint height) +{ + const struct gl_texture_format *texFormat = texImage->TexFormat; + GLuint srcPitch, dstPitch; + int blit_format; + int srcOffset; + + /* + * XXX it appears that we always upload the full image, not a subimage. + * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever + * changed, the src pitch will have to change. + */ + switch (texFormat->TexelBytes) { + case 1: + blit_format = R300_CP_COLOR_FORMAT_CI8; + srcPitch = t->image[0][0].width * texFormat->TexelBytes; + dstPitch = t->image[0][0].width * texFormat->TexelBytes; + break; + case 2: + blit_format = R300_CP_COLOR_FORMAT_RGB565; + srcPitch = t->image[0][0].width * texFormat->TexelBytes; + dstPitch = t->image[0][0].width * texFormat->TexelBytes; + break; + case 4: + blit_format = R300_CP_COLOR_FORMAT_ARGB8888; + srcPitch = t->image[0][0].width * texFormat->TexelBytes; + dstPitch = t->image[0][0].width * texFormat->TexelBytes; + break; + case 8: + case 16: + blit_format = R300_CP_COLOR_FORMAT_CI8; + srcPitch = t->image[0][0].width * texFormat->TexelBytes; + dstPitch = t->image[0][0].width * texFormat->TexelBytes; + break; + default: + return; + } + + t->image[0][hwlevel].data = texImage->Data; + srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data); + + assert(srcOffset != ~0); + + /* Don't currently need to cope with small pitches? + */ + width = texImage->Width; + height = texImage->Height; + + if (texFormat->TexelBytes > 4) { + width *= texFormat->TexelBytes; + } + + r300EmitWait(rmesa, R300_WAIT_3D); + + r300EmitBlit(rmesa, blit_format, + srcPitch, + srcOffset, + dstPitch, + t->bufAddr, + x, + y, + t->image[0][hwlevel].x + x, + t->image[0][hwlevel].y + y, width, height); + + r300EmitWait(rmesa, R300_WAIT_2D); +} + +static void r300UploadRectSubImage(r300ContextPtr rmesa, + r300TexObjPtr t, + struct gl_texture_image *texImage, + GLint x, GLint y, GLint width, GLint height) +{ + const struct gl_texture_format *texFormat = texImage->TexFormat; + int blit_format, dstPitch, done; + + switch (texFormat->TexelBytes) { + case 1: + blit_format = R300_CP_COLOR_FORMAT_CI8; + break; + case 2: + blit_format = R300_CP_COLOR_FORMAT_RGB565; + break; + case 4: + blit_format = R300_CP_COLOR_FORMAT_ARGB8888; + break; + case 8: + case 16: + blit_format = R300_CP_COLOR_FORMAT_CI8; + break; + default: + return; + } + + t->image[0][0].data = texImage->Data; + + /* Currently don't need to cope with small pitches. + */ + width = texImage->Width; + height = texImage->Height; + dstPitch = t->pitch; + + if (texFormat->TexelBytes > 4) { + width *= texFormat->TexelBytes; + } + + if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) { + /* In this case, could also use GART texturing. This is + * currently disabled, but has been tested & works. + */ + t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data); + t->pitch = texImage->RowStride * texFormat->TexelBytes - 32; + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, + "Using GART texturing for rectangular client texture\n"); + + /* Release FB memory allocated for this image: + */ + /* FIXME This may not be correct as driSwapOutTextureObject sets + * FIXME dirty_images. It may be fine, though. + */ + if (t->base.memBlock) { + driSwapOutTextureObject((driTextureObject *) t); + } + } else if (texImage->IsClientData) { + /* Data already in GART memory, with usable pitch. + */ + GLuint srcPitch; + srcPitch = texImage->RowStride * texFormat->TexelBytes; + r300EmitBlit(rmesa, + blit_format, + srcPitch, + r300GartOffsetFromVirtual(rmesa, texImage->Data), + dstPitch, t->bufAddr, 0, 0, 0, 0, width, height); + } else { + /* Data not in GART memory, or bad pitch. + */ + for (done = 0; done < height;) { + struct r300_dma_region region; + int lines = + MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch); + int src_pitch; + char *tex; + + src_pitch = texImage->RowStride * texFormat->TexelBytes; + + tex = (char *)texImage->Data + done * src_pitch; + + memset(®ion, 0, sizeof(region)); + r300AllocDmaRegion(rmesa, ®ion, lines * dstPitch, + 1024); + + /* Copy texdata to dma: + */ + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, + "%s: src_pitch %d dst_pitch %d\n", + __FUNCTION__, src_pitch, dstPitch); + + if (src_pitch == dstPitch) { + memcpy(region.address + region.start, tex, + lines * src_pitch); + } else { + char *buf = region.address + region.start; + int i; + for (i = 0; i < lines; i++) { + memcpy(buf, tex, src_pitch); + buf += dstPitch; + tex += src_pitch; + } + } + + r300EmitWait(rmesa, R300_WAIT_3D); + + /* Blit to framebuffer + */ + r300EmitBlit(rmesa, + blit_format, + dstPitch, GET_START(®ion), + dstPitch | (t->tile_bits >> 16), + t->bufAddr, 0, 0, 0, done, width, lines); + + r300EmitWait(rmesa, R300_WAIT_2D); +#ifdef USER_BUFFERS + r300_mem_use(rmesa, region.buf->id); +#endif + + r300ReleaseDmaRegion(rmesa, ®ion, __FUNCTION__); + done += lines; + } + } +} + +/** + * Upload the texture image associated with texture \a t at the specified + * level at the address relative to \a start. + */ +static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, + GLint hwlevel, + GLint x, GLint y, GLint width, GLint height, + GLuint face) +{ + struct gl_texture_image *texImage = NULL; + GLuint offset; + GLint imageWidth, imageHeight; + GLint ret; + drm_radeon_texture_t tex; + drm_radeon_tex_image_t tmp; + const int level = hwlevel + t->base.firstLevel; + + if (RADEON_DEBUG & DEBUG_TEXTURE) { + fprintf(stderr, + "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", + __FUNCTION__, (void *)t, (void *)t->base.tObj, level, + width, height, face); + } + + ASSERT(face < 6); + + /* Ensure we have a valid texture to upload */ + if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) { + _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); + return; + } + + texImage = t->base.tObj->Image[face][level]; + + if (!texImage) { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: texImage %d is NULL!\n", + __FUNCTION__, level); + return; + } + if (!texImage->Data) { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: image data is NULL!\n", + __FUNCTION__); + return; + } + + if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + assert(level == 0); + assert(hwlevel == 0); + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: image data is rectangular\n", + __FUNCTION__); + r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height); + return; + } else if (texImage->IsClientData) { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, + "%s: image data is in GART client storage\n", + __FUNCTION__); + r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y, + width, height); + return; + } else if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: image data is in normal memory\n", + __FUNCTION__); + + imageWidth = texImage->Width; + imageHeight = texImage->Height; + + offset = t->bufAddr + t->base.totalSize / 6 * face; + + if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { + GLint imageX = 0; + GLint imageY = 0; + GLint blitX = t->image[face][hwlevel].x; + GLint blitY = t->image[face][hwlevel].y; + GLint blitWidth = t->image[face][hwlevel].width; + GLint blitHeight = t->image[face][hwlevel].height; + fprintf(stderr, " upload image: %d,%d at %d,%d\n", + imageWidth, imageHeight, imageX, imageY); + fprintf(stderr, " upload blit: %d,%d at %d,%d\n", + blitWidth, blitHeight, blitX, blitY); + fprintf(stderr, " blit ofs: 0x%07x level: %d/%d\n", + (GLuint) offset, hwlevel, level); + } + + t->image[face][hwlevel].data = texImage->Data; + + /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. + * NOTE: we're always use a 1KB-wide blit and I8 texture format. + * We used to use 1, 2 and 4-byte texels and used to use the texture + * width to dictate the blit width - but that won't work for compressed + * textures. (Brian) + * NOTE: can't do that with texture tiling. (sroland) + */ + tex.offset = offset; + tex.image = &tmp; + /* copy (x,y,width,height,data) */ + memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp)); + + if (texImage->TexFormat->TexelBytes > 4) { + const int log2TexelBytes = + (3 + (texImage->TexFormat->TexelBytes >> 4)); + tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ + tex.pitch = + MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / + 64, 1); + tex.height = imageHeight; + tex.width = imageWidth << log2TexelBytes; + tex.offset += (tmp.x << log2TexelBytes) & ~1023; + tmp.x = tmp.x % (1024 >> log2TexelBytes); + tmp.width = tmp.width << log2TexelBytes; + } else if (texImage->TexFormat->TexelBytes) { + /* use multi-byte upload scheme */ + tex.height = imageHeight; + tex.width = imageWidth; + switch (texImage->TexFormat->TexelBytes) { + case 1: + tex.format = RADEON_TXFORMAT_I8; + break; + case 2: + tex.format = RADEON_TXFORMAT_AI88; + break; + case 4: + tex.format = RADEON_TXFORMAT_ARGB8888; + break; + } + tex.pitch = + MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / + 64, 1); + tex.offset += tmp.x & ~1023; + tmp.x = tmp.x % 1024; + + if (t->tile_bits & R300_TXO_MICRO_TILE) { + /* need something like "tiled coordinates" ? */ + tmp.y = tmp.x / (tex.pitch * 128) * 2; + tmp.x = + tmp.x % (tex.pitch * 128) / 2 / + texImage->TexFormat->TexelBytes; + tex.pitch |= RADEON_DST_TILE_MICRO >> 22; + } else { + tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); + } +#if 1 + if ((t->tile_bits & R300_TXO_MACRO_TILE) && + (texImage->Width * texImage->TexFormat->TexelBytes >= 256) + && ((!(t->tile_bits & R300_TXO_MICRO_TILE) + && (texImage->Height >= 8)) + || (texImage->Height >= 16))) { + /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, + OR if height is smaller than 8 automatically, but if micro tiling is active + the limit is height 16 instead ? */ + tex.pitch |= RADEON_DST_TILE_MACRO >> 22; + } +#endif + } else { + /* In case of for instance 8x8 texture (2x2 dxt blocks), + padding after the first two blocks is needed (only + with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ + /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) + has 4 real pixels. Needed so the kernel module reads + the right amount of data. */ + tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ + tex.pitch = (R300_BLIT_WIDTH_BYTES / 64); + tex.height = (imageHeight + 3) / 4; + tex.width = (imageWidth + 3) / 4; + if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) { + tex.width *= 8; + } else { + tex.width *= 16; + } + } + + LOCK_HARDWARE(&rmesa->radeon); + do { + ret = + drmCommandWriteRead(rmesa->radeon.dri.fd, + DRM_RADEON_TEXTURE, &tex, + sizeof(drm_radeon_texture_t)); + if (ret) { + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, + "DRM_RADEON_TEXTURE: again!\n"); + usleep(1); + } + } while (ret == -EAGAIN); + + UNLOCK_HARDWARE(&rmesa->radeon); + + if (ret) { + fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret); + fprintf(stderr, " offset=0x%08x\n", offset); + fprintf(stderr, " image width=%d height=%d\n", + imageWidth, imageHeight); + fprintf(stderr, " blit width=%d height=%d data=%p\n", + t->image[face][hwlevel].width, + t->image[face][hwlevel].height, + t->image[face][hwlevel].data); + _mesa_exit(-1); + } +} + +/** + * Upload the texture images associated with texture \a t. This might + * require the allocation of texture memory. + * + * \param rmesa Context pointer + * \param t Texture to be uploaded + * \param face Cube map face to be uploaded. Zero for non-cube maps. + */ + +int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) +{ + const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; + + if (t->image_override) + return 0; + + if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { + fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, + (void *)rmesa->radeon.glCtx, (void *)t->base.tObj, + t->base.totalSize, t->base.firstLevel, + t->base.lastLevel); + } + + if (!t || t->base.totalSize == 0) + return 0; + + if (RADEON_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__); + radeonFinish(rmesa->radeon.glCtx); + } + + LOCK_HARDWARE(&rmesa->radeon); + + if (t->base.memBlock == NULL) { + int heap; + + heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps, + (driTextureObject *) t); + if (heap == -1) { + UNLOCK_HARDWARE(&rmesa->radeon); + return -1; + } + + /* Set the base offset of the texture image */ + t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap] + + t->base.memBlock->ofs; + t->offset = t->bufAddr; + + if (!(t->base.tObj->Image[0][0]->IsClientData)) { + /* hope it's safe to add that here... */ + t->offset |= t->tile_bits; + } + + /* Mark this texobj as dirty on all units: + */ + t->dirty_state = TEX_ALL; + } + + /* Let the world know we've used this memory recently. + */ + driUpdateTextureLRU((driTextureObject *) t); + UNLOCK_HARDWARE(&rmesa->radeon); + + /* Upload any images that are new */ + if (t->base.dirty_images[face]) { + int i; + for (i = 0; i < numLevels; i++) { + if ((t->base. + dirty_images[face] & (1 << + (i + t->base.firstLevel))) != + 0) { + r300UploadSubImage(rmesa, t, i, 0, 0, + t->image[face][i].width, + t->image[face][i].height, + face); + } + } + t->base.dirty_images[face] = 0; + } + + if (RADEON_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__); + radeonFinish(rmesa->radeon.glCtx); + } + + return 0; +} diff --git a/r300/r300_texstate.c b/r300/r300_texstate.c new file mode 100644 index 0000000..8203189 --- /dev/null +++ b/r300/r300_texstate.c @@ -0,0 +1,620 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * + * \todo Enable R300 texture tiling code? + */ + +#include "glheader.h" +#include "imports.h" +#include "context.h" +#include "macros.h" +#include "texformat.h" +#include "teximage.h" +#include "texobj.h" +#include "enums.h" + +#include "r300_context.h" +#include "r300_state.h" +#include "r300_ioctl.h" +#include "radeon_ioctl.h" +#include "r300_tex.h" +#include "r300_reg.h" + +#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \ + || ((f) >= MESA_FORMAT_RGBA_FLOAT32 && \ + (f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \ + && tx_table_le[f].flag ) + +#define _ASSIGN(entry, format) \ + [ MESA_FORMAT_ ## entry ] = { format, 0, 1} + +/* + * Note that the _REV formats are the same as the non-REV formats. This is + * because the REV and non-REV formats are identical as a byte string, but + * differ when accessed as 16-bit or 32-bit words depending on the endianness of + * the host. Since the textures are transferred to the R300 as a byte string + * (i.e. without any byte-swapping), the R300 sees the REV and non-REV formats + * identically. -- paulus + */ + +static const struct tx_table { + GLuint format, filter, flag; +} tx_table_be[] = { + /* *INDENT-OFF* */ + _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), + _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), + _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), + _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), + _ASSIGN(RGB888, 0xffffffff), + _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), + _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), + _ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), + _ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), + _ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), + _ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), + _ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), + _ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), + _ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)), + _ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)), + _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)), + _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), + _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), + _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE ), + _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE), + _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)), + _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)), + _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)), + _ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)), + _ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)), + _ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)), + _ASSIGN(RGB_FLOAT32, 0xffffffff), + _ASSIGN(RGB_FLOAT16, 0xffffffff), + _ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)), + _ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)), + _ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)), + _ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)), + _ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)), + _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)), + _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)), + _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), + /* *INDENT-ON* */ +}; + +static const struct tx_table tx_table_le[] = { + /* *INDENT-OFF* */ + _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), + _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), + _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), + _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), + _ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)), + _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), + _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), + _ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), + _ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), + _ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), + _ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), + _ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), + _ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), + _ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)), + _ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)), + _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)), + _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), + _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), + _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE ), + _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8)|R300_TX_FORMAT_YUV_MODE), + _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)), + _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)), + _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)), + _ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)), + _ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)), + _ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)), + _ASSIGN(RGB_FLOAT32, 0xffffffff), + _ASSIGN(RGB_FLOAT16, 0xffffffff), + _ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)), + _ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)), + _ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)), + _ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)), + _ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)), + _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)), + _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)), + _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), + /* *INDENT-ON* */ +}; + +#undef _ASSIGN + +/** + * This function computes the number of bytes of storage needed for + * the given texture object (all mipmap levels, all cube faces). + * The \c image[face][level].x/y/width/height parameters for upload/blitting + * are computed here. \c filter, \c format, etc. will be set here + * too. + * + * \param rmesa Context pointer + * \param tObj GL texture object whose images are to be posted to + * hardware state. + */ +static void r300SetTexImages(r300ContextPtr rmesa, + struct gl_texture_object *tObj) +{ + r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + const struct gl_texture_image *baseImage = + tObj->Image[0][tObj->BaseLevel]; + GLint curOffset, blitWidth; + GLint i, texelBytes; + GLint numLevels; + GLint log2Width, log2Height, log2Depth; + + /* Set the hardware texture format + */ + if (!t->image_override && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) { + if (_mesa_little_endian()) { + t->format = + tx_table_le[baseImage->TexFormat->MesaFormat]. + format; + t->filter |= + tx_table_le[baseImage->TexFormat->MesaFormat]. + filter; + } else { + t->format = + tx_table_be[baseImage->TexFormat->MesaFormat]. + format; + t->filter |= + tx_table_be[baseImage->TexFormat->MesaFormat]. + filter; + } + } else if (!t->image_override) { + _mesa_problem(NULL, "unexpected texture format in %s", + __FUNCTION__); + return; + } + + texelBytes = baseImage->TexFormat->TexelBytes; + + /* Compute which mipmap levels we really want to send to the hardware. + */ + driCalculateTextureFirstLastLevel((driTextureObject *) t); + log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; + log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; + log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; + + numLevels = t->base.lastLevel - t->base.firstLevel + 1; + + assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); + + /* Calculate mipmap offsets and dimensions for blitting (uploading) + * The idea is that we lay out the mipmap levels within a block of + * memory organized as a rectangle of width BLIT_WIDTH_BYTES. + */ + curOffset = 0; + blitWidth = R300_BLIT_WIDTH_BYTES; + t->tile_bits = 0; + + /* figure out if this texture is suitable for tiling. */ +#if 0 /* Disabled for now */ + if (texelBytes) { + if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) && + /* texrect might be able to use micro tiling too in theory? */ + (baseImage->Height > 1)) { + + /* allow 32 (bytes) x 1 mip (which will use two times the space + the non-tiled version would use) max if base texture is large enough */ + if ((numLevels == 1) || + (((baseImage->Width * texelBytes / + baseImage->Height) <= 32) + && (baseImage->Width * texelBytes > 64)) + || + ((baseImage->Width * texelBytes / + baseImage->Height) <= 16)) { + t->tile_bits |= R300_TXO_MICRO_TILE; + } + } + + if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { + /* we can set macro tiling even for small textures, they will be untiled anyway */ + t->tile_bits |= R300_TXO_MACRO_TILE; + } + } +#endif + + for (i = 0; i < numLevels; i++) { + const struct gl_texture_image *texImage; + GLuint size; + + texImage = tObj->Image[0][i + t->base.firstLevel]; + if (!texImage) + break; + + /* find image size in bytes */ + if (texImage->IsCompressed) { + if ((t->format & R300_TX_FORMAT_DXT1) == + R300_TX_FORMAT_DXT1) { + // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); + if ((texImage->Width + 3) < 8) /* width one block */ + size = texImage->CompressedSize * 4; + else if ((texImage->Width + 3) < 16) + size = texImage->CompressedSize * 2; + else + size = texImage->CompressedSize; + } else { + /* DXT3/5, 16 bytes per block */ + WARN_ONCE + ("DXT 3/5 suffers from multitexturing problems!\n"); + // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); + if ((texImage->Width + 3) < 8) + size = texImage->CompressedSize * 2; + else + size = texImage->CompressedSize; + } + } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + size = + ((texImage->Width * texelBytes + + 63) & ~63) * texImage->Height; + blitWidth = 64 / texelBytes; + } else if (t->tile_bits & R300_TXO_MICRO_TILE) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + though the actual offset may be different (if texture is less than + 32 bytes width) to the untiled case */ + int w = (texImage->Width * texelBytes * 2 + 31) & ~31; + size = + (w * ((texImage->Height + 1) / 2)) * + texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + } else { + int w = (texImage->Width * texelBytes + 31) & ~31; + size = w * texImage->Height * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + } + assert(size > 0); + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", + texImage->Width, texImage->Height, + texImage->Depth, + texImage->TexFormat->TexelBytes, + texImage->InternalFormat); + + /* Align to 32-byte offset. It is faster to do this unconditionally + * (no branch penalty). + */ + + curOffset = (curOffset + 0x1f) & ~0x1f; + + if (texelBytes) { + /* fix x and y coords up later together with offset */ + t->image[0][i].x = curOffset; + t->image[0][i].y = 0; + t->image[0][i].width = + MIN2(size / texelBytes, blitWidth); + t->image[0][i].height = + (size / texelBytes) / t->image[0][i].width; + } else { + t->image[0][i].x = curOffset % R300_BLIT_WIDTH_BYTES; + t->image[0][i].y = curOffset / R300_BLIT_WIDTH_BYTES; + t->image[0][i].width = + MIN2(size, R300_BLIT_WIDTH_BYTES); + t->image[0][i].height = size / t->image[0][i].width; + } + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, + "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", + i, texImage->Width, texImage->Height, + t->image[0][i].x, t->image[0][i].y, + t->image[0][i].width, t->image[0][i].height, + size, curOffset); + + curOffset += size; + } + + /* Align the total size of texture memory block. + */ + t->base.totalSize = + (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; + + /* Setup remaining cube face blits, if needed */ + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + GLuint face; + for (face = 1; face < 6; face++) { + for (i = 0; i < numLevels; i++) { + t->image[face][i].x = t->image[0][i].x; + t->image[face][i].y = t->image[0][i].y; + t->image[face][i].width = t->image[0][i].width; + t->image[face][i].height = + t->image[0][i].height; + } + } + t->base.totalSize *= 6; /* total texmem needed */ + } + + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + ASSERT(log2Width == log2Height); + t->format |= R300_TX_FORMAT_CUBIC_MAP; + } + + t->size = + (((tObj->Image[0][t->base.firstLevel]->Width - + 1) << R300_TX_WIDTHMASK_SHIFT) + | ((tObj->Image[0][t->base.firstLevel]->Height - 1) << + R300_TX_HEIGHTMASK_SHIFT)) + | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); + + /* Only need to round to nearest 32 for textures, but the blitter + * requires 64-byte aligned pitches, and we may/may not need the + * blitter. NPOT only! + */ + if (baseImage->IsCompressed) { + t->pitch = + (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); + } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + unsigned int align = blitWidth - 1; + t->pitch = ((tObj->Image[0][t->base.firstLevel]->Width * + texelBytes) + 63) & ~(63); + t->size |= R300_TX_SIZE_TXPITCH_EN; + if (!t->image_override) + t->pitch_reg = + (((tObj->Image[0][t->base.firstLevel]->Width) + + align) & ~align) - 1; + } else { + t->pitch = + ((tObj->Image[0][t->base.firstLevel]->Width * + texelBytes) + 63) & ~(63); + } + + t->dirty_state = TEX_ALL; + + /* FYI: r300UploadTexImages( rmesa, t ) used to be called here */ +} + +/* ================================================================ + * Texture unit state management + */ + +static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + + ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); + + if (t->base.dirty_images[0]) { + R300_FIREVERTICES(rmesa); + + r300SetTexImages(rmesa, tObj); + r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); + if (!t->base.memBlock && !t->image_override) + return GL_FALSE; + } + + return GL_TRUE; +} + +static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + + ASSERT(tObj->Target == GL_TEXTURE_3D); + + /* r300 does not support mipmaps for 3D textures. */ + if ((tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) { + return GL_FALSE; + } + + if (t->base.dirty_images[0]) { + R300_FIREVERTICES(rmesa); + r300SetTexImages(rmesa, tObj); + r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); + if (!t->base.memBlock) + return GL_FALSE; + } + + return GL_TRUE; +} + +static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + GLuint face; + + ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); + + if (t->base.dirty_images[0] || t->base.dirty_images[1] || + t->base.dirty_images[2] || t->base.dirty_images[3] || + t->base.dirty_images[4] || t->base.dirty_images[5]) { + /* flush */ + R300_FIREVERTICES(rmesa); + /* layout memory space, once for all faces */ + r300SetTexImages(rmesa, tObj); + } + + /* upload (per face) */ + for (face = 0; face < 6; face++) { + if (t->base.dirty_images[face]) { + r300UploadTexImages(rmesa, + (r300TexObjPtr) tObj->DriverData, + face); + } + } + + if (!t->base.memBlock) { + /* texmem alloc failed, use s/w fallback */ + return GL_FALSE; + } + + return GL_TRUE; +} + +static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + + ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); + + if (t->base.dirty_images[0]) { + R300_FIREVERTICES(rmesa); + + r300SetTexImages(rmesa, tObj); + r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); + if (!t->base.memBlock && !t->image_override && + !rmesa->prefer_gart_client_texturing) + return GL_FALSE; + } + + return GL_TRUE; +} + +static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + + /* Fallback if there's a texture border */ + if (tObj->Image[0][tObj->BaseLevel]->Border > 0) + return GL_FALSE; + + /* Update state if this is a different texture object to last + * time. + */ + if (rmesa->state.texture.unit[unit].texobj != t) { + if (rmesa->state.texture.unit[unit].texobj != NULL) { + /* The old texture is no longer bound to this texture unit. + * Mark it as such. + */ + + rmesa->state.texture.unit[unit].texobj->base.bound &= + ~(1UL << unit); + } + + rmesa->state.texture.unit[unit].texobj = t; + t->base.bound |= (1UL << unit); + t->dirty_state |= 1 << unit; + driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ + } + + return !t->border_fallback; +} + +void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch) +{ + r300ContextPtr rmesa = + (r300ContextPtr)((__DRIcontextPrivate*)pDRICtx->private)->driverPrivate; + struct gl_texture_object *tObj = + _mesa_lookup_texture(rmesa->radeon.glCtx, texname); + r300TexObjPtr t; + int idx; + + if (!tObj) + return; + + t = (r300TexObjPtr) tObj->DriverData; + + t->image_override = GL_TRUE; + + if (!offset) + return; + + t->offset = offset; + t->pitch_reg = pitch; + + switch (depth) { + case 32: + idx = 2; + t->pitch_reg /= 4; + break; + case 24: + default: + idx = 4; + t->pitch_reg /= 4; + break; + case 16: + idx = 5; + t->pitch_reg /= 2; + break; + } + + t->pitch_reg--; + + t->format = tx_table_le[idx].format; + t->filter |= tx_table_le[idx].filter; +} + +static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit) +{ + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + + if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) { + return (r300EnableTextureRect(ctx, unit) && + r300UpdateTexture(ctx, unit)); + } else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) { + return (r300EnableTexture2D(ctx, unit) && + r300UpdateTexture(ctx, unit)); + } else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) { + return (r300EnableTexture3D(ctx, unit) && + r300UpdateTexture(ctx, unit)); + } else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) { + return (r300EnableTextureCube(ctx, unit) && + r300UpdateTexture(ctx, unit)); + } else if (texUnit->_ReallyEnabled) { + return GL_FALSE; + } else { + return GL_TRUE; + } +} + +void r300UpdateTextureState(GLcontext * ctx) +{ + int i; + + for (i = 0; i < 8; i++) { + if (!r300UpdateTextureUnit(ctx, i)) { + _mesa_warning(ctx, + "failed to update texture state for unit %d.\n", + i); + } + } +} diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c new file mode 100644 index 0000000..1d90ade --- /dev/null +++ b/r300/r300_vertprog.c @@ -0,0 +1,1305 @@ +/************************************************************************** + +Copyright (C) 2005 Aapo Tahkola. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Aapo Tahkola <aet@rasterburn.org> + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "program.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "tnl/tnl.h" + +#include "r300_context.h" + +#if SWIZZLE_X != VSF_IN_COMPONENT_X || \ + SWIZZLE_Y != VSF_IN_COMPONENT_Y || \ + SWIZZLE_Z != VSF_IN_COMPONENT_Z || \ + SWIZZLE_W != VSF_IN_COMPONENT_W || \ + SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \ + SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \ + WRITEMASK_X != VSF_FLAG_X || \ + WRITEMASK_Y != VSF_FLAG_Y || \ + WRITEMASK_Z != VSF_FLAG_Z || \ + WRITEMASK_W != VSF_FLAG_W +#error Cannot change these! +#endif + +#define SCALAR_FLAG (1<<31) +#define FLAG_MASK (1<<31) +#define OP_MASK (0xf) /* we are unlikely to have more than 15 */ +#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} + +static struct { + char *name; + int opcode; + unsigned long ip; /* number of input operands and flags */ +} op_names[] = { + /* *INDENT-OFF* */ + OPN(ABS, 1), + OPN(ADD, 2), + OPN(ARL, 1 | SCALAR_FLAG), + OPN(DP3, 2), + OPN(DP4, 2), + OPN(DPH, 2), + OPN(DST, 2), + OPN(EX2, 1 | SCALAR_FLAG), + OPN(EXP, 1 | SCALAR_FLAG), + OPN(FLR, 1), + OPN(FRC, 1), + OPN(LG2, 1 | SCALAR_FLAG), + OPN(LIT, 1), + OPN(LOG, 1 | SCALAR_FLAG), + OPN(MAD, 3), + OPN(MAX, 2), + OPN(MIN, 2), + OPN(MOV, 1), + OPN(MUL, 2), + OPN(POW, 2 | SCALAR_FLAG), + OPN(RCP, 1 | SCALAR_FLAG), + OPN(RSQ, 1 | SCALAR_FLAG), + OPN(SGE, 2), + OPN(SLT, 2), + OPN(SUB, 2), + OPN(SWZ, 1), + OPN(XPD, 2), + OPN(RCC, 0), //extra + OPN(PRINT, 0), + OPN(END, 0) + /* *INDENT-ON* */ +}; + +#undef OPN + +int r300VertexProgUpdateParams(GLcontext * ctx, + struct r300_vertex_program_cont *vp, float *dst) +{ + int pi; + struct gl_vertex_program *mesa_vp = &vp->mesa_program; + float *dst_o = dst; + struct gl_program_parameter_list *paramList; + + if (mesa_vp->IsNVProgram) { + _mesa_load_tracked_matrices(ctx); + + for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { + *dst++ = ctx->VertexProgram.Parameters[pi][0]; + *dst++ = ctx->VertexProgram.Parameters[pi][1]; + *dst++ = ctx->VertexProgram.Parameters[pi][2]; + *dst++ = ctx->VertexProgram.Parameters[pi][3]; + } + return dst - dst_o; + } + + assert(mesa_vp->Base.Parameters); + _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); + + if (mesa_vp->Base.Parameters->NumParameters * 4 > + VSF_MAX_FRAGMENT_LENGTH) { + fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); + _mesa_exit(-1); + } + + paramList = mesa_vp->Base.Parameters; + for (pi = 0; pi < paramList->NumParameters; pi++) { + switch (paramList->Parameters[pi].Type) { + + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); + case PROGRAM_CONSTANT: + *dst++ = paramList->ParameterValues[pi][0]; + *dst++ = paramList->ParameterValues[pi][1]; + *dst++ = paramList->ParameterValues[pi][2]; + *dst++ = paramList->ParameterValues[pi][3]; + break; + + default: + _mesa_problem(NULL, "Bad param type in %s", + __FUNCTION__); + } + + } + + return dst - dst_o; +} + +static unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & VSF_FLAG_ALL; +} + +static unsigned long t_dst_class(enum register_file file) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + return VSF_OUT_CLASS_TMP; + case PROGRAM_OUTPUT: + return VSF_OUT_CLASS_RESULT; + case PROGRAM_ADDRESS: + return VSF_OUT_CLASS_ADDR; + /* + case PROGRAM_INPUT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(enum register_file file) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + return VSF_IN_CLASS_TMP; + + case PROGRAM_INPUT: + return VSF_IN_CLASS_ATTR; + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + return VSF_IN_CLASS_PARAM; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static __inline unsigned long t_swizzle(GLubyte swizzle) +{ +/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +#if 0 +static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) +{ + int i; + + if (vp == NULL) { + fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, + caller); + return; + } + + fprintf(stderr, "%s:<", caller); + for (i = 0; i < VERT_ATTRIB_MAX; i++) + fprintf(stderr, "%d ", vp->inputs[i]); + fprintf(stderr, ">\n"); + +} +#endif + +static unsigned long t_src_index(struct r300_vertex_program *vp, + struct prog_src_register *src) +{ + int i; + int max_reg = -1; + + if (src->File == PROGRAM_INPUT) { + if (vp->inputs[src->Index] != -1) + return vp->inputs[src->Index]; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + if (vp->inputs[i] > max_reg) + max_reg = vp->inputs[i]; + + vp->inputs[src->Index] = max_reg + 1; + + //vp_dump_inputs(vp, __FUNCTION__); + + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +static unsigned long t_src(struct r300_vertex_program *vp, + struct prog_src_register *src) +{ + /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return MAKE_VSF_SOURCE(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->NegateBase) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program *vp, + struct prog_src_register *src) +{ + + return MAKE_VSF_SOURCE(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src-> + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src->RelAddr << 4); +} + +static unsigned long t_opcode(enum prog_opcode opcode) +{ + + switch (opcode) { + /* *INDENT-OFF* */ + case OPCODE_ARL: return R300_VPI_OUT_OP_ARL; + case OPCODE_DST: return R300_VPI_OUT_OP_DST; + case OPCODE_EX2: return R300_VPI_OUT_OP_EX2; + case OPCODE_EXP: return R300_VPI_OUT_OP_EXP; + case OPCODE_FRC: return R300_VPI_OUT_OP_FRC; + case OPCODE_LG2: return R300_VPI_OUT_OP_LG2; + case OPCODE_LOG: return R300_VPI_OUT_OP_LOG; + case OPCODE_MAX: return R300_VPI_OUT_OP_MAX; + case OPCODE_MIN: return R300_VPI_OUT_OP_MIN; + case OPCODE_MUL: return R300_VPI_OUT_OP_MUL; + case OPCODE_RCP: return R300_VPI_OUT_OP_RCP; + case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ; + case OPCODE_SGE: return R300_VPI_OUT_OP_SGE; + case OPCODE_SLT: return R300_VPI_OUT_OP_SLT; + case OPCODE_DP4: return R300_VPI_OUT_OP_DOT; + /* *INDENT-ON* */ + + default: + fprintf(stderr, "%s: Should not be called with opcode %d!", + __FUNCTION__, opcode); + } + _mesa_exit(-1); + return 0; +} + +static unsigned long op_operands(enum prog_opcode opcode) +{ + int i; + + /* Can we trust mesas opcodes to be in order ? */ + for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++) + if (op_names[i].opcode == opcode) + return op_names[i].ip; + + fprintf(stderr, "op %d not found in op_names\n", opcode); + _mesa_exit(-1); + return 0; +} + +static GLboolean valid_dst(struct r300_vertex_program *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} + +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ + ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ + t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ + (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ + t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ + +#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) + +#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + +#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) + +#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + +/* DP4 version seems to trigger some hw peculiarity */ +//#define PREFER_DP4 + +#define FREE_TEMPS() \ + do { \ + if(u_temp_i < vp->num_temporaries) { \ + WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \ + vp->native = GL_FALSE; \ + } \ + u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ + } while (0) + +static void r300TranslateVertexShader(struct r300_vertex_program *vp, + struct prog_instruction *vpi) +{ + int i, cur_reg = 0; + VERTEX_SHADER_INSTRUCTION *o_inst; + unsigned long operands; + int are_srcs_scalar; + unsigned long hw_op; + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; + struct prog_src_register src[3]; + + vp->pos_end = 0; /* Not supported yet */ + vp->program.length = 0; + /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + vp->inputs[i] = -1; + + for (i = 0; i < VERT_RESULT_MAX; i++) + vp->outputs[i] = -1; + + assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); + + /* Assign outputs */ + if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) + vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) + vp->outputs[VERT_RESULT_COL0] = cur_reg++; + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) + vp->outputs[VERT_RESULT_COL1] = cur_reg++; + +#if 0 /* Not supported yet */ + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) + vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) + vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) + vp->outputs[VERT_RESULT_FOGC] = cur_reg++; +#endif + + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) + if (vp->key.OutputsWritten & (1 << i)) + vp->outputs[i] = cur_reg++; + + vp->translated = GL_TRUE; + vp->native = GL_TRUE; + + o_inst = vp->program.body.i; + for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) { + FREE_TEMPS(); + + if (!valid_dst(vp, &vpi->DstReg)) { + /* redirect result to unused temp */ + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = u_temp_i; + } + + operands = op_operands(vpi->Opcode); + are_srcs_scalar = operands & SCALAR_FLAG; + operands &= OP_MASK; + + for (i = 0; i < operands; i++) + src[i] = vpi->SrcReg[i]; + + if (operands == 3) { /* TODO: scalars */ + if (CMP_SRCS(src[1], src[2]) + || CMP_SRCS(src[0], src[2])) { + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, + VSF_FLAG_ALL, + VSF_OUT_CLASS_TMP); + + o_inst->src[0] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), + SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_Z, SWIZZLE_W, + t_src_class(src[2].File), + VSF_FLAG_NONE) | (src[2]. + RelAddr << + 4); + + o_inst->src[1] = ZERO_SRC_2; + o_inst->src[2] = ZERO_SRC_2; + o_inst++; + + src[2].File = PROGRAM_TEMPORARY; + src[2].Index = u_temp_i; + src[2].RelAddr = 0; + u_temp_i--; + } + + } + + if (operands >= 2) { + if (CMP_SRCS(src[1], src[0])) { + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, + VSF_FLAG_ALL, + VSF_OUT_CLASS_TMP); + + o_inst->src[0] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_Z, SWIZZLE_W, + t_src_class(src[0].File), + VSF_FLAG_NONE) | (src[0]. + RelAddr << + 4); + + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; + o_inst++; + + src[0].File = PROGRAM_TEMPORARY; + src[0].Index = u_temp_i; + src[0].RelAddr = 0; + u_temp_i--; + } + } + + /* These ops need special handling. */ + switch (vpi->Opcode) { + case OPCODE_POW: + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_POW, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src_scalar(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = t_src_scalar(vp, &src[1]); + goto next; + + case OPCODE_MOV: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + case OPCODE_SWZ: +#if 1 + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +#else + hw_op = + (src[0].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->op = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ONE_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +#endif + + goto next; + + case OPCODE_ADD: +#if 1 + hw_op = (src[0].File == PROGRAM_TEMPORARY && + src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->op = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = ONE_SRC_0; + o_inst->src[1] = t_src(vp, &src[0]); + o_inst->src[2] = t_src(vp, &src[1]); +#else + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; + +#endif + goto next; + + case OPCODE_MAD: + hw_op = (src[0].File == PROGRAM_TEMPORARY && + src[1].File == PROGRAM_TEMPORARY && + src[2].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->op = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = t_src(vp, &src[2]); + goto next; + + case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */ + hw_op = (src[0].File == PROGRAM_TEMPORARY && + src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->op = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + + o_inst->src[2] = ZERO_SRC_1; + goto next; + + case OPCODE_DP3: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ + (src[0].Swizzle, 0)), + t_swizzle(GET_SWZ + (src[0].Swizzle, 1)), + t_swizzle(GET_SWZ + (src[0].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : + VSF_FLAG_NONE) | (src[0]. + RelAddr << 4); + + o_inst->src[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ + (src[1].Swizzle, 0)), + t_swizzle(GET_SWZ + (src[1].Swizzle, 1)), + t_swizzle(GET_SWZ + (src[1].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_XYZ : + VSF_FLAG_NONE) | (src[1]. + RelAddr << 4); + + o_inst->src[2] = ZERO_SRC_1; + goto next; + + case OPCODE_SUB: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W +#if 1 + hw_op = (src[0].File == PROGRAM_TEMPORARY && + src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->op = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ONE_SRC_0; + o_inst->src[2] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ + (src[1].Swizzle, 0)), + t_swizzle(GET_SWZ + (src[1].Swizzle, 1)), + t_swizzle(GET_SWZ + (src[1].Swizzle, 2)), + t_swizzle(GET_SWZ + (src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[1]. + RelAddr << 4); +#else + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ + (src[1].Swizzle, 0)), + t_swizzle(GET_SWZ + (src[1].Swizzle, 1)), + t_swizzle(GET_SWZ + (src[1].Swizzle, 2)), + t_swizzle(GET_SWZ + (src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[1]. + RelAddr << 4); + o_inst->src[2] = 0; +#endif + goto next; + + case OPCODE_ABS: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ + (src[0].Swizzle, 0)), + t_swizzle(GET_SWZ + (src[0].Swizzle, 1)), + t_swizzle(GET_SWZ + (src[0].Swizzle, 2)), + t_swizzle(GET_SWZ + (src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0]. + RelAddr << 4); + o_inst->src[2] = 0; + goto next; + + case OPCODE_FLR: + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i, + t_dst_mask(vpi->DstReg. + WriteMask), + VSF_OUT_CLASS_TMP); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; + o_inst++; + + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = MAKE_VSF_SOURCE(u_temp_i, + VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, + VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, + VSF_IN_CLASS_TMP, + /* Not 100% sure about this */ + (!src[0]. + NegateBase) ? + VSF_FLAG_ALL : + VSF_FLAG_NONE + /*VSF_FLAG_ALL */ ); + + o_inst->src[2] = ZERO_SRC_0; + u_temp_i--; + goto next; + + case OPCODE_LG2: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ + (src[0].Swizzle, 0)), + t_swizzle(GET_SWZ + (src[0].Swizzle, 0)), + t_swizzle(GET_SWZ + (src[0].Swizzle, 0)), + t_swizzle(GET_SWZ + (src[0].Swizzle, 0)), + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0]. + RelAddr << 4); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; + goto next; + + case OPCODE_LIT: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_src_class(src[0]. + File), + src[0]. + NegateBase ? + VSF_FLAG_ALL : + VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_src_class(src[0]. + File), + src[0]. + NegateBase ? + VSF_FLAG_ALL : + VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0]. + File), + src[0]. + NegateBase ? + VSF_FLAG_ALL : + VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + goto next; + + case OPCODE_DPH: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ + (src[0].Swizzle, 0)), + t_swizzle(GET_SWZ + (src[0].Swizzle, 1)), + t_swizzle(GET_SWZ + (src[0].Swizzle, 2)), + VSF_IN_COMPONENT_ONE, + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : + VSF_FLAG_NONE) | (src[0]. + RelAddr << 4); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; + goto next; + + case OPCODE_XPD: + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + NOTE: might need MAD_2 + */ + + o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i, + t_dst_mask(vpi->DstReg. + WriteMask), + VSF_OUT_CLASS_TMP); + + o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0]. + File), + src[0]. + NegateBase ? + VSF_FLAG_ALL : + VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + + o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1]. + File), + src[1]. + NegateBase ? + VSF_FLAG_ALL : + VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + + o_inst->src[2] = ZERO_SRC_1; + o_inst++; + u_temp_i--; + + o_inst->op = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1]. + File), + (!src[1]. + NegateBase) ? + VSF_FLAG_ALL : + VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + + o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0]. + File), + src[0]. + NegateBase ? + VSF_FLAG_ALL : + VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + + o_inst->src[2] = MAKE_VSF_SOURCE(u_temp_i + 1, + VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, + VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, + VSF_IN_CLASS_TMP, + VSF_FLAG_NONE); + + goto next; + + case OPCODE_RCC: + fprintf(stderr, "Dont know how to handle op %d yet\n", + vpi->Opcode); + _mesa_exit(-1); + break; + case OPCODE_END: + break; + default: + break; + } + + o_inst->op = + MAKE_VSF_OP(t_opcode(vpi->Opcode), + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + if (are_srcs_scalar) { + switch (operands) { + case 1: + o_inst->src[0] = t_src_scalar(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; + break; + + case 2: + o_inst->src[0] = t_src_scalar(vp, &src[0]); + o_inst->src[1] = t_src_scalar(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; + break; + + case 3: + o_inst->src[0] = t_src_scalar(vp, &src[0]); + o_inst->src[1] = t_src_scalar(vp, &src[1]); + o_inst->src[2] = t_src_scalar(vp, &src[2]); + break; + + default: + fprintf(stderr, + "scalars and op RCC not handled yet"); + _mesa_exit(-1); + break; + } + } else { + switch (operands) { + case 1: + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; + break; + + case 2: + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; + break; + + case 3: + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = t_src(vp, &src[2]); + break; + + default: + fprintf(stderr, + "scalars and op RCC not handled yet"); + _mesa_exit(-1); + break; + } + } + next:; + } + + /* Will most likely segfault before we get here... fix later. */ + if (o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH / 4) { + vp->program.length = 0; + vp->native = GL_FALSE; + return; + } + vp->program.length = (o_inst - vp->program.body.i) * 4; +#if 0 + fprintf(stderr, "hw program:\n"); + for (i = 0; i < vp->program.length; i++) + fprintf(stderr, "%08x\n", vp->program.body.d[i]); +#endif +} + +static void position_invariant(struct gl_program *prog) +{ + struct prog_instruction *vpi; + struct gl_program_parameter_list *paramList; + int i; + + gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 }; + + /* tokens[4] = matrix modifier */ +#ifdef PREFER_DP4 + tokens[4] = 0; /* not transposed or inverted */ +#else + tokens[4] = STATE_MATRIX_TRANSPOSE; +#endif + paramList = prog->Parameters; + + vpi = _mesa_alloc_instructions(prog->NumInstructions + 4); + _mesa_init_instructions(vpi, prog->NumInstructions + 4); + + for (i = 0; i < 4; i++) { + GLint idx; + tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */ + idx = _mesa_add_state_reference(paramList, tokens); +#ifdef PREFER_DP4 + vpi[i].Opcode = OPCODE_DP4; + vpi[i].StringPos = 0; + vpi[i].Data = 0; + + vpi[i].DstReg.File = PROGRAM_OUTPUT; + vpi[i].DstReg.Index = VERT_RESULT_HPOS; + vpi[i].DstReg.WriteMask = 1 << i; + vpi[i].DstReg.CondMask = COND_TR; + + vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; + vpi[i].SrcReg[0].Index = idx; + vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + vpi[i].SrcReg[1].File = PROGRAM_INPUT; + vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; + vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW; +#else + if (i == 0) + vpi[i].Opcode = OPCODE_MUL; + else + vpi[i].Opcode = OPCODE_MAD; + + vpi[i].StringPos = 0; + vpi[i].Data = 0; + + if (i == 3) + vpi[i].DstReg.File = PROGRAM_OUTPUT; + else + vpi[i].DstReg.File = PROGRAM_TEMPORARY; + vpi[i].DstReg.Index = 0; + vpi[i].DstReg.WriteMask = 0xf; + vpi[i].DstReg.CondMask = COND_TR; + + vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; + vpi[i].SrcReg[0].Index = idx; + vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + vpi[i].SrcReg[1].File = PROGRAM_INPUT; + vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; + vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i); + + if (i > 0) { + vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; + vpi[i].SrcReg[2].Index = 0; + vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW; + } +#endif + } + + _mesa_copy_instructions(&vpi[i], prog->Instructions, + prog->NumInstructions); + + free(prog->Instructions); + + prog->Instructions = vpi; + + prog->NumInstructions += 4; + vpi = &prog->Instructions[prog->NumInstructions - 1]; + + assert(vpi->Opcode == OPCODE_END); +} + +static void insert_wpos(struct r300_vertex_program *vp, + struct gl_program *prog, GLuint temp_index) +{ + struct prog_instruction *vpi; + struct prog_instruction *vpi_insert; + int i = 0; + + vpi = _mesa_alloc_instructions(prog->NumInstructions + 2); + _mesa_init_instructions(vpi, prog->NumInstructions + 2); + /* all but END */ + _mesa_copy_instructions(vpi, prog->Instructions, + prog->NumInstructions - 1); + /* END */ + _mesa_copy_instructions(&vpi[prog->NumInstructions + 1], + &prog->Instructions[prog->NumInstructions - 1], + 1); + vpi_insert = &vpi[prog->NumInstructions - 1]; + + vpi_insert[i].Opcode = OPCODE_MOV; + + vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; + vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS; + vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; + vpi_insert[i].DstReg.CondMask = COND_TR; + + vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; + vpi_insert[i].SrcReg[0].Index = temp_index; + vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + i++; + + vpi_insert[i].Opcode = OPCODE_MOV; + + vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; + vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx; + vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; + vpi_insert[i].DstReg.CondMask = COND_TR; + + vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; + vpi_insert[i].SrcReg[0].Index = temp_index; + vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + i++; + + free(prog->Instructions); + + prog->Instructions = vpi; + + prog->NumInstructions += i; + vpi = &prog->Instructions[prog->NumInstructions - 1]; + + assert(vpi->Opcode == OPCODE_END); +} + +static void pos_as_texcoord(struct r300_vertex_program *vp, + struct gl_program *prog) +{ + struct prog_instruction *vpi; + GLuint tempregi = prog->NumTemporaries; + /* should do something else if no temps left... */ + prog->NumTemporaries++; + + for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && + vpi->DstReg.Index == VERT_RESULT_HPOS) { + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = tempregi; + } + } + insert_wpos(vp, prog, tempregi); +} + +static struct r300_vertex_program *build_program(struct r300_vertex_program_key + *wanted_key, struct gl_vertex_program + *mesa_vp, GLint wpos_idx) +{ + struct r300_vertex_program *vp; + + vp = _mesa_calloc(sizeof(*vp)); + _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); + + vp->wpos_idx = wpos_idx; + + if (mesa_vp->IsPositionInvariant) { + position_invariant(&mesa_vp->Base); + } + + if (wpos_idx > -1) + pos_as_texcoord(vp, &mesa_vp->Base); + + assert(mesa_vp->Base.NumInstructions); + + vp->num_temporaries = mesa_vp->Base.NumTemporaries; + + r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); + + return vp; +} + +void r300SelectVertexShader(r300ContextPtr r300) +{ + GLcontext *ctx = ctx = r300->radeon.glCtx; + GLuint InputsRead; + struct r300_vertex_program_key wanted_key = { 0 }; + GLint i; + struct r300_vertex_program_cont *vpc; + struct r300_vertex_program *vp; + GLint wpos_idx; + + vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + + wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS; + + wpos_idx = -1; + if (InputsRead & FRAG_BIT_WPOS) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (!(InputsRead & (FRAG_BIT_TEX0 << i))) + break; + + if (i == ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tno free texcoord found\n"); + _mesa_exit(-1); + } + + InputsRead |= (FRAG_BIT_TEX0 << i); + wpos_idx = i; + } + + if (InputsRead & FRAG_BIT_COL0) + wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0; + + if ((InputsRead & FRAG_BIT_COL1) /*|| + (InputsRead & FRAG_BIT_FOGC) */ ) + wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (InputsRead & (FRAG_BIT_TEX0 << i)) + wanted_key.OutputsWritten |= + 1 << (VERT_RESULT_TEX0 + i); + + wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; + if (vpc->mesa_program.IsPositionInvariant) { + /* we wan't position don't we ? */ + wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); + } + + for (vp = vpc->progs; vp; vp = vp->next) + if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == + 0) { + r300->selected_vp = vp; + return; + } + //_mesa_print_program(&vpc->mesa_program.Base); + + vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx); + vp->next = vpc->progs; + vpc->progs = vp; + r300->selected_vp = vp; +} diff --git a/r300/r300_vertprog.h b/r300/r300_vertprog.h new file mode 100644 index 0000000..252d5a9 --- /dev/null +++ b/r300/r300_vertprog.h @@ -0,0 +1,89 @@ +#ifndef __R300_VERTPROG_H_ +#define __R300_VERTPROG_H_ + +#include "r300_reg.h" + +typedef struct { + GLuint op; + GLuint src[3]; +} VERTEX_SHADER_INSTRUCTION; + +#define VSF_FLAG_X 1 +#define VSF_FLAG_Y 2 +#define VSF_FLAG_Z 4 +#define VSF_FLAG_W 8 +#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z) +#define VSF_FLAG_ALL 0xf +#define VSF_FLAG_NONE 0 + +#define VSF_OUT_CLASS_TMP 0 +#define VSF_OUT_CLASS_ADDR 1 +#define VSF_OUT_CLASS_RESULT 2 + +/* first DWORD of an instruction */ + +/* possible operations: + DOT, MUL, ADD, MAD, FRC, MAX, MIN, SGE, SLT, EXP, LOG, LIT, POW, RCP, RSQ, EX2, + LG2, MAD_2 */ + +#define MAKE_VSF_OP(op, out_reg_index, out_reg_fields, class) \ + ((op) \ + | ((out_reg_index) << R300_VPI_OUT_REG_INDEX_SHIFT) \ + | ((out_reg_fields) << 20) \ + | ( (class) << 8 ) ) + +#define EASY_VSF_OP(op, out_reg_index, out_reg_fields, class) \ + MAKE_VSF_OP(R300_VPI_OUT_OP_##op, out_reg_index, VSF_FLAG_##out_reg_fields, VSF_OUT_CLASS_##class) \ + +/* according to Nikolai, the subsequent 3 DWORDs are sources, use same define for each */ + +#define VSF_IN_CLASS_TMP 0 +#define VSF_IN_CLASS_ATTR 1 +#define VSF_IN_CLASS_PARAM 2 +#define VSF_IN_CLASS_NONE 9 + +#define VSF_IN_COMPONENT_X 0 +#define VSF_IN_COMPONENT_Y 1 +#define VSF_IN_COMPONENT_Z 2 +#define VSF_IN_COMPONENT_W 3 +#define VSF_IN_COMPONENT_ZERO 4 +#define VSF_IN_COMPONENT_ONE 5 + +#define MAKE_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \ + ( ((in_reg_index)<<R300_VPI_IN_REG_INDEX_SHIFT) \ + | ((comp_x)<<R300_VPI_IN_X_SHIFT) \ + | ((comp_y)<<R300_VPI_IN_Y_SHIFT) \ + | ((comp_z)<<R300_VPI_IN_Z_SHIFT) \ + | ((comp_w)<<R300_VPI_IN_W_SHIFT) \ + | ((negate)<<25) | ((class))) + +#define EASY_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \ + MAKE_VSF_SOURCE(in_reg_index, \ + VSF_IN_COMPONENT_##comp_x, \ + VSF_IN_COMPONENT_##comp_y, \ + VSF_IN_COMPONENT_##comp_z, \ + VSF_IN_COMPONENT_##comp_w, \ + VSF_IN_CLASS_##class, VSF_FLAG_##negate) + +/* special sources: */ + +/* (1.0,1.0,1.0,1.0) vector (ATTR, plain ) */ +#define VSF_ATTR_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, ATTR, NONE) +#define VSF_UNITY(reg) EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, NONE, NONE) + +/* contents of unmodified register */ +#define VSF_REG(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, ATTR, NONE) + +/* contents of unmodified parameter */ +#define VSF_PARAM(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, PARAM, NONE) + +/* contents of unmodified temporary register */ +#define VSF_TMP(reg) EASY_VSF_SOURCE(reg, X, Y, Z, W, TMP, NONE) + +/* components of ATTR register */ +#define VSF_ATTR_X(reg) EASY_VSF_SOURCE(reg, X, X, X, X, ATTR, NONE) +#define VSF_ATTR_Y(reg) EASY_VSF_SOURCE(reg, Y, Y, Y, Y, ATTR, NONE) +#define VSF_ATTR_Z(reg) EASY_VSF_SOURCE(reg, Z, Z, Z, Z, ATTR, NONE) +#define VSF_ATTR_W(reg) EASY_VSF_SOURCE(reg, W, W, W, W, ATTR, NONE) + +#endif diff --git a/r300/radeon_context.c b/r300/radeon_context.c new file mode 100644 index 0000000..e9634b4 --- /dev/null +++ b/r300/radeon_context.c @@ -0,0 +1,327 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file radeon_context.c + * Common context initialization. + * + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <dlfcn.h> + +#include "glheader.h" +#include "imports.h" +#include "context.h" +#include "state.h" +#include "matrix.h" +#include "framebuffer.h" + +#include "drivers/common/driverfuncs.h" +#include "swrast/swrast.h" + +#include "radeon_screen.h" +#include "radeon_ioctl.h" +#include "radeon_macros.h" +#include "radeon_reg.h" + +#include "radeon_state.h" +#include "r300_state.h" + +#include "utils.h" +#include "vblank.h" +#include "xmlpool.h" /* for symbolic values of enum-type options */ + +#define DRIVER_DATE "20060815" + + +/* Return various strings for glGetString(). + */ +static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + static char buffer[128]; + + switch (name) { + case GL_VENDOR: + if (IS_R300_CLASS(radeon->radeonScreen)) + return (GLubyte *) "DRI R300 Project"; + else + return (GLubyte *) "Tungsten Graphics, Inc."; + + case GL_RENDERER: + { + unsigned offset; + GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 : + radeon->radeonScreen->AGPMode; + const char* chipname; + + if (IS_R300_CLASS(radeon->radeonScreen)) + chipname = "R300"; + else + chipname = "R200"; + + offset = driGetRendererString(buffer, chipname, DRIVER_DATE, + agp_mode); + + if (IS_R300_CLASS(radeon->radeonScreen)) { + sprintf(&buffer[offset], " %sTCL", + (radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) + ? "" : "NO-"); + } else { + sprintf(&buffer[offset], " %sTCL", + !(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE) + ? "" : "NO-"); + } + + return (GLubyte *) buffer; + } + + default: + return NULL; + } +} + +/* Initialize the driver's misc functions. + */ +static void radeonInitDriverFuncs(struct dd_function_table *functions) +{ + functions->GetString = radeonGetString; +} + + +/** + * Create and initialize all common fields of the context, + * including the Mesa context itself. + */ +GLboolean radeonInitContext(radeonContextPtr radeon, + struct dd_function_table* functions, + const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); + GLcontext* ctx; + GLcontext* shareCtx; + int fthrottle_mode; + + /* Fill in additional standard functions. */ + radeonInitDriverFuncs(functions); + + /* Allocate and initialize the Mesa context */ + if (sharedContextPrivate) + shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx; + else + shareCtx = NULL; + radeon->glCtx = _mesa_create_context(glVisual, shareCtx, + functions, (void *)radeon); + if (!radeon->glCtx) + return GL_FALSE; + + ctx = radeon->glCtx; + driContextPriv->driverPrivate = radeon; + + /* DRI fields */ + radeon->dri.context = driContextPriv; + radeon->dri.screen = sPriv; + radeon->dri.drawable = NULL; + radeon->dri.readable = NULL; + radeon->dri.hwContext = driContextPriv->hHWContext; + radeon->dri.hwLock = &sPriv->pSAREA->lock; + radeon->dri.fd = sPriv->fd; + radeon->dri.drmMinor = sPriv->drmMinor; + + radeon->radeonScreen = screen; + radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + + screen->sarea_priv_offset); + + /* Setup IRQs */ + fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); + radeon->iw.irq_seq = -1; + radeon->irqsEmitted = 0; + radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && + radeon->radeonScreen->irq); + + radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); + + if (!radeon->do_irqs) + fprintf(stderr, + "IRQ's not enabled, falling back to %s: %d %d\n", + radeon->do_usleeps ? "usleeps" : "busy waits", + fthrottle_mode, radeon->radeonScreen->irq); + + radeon->vblank_flags = (radeon->radeonScreen->irq != 0) + ? driGetDefaultVBlankFlags(&radeon->optionCache) : VBLANK_FLAG_NO_IRQ; + + (*dri_interface->getUST) (&radeon->swap_ust); + + return GL_TRUE; +} + + +/** + * Cleanup common context fields. + * Called by r200DestroyContext/r300DestroyContext + */ +void radeonCleanupContext(radeonContextPtr radeon) +{ + /* _mesa_destroy_context() might result in calls to functions that + * depend on the DriverCtx, so don't set it to NULL before. + * + * radeon->glCtx->DriverCtx = NULL; + */ + + /* free the Mesa context */ + _mesa_destroy_context(radeon->glCtx); + + if (radeon->state.scissor.pClipRects) { + FREE(radeon->state.scissor.pClipRects); + radeon->state.scissor.pClipRects = 0; + } +} + + +/** + * Swap front and back buffer. + */ +void radeonSwapBuffers(__DRIdrawablePrivate * dPriv) +{ + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + radeonContextPtr radeon; + GLcontext *ctx; + + radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + ctx = radeon->glCtx; + + if (ctx->Visual.doubleBufferMode) { + _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ + if (radeon->doPageFlip) { + radeonPageFlip(dPriv); + } else { + radeonCopyBuffer(dPriv, NULL); + } + } + } else { + /* XXX this shouldn't be an error but we can't handle it for now */ + _mesa_problem(NULL, "%s: drawable has no context!", + __FUNCTION__); + } +} + +void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, + int x, int y, int w, int h ) +{ + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + radeonContextPtr radeon; + GLcontext *ctx; + + radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + ctx = radeon->glCtx; + + if (ctx->Visual.doubleBufferMode) { + drm_clip_rect_t rect; + rect.x1 = x + dPriv->x; + rect.y1 = (dPriv->h - y - h) + dPriv->y; + rect.x2 = rect.x1 + w; + rect.y2 = rect.y1 + h; + _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ + radeonCopyBuffer(dPriv, &rect); + } + } else { + /* XXX this shouldn't be an error but we can't handle it for now */ + _mesa_problem(NULL, "%s: drawable has no context!", + __FUNCTION__); + } +} + +/* Force the context `c' to be the current context and associate with it + * buffer `b'. + */ +GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv) +{ + if (driContextPriv) { + radeonContextPtr radeon = + (radeonContextPtr) driContextPriv->driverPrivate; + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx %p\n", __FUNCTION__, + radeon->glCtx); + + if (radeon->dri.drawable != driDrawPriv) { + driDrawableInitVBlank(driDrawPriv, + radeon->vblank_flags, + &radeon->vbl_seq); + } + + radeon->dri.readable = driReadPriv; + + if (radeon->dri.drawable != driDrawPriv || + radeon->lastStamp != driDrawPriv->lastStamp) { + radeon->dri.drawable = driDrawPriv; + + radeonSetCliprects(radeon); + r300UpdateViewportOffset(radeon->glCtx); + } + + _mesa_make_current(radeon->glCtx, + (GLframebuffer *) driDrawPriv-> + driverPrivate, + (GLframebuffer *) driReadPriv-> + driverPrivate); + + _mesa_update_state(radeon->glCtx); + + radeonUpdatePageFlipping(radeon); + } else { + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx is null\n", __FUNCTION__); + _mesa_make_current(0, 0, 0); + } + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "End %s\n", __FUNCTION__); + return GL_TRUE; +} + +/* Force the context `c' to be unbound from its buffer. + */ +GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv) +{ + radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx %p\n", __FUNCTION__, + radeon->glCtx); + + return GL_TRUE; +} + diff --git a/r300/radeon_context.h b/r300/radeon_context.h new file mode 100644 index 0000000..2f23941 --- /dev/null +++ b/r300/radeon_context.h @@ -0,0 +1,246 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + * Kevin E. Martin <martin@valinux.com> + * Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __RADEON_CONTEXT_H__ +#define __RADEON_CONTEXT_H__ + +#include "mtypes.h" +#include "radeon_screen.h" +#include "drm.h" +#include "dri_util.h" +#include "colormac.h" + +struct radeon_context; +typedef struct radeon_context radeonContextRec; +typedef struct radeon_context *radeonContextPtr; + +#define TEX_0 0x1 +#define TEX_1 0x2 +#define TEX_2 0x4 +#define TEX_3 0x8 +#define TEX_4 0x10 +#define TEX_5 0x20 +#define TEX_6 0x40 +#define TEX_7 0x80 +#define TEX_ALL 0xff + +/* Rasterizing fallbacks */ +/* See correponding strings in r200_swtcl.c */ +#define RADEON_FALLBACK_TEXTURE 0x0001 +#define RADEON_FALLBACK_DRAW_BUFFER 0x0002 +#define RADEON_FALLBACK_STENCIL 0x0004 +#define RADEON_FALLBACK_RENDER_MODE 0x0008 +#define RADEON_FALLBACK_BLEND_EQ 0x0010 +#define RADEON_FALLBACK_BLEND_FUNC 0x0020 +#define RADEON_FALLBACK_DISABLE 0x0040 +#define RADEON_FALLBACK_BORDER_MODE 0x0080 + +#if R200_MERGED +extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode); + +#define FALLBACK( radeon, bit, mode ) do { \ + if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ + __FUNCTION__, bit, mode ); \ + radeonFallback( (radeon)->glCtx, bit, mode ); \ +} while (0) +#else +#define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__); +#endif + +/* TCL fallbacks */ +extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode); + +#define RADEON_TCL_FALLBACK_RASTER 0x0001 /* rasterization */ +#define RADEON_TCL_FALLBACK_UNFILLED 0x0002 /* unfilled tris */ +#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE 0x0004 /* twoside tris */ +#define RADEON_TCL_FALLBACK_MATERIAL 0x0008 /* material in vb */ +#define RADEON_TCL_FALLBACK_TEXGEN_0 0x0010 /* texgen, unit 0 */ +#define RADEON_TCL_FALLBACK_TEXGEN_1 0x0020 /* texgen, unit 1 */ +#define RADEON_TCL_FALLBACK_TEXGEN_2 0x0040 /* texgen, unit 2 */ +#define RADEON_TCL_FALLBACK_TEXGEN_3 0x0080 /* texgen, unit 3 */ +#define RADEON_TCL_FALLBACK_TEXGEN_4 0x0100 /* texgen, unit 4 */ +#define RADEON_TCL_FALLBACK_TEXGEN_5 0x0200 /* texgen, unit 5 */ +#define RADEON_TCL_FALLBACK_TCL_DISABLE 0x0400 /* user disable */ +#define RADEON_TCL_FALLBACK_BITMAP 0x0800 /* draw bitmap with points */ +#define RADEON_TCL_FALLBACK_VERTEX_PROGRAM 0x1000 /* vertex program active */ + +#if R200_MERGED +#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode ) +#else +#define TCL_FALLBACK( ctx, bit, mode ) ; +#endif + +struct radeon_dri_mirror { + __DRIcontextPrivate *context; /* DRI context */ + __DRIscreenPrivate *screen; /* DRI screen */ + /** + * DRI drawable bound to this context for drawing. + */ + __DRIdrawablePrivate *drawable; + + /** + * DRI drawable bound to this context for reading. + */ + __DRIdrawablePrivate *readable; + + drm_context_t hwContext; + drm_hw_lock_t *hwLock; + int fd; + int drmMinor; +}; + +/** + * Derived state for internal purposes. + */ +struct radeon_scissor_state { + drm_clip_rect_t rect; + GLboolean enabled; + + GLuint numClipRects; /* Cliprects active */ + GLuint numAllocedClipRects; /* Cliprects available */ + drm_clip_rect_t *pClipRects; +}; + +struct radeon_colorbuffer_state { + GLuint clear; + GLint drawOffset, drawPitch; +}; + +struct radeon_state { + struct radeon_colorbuffer_state color; + struct radeon_scissor_state scissor; +}; + +/** + * Common per-context variables shared by R200 and R300. + * R200- and R300-specific code "derive" their own context from this + * structure. + */ +struct radeon_context { + GLcontext *glCtx; /* Mesa context */ + radeonScreenPtr radeonScreen; /* Screen private DRI data */ + + /* Fallback state */ + GLuint Fallback; + GLuint TclFallback; + + /* Page flipping */ + GLuint doPageFlip; + + /* Drawable, cliprect and scissor information */ + GLuint numClipRects; /* Cliprects for the draw buffer */ + drm_clip_rect_t *pClipRects; + unsigned int lastStamp; + GLboolean lost_context; + drm_radeon_sarea_t *sarea; /* Private SAREA data */ + + /* Mirrors of some DRI state */ + struct radeon_dri_mirror dri; + + /* Busy waiting */ + GLuint do_usleeps; + GLuint do_irqs; + GLuint irqsEmitted; + drm_radeon_irq_wait_t iw; + + /* VBI / buffer swap */ + GLuint vbl_seq; + GLuint vblank_flags; + + int64_t swap_ust; + int64_t swap_missed_ust; + + GLuint swap_count; + GLuint swap_missed_count; + + /* Derived state */ + struct radeon_state state; + + /* Configuration cache + */ + driOptionCache optionCache; +}; + +#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx)) + +extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv); +extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, + int x, int y, int w, int h); +extern GLboolean radeonInitContext(radeonContextPtr radeon, + struct dd_function_table *functions, + const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); +extern void radeonCleanupContext(radeonContextPtr radeon); +extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv); +extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv); + +/* ================================================================ + * Debugging: + */ +#define DO_DEBUG 1 + +#if DO_DEBUG +extern int RADEON_DEBUG; +#else +#define RADEON_DEBUG 0 +#endif + +#define DEBUG_TEXTURE 0x0001 +#define DEBUG_STATE 0x0002 +#define DEBUG_IOCTL 0x0004 +#define DEBUG_PRIMS 0x0008 +#define DEBUG_VERTS 0x0010 +#define DEBUG_FALLBACKS 0x0020 +#define DEBUG_VFMT 0x0040 +#define DEBUG_CODEGEN 0x0080 +#define DEBUG_VERBOSE 0x0100 +#define DEBUG_DRI 0x0200 +#define DEBUG_DMA 0x0400 +#define DEBUG_SANITY 0x0800 +#define DEBUG_SYNC 0x1000 +#define DEBUG_PIXEL 0x2000 +#define DEBUG_MEMORY 0x4000 + +#endif /* __RADEON_CONTEXT_H__ */ diff --git a/r300/radeon_ioctl.c b/r300/radeon_ioctl.c new file mode 100644 index 0000000..0b8656b --- /dev/null +++ b/r300/radeon_ioctl.c @@ -0,0 +1,394 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <sched.h> +#include <errno.h> + +#include "glheader.h" +#include "imports.h" +#include "macros.h" +#include "context.h" +#include "swrast/swrast.h" +#include "r300_context.h" +#include "radeon_ioctl.h" +#include "r300_ioctl.h" +#include "r300_state.h" +#include "radeon_reg.h" + +#include "drirenderbuffer.h" +#include "vblank.h" + +static void radeonWaitForIdle(radeonContextPtr radeon); + +/* ================================================================ + * SwapBuffers with client-side throttling + */ + +static uint32_t radeonGetLastFrame(radeonContextPtr radeon) +{ + drm_radeon_getparam_t gp; + int ret; + uint32_t frame; + + gp.param = RADEON_PARAM_LAST_FRAME; + gp.value = (int *)&frame; + ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM, + &gp, sizeof(gp)); + if (ret) { + fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, + ret); + exit(1); + } + + return frame; +} + +uint32_t radeonGetAge(radeonContextPtr radeon) +{ + drm_radeon_getparam_t gp; + int ret; + uint32_t age; + + gp.param = RADEON_PARAM_LAST_CLEAR; + gp.value = (int *)&age; + ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM, + &gp, sizeof(gp)); + if (ret) { + fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, + ret); + exit(1); + } + + return age; +} + +static void radeonEmitIrqLocked(radeonContextPtr radeon) +{ + drm_radeon_irq_emit_t ie; + int ret; + + ie.irq_seq = &radeon->iw.irq_seq; + ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT, + &ie, sizeof(ie)); + if (ret) { + fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, + ret); + exit(1); + } +} + +static void radeonWaitIrq(radeonContextPtr radeon) +{ + int ret; + + do { + ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT, + &radeon->iw, sizeof(radeon->iw)); + } while (ret && (errno == EINTR || errno == EBUSY)); + + if (ret) { + fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, + ret); + exit(1); + } +} + +static void radeonWaitForFrameCompletion(radeonContextPtr radeon) +{ + drm_radeon_sarea_t *sarea = radeon->sarea; + + if (radeon->do_irqs) { + if (radeonGetLastFrame(radeon) < sarea->last_frame) { + if (!radeon->irqsEmitted) { + while (radeonGetLastFrame(radeon) < + sarea->last_frame) ; + } else { + UNLOCK_HARDWARE(radeon); + radeonWaitIrq(radeon); + LOCK_HARDWARE(radeon); + } + radeon->irqsEmitted = 10; + } + + if (radeon->irqsEmitted) { + radeonEmitIrqLocked(radeon); + radeon->irqsEmitted--; + } + } else { + while (radeonGetLastFrame(radeon) < sarea->last_frame) { + UNLOCK_HARDWARE(radeon); + if (radeon->do_usleeps) + DO_USLEEP(1); + LOCK_HARDWARE(radeon); + } + } +} + +/* Copy the back color buffer to the front color buffer. + */ +void radeonCopyBuffer(const __DRIdrawablePrivate * dPriv, + const drm_clip_rect_t * rect) +{ + radeonContextPtr radeon; + GLint nbox, i, ret; + GLboolean missed_target; + int64_t ust; + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + + if (RADEON_DEBUG & DEBUG_IOCTL) { + fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__, + (void *)radeon->glCtx); + } + + r300Flush(radeon->glCtx); + + LOCK_HARDWARE(radeon); + + /* Throttle the frame rate -- only allow one pending swap buffers + * request at a time. + */ + radeonWaitForFrameCompletion(radeon); + if (!rect) + { + UNLOCK_HARDWARE(radeon); + driWaitForVBlank(dPriv, &radeon->vbl_seq, radeon->vblank_flags, + &missed_target); + LOCK_HARDWARE(radeon); + } + + nbox = dPriv->numClipRects; /* must be in locked region */ + + for (i = 0; i < nbox;) { + GLint nr = MIN2(i + RADEON_NR_SAREA_CLIPRECTS, nbox); + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t *b = radeon->sarea->boxes; + GLint n = 0; + + for ( ; i < nr ; i++ ) { + + *b = box[i]; + + if (rect) + { + if (rect->x1 > b->x1) + b->x1 = rect->x1; + if (rect->y1 > b->y1) + b->y1 = rect->y1; + if (rect->x2 < b->x2) + b->x2 = rect->x2; + if (rect->y2 < b->y2) + b->y2 = rect->y2; + + if (b->x1 < b->x2 && b->y1 < b->y2) + b++; + } + else + b++; + + n++; + } + radeon->sarea->nbox = n; + + ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP); + + if (ret) { + fprintf(stderr, "DRM_RADEON_SWAP: return = %d\n", + ret); + UNLOCK_HARDWARE(radeon); + exit(1); + } + } + + UNLOCK_HARDWARE(radeon); + if (!rect) + { + ((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE; + + radeon->swap_count++; + (*dri_interface->getUST) (&ust); + if (missed_target) { + radeon->swap_missed_count++; + radeon->swap_missed_ust = ust - radeon->swap_ust; + } + + radeon->swap_ust = ust; + + sched_yield(); + } +} + +void radeonPageFlip(const __DRIdrawablePrivate * dPriv) +{ + radeonContextPtr radeon; + GLint ret; + GLboolean missed_target; + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + + if (RADEON_DEBUG & DEBUG_IOCTL) { + fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, + radeon->sarea->pfCurrentPage); + } + + r300Flush(radeon->glCtx); + LOCK_HARDWARE(radeon); + + if (!dPriv->numClipRects) { + UNLOCK_HARDWARE(radeon); + usleep(10000); /* throttle invisible client 10ms */ + return; + } + + /* Need to do this for the perf box placement: + */ + { + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t *b = radeon->sarea->boxes; + b[0] = box[0]; + radeon->sarea->nbox = 1; + } + + /* Throttle the frame rate -- only allow a few pending swap buffers + * request at a time. + */ + radeonWaitForFrameCompletion(radeon); + UNLOCK_HARDWARE(radeon); + driWaitForVBlank(dPriv, &radeon->vbl_seq, radeon->vblank_flags, + &missed_target); + if (missed_target) { + radeon->swap_missed_count++; + (void)(*dri_interface->getUST) (&radeon->swap_missed_ust); + } + LOCK_HARDWARE(radeon); + + ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_FLIP); + + UNLOCK_HARDWARE(radeon); + + if (ret) { + fprintf(stderr, "DRM_RADEON_FLIP: return = %d\n", ret); + exit(1); + } + + radeon->swap_count++; + (void)(*dri_interface->getUST) (&radeon->swap_ust); + + driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, + radeon->sarea->pfCurrentPage); + + if (radeon->sarea->pfCurrentPage == 1) { + radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; + radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; + } else { + radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; + radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; + } + + if (IS_R300_CLASS(radeon->radeonScreen)) { + r300ContextPtr r300 = (r300ContextPtr)radeon; + R300_STATECHANGE(r300, cb); + r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + + r300->radeon.radeonScreen->fbLocation; + r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; + + if (r300->radeon.radeonScreen->cpp == 4) + r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; + else + r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; + + if (r300->radeon.sarea->tiling_enabled) + r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; + } +} + +void radeonWaitForIdleLocked(radeonContextPtr radeon) +{ + int ret; + int i = 0; + + do { + ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE); + if (ret) + DO_USLEEP(1); + } while (ret && ++i < 100); + + if (ret < 0) { + UNLOCK_HARDWARE(radeon); + fprintf(stderr, "Error: R300 timed out... exiting\n"); + exit(-1); + } +} + +static void radeonWaitForIdle(radeonContextPtr radeon) +{ + LOCK_HARDWARE(radeon); + radeonWaitForIdleLocked(radeon); + UNLOCK_HARDWARE(radeon); +} + +void radeonFlush(GLcontext * ctx) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + + if (IS_R300_CLASS(radeon->radeonScreen)) + r300Flush(ctx); +} + + +/* Make sure all commands have been sent to the hardware and have + * completed processing. + */ +void radeonFinish(GLcontext * ctx) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + + radeonFlush(ctx); + + if (radeon->do_irqs) { + LOCK_HARDWARE(radeon); + radeonEmitIrqLocked(radeon); + UNLOCK_HARDWARE(radeon); + radeonWaitIrq(radeon); + } else + radeonWaitForIdle(radeon); +} diff --git a/r300/radeon_ioctl.h b/r300/radeon_ioctl.h new file mode 100644 index 0000000..3a80d36 --- /dev/null +++ b/r300/radeon_ioctl.h @@ -0,0 +1,57 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __RADEON_IOCTL_H__ +#define __RADEON_IOCTL_H__ + +#include "simple_list.h" +#include "radeon_dri.h" +#include "radeon_lock.h" + +#include "xf86drm.h" +#include "drm.h" +#if 0 +#include "r200_context.h" +#endif +#include "radeon_drm.h" + +extern void radeonCopyBuffer(const __DRIdrawablePrivate * drawable, + const drm_clip_rect_t * rect); +extern void radeonPageFlip(const __DRIdrawablePrivate * drawable); +extern void radeonFlush(GLcontext * ctx); +extern void radeonFinish(GLcontext * ctx); +extern void radeonWaitForIdleLocked(radeonContextPtr radeon); +extern uint32_t radeonGetAge(radeonContextPtr radeon); + +#endif /* __RADEON_IOCTL_H__ */ diff --git a/r300/radeon_lock.c b/r300/radeon_lock.c new file mode 100644 index 0000000..bc3c2d6 --- /dev/null +++ b/r300/radeon_lock.c @@ -0,0 +1,137 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + * Kevin E. Martin <martin@valinux.com> + */ + +#include "radeon_lock.h" +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "r300_context.h" +#include "r300_state.h" + +#include "framebuffer.h" + +#include "drirenderbuffer.h" + +#if DEBUG_LOCKING +char *prevLockFile = NULL; +int prevLockLine = 0; +#endif + +/* Turn on/off page flipping according to the flags in the sarea: + */ +void radeonUpdatePageFlipping(radeonContextPtr rmesa) +{ + int use_back; + + rmesa->doPageFlip = rmesa->sarea->pfState; + if (rmesa->glCtx->WinSysDrawBuffer) { + driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, + rmesa->sarea->pfCurrentPage); + r300UpdateDrawBuffer(rmesa->glCtx); + } + + use_back = rmesa->glCtx->DrawBuffer ? + (rmesa->glCtx->DrawBuffer->_ColorDrawBufferMask[0] == + BUFFER_BIT_BACK_LEFT) : 1; + use_back ^= (rmesa->sarea->pfCurrentPage == 1); + + if (use_back) { + rmesa->state.color.drawOffset = + rmesa->radeonScreen->backOffset; + rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch; + } else { + rmesa->state.color.drawOffset = + rmesa->radeonScreen->frontOffset; + rmesa->state.color.drawPitch = + rmesa->radeonScreen->frontPitch; + } +} + +/* Update the hardware state. This is called if another context has + * grabbed the hardware lock, which includes the X server. This + * function also updates the driver's window state after the X server + * moves, resizes or restacks a window -- the change will be reflected + * in the drawable position and clip rects. Since the X server grabs + * the hardware lock when it changes the window state, this routine will + * automatically be called after such a change. + */ +void radeonGetLock(radeonContextPtr rmesa, GLuint flags) +{ + __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; + __DRIdrawablePrivate *const readable = rmesa->dri.readable; + __DRIscreenPrivate *sPriv = rmesa->dri.screen; + drm_radeon_sarea_t *sarea = rmesa->sarea; + r300ContextPtr r300 = (r300ContextPtr) rmesa; + + assert(drawable != NULL); + + drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags); + + /* The window might have moved, so we might need to get new clip + * rects. + * + * NOTE: This releases and regrabs the hw lock to allow the X server + * to respond to the DRI protocol request for new drawable info. + * Since the hardware state depends on having the latest drawable + * clip rects, all state checking must be done _after_ this call. + */ + DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable); + if (drawable != readable) { + DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable); + } + + if (rmesa->lastStamp != drawable->lastStamp) { + radeonUpdatePageFlipping(rmesa); + radeonSetCliprects(rmesa); + r300UpdateViewportOffset(rmesa->glCtx); + driUpdateFramebufferSize(rmesa->glCtx, drawable); + } + + if (sarea->ctx_owner != rmesa->dri.hwContext) { + int i; + + sarea->ctx_owner = rmesa->dri.hwContext; + for (i = 0; i < r300->nr_heaps; i++) { + DRI_AGE_TEXTURES(r300->texture_heaps[i]); + } + } + + rmesa->lost_context = GL_TRUE; +} diff --git a/r300/radeon_lock.h b/r300/radeon_lock.h new file mode 100644 index 0000000..c47adc9 --- /dev/null +++ b/r300/radeon_lock.h @@ -0,0 +1,118 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + * Kevin E. Martin <martin@valinux.com> + */ + +#ifndef __RADEON_LOCK_H__ +#define __RADEON_LOCK_H__ + +#if 0 +#include "r200_ioctl.h" +#endif +#include "radeon_context.h" + +extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); +extern void radeonUpdatePageFlipping(radeonContextPtr rmesa); + +/* Turn DEBUG_LOCKING on to find locking conflicts. + */ +#define DEBUG_LOCKING 0 + +#if DEBUG_LOCKING +extern char *prevLockFile; +extern int prevLockLine; + +#define DEBUG_LOCK() \ + do { \ + prevLockFile = (__FILE__); \ + prevLockLine = (__LINE__); \ + } while (0) + +#define DEBUG_RESET() \ + do { \ + prevLockFile = 0; \ + prevLockLine = 0; \ + } while (0) + +#define DEBUG_CHECK_LOCK() \ + do { \ + if (prevLockFile) { \ + fprintf(stderr, \ + "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \ + prevLockFile, prevLockLine, __FILE__, __LINE__); \ + exit(1); \ + } \ + } while (0) + +#else + +#define DEBUG_LOCK() +#define DEBUG_RESET() +#define DEBUG_CHECK_LOCK() + +#endif + +/* + * !!! We may want to separate locks from locks with validation. This + * could be used to improve performance for those things commands that + * do not do any drawing !!! + */ + +/* Lock the hardware and validate our state. + */ +#define LOCK_HARDWARE( rmesa ) \ + do { \ + char __ret = 0; \ + DEBUG_CHECK_LOCK(); \ + DRM_CAS((rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \ + (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret); \ + if (__ret) \ + radeonGetLock((rmesa), 0); \ + DEBUG_LOCK(); \ + } while (0) + +#define UNLOCK_HARDWARE( rmesa ) \ + do { \ + DRM_UNLOCK((rmesa)->dri.fd, \ + (rmesa)->dri.hwLock, \ + (rmesa)->dri.hwContext); \ + DEBUG_RESET(); \ + } while (0) + +#endif /* __RADEON_LOCK_H__ */ diff --git a/r300/radeon_span.c b/r300/radeon_span.c new file mode 100644 index 0000000..eae09d6 --- /dev/null +++ b/r300/radeon_span.c @@ -0,0 +1,321 @@ +/************************************************************************** + +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + * + */ + +#include "glheader.h" +#include "swrast/swrast.h" + +#include "r300_state.h" +#include "radeon_ioctl.h" +#include "r300_ioctl.h" +#include "radeon_span.h" + +#include "drirenderbuffer.h" + +#define DBG 0 + +/* + * Note that all information needed to access pixels in a renderbuffer + * should be obtained through the gl_renderbuffer parameter, not per-context + * information. + */ +#define LOCAL_VARS \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + const __DRIdrawablePrivate *dPriv = drb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ + GLubyte *buf = (GLubyte *) drb->flippedData \ + + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ + GLuint p; \ + (void) p; + +#define LOCAL_DEPTH_VARS \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + const __DRIdrawablePrivate *dPriv = drb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ + GLuint xo = dPriv->x; \ + GLuint yo = dPriv->y; \ + GLubyte *buf = (GLubyte *) drb->Base.Data; + +#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS + +#define Y_FLIP(Y) (bottom - (Y)) + +#define HW_LOCK() + +#define HW_UNLOCK() + +/* ================================================================ + * Color buffer + */ + +/* 16 bit, RGB565 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) radeon##x##_RGB565 +#define TAG2(x,y) radeon##x##_RGB565##y +#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) +#include "spantmp2.h" + +/* 32 bit, ARGB8888 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) radeon##x##_ARGB8888 +#define TAG2(x,y) radeon##x##_ARGB8888##y +#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) +#include "spantmp2.h" + +/* ================================================================ + * Depth buffer + */ + +/* The Radeon family has depth tiling on all the time, so we have to convert + * the x,y coordinates into the memory bus address (mba) in the same + * manner as the engine. In each case, the linear block address (ba) + * is calculated, and then wired with x and y to produce the final + * memory address. + * The chip will do address translation on its own if the surface registers + * are set up correctly. It is not quite enough to get it working with hyperz + * too... + */ + +static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) +{ + GLuint pitch = drb->pitch; + if (drb->depthHasSurface) { + return 4 * (x + y * pitch); + } else { + GLuint ba, address = 0; /* a[0..1] = 0 */ + +#ifdef COMPILE_R300 + ba = (y / 8) * (pitch / 8) + (x / 8); +#else + ba = (y / 16) * (pitch / 16) + (x / 16); +#endif + + address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */ + address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */ + address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */ + address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ + + address |= (y & 0x8) << 7; /* a[10] = y[3] */ + address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */ + address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ + + return address; + } +} + +static INLINE GLuint +radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) +{ + GLuint pitch = drb->pitch; + if (drb->depthHasSurface) { + return 2 * (x + y * pitch); + } else { + GLuint ba, address = 0; /* a[0] = 0 */ + + ba = (y / 16) * (pitch / 32) + (x / 32); + + address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */ + address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */ + address |= (x & 0x8) << 4; /* a[7] = x[3] */ + address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ + address |= (y & 0x8) << 7; /* a[10] = y[3] */ + address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */ + address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ + + return address; + } +} + +/* 16-bit depth buffer functions + */ +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; + +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )); + +#define TAG(x) radeon##x##_z16 +#include "depthtmp.h" + +/* 24 bit depth, 8 bit stencil depthbuffer functions + * + * Careful: It looks like the R300 uses ZZZS byte order while the R200 + * uses SZZZ for 24 bit depth, 8 bit stencil mode. + */ +#ifdef COMPILE_R300 +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0x000000ff; \ + tmp |= ((d << 8) & 0xffffff00); \ + *(GLuint *)(buf + offset) = tmp; \ +} while (0) +#else +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ + *(GLuint *)(buf + offset) = tmp; \ +} while (0) +#endif + +#ifdef COMPILE_R300 +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ + _y + yo )) & 0xffffff00) >> 8; \ + }while(0) +#else +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ + _y + yo )) & 0x00ffffff; +#endif + +#define TAG(x) radeon##x##_z24_s8 +#include "depthtmp.h" + +/* ================================================================ + * Stencil buffer + */ + +/* 24 bit depth, 8 bit stencil depthbuffer functions + */ +#ifdef COMPILE_R300 +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0xffffff00; \ + tmp |= (d) & 0xff; \ + *(GLuint *)(buf + offset) = tmp; \ +} while (0) +#else +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0x00ffffff; \ + tmp |= (((d) & 0xff) << 24); \ + *(GLuint *)(buf + offset) = tmp; \ +} while (0) +#endif + +#ifdef COMPILE_R300 +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + d = tmp & 0x000000ff; \ +} while (0) +#else +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + d = (tmp & 0xff000000) >> 24; \ +} while (0) +#endif + +#define TAG(x) radeon##x##_z24_s8 +#include "stenciltmp.h" + +/* Move locking out to get reasonable span performance (10x better + * than doing this in HW_LOCK above). WaitForIdle() is the main + * culprit. + */ + +static void radeonSpanRenderStart(GLcontext * ctx) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +#ifdef COMPILE_R300 + r300ContextPtr r300 = (r300ContextPtr) rmesa; + R300_FIREVERTICES(r300); +#else + RADEON_FIREVERTICES(rmesa); +#endif + LOCK_HARDWARE(rmesa); + radeonWaitForIdleLocked(rmesa); +} + +static void radeonSpanRenderFinish(GLcontext * ctx) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + _swrast_flush(ctx); + UNLOCK_HARDWARE(rmesa); +} + +void radeonInitSpanFuncs(GLcontext * ctx) +{ + struct swrast_device_driver *swdd = + _swrast_GetDeviceDriverReference(ctx); + swdd->SpanRenderStart = radeonSpanRenderStart; + swdd->SpanRenderFinish = radeonSpanRenderFinish; +} + +/** + * Plug in the Get/Put routines for the given driRenderbuffer. + */ +void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis) +{ + if (drb->Base.InternalFormat == GL_RGBA) { + if (vis->redBits == 5 && vis->greenBits == 6 + && vis->blueBits == 5) { + radeonInitPointers_RGB565(&drb->Base); + } else { + radeonInitPointers_ARGB8888(&drb->Base); + } + } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { + radeonInitDepthPointers_z16(&drb->Base); + } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { + radeonInitDepthPointers_z24_s8(&drb->Base); + } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { + radeonInitStencilPointers_z24_s8(&drb->Base); + } +} diff --git a/r300/radeon_state.c b/r300/radeon_state.c new file mode 100644 index 0000000..82bfd95 --- /dev/null +++ b/r300/radeon_state.c @@ -0,0 +1,243 @@ +/************************************************************************** + +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "api_arrayelt.h" +#include "enums.h" +#include "colormac.h" +#include "light.h" + +#include "swrast/swrast.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "swrast_setup/swrast_setup.h" + +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "r300_ioctl.h" +#include "framebuffer.h" + +/* ============================================================= + * Scissoring + */ + +static GLboolean intersect_rect(drm_clip_rect_t * out, + drm_clip_rect_t * a, drm_clip_rect_t * b) +{ + *out = *a; + if (b->x1 > out->x1) + out->x1 = b->x1; + if (b->y1 > out->y1) + out->y1 = b->y1; + if (b->x2 < out->x2) + out->x2 = b->x2; + if (b->y2 < out->y2) + out->y2 = b->y2; + if (out->x1 >= out->x2) + return GL_FALSE; + if (out->y1 >= out->y2) + return GL_FALSE; + return GL_TRUE; +} + +void radeonRecalcScissorRects(radeonContextPtr radeon) +{ + drm_clip_rect_t *out; + int i; + + /* Grow cliprect store? + */ + if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) { + while (radeon->state.scissor.numAllocedClipRects < + radeon->numClipRects) { + radeon->state.scissor.numAllocedClipRects += 1; /* zero case */ + radeon->state.scissor.numAllocedClipRects *= 2; + } + + if (radeon->state.scissor.pClipRects) + FREE(radeon->state.scissor.pClipRects); + + radeon->state.scissor.pClipRects = + MALLOC(radeon->state.scissor.numAllocedClipRects * + sizeof(drm_clip_rect_t)); + + if (radeon->state.scissor.pClipRects == NULL) { + radeon->state.scissor.numAllocedClipRects = 0; + return; + } + } + + out = radeon->state.scissor.pClipRects; + radeon->state.scissor.numClipRects = 0; + + for (i = 0; i < radeon->numClipRects; i++) { + if (intersect_rect(out, + &radeon->pClipRects[i], + &radeon->state.scissor.rect)) { + radeon->state.scissor.numClipRects++; + out++; + } + } +} + +void radeonUpdateScissor(GLcontext* ctx) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + + if (radeon->dri.drawable) { + __DRIdrawablePrivate *dPriv = radeon->dri.drawable; + int x1 = dPriv->x + ctx->Scissor.X; + int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height); + + radeon->state.scissor.rect.x1 = x1; + radeon->state.scissor.rect.y1 = y1; + radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width - 1; + radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height - 1; + + radeonRecalcScissorRects(radeon); + } +} + +static void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) +{ + if (ctx->Scissor.Enabled) { + /* We don't pipeline cliprect changes */ + r300Flush(ctx); + radeonUpdateScissor(ctx); + } +} + + +/** + * Update cliprects and scissors. + */ +void radeonSetCliprects(radeonContextPtr radeon) +{ + __DRIdrawablePrivate *const drawable = radeon->dri.drawable; + __DRIdrawablePrivate *const readable = radeon->dri.readable; + GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate; + GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate; + + if (draw_fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) { + /* Can't ignore 2d windows if we are page flipping. */ + if (drawable->numBackClipRects == 0 || radeon->doPageFlip || + radeon->sarea->pfCurrentPage == 1) { + radeon->numClipRects = drawable->numClipRects; + radeon->pClipRects = drawable->pClipRects; + } else { + radeon->numClipRects = drawable->numBackClipRects; + radeon->pClipRects = drawable->pBackClipRects; + } + } else { + /* front buffer (or none, or multiple buffers */ + radeon->numClipRects = drawable->numClipRects; + radeon->pClipRects = drawable->pClipRects; + } + + if ((draw_fb->Width != drawable->w) || + (draw_fb->Height != drawable->h)) { + _mesa_resize_framebuffer(radeon->glCtx, draw_fb, + drawable->w, drawable->h); + draw_fb->Initialized = GL_TRUE; + } + + if (drawable != readable) { + if ((read_fb->Width != readable->w) || + (read_fb->Height != readable->h)) { + _mesa_resize_framebuffer(radeon->glCtx, read_fb, + readable->w, readable->h); + read_fb->Initialized = GL_TRUE; + } + } + + if (radeon->state.scissor.enabled) + radeonRecalcScissorRects(radeon); + + radeon->lastStamp = drawable->lastStamp; +} + + +/** + * Handle common enable bits. + * Called as a fallback by r200Enable/r300Enable. + */ +void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + + switch(cap) { + case GL_SCISSOR_TEST: + /* We don't pipeline cliprect & scissor changes */ + r300Flush(ctx); + + radeon->state.scissor.enabled = state; + radeonUpdateScissor(ctx); + break; + + default: + return; + } +} + + +/** + * Initialize default state. + * This function is called once at context init time from + * r200InitState/r300InitState + */ +void radeonInitState(radeonContextPtr radeon) +{ + radeon->Fallback = 0; + + if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) { + radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; + radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; + } else { + radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; + radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; + } +} + + +/** + * Initialize common state functions. + * Called by r200InitStateFuncs/r300InitStateFuncs + */ +void radeonInitStateFuncs(struct dd_function_table *functions) +{ + functions->Scissor = radeonScissor; +} diff --git a/r300/radeon_state.h b/r300/radeon_state.h new file mode 100644 index 0000000..821cb40 --- /dev/null +++ b/r300/radeon_state.h @@ -0,0 +1,43 @@ +/* +Copyright (C) 2004 Nicolai Haehnle. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __RADEON_STATE_H__ +#define __RADEON_STATE_H__ + +extern void radeonRecalcScissorRects(radeonContextPtr radeon); +extern void radeonSetCliprects(radeonContextPtr radeon); +extern void radeonUpdateScissor(GLcontext* ctx); + +extern void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state); + +extern void radeonInitState(radeonContextPtr radeon); +extern void radeonInitStateFuncs(struct dd_function_table* functions); + +#endif diff --git a/radeon/Makefile.am b/radeon/Makefile.am new file mode 100644 index 0000000..ee1d008 --- /dev/null +++ b/radeon/Makefile.am @@ -0,0 +1,24 @@ +AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING + +RADEON_CFLAGS = -Iserver -DRADEON_COMMON=0 + +radeon_dri_la_LTLIBRARIES = radeon_dri.la +radeon_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(RADEON_CFLAGS) +radeon_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \ + $(DRM_LIBS) $(DRI_LIBS) +radeon_dri_ladir = @libdir@/dri +radeon_dri_la_SOURCES = \ + radeon_context.c \ + radeon_ioctl.c \ + radeon_lock.c \ + radeon_screen.c \ + radeon_state.c \ + radeon_state_init.c \ + radeon_tex.c \ + radeon_texmem.c \ + radeon_texstate.c \ + radeon_tcl.c \ + radeon_swtcl.c \ + radeon_span.c \ + radeon_maos.c \ + radeon_sanity.c diff --git a/radeon/radeon_chipset.h b/radeon/radeon_chipset.h new file mode 100644 index 0000000..5c17e8f --- /dev/null +++ b/radeon/radeon_chipset.h @@ -0,0 +1,182 @@ +#ifndef _RADEON_CHIPSET_H +#define _RADEON_CHIPSET_H +/* Including xf86PciInfo.h introduces a bunch of errors... + */ + +/* General chip classes: + * r100 includes R100, RV100, RV200, RS100, RS200, RS250. + * r200 includes R200, RV250, RV280, RS300. + * r300 includes R300, RV350, RV370. + * (RS* denotes IGP) + */ +#define PCI_CHIP_RV380_3150 0x3150 +#define PCI_CHIP_RV380_3152 0x3152 +#define PCI_CHIP_RV380_3154 0x3154 +#define PCI_CHIP_RV380_3E50 0x3E50 +#define PCI_CHIP_RV380_3E54 0x3E54 +#define PCI_CHIP_RS100_4136 0x4136 +#define PCI_CHIP_RS200_4137 0x4137 +#define PCI_CHIP_R300_AD 0x4144 +#define PCI_CHIP_R300_AE 0x4145 +#define PCI_CHIP_R300_AF 0x4146 +#define PCI_CHIP_R300_AG 0x4147 +#define PCI_CHIP_R350_AH 0x4148 +#define PCI_CHIP_R350_AI 0x4149 +#define PCI_CHIP_R350_AJ 0x414A +#define PCI_CHIP_R350_AK 0x414B +#define PCI_CHIP_RV350_AP 0x4150 +#define PCI_CHIP_RV350_AQ 0x4151 +#define PCI_CHIP_RV350_AR 0x4152 +#define PCI_CHIP_RV350_AS 0x4153 +#define PCI_CHIP_RV350_AT 0x4154 +#define PCI_CHIP_RV350_AU 0x4155 +#define PCI_CHIP_RV350_AV 0x4156 +#define PCI_CHIP_RS250_4237 0x4237 +#define PCI_CHIP_R200_BB 0x4242 +#define PCI_CHIP_R200_BC 0x4243 +#define PCI_CHIP_RS100_4336 0x4336 +#define PCI_CHIP_RS200_4337 0x4337 +#define PCI_CHIP_RS250_4437 0x4437 +#define PCI_CHIP_RV250_If 0x4966 +#define PCI_CHIP_RV250_Ig 0x4967 +#define PCI_CHIP_R420_JH 0x4A48 +#define PCI_CHIP_R420_JI 0x4A49 +#define PCI_CHIP_R420_JJ 0x4A4A +#define PCI_CHIP_R420_JK 0x4A4B +#define PCI_CHIP_R420_JL 0x4A4C +#define PCI_CHIP_R420_JM 0x4A4D +#define PCI_CHIP_R420_JN 0x4A4E +#define PCI_CHIP_R420_JO 0x4A4F +#define PCI_CHIP_R420_JP 0x4A50 +#define PCI_CHIP_R420_JT 0x4A54 +#define PCI_CHIP_R481_4B49 0x4B49 +#define PCI_CHIP_R481_4B4A 0x4B4A +#define PCI_CHIP_R481_4B4B 0x4B4B +#define PCI_CHIP_R481_4B4C 0x4B4C +#define PCI_CHIP_RADEON_LW 0x4C57 +#define PCI_CHIP_RADEON_LX 0x4C58 +#define PCI_CHIP_RADEON_LY 0x4C59 +#define PCI_CHIP_RADEON_LZ 0x4C5A +#define PCI_CHIP_RV250_Ld 0x4C64 +#define PCI_CHIP_RV250_Lf 0x4C66 +#define PCI_CHIP_RV250_Lg 0x4C67 +#define PCI_CHIP_R300_ND 0x4E44 +#define PCI_CHIP_R300_NE 0x4E45 +#define PCI_CHIP_R300_NF 0x4E46 +#define PCI_CHIP_R300_NG 0x4E47 +#define PCI_CHIP_R350_NH 0x4E48 +#define PCI_CHIP_R350_NI 0x4E49 +#define PCI_CHIP_R360_NJ 0x4E4A +#define PCI_CHIP_R350_NK 0x4E4B +#define PCI_CHIP_RV350_NP 0x4E50 +#define PCI_CHIP_RV350_NQ 0x4E51 +#define PCI_CHIP_RV350_NR 0x4E52 +#define PCI_CHIP_RV350_NS 0x4E53 +#define PCI_CHIP_RV350_NT 0x4E54 +#define PCI_CHIP_RV350_NV 0x4E56 +#define PCI_CHIP_RADEON_QD 0x5144 +#define PCI_CHIP_RADEON_QE 0x5145 +#define PCI_CHIP_RADEON_QF 0x5146 +#define PCI_CHIP_RADEON_QG 0x5147 +#define PCI_CHIP_R200_QH 0x5148 +#define PCI_CHIP_R200_QL 0x514C +#define PCI_CHIP_R200_QM 0x514D +#define PCI_CHIP_RV200_QW 0x5157 +#define PCI_CHIP_RV200_QX 0x5158 +#define PCI_CHIP_RADEON_QY 0x5159 +#define PCI_CHIP_RADEON_QZ 0x515A +#define PCI_CHIP_RN50_515E 0x515E +#define PCI_CHIP_RV370_5460 0x5460 +#define PCI_CHIP_RV370_5462 0x5462 +#define PCI_CHIP_RV370_5464 0x5464 +#define PCI_CHIP_R423_UH 0x5548 +#define PCI_CHIP_R423_UI 0x5549 +#define PCI_CHIP_R423_UJ 0x554A +#define PCI_CHIP_R423_UK 0x554B +#define PCI_CHIP_R430_554C 0x554C +#define PCI_CHIP_R430_554D 0x554D +#define PCI_CHIP_R430_554E 0x554E +#define PCI_CHIP_R430_554F 0x554F +#define PCI_CHIP_R423_5550 0x5550 +#define PCI_CHIP_R423_UQ 0x5551 +#define PCI_CHIP_R423_UR 0x5552 +#define PCI_CHIP_R423_UT 0x5554 +#define PCI_CHIP_RV410_564A 0x564A +#define PCI_CHIP_RV410_564B 0x564B +#define PCI_CHIP_RV410_564F 0x564F +#define PCI_CHIP_RV410_5652 0x5652 +#define PCI_CHIP_RV410_5653 0x5653 +#define PCI_CHIP_RS300_5834 0x5834 +#define PCI_CHIP_RS300_5835 0x5835 +#define PCI_CHIP_RS480_5954 0x5954 +#define PCI_CHIP_RS480_5955 0x5955 +#define PCI_CHIP_RV280_5960 0x5960 +#define PCI_CHIP_RV280_5961 0x5961 +#define PCI_CHIP_RV280_5962 0x5962 +#define PCI_CHIP_RV280_5964 0x5964 +#define PCI_CHIP_RV280_5965 0x5965 +#define PCI_CHIP_RN50_5969 0x5969 +#define PCI_CHIP_RS482_5974 0x5974 +#define PCI_CHIP_RS482_5975 0x5975 +#define PCI_CHIP_RS400_5A41 0x5A41 +#define PCI_CHIP_RS400_5A42 0x5A42 +#define PCI_CHIP_RC410_5A61 0x5A61 +#define PCI_CHIP_RC410_5A62 0x5A62 +#define PCI_CHIP_RV370_5B60 0x5B60 +#define PCI_CHIP_RV370_5B62 0x5B62 +#define PCI_CHIP_RV370_5B63 0x5B63 +#define PCI_CHIP_RV370_5B64 0x5B64 +#define PCI_CHIP_RV370_5B65 0x5B65 +#define PCI_CHIP_RV370_5657 0x5657 +#define PCI_CHIP_RV280_5C61 0x5C61 +#define PCI_CHIP_RV280_5C63 0x5C63 +#define PCI_CHIP_R430_5D48 0x5D48 +#define PCI_CHIP_R430_5D49 0x5D49 +#define PCI_CHIP_R430_5D4A 0x5D4A +#define PCI_CHIP_R480_5D4C 0x5D4C +#define PCI_CHIP_R480_5D4D 0x5D4D +#define PCI_CHIP_R480_5D4E 0x5D4E +#define PCI_CHIP_R480_5D4F 0x5D4F +#define PCI_CHIP_R480_5D50 0x5D50 +#define PCI_CHIP_R480_5D52 0x5D52 +#define PCI_CHIP_R423_5D57 0x5D57 +#define PCI_CHIP_RV410_5E48 0x5E48 +#define PCI_CHIP_RV410_5E4A 0x5E4A +#define PCI_CHIP_RV410_5E4B 0x5E4B +#define PCI_CHIP_RV410_5E4C 0x5E4C +#define PCI_CHIP_RV410_5E4D 0x5E4D +#define PCI_CHIP_RV410_5E4F 0x5E4F +#define PCI_CHIP_RS350_7834 0x7834 +#define PCI_CHIP_RS350_7835 0x7835 + +enum { + CHIP_FAMILY_R100, + CHIP_FAMILY_RV100, + CHIP_FAMILY_RS100, + CHIP_FAMILY_RV200, + CHIP_FAMILY_RS200, + CHIP_FAMILY_R200, + CHIP_FAMILY_RV250, + CHIP_FAMILY_RS300, + CHIP_FAMILY_RV280, + CHIP_FAMILY_R300, + CHIP_FAMILY_R350, + CHIP_FAMILY_RV350, + CHIP_FAMILY_RV380, + CHIP_FAMILY_R420, + CHIP_FAMILY_RV410, + CHIP_FAMILY_RS400, + CHIP_FAMILY_LAST +}; + +/* General classes of Radeons, as described above the device ID section */ +#define RADEON_CLASS_R100 (0 << 0) +#define RADEON_CLASS_R200 (1 << 0) +#define RADEON_CLASS_R300 (2 << 0) +#define RADEON_CLASS_MASK (3 << 0) + +#define RADEON_CHIPSET_TCL (1 << 2) /* tcl support - any radeon */ +#define RADEON_CHIPSET_BROKEN_STENCIL (1 << 3) /* r100 stencil bug */ +#define R200_CHIPSET_YCBCR_BROKEN (1 << 4) /* r200 ycbcr bug */ + +#endif /* _RADEON_CHIPSET_H */ diff --git a/radeon/radeon_compat.c b/radeon/radeon_compat.c new file mode 100644 index 0000000..1cbe340 --- /dev/null +++ b/radeon/radeon_compat.c @@ -0,0 +1,302 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2002 ATI Technologies Inc., Ontario, Canada, and + Tungsten Graphics Inc., Austin, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * + */ + +#include "glheader.h" +#include "imports.h" + +#include "radeon_context.h" +#include "radeon_state.h" +#include "radeon_ioctl.h" + + +static struct { + int start; + int len; + const char *name; +} packet[RADEON_MAX_STATE_PACKETS] = { + { RADEON_PP_MISC,7,"RADEON_PP_MISC" }, + { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" }, + { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" }, + { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" }, + { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" }, + { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" }, + { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" }, + { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" }, + { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" }, + { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" }, + { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" }, + { RADEON_RE_MISC,1,"RADEON_RE_MISC" }, + { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" }, + { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" }, + { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" }, + { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" }, + { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" }, + { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" }, + { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" }, + { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" }, + { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" }, +}; + + +static void radeonCompatEmitPacket( radeonContextPtr rmesa, + struct radeon_state_atom *state ) +{ + drm_radeon_sarea_t *sarea = rmesa->sarea; + drm_radeon_context_regs_t *ctx = &sarea->context_state; + drm_radeon_texture_regs_t *tex0 = &sarea->tex_state[0]; + drm_radeon_texture_regs_t *tex1 = &sarea->tex_state[1]; + int i; + int *buf = state->cmd; + + for ( i = 0 ; i < state->cmd_size ; ) { + drm_radeon_cmd_header_t *header = (drm_radeon_cmd_header_t *)&buf[i++]; + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id, + packet[(int)header->packet.packet_id].name); + + switch (header->packet.packet_id) { + case RADEON_EMIT_PP_MISC: + ctx->pp_misc = buf[i++]; + ctx->pp_fog_color = buf[i++]; + ctx->re_solid_color = buf[i++]; + ctx->rb3d_blendcntl = buf[i++]; + ctx->rb3d_depthoffset = buf[i++]; + ctx->rb3d_depthpitch = buf[i++]; + ctx->rb3d_zstencilcntl = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_CONTEXT; + break; + case RADEON_EMIT_PP_CNTL: + ctx->pp_cntl = buf[i++]; + ctx->rb3d_cntl = buf[i++]; + ctx->rb3d_coloroffset = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_CONTEXT; + break; + case RADEON_EMIT_RB3D_COLORPITCH: + ctx->rb3d_colorpitch = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_CONTEXT; + break; + case RADEON_EMIT_RE_LINE_PATTERN: + ctx->re_line_pattern = buf[i++]; + ctx->re_line_state = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_LINE; + break; + case RADEON_EMIT_SE_LINE_WIDTH: + ctx->se_line_width = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_LINE; + break; + case RADEON_EMIT_PP_LUM_MATRIX: + ctx->pp_lum_matrix = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_BUMPMAP; + break; + case RADEON_EMIT_PP_ROT_MATRIX_0: + ctx->pp_rot_matrix_0 = buf[i++]; + ctx->pp_rot_matrix_1 = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_BUMPMAP; + break; + case RADEON_EMIT_RB3D_STENCILREFMASK: + ctx->rb3d_stencilrefmask = buf[i++]; + ctx->rb3d_ropcntl = buf[i++]; + ctx->rb3d_planemask = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_MASKS; + break; + case RADEON_EMIT_SE_VPORT_XSCALE: + ctx->se_vport_xscale = buf[i++]; + ctx->se_vport_xoffset = buf[i++]; + ctx->se_vport_yscale = buf[i++]; + ctx->se_vport_yoffset = buf[i++]; + ctx->se_vport_zscale = buf[i++]; + ctx->se_vport_zoffset = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_VIEWPORT; + break; + case RADEON_EMIT_SE_CNTL: + ctx->se_cntl = buf[i++]; + ctx->se_coord_fmt = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT; + break; + case RADEON_EMIT_SE_CNTL_STATUS: + ctx->se_cntl_status = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_SETUP; + break; + case RADEON_EMIT_RE_MISC: + ctx->re_misc = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_MISC; + break; + case RADEON_EMIT_PP_TXFILTER_0: + tex0->pp_txfilter = buf[i++]; + tex0->pp_txformat = buf[i++]; + tex0->pp_txoffset = buf[i++]; + tex0->pp_txcblend = buf[i++]; + tex0->pp_txablend = buf[i++]; + tex0->pp_tfactor = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_TEX0; + break; + case RADEON_EMIT_PP_BORDER_COLOR_0: + tex0->pp_border_color = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_TEX0; + break; + case RADEON_EMIT_PP_TXFILTER_1: + tex1->pp_txfilter = buf[i++]; + tex1->pp_txformat = buf[i++]; + tex1->pp_txoffset = buf[i++]; + tex1->pp_txcblend = buf[i++]; + tex1->pp_txablend = buf[i++]; + tex1->pp_tfactor = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_TEX1; + break; + case RADEON_EMIT_PP_BORDER_COLOR_1: + tex1->pp_border_color = buf[i++]; + sarea->dirty |= RADEON_UPLOAD_TEX1; + break; + + case RADEON_EMIT_SE_ZBIAS_FACTOR: + i++; + i++; + break; + + case RADEON_EMIT_PP_TXFILTER_2: + case RADEON_EMIT_PP_BORDER_COLOR_2: + case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT: + case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED: + default: + /* These states aren't understood by radeon drm 1.1 */ + fprintf(stderr, "Tried to emit unsupported state\n"); + return; + } + } +} + + + +static void radeonCompatEmitStateLocked( radeonContextPtr rmesa ) +{ + struct radeon_state_atom *atom; + + if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty) + return; + + foreach(atom, &rmesa->hw.atomlist) { + if (rmesa->hw.all_dirty) + atom->dirty = GL_TRUE; + if (atom->is_tcl) + atom->dirty = GL_FALSE; + if (atom->dirty) + radeonCompatEmitPacket(rmesa, atom); + } + + rmesa->hw.is_dirty = GL_FALSE; + rmesa->hw.all_dirty = GL_FALSE; +} + + +static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa, + GLuint hw_primitive, + GLuint nverts, + drm_clip_rect_t *pbox, + GLuint nbox ) +{ + int i; + + for ( i = 0 ; i < nbox ; ) { + int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox ); + drm_clip_rect_t *b = rmesa->sarea->boxes; + drm_radeon_vertex_t vtx; + + rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS; + rmesa->sarea->nbox = nr - i; + + for ( ; i < nr ; i++) + *b++ = pbox[i]; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, + "RadeonFlushVertexBuffer: prim %x buf %d verts %d " + "disc %d nbox %d\n", + hw_primitive, + rmesa->dma.current.buf->buf->idx, + nverts, + nr == nbox, + rmesa->sarea->nbox ); + + vtx.prim = hw_primitive; + vtx.idx = rmesa->dma.current.buf->buf->idx; + vtx.count = nverts; + vtx.discard = (nr == nbox); + + drmCommandWrite( rmesa->dri.fd, + DRM_RADEON_VERTEX, + &vtx, sizeof(vtx)); + } +} + + + +/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer! + */ +void radeonCompatEmitPrimitive( radeonContextPtr rmesa, + GLuint vertex_format, + GLuint hw_primitive, + GLuint nrverts ) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + LOCK_HARDWARE( rmesa ); + + radeonCompatEmitStateLocked( rmesa ); + rmesa->sarea->vc_format = vertex_format; + + if (rmesa->state.scissor.enabled) { + radeonCompatEmitPrimitiveLocked( rmesa, + hw_primitive, + nrverts, + rmesa->state.scissor.pClipRects, + rmesa->state.scissor.numClipRects ); + } + else { + radeonCompatEmitPrimitiveLocked( rmesa, + hw_primitive, + nrverts, + rmesa->pClipRects, + rmesa->numClipRects ); + } + + + UNLOCK_HARDWARE( rmesa ); +} + diff --git a/radeon/radeon_context.c b/radeon/radeon_context.c new file mode 100644 index 0000000..9451ec4 --- /dev/null +++ b/radeon/radeon_context.c @@ -0,0 +1,637 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_context.c,v 1.9 2003/09/24 02:43:12 dawes Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "api_arrayelt.h" +#include "context.h" +#include "simple_list.h" +#include "imports.h" +#include "matrix.h" +#include "extensions.h" +#include "framebuffer.h" + +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" + +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" + +#include "drivers/common/driverfuncs.h" + +#include "radeon_context.h" +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "radeon_span.h" +#include "radeon_tex.h" +#include "radeon_swtcl.h" +#include "radeon_tcl.h" +#include "radeon_maos.h" + +#define need_GL_ARB_multisample +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_fog_coord +#define need_GL_EXT_secondary_color +#include "extension_helper.h" + +#define DRIVER_DATE "20061018" + +#include "vblank.h" +#include "utils.h" +#include "xmlpool.h" /* for symbolic values of enum-type options */ +#ifndef RADEON_DEBUG +int RADEON_DEBUG = (0); +#endif + + +/* Return various strings for glGetString(). + */ +static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + static char buffer[128]; + unsigned offset; + GLuint agp_mode = (rmesa->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 : + rmesa->radeonScreen->AGPMode; + + switch ( name ) { + case GL_VENDOR: + return (GLubyte *)"Tungsten Graphics, Inc."; + + case GL_RENDERER: + offset = driGetRendererString( buffer, "Radeon", DRIVER_DATE, + agp_mode ); + + sprintf( & buffer[ offset ], " %sTCL", + !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE) + ? "" : "NO-" ); + + return (GLubyte *)buffer; + + default: + return NULL; + } +} + + +/* Extension strings exported by the R100 driver. + */ +const struct dri_extension card_extensions[] = +{ + { "GL_ARB_multisample", GL_ARB_multisample_functions }, + { "GL_ARB_multitexture", NULL }, + { "GL_ARB_texture_border_clamp", NULL }, + { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, + { "GL_ARB_texture_env_add", NULL }, + { "GL_ARB_texture_env_combine", NULL }, + { "GL_ARB_texture_env_crossbar", NULL }, + { "GL_ARB_texture_env_dot3", NULL }, + { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, + { "GL_EXT_blend_logic_op", NULL }, + { "GL_EXT_blend_subtract", GL_EXT_blend_minmax_functions }, + { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, + { "GL_EXT_stencil_wrap", NULL }, + { "GL_EXT_texture_edge_clamp", NULL }, + { "GL_EXT_texture_env_combine", NULL }, + { "GL_EXT_texture_env_dot3", NULL }, + { "GL_EXT_texture_filter_anisotropic", NULL }, + { "GL_EXT_texture_lod_bias", NULL }, + { "GL_EXT_texture_mirror_clamp", NULL }, + { "GL_ATI_texture_env_combine3", NULL }, + { "GL_ATI_texture_mirror_once", NULL }, + { "GL_MESA_ycbcr_texture", NULL }, + { "GL_NV_blend_square", NULL }, + { "GL_SGIS_generate_mipmap", NULL }, + { NULL, NULL } +}; + +extern const struct tnl_pipeline_stage _radeon_render_stage; +extern const struct tnl_pipeline_stage _radeon_tcl_stage; + +static const struct tnl_pipeline_stage *radeon_pipeline[] = { + + /* Try and go straight to t&l + */ + &_radeon_tcl_stage, + + /* Catch any t&l fallbacks + */ + &_tnl_vertex_transform_stage, + &_tnl_normal_transform_stage, + &_tnl_lighting_stage, + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, + + &_radeon_render_stage, + &_tnl_render_stage, /* FALLBACK: */ + NULL, +}; + + + +/* Initialize the driver's misc functions. + */ +static void radeonInitDriverFuncs( struct dd_function_table *functions ) +{ + functions->GetString = radeonGetString; +} + +static const struct dri_debug_control debug_control[] = +{ + { "fall", DEBUG_FALLBACKS }, + { "tex", DEBUG_TEXTURE }, + { "ioctl", DEBUG_IOCTL }, + { "prim", DEBUG_PRIMS }, + { "vert", DEBUG_VERTS }, + { "state", DEBUG_STATE }, + { "code", DEBUG_CODEGEN }, + { "vfmt", DEBUG_VFMT }, + { "vtxf", DEBUG_VFMT }, + { "verb", DEBUG_VERBOSE }, + { "dri", DEBUG_DRI }, + { "dma", DEBUG_DMA }, + { "san", DEBUG_SANITY }, + { "sync", DEBUG_SYNC }, + { NULL, 0 } +}; + + +/* Create the device specific context. + */ +GLboolean +radeonCreateContext( const __GLcontextModes *glVisual, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private); + struct dd_function_table functions; + radeonContextPtr rmesa; + GLcontext *ctx, *shareCtx; + int i; + int tcl_mode, fthrottle_mode; + + assert(glVisual); + assert(driContextPriv); + assert(screen); + + /* Allocate the Radeon context */ + rmesa = (radeonContextPtr) CALLOC( sizeof(*rmesa) ); + if ( !rmesa ) + return GL_FALSE; + + /* init exp fog table data */ + radeonInitStaticFogData(); + + /* Parse configuration files. + * Do this here so that initialMaxAnisotropy is set before we create + * the default textures. + */ + driParseConfigFiles (&rmesa->optionCache, &screen->optionCache, + screen->driScreen->myNum, "radeon"); + rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache, + "def_max_anisotropy"); + + if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) { + if ( sPriv->drmMinor < 13 ) + fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " + "disabling.\n",sPriv->drmMinor ); + else + rmesa->using_hyperz = GL_TRUE; + } + + if ( sPriv->drmMinor >= 15 ) + rmesa->texmicrotile = GL_TRUE; + + /* Init default driver functions then plug in our Radeon-specific functions + * (the texture functions are especially important) + */ + _mesa_init_driver_functions( &functions ); + radeonInitDriverFuncs( &functions ); + radeonInitTextureFuncs( &functions ); + + /* Allocate the Mesa context */ + if (sharedContextPrivate) + shareCtx = ((radeonContextPtr) sharedContextPrivate)->glCtx; + else + shareCtx = NULL; + rmesa->glCtx = _mesa_create_context(glVisual, shareCtx, + &functions, (void *) rmesa); + if (!rmesa->glCtx) { + FREE(rmesa); + return GL_FALSE; + } + driContextPriv->driverPrivate = rmesa; + + /* Init radeon context data */ + rmesa->dri.context = driContextPriv; + rmesa->dri.screen = sPriv; + rmesa->dri.drawable = NULL; + rmesa->dri.readable = NULL; + rmesa->dri.hwContext = driContextPriv->hHWContext; + rmesa->dri.hwLock = &sPriv->pSAREA->lock; + rmesa->dri.fd = sPriv->fd; + rmesa->dri.drmMinor = sPriv->drmMinor; + + rmesa->radeonScreen = screen; + rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA + + screen->sarea_priv_offset); + + + rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address; + + (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) ); + make_empty_list( & rmesa->swapped ); + + rmesa->nr_heaps = screen->numTexHeaps; + for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { + rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa, + screen->texSize[i], + 12, + RADEON_NR_TEX_REGIONS, + (drmTextureRegionPtr)rmesa->sarea->tex_list[i], + & rmesa->sarea->tex_age[i], + & rmesa->swapped, + sizeof( radeonTexObj ), + (destroy_texture_object_t *) radeonDestroyTexObj ); + + driSetTextureSwapCounterLocation( rmesa->texture_heaps[i], + & rmesa->c_textureSwaps ); + } + rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache, + "texture_depth"); + if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) + rmesa->texture_depth = ( screen->cpp == 4 ) ? + DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; + + rmesa->swtcl.RenderIndex = ~0; + rmesa->hw.all_dirty = GL_TRUE; + + /* Set the maximum texture size small enough that we can guarentee that + * all texture units can bind a maximal texture and have all of them in + * texturable memory at once. Depending on the allow_large_textures driconf + * setting allow larger textures. + */ + + ctx = rmesa->glCtx; + ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache, + "texture_units"); + ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits; + ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits; + + i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures"); + + driCalculateMaxTextureLevels( rmesa->texture_heaps, + rmesa->nr_heaps, + & ctx->Const, + 4, + 11, /* max 2D texture size is 2048x2048 */ + 8, /* 256^3 */ + 9, /* \todo: max cube texture size seems to be 512x512(x6) */ + 11, /* max rect texture size is 2048x2048. */ + 12, + GL_FALSE, + i ); + + + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + + /* No wide points. + */ + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSize = 1.0; + ctx->Const.MaxPointSizeAA = 1.0; + + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = 10.0; + ctx->Const.MaxLineWidthAA = 10.0; + ctx->Const.LineWidthGranularity = 0.0625; + + /* Set maxlocksize (and hence vb size) small enough to avoid + * fallbacks in radeon_tcl.c. ie. guarentee that all vertices can + * fit in a single dma buffer for indexed rendering of quad strips, + * etc. + */ + ctx->Const.MaxArrayLockSize = + MIN2( ctx->Const.MaxArrayLockSize, + RADEON_BUFFER_SIZE / RADEON_MAX_TCL_VERTSIZE ); + + rmesa->boxes = 0; + + /* Initialize the software rasterizer and helper modules. + */ + _swrast_CreateContext( ctx ); + _vbo_CreateContext( ctx ); + _tnl_CreateContext( ctx ); + _swsetup_CreateContext( ctx ); + _ae_create_context( ctx ); + + /* Install the customized pipeline: + */ + _tnl_destroy_pipeline( ctx ); + _tnl_install_pipeline( ctx, radeon_pipeline ); + + /* Try and keep materials and vertices separate: + */ +/* _tnl_isolate_materials( ctx, GL_TRUE ); */ + + /* Configure swrast and T&L to match hardware characteristics: + */ + _swrast_allow_pixel_fog( ctx, GL_FALSE ); + _swrast_allow_vertex_fog( ctx, GL_TRUE ); + _tnl_allow_pixel_fog( ctx, GL_FALSE ); + _tnl_allow_vertex_fog( ctx, GL_TRUE ); + + + for ( i = 0 ; i < RADEON_MAX_TEXTURE_UNITS ; i++ ) { + _math_matrix_ctr( &rmesa->TexGenMatrix[i] ); + _math_matrix_ctr( &rmesa->tmpmat[i] ); + _math_matrix_set_identity( &rmesa->TexGenMatrix[i] ); + _math_matrix_set_identity( &rmesa->tmpmat[i] ); + } + + driInitExtensions( ctx, card_extensions, GL_TRUE ); + if (rmesa->radeonScreen->drmSupportsCubeMapsR100) + _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" ); + if (rmesa->glCtx->Mesa_DXTn) { + _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); + _mesa_enable_extension( ctx, "GL_S3_s3tc" ); + } + else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) { + _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); + } + + if (rmesa->dri.drmMinor >= 9) + _mesa_enable_extension( ctx, "GL_NV_texture_rectangle"); + + /* XXX these should really go right after _mesa_init_driver_functions() */ + radeonInitIoctlFuncs( ctx ); + radeonInitStateFuncs( ctx ); + radeonInitSpanFuncs( ctx ); + radeonInitState( rmesa ); + radeonInitSwtcl( ctx ); + + _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0, + ctx->Const.MaxArrayLockSize, 32 ); + + fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode"); + rmesa->iw.irq_seq = -1; + rmesa->irqsEmitted = 0; + rmesa->do_irqs = (rmesa->radeonScreen->irq != 0 && + fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS); + + rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); + + rmesa->vblank_flags = (rmesa->radeonScreen->irq != 0) + ? driGetDefaultVBlankFlags(&rmesa->optionCache) : VBLANK_FLAG_NO_IRQ; + + (*dri_interface->getUST)( & rmesa->swap_ust ); + + +#if DO_DEBUG + RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ), + debug_control ); +#endif + + tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode"); + if (driQueryOptionb(&rmesa->optionCache, "no_rast")) { + fprintf(stderr, "disabling 3D acceleration\n"); + FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1); + } else if (tcl_mode == DRI_CONF_TCL_SW || + !(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { + if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + rmesa->radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL; + fprintf(stderr, "Disabling HW TCL support\n"); + } + TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1); + } + + if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { +/* _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */ + } + return GL_TRUE; +} + + +/* Destroy the device specific context. + */ +/* Destroy the Mesa and driver specific context data. + */ +void radeonDestroyContext( __DRIcontextPrivate *driContextPriv ) +{ + GET_CURRENT_CONTEXT(ctx); + radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate; + radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; + + /* check if we're deleting the currently bound context */ + if (rmesa == current) { + RADEON_FIREVERTICES( rmesa ); + _mesa_make_current(NULL, NULL, NULL); + } + + /* Free radeon context resources */ + assert(rmesa); /* should never be null */ + if ( rmesa ) { + GLboolean release_texture_heaps; + + + release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1); + _swsetup_DestroyContext( rmesa->glCtx ); + _tnl_DestroyContext( rmesa->glCtx ); + _vbo_DestroyContext( rmesa->glCtx ); + _swrast_DestroyContext( rmesa->glCtx ); + + radeonDestroySwtcl( rmesa->glCtx ); + radeonReleaseArrays( rmesa->glCtx, ~0 ); + if (rmesa->dma.current.buf) { + radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); + radeonFlushCmdBuf( rmesa, __FUNCTION__ ); + } + + _mesa_vector4f_free( &rmesa->tcl.ObjClean ); + + if (rmesa->state.scissor.pClipRects) { + FREE(rmesa->state.scissor.pClipRects); + rmesa->state.scissor.pClipRects = NULL; + } + + if ( release_texture_heaps ) { + /* This share group is about to go away, free our private + * texture object data. + */ + int i; + + for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { + driDestroyTextureHeap( rmesa->texture_heaps[ i ] ); + rmesa->texture_heaps[ i ] = NULL; + } + + assert( is_empty_list( & rmesa->swapped ) ); + } + + /* free the Mesa context */ + rmesa->glCtx->DriverCtx = NULL; + _mesa_destroy_context( rmesa->glCtx ); + + /* free the option cache */ + driDestroyOptionCache (&rmesa->optionCache); + + FREE( rmesa ); + } +} + + + + +void +radeonSwapBuffers( __DRIdrawablePrivate *dPriv ) +{ + + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + radeonContextPtr rmesa; + GLcontext *ctx; + rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + ctx = rmesa->glCtx; + if (ctx->Visual.doubleBufferMode) { + _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ + + if ( rmesa->doPageFlip ) { + radeonPageFlip( dPriv ); + } + else { + radeonCopyBuffer( dPriv, NULL ); + } + } + } + else { + /* XXX this shouldn't be an error but we can't handle it for now */ + _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__); + } +} + +void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, + int x, int y, int w, int h ) +{ + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + radeonContextPtr radeon; + GLcontext *ctx; + + radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + ctx = radeon->glCtx; + + if (ctx->Visual.doubleBufferMode) { + drm_clip_rect_t rect; + rect.x1 = x + dPriv->x; + rect.y1 = (dPriv->h - y - h) + dPriv->y; + rect.x2 = rect.x1 + w; + rect.y2 = rect.y1 + h; + _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ + radeonCopyBuffer(dPriv, &rect); + } + } else { + /* XXX this shouldn't be an error but we can't handle it for now */ + _mesa_problem(NULL, "%s: drawable has no context!", + __FUNCTION__); + } +} + +/* Make context `c' the current context and bind it to the given + * drawing and reading surfaces. + */ +GLboolean +radeonMakeCurrent( __DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv ) +{ + if ( driContextPriv ) { + radeonContextPtr newCtx = + (radeonContextPtr) driContextPriv->driverPrivate; + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx); + + if ( newCtx->dri.drawable != driDrawPriv ) { + /* XXX we may need to validate the drawable here!!! */ + driDrawableInitVBlank( driDrawPriv, newCtx->vblank_flags, + &newCtx->vbl_seq ); + } + + newCtx->dri.readable = driReadPriv; + + if ( (newCtx->dri.drawable != driDrawPriv) || + newCtx->lastStamp != driDrawPriv->lastStamp ) { + newCtx->dri.drawable = driDrawPriv; + + radeonSetCliprects(newCtx); + radeonUpdateViewportOffset( newCtx->glCtx ); + } + + _mesa_make_current( newCtx->glCtx, + (GLframebuffer *) driDrawPriv->driverPrivate, + (GLframebuffer *) driReadPriv->driverPrivate ); + + _mesa_update_state( newCtx->glCtx ); + } else { + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx is null\n", __FUNCTION__); + _mesa_make_current( NULL, NULL, NULL ); + } + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "End %s\n", __FUNCTION__); + return GL_TRUE; +} + +/* Force the context `c' to be unbound from its buffer. + */ +GLboolean +radeonUnbindContext( __DRIcontextPrivate *driContextPriv ) +{ + radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate; + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) rmesa->glCtx); + + return GL_TRUE; +} diff --git a/radeon/radeon_context.h b/radeon/radeon_context.h new file mode 100644 index 0000000..8dedd66 --- /dev/null +++ b/radeon/radeon_context.h @@ -0,0 +1,764 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + * Kevin E. Martin <martin@valinux.com> + * Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __RADEON_CONTEXT_H__ +#define __RADEON_CONTEXT_H__ + +#include "tnl/t_vertex.h" +#include "dri_util.h" +#include "drm.h" +#include "radeon_drm.h" +#include "texmem.h" + +#include "macros.h" +#include "mtypes.h" +#include "colormac.h" + +struct radeon_context; +typedef struct radeon_context radeonContextRec; +typedef struct radeon_context *radeonContextPtr; + +/* This union is used to avoid warnings/miscompilation + with float to uint32_t casts due to strict-aliasing */ +typedef union { + GLfloat f; + uint32_t ui32; +} float_ui32_type; + +#include "radeon_lock.h" +#include "radeon_screen.h" +#include "mm.h" + +#include "math/m_vector.h" + +#define TEX_0 0x1 +#define TEX_1 0x2 +#define TEX_2 0x4 +#define TEX_ALL 0x7 + +/* Rasterizing fallbacks */ +/* See correponding strings in r200_swtcl.c */ +#define RADEON_FALLBACK_TEXTURE 0x0001 +#define RADEON_FALLBACK_DRAW_BUFFER 0x0002 +#define RADEON_FALLBACK_STENCIL 0x0004 +#define RADEON_FALLBACK_RENDER_MODE 0x0008 +#define RADEON_FALLBACK_BLEND_EQ 0x0010 +#define RADEON_FALLBACK_BLEND_FUNC 0x0020 +#define RADEON_FALLBACK_DISABLE 0x0040 +#define RADEON_FALLBACK_BORDER_MODE 0x0080 + +/* The blit width for texture uploads + */ +#define BLIT_WIDTH_BYTES 1024 + +/* Use the templated vertex format: + */ +#define COLOR_IS_RGBA +#define TAG(x) radeon##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + +typedef void (*radeon_tri_func) (radeonContextPtr, + radeonVertex *, + radeonVertex *, radeonVertex *); + +typedef void (*radeon_line_func) (radeonContextPtr, + radeonVertex *, radeonVertex *); + +typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *); + +struct radeon_colorbuffer_state { + GLuint clear; + int roundEnable; +}; + +struct radeon_depthbuffer_state { + GLuint clear; + GLfloat scale; +}; + +struct radeon_scissor_state { + drm_clip_rect_t rect; + GLboolean enabled; + + GLuint numClipRects; /* Cliprects active */ + GLuint numAllocedClipRects; /* Cliprects available */ + drm_clip_rect_t *pClipRects; +}; + +struct radeon_stencilbuffer_state { + GLboolean hwBuffer; + GLuint clear; /* rb3d_stencilrefmask value */ +}; + +struct radeon_stipple_state { + GLuint mask[32]; +}; + +/* used for both tcl_vtx and vc_frmt tex bits (they are identical) */ +#define RADEON_ST_BIT(unit) \ +(unit == 0 ? RADEON_CP_VC_FRMT_ST0 : (RADEON_CP_VC_FRMT_ST1 >> 2) << (2 * unit)) + +#define RADEON_Q_BIT(unit) \ +(unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit)) + +typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr; + +/* Texture object in locally shared texture space. + */ +struct radeon_tex_obj { + driTextureObject base; + + GLuint bufAddr; /* Offset to start of locally + shared texture block */ + + GLuint dirty_state; /* Flags (1 per texunit) for + whether or not this texobj + has dirty hardware state + (pp_*) that needs to be + brought into the + texunit. */ + + drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; + /* Six, for the cube faces */ + + GLuint pp_txfilter; /* hardware register values */ + GLuint pp_txformat; + GLuint pp_txoffset; /* Image location in texmem. + All cube faces follow. */ + GLuint pp_txsize; /* npot only */ + GLuint pp_txpitch; /* npot only */ + GLuint pp_border_color; + GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ + + GLboolean border_fallback; + + GLuint tile_bits; /* hw texture tile bits used on this texture */ +}; + +struct radeon_texture_env_state { + radeonTexObjPtr texobj; + GLenum format; + GLenum envMode; +}; + +struct radeon_texture_state { + struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS]; +}; + +struct radeon_state_atom { + struct radeon_state_atom *next, *prev; + const char *name; /* for debug */ + int cmd_size; /* size in bytes */ + GLuint is_tcl; + int *cmd; /* one or more cmd's */ + int *lastcmd; /* one or more cmd's */ + GLboolean dirty; /* dirty-mark in emit_state_list */ + GLboolean(*check) (GLcontext *); /* is this state active? */ +}; + +/* Trying to keep these relatively short as the variables are becoming + * extravagently long. Drop the driver name prefix off the front of + * everything - I think we know which driver we're in by now, and keep the + * prefix to 3 letters unless absolutely impossible. + */ + +#define CTX_CMD_0 0 +#define CTX_PP_MISC 1 +#define CTX_PP_FOG_COLOR 2 +#define CTX_RE_SOLID_COLOR 3 +#define CTX_RB3D_BLENDCNTL 4 +#define CTX_RB3D_DEPTHOFFSET 5 +#define CTX_RB3D_DEPTHPITCH 6 +#define CTX_RB3D_ZSTENCILCNTL 7 +#define CTX_CMD_1 8 +#define CTX_PP_CNTL 9 +#define CTX_RB3D_CNTL 10 +#define CTX_RB3D_COLOROFFSET 11 +#define CTX_CMD_2 12 +#define CTX_RB3D_COLORPITCH 13 +#define CTX_STATE_SIZE 14 + +#define SET_CMD_0 0 +#define SET_SE_CNTL 1 +#define SET_SE_COORDFMT 2 +#define SET_CMD_1 3 +#define SET_SE_CNTL_STATUS 4 +#define SET_STATE_SIZE 5 + +#define LIN_CMD_0 0 +#define LIN_RE_LINE_PATTERN 1 +#define LIN_RE_LINE_STATE 2 +#define LIN_CMD_1 3 +#define LIN_SE_LINE_WIDTH 4 +#define LIN_STATE_SIZE 5 + +#define MSK_CMD_0 0 +#define MSK_RB3D_STENCILREFMASK 1 +#define MSK_RB3D_ROPCNTL 2 +#define MSK_RB3D_PLANEMASK 3 +#define MSK_STATE_SIZE 4 + +#define VPT_CMD_0 0 +#define VPT_SE_VPORT_XSCALE 1 +#define VPT_SE_VPORT_XOFFSET 2 +#define VPT_SE_VPORT_YSCALE 3 +#define VPT_SE_VPORT_YOFFSET 4 +#define VPT_SE_VPORT_ZSCALE 5 +#define VPT_SE_VPORT_ZOFFSET 6 +#define VPT_STATE_SIZE 7 + +#define MSC_CMD_0 0 +#define MSC_RE_MISC 1 +#define MSC_STATE_SIZE 2 + +#define TEX_CMD_0 0 +#define TEX_PP_TXFILTER 1 +#define TEX_PP_TXFORMAT 2 +#define TEX_PP_TXOFFSET 3 +#define TEX_PP_TXCBLEND 4 +#define TEX_PP_TXABLEND 5 +#define TEX_PP_TFACTOR 6 +#define TEX_CMD_1 7 +#define TEX_PP_BORDER_COLOR 8 +#define TEX_STATE_SIZE 9 + +#define TXR_CMD_0 0 /* rectangle textures */ +#define TXR_PP_TEX_SIZE 1 /* 0x1d04, 0x1d0c for NPOT! */ +#define TXR_PP_TEX_PITCH 2 /* 0x1d08, 0x1d10 for NPOT! */ +#define TXR_STATE_SIZE 3 + +#define CUBE_CMD_0 0 +#define CUBE_PP_CUBIC_FACES 1 +#define CUBE_CMD_1 2 +#define CUBE_PP_CUBIC_OFFSET_0 3 +#define CUBE_PP_CUBIC_OFFSET_1 4 +#define CUBE_PP_CUBIC_OFFSET_2 5 +#define CUBE_PP_CUBIC_OFFSET_3 6 +#define CUBE_PP_CUBIC_OFFSET_4 7 +#define CUBE_STATE_SIZE 8 + +#define ZBS_CMD_0 0 +#define ZBS_SE_ZBIAS_FACTOR 1 +#define ZBS_SE_ZBIAS_CONSTANT 2 +#define ZBS_STATE_SIZE 3 + +#define TCL_CMD_0 0 +#define TCL_OUTPUT_VTXFMT 1 +#define TCL_OUTPUT_VTXSEL 2 +#define TCL_MATRIX_SELECT_0 3 +#define TCL_MATRIX_SELECT_1 4 +#define TCL_UCP_VERT_BLEND_CTL 5 +#define TCL_TEXTURE_PROC_CTL 6 +#define TCL_LIGHT_MODEL_CTL 7 +#define TCL_PER_LIGHT_CTL_0 8 +#define TCL_PER_LIGHT_CTL_1 9 +#define TCL_PER_LIGHT_CTL_2 10 +#define TCL_PER_LIGHT_CTL_3 11 +#define TCL_STATE_SIZE 12 + +#define MTL_CMD_0 0 +#define MTL_EMMISSIVE_RED 1 +#define MTL_EMMISSIVE_GREEN 2 +#define MTL_EMMISSIVE_BLUE 3 +#define MTL_EMMISSIVE_ALPHA 4 +#define MTL_AMBIENT_RED 5 +#define MTL_AMBIENT_GREEN 6 +#define MTL_AMBIENT_BLUE 7 +#define MTL_AMBIENT_ALPHA 8 +#define MTL_DIFFUSE_RED 9 +#define MTL_DIFFUSE_GREEN 10 +#define MTL_DIFFUSE_BLUE 11 +#define MTL_DIFFUSE_ALPHA 12 +#define MTL_SPECULAR_RED 13 +#define MTL_SPECULAR_GREEN 14 +#define MTL_SPECULAR_BLUE 15 +#define MTL_SPECULAR_ALPHA 16 +#define MTL_SHININESS 17 +#define MTL_STATE_SIZE 18 + +#define VTX_CMD_0 0 +#define VTX_SE_COORD_FMT 1 +#define VTX_STATE_SIZE 2 + +#define MAT_CMD_0 0 +#define MAT_ELT_0 1 +#define MAT_STATE_SIZE 17 + +#define GRD_CMD_0 0 +#define GRD_VERT_GUARD_CLIP_ADJ 1 +#define GRD_VERT_GUARD_DISCARD_ADJ 2 +#define GRD_HORZ_GUARD_CLIP_ADJ 3 +#define GRD_HORZ_GUARD_DISCARD_ADJ 4 +#define GRD_STATE_SIZE 5 + +/* position changes frequently when lighting in modelpos - separate + * out to new state item? + */ +#define LIT_CMD_0 0 +#define LIT_AMBIENT_RED 1 +#define LIT_AMBIENT_GREEN 2 +#define LIT_AMBIENT_BLUE 3 +#define LIT_AMBIENT_ALPHA 4 +#define LIT_DIFFUSE_RED 5 +#define LIT_DIFFUSE_GREEN 6 +#define LIT_DIFFUSE_BLUE 7 +#define LIT_DIFFUSE_ALPHA 8 +#define LIT_SPECULAR_RED 9 +#define LIT_SPECULAR_GREEN 10 +#define LIT_SPECULAR_BLUE 11 +#define LIT_SPECULAR_ALPHA 12 +#define LIT_POSITION_X 13 +#define LIT_POSITION_Y 14 +#define LIT_POSITION_Z 15 +#define LIT_POSITION_W 16 +#define LIT_DIRECTION_X 17 +#define LIT_DIRECTION_Y 18 +#define LIT_DIRECTION_Z 19 +#define LIT_DIRECTION_W 20 +#define LIT_ATTEN_QUADRATIC 21 +#define LIT_ATTEN_LINEAR 22 +#define LIT_ATTEN_CONST 23 +#define LIT_ATTEN_XXX 24 +#define LIT_CMD_1 25 +#define LIT_SPOT_DCD 26 +#define LIT_SPOT_EXPONENT 27 +#define LIT_SPOT_CUTOFF 28 +#define LIT_SPECULAR_THRESH 29 +#define LIT_RANGE_CUTOFF 30 /* ? */ +#define LIT_ATTEN_CONST_INV 31 +#define LIT_STATE_SIZE 32 + +/* Fog + */ +#define FOG_CMD_0 0 +#define FOG_R 1 +#define FOG_C 2 +#define FOG_D 3 +#define FOG_PAD 4 +#define FOG_STATE_SIZE 5 + +/* UCP + */ +#define UCP_CMD_0 0 +#define UCP_X 1 +#define UCP_Y 2 +#define UCP_Z 3 +#define UCP_W 4 +#define UCP_STATE_SIZE 5 + +/* GLT - Global ambient + */ +#define GLT_CMD_0 0 +#define GLT_RED 1 +#define GLT_GREEN 2 +#define GLT_BLUE 3 +#define GLT_ALPHA 4 +#define GLT_STATE_SIZE 5 + +/* EYE + */ +#define EYE_CMD_0 0 +#define EYE_X 1 +#define EYE_Y 2 +#define EYE_Z 3 +#define EYE_RESCALE_FACTOR 4 +#define EYE_STATE_SIZE 5 + +#define SHN_CMD_0 0 +#define SHN_SHININESS 1 +#define SHN_STATE_SIZE 2 + +struct radeon_hw_state { + /* Head of the linked list of state atoms. */ + struct radeon_state_atom atomlist; + + /* Hardware state, stored as cmdbuf commands: + * -- Need to doublebuffer for + * - eliding noop statechange loops? (except line stipple count) + */ + struct radeon_state_atom ctx; + struct radeon_state_atom set; + struct radeon_state_atom lin; + struct radeon_state_atom msk; + struct radeon_state_atom vpt; + struct radeon_state_atom tcl; + struct radeon_state_atom msc; + struct radeon_state_atom tex[3]; + struct radeon_state_atom cube[3]; + struct radeon_state_atom zbs; + struct radeon_state_atom mtl; + struct radeon_state_atom mat[6]; + struct radeon_state_atom lit[8]; /* includes vec, scl commands */ + struct radeon_state_atom ucp[6]; + struct radeon_state_atom eye; /* eye pos */ + struct radeon_state_atom grd; /* guard band clipping */ + struct radeon_state_atom fog; + struct radeon_state_atom glt; + struct radeon_state_atom txr[3]; /* for NPOT */ + + int max_state_size; /* Number of bytes necessary for a full state emit. */ + GLboolean is_dirty, all_dirty; +}; + +struct radeon_state { + /* Derived state for internal purposes: + */ + struct radeon_colorbuffer_state color; + struct radeon_depthbuffer_state depth; + struct radeon_scissor_state scissor; + struct radeon_stencilbuffer_state stencil; + struct radeon_stipple_state stipple; + struct radeon_texture_state texture; +}; + +/* Need refcounting on dma buffers: + */ +struct radeon_dma_buffer { + int refcount; /* the number of retained regions in buf */ + drmBufPtr buf; +}; + +#define GET_START(rvb) (rmesa->radeonScreen->gart_buffer_offset + \ + (rvb)->address - rmesa->dma.buf0_address + \ + (rvb)->start) + +/* A retained region, eg vertices for indexed vertices. + */ +struct radeon_dma_region { + struct radeon_dma_buffer *buf; + char *address; /* == buf->address */ + int start, end, ptr; /* offsets from start of buf */ + int aos_start; + int aos_stride; + int aos_size; +}; + +struct radeon_dma { + /* Active dma region. Allocations for vertices and retained + * regions come from here. Also used for emitting random vertices, + * these may be flushed by calling flush_current(); + */ + struct radeon_dma_region current; + + void (*flush) (radeonContextPtr); + + char *buf0_address; /* start of buf[0], for index calcs */ + GLuint nr_released_bufs; /* flush after so many buffers released */ +}; + +struct radeon_dri_mirror { + __DRIcontextPrivate *context; /* DRI context */ + __DRIscreenPrivate *screen; /* DRI screen */ + + /** + * DRI drawable bound to this context for drawing. + */ + __DRIdrawablePrivate *drawable; + + /** + * DRI drawable bound to this context for reading. + */ + __DRIdrawablePrivate *readable; + + drm_context_t hwContext; + drm_hw_lock_t *hwLock; + int fd; + int drmMinor; +}; + +#define RADEON_CMD_BUF_SZ (8*1024) + +struct radeon_store { + GLuint statenr; + GLuint primnr; + char cmd_buf[RADEON_CMD_BUF_SZ]; + int cmd_used; + int elts_start; +}; + +/* radeon_tcl.c + */ +struct radeon_tcl_info { + GLuint vertex_format; + GLuint hw_primitive; + + /* Temporary for cases where incoming vertex data is incompatible + * with maos code. + */ + GLvector4f ObjClean; + + struct radeon_dma_region *aos_components[8]; + GLuint nr_aos_components; + + GLuint *Elts; + + struct radeon_dma_region indexed_verts; + struct radeon_dma_region obj; + struct radeon_dma_region rgba; + struct radeon_dma_region spec; + struct radeon_dma_region fog; + struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS]; + struct radeon_dma_region norm; +}; + +/* radeon_swtcl.c + */ +struct radeon_swtcl_info { + GLuint RenderIndex; + GLuint vertex_size; + GLuint vertex_format; + + struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + GLuint vertex_attr_count; + + GLubyte *verts; + + /* Fallback rasterization functions + */ + radeon_point_func draw_point; + radeon_line_func draw_line; + radeon_tri_func draw_tri; + + GLuint hw_primitive; + GLenum render_primitive; + GLuint numverts; + + /** + * Offset of the 4UB color data within a hardware (swtcl) vertex. + */ + GLuint coloroffset; + + /** + * Offset of the 3UB specular color data within a hardware (swtcl) vertex. + */ + GLuint specoffset; + + GLboolean needproj; + + struct radeon_dma_region indexed_verts; +}; + +struct radeon_ioctl { + GLuint vertex_offset; + GLuint vertex_size; +}; + +#define RADEON_MAX_PRIMS 64 + +struct radeon_prim { + GLuint start; + GLuint end; + GLuint prim; +}; + +/* A maximum total of 20 elements per vertex: 3 floats for position, 3 + * floats for normal, 4 floats for color, 4 bytes for secondary color, + * 3 floats for each texture unit (9 floats total). + * + * The position data is never actually stored here, so 3 elements could be + * trimmed out of the buffer. This number is only valid for vtxfmt! + */ +#define RADEON_MAX_VERTEX_SIZE 20 + +struct radeon_context { + GLcontext *glCtx; /* Mesa context */ + + /* Driver and hardware state management + */ + struct radeon_hw_state hw; + struct radeon_state state; + + /* Texture object bookkeeping + */ + unsigned nr_heaps; + driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS]; + driTextureObject swapped; + int texture_depth; + float initialMaxAnisotropy; + + /* Rasterization and vertex state: + */ + GLuint TclFallback; + GLuint Fallback; + GLuint NewGLState; + DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ + + /* Vertex buffers + */ + struct radeon_ioctl ioctl; + struct radeon_dma dma; + struct radeon_store store; + /* A full state emit as of the first state emit in the main store, in case + * the context is lost. + */ + struct radeon_store backup_store; + + /* Page flipping + */ + GLuint doPageFlip; + + /* Busy waiting + */ + GLuint do_usleeps; + GLuint do_irqs; + GLuint irqsEmitted; + drm_radeon_irq_wait_t iw; + + /* Drawable, cliprect and scissor information + */ + GLuint numClipRects; /* Cliprects for the draw buffer */ + drm_clip_rect_t *pClipRects; + unsigned int lastStamp; + GLboolean lost_context; + GLboolean save_on_next_emit; + radeonScreenPtr radeonScreen; /* Screen private DRI data */ + drm_radeon_sarea_t *sarea; /* Private SAREA data */ + + /* TCL stuff + */ + GLmatrix TexGenMatrix[RADEON_MAX_TEXTURE_UNITS]; + GLboolean recheck_texgen[RADEON_MAX_TEXTURE_UNITS]; + GLboolean TexGenNeedNormals[RADEON_MAX_TEXTURE_UNITS]; + GLuint TexGenEnabled; + GLuint NeedTexMatrix; + GLuint TexMatColSwap; + GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS]; + GLuint last_ReallyEnabled; + + /* VBI + */ + GLuint vbl_seq; + GLuint vblank_flags; + + int64_t swap_ust; + int64_t swap_missed_ust; + + GLuint swap_count; + GLuint swap_missed_count; + + /* radeon_tcl.c + */ + struct radeon_tcl_info tcl; + + /* radeon_swtcl.c + */ + struct radeon_swtcl_info swtcl; + + /* Mirrors of some DRI state + */ + struct radeon_dri_mirror dri; + + /* Configuration cache + */ + driOptionCache optionCache; + + GLboolean using_hyperz; + GLboolean texmicrotile; + + /* Performance counters + */ + GLuint boxes; /* Draw performance boxes */ + GLuint hardwareWentIdle; + GLuint c_clears; + GLuint c_drawWaits; + GLuint c_textureSwaps; + GLuint c_textureBytes; + GLuint c_vertexBuffers; +}; + +#define RADEON_CONTEXT(ctx) ((radeonContextPtr)(ctx->DriverCtx)) + +static __inline GLuint radeonPackColor(GLuint cpp, + GLubyte r, GLubyte g, + GLubyte b, GLubyte a) +{ + switch (cpp) { + case 2: + return PACK_COLOR_565(r, g, b); + case 4: + return PACK_COLOR_8888(a, r, g, b); + default: + return 0; + } +} + +#define RADEON_OLD_PACKETS 1 + +extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv); +extern GLboolean radeonCreateContext(const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); +extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv); +extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, + int x, int y, int w, int h); +extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv); +extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv); + +/* ================================================================ + * Debugging: + */ +#define DO_DEBUG 1 + +#if DO_DEBUG +extern int RADEON_DEBUG; +#else +#define RADEON_DEBUG 0 +#endif + +#define DEBUG_TEXTURE 0x0001 +#define DEBUG_STATE 0x0002 +#define DEBUG_IOCTL 0x0004 +#define DEBUG_PRIMS 0x0008 +#define DEBUG_VERTS 0x0010 +#define DEBUG_FALLBACKS 0x0020 +#define DEBUG_VFMT 0x0040 +#define DEBUG_CODEGEN 0x0080 +#define DEBUG_VERBOSE 0x0100 +#define DEBUG_DRI 0x0200 +#define DEBUG_DMA 0x0400 +#define DEBUG_SANITY 0x0800 +#define DEBUG_SYNC 0x1000 + +#endif /* __RADEON_CONTEXT_H__ */ diff --git a/radeon/radeon_ioctl.c b/radeon/radeon_ioctl.c new file mode 100644 index 0000000..4c64bc2 --- /dev/null +++ b/radeon/radeon_ioctl.c @@ -0,0 +1,1276 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c,v 1.11 2003/01/29 22:04:59 dawes Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <sched.h> +#include <errno.h> + +#include "glheader.h" +#include "imports.h" +#include "simple_list.h" +#include "swrast/swrast.h" + +#include "radeon_context.h" +#include "radeon_state.h" +#include "radeon_ioctl.h" +#include "radeon_tcl.h" +#include "radeon_sanity.h" + +#define STANDALONE_MMIO +#include "radeon_macros.h" /* for INREG() */ + +#include "drirenderbuffer.h" +#include "vblank.h" + +#define RADEON_TIMEOUT 512 +#define RADEON_IDLE_RETRY 16 + + +static void radeonWaitForIdle( radeonContextPtr rmesa ); +static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, + const char * caller ); + +static void print_state_atom( struct radeon_state_atom *state ) +{ + int i; + + fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size); + + if (RADEON_DEBUG & DEBUG_VERBOSE) + for (i = 0 ; i < state->cmd_size ; i++) + fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]); + +} + +static void radeonSaveHwState( radeonContextPtr rmesa ) +{ + struct radeon_state_atom *atom; + char * dest = rmesa->backup_store.cmd_buf; + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s\n", __FUNCTION__); + + rmesa->backup_store.cmd_used = 0; + + foreach( atom, &rmesa->hw.atomlist ) { + if ( atom->check( rmesa->glCtx ) ) { + int size = atom->cmd_size * 4; + memcpy( dest, atom->cmd, size); + dest += size; + rmesa->backup_store.cmd_used += size; + if (RADEON_DEBUG & DEBUG_STATE) + print_state_atom( atom ); + } + } + + assert( rmesa->backup_store.cmd_used <= RADEON_CMD_BUF_SZ ); + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "Returning to radeonEmitState\n"); +} + +/* At this point we were in FlushCmdBufLocked but we had lost our context, so + * we need to unwire our current cmdbuf, hook the one with the saved state in + * it, flush it, and then put the current one back. This is so commands at the + * start of a cmdbuf can rely on the state being kept from the previous one. + */ +static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa ) +{ + GLuint nr_released_bufs; + struct radeon_store saved_store; + + if (rmesa->backup_store.cmd_used == 0) + return; + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "Emitting backup state on lost context\n"); + + rmesa->lost_context = GL_FALSE; + + nr_released_bufs = rmesa->dma.nr_released_bufs; + saved_store = rmesa->store; + rmesa->dma.nr_released_bufs = 0; + rmesa->store = rmesa->backup_store; + radeonFlushCmdBufLocked( rmesa, __FUNCTION__ ); + rmesa->dma.nr_released_bufs = nr_released_bufs; + rmesa->store = saved_store; +} + +/* ============================================================= + * Kernel command buffer handling + */ + +/* The state atoms will be emitted in the order they appear in the atom list, + * so this step is important. + */ +void radeonSetUpAtomList( radeonContextPtr rmesa ) +{ + int i, mtu = rmesa->glCtx->Const.MaxTextureUnits; + + make_empty_list(&rmesa->hw.atomlist); + rmesa->hw.atomlist.name = "atom-list"; + + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc); + for (i = 0; i < mtu; ++i) { + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.cube[i]); + } + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl); + for (i = 0; i < 3 + mtu; ++i) + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]); + for (i = 0; i < 8; ++i) + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]); + for (i = 0; i < 6; ++i) + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog); + insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt); +} + +void radeonEmitState( radeonContextPtr rmesa ) +{ + struct radeon_state_atom *atom; + char *dest; + + if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->save_on_next_emit) { + radeonSaveHwState(rmesa); + rmesa->save_on_next_emit = GL_FALSE; + } + + /* this code used to return here but now it emits zbs */ + + /* To avoid going across the entire set of states multiple times, just check + * for enough space for the case of emitting all state, and inline the + * radeonAllocCmdBuf code here without all the checks. + */ + radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size); + dest = rmesa->store.cmd_buf + rmesa->store.cmd_used; + + /* We always always emit zbs, this is due to a bug found by keithw in + the hardware and rediscovered after Erics changes by me. + if you ever touch this code make sure you emit zbs otherwise + you get tcl lockups on at least M7/7500 class of chips - airlied */ + rmesa->hw.zbs.dirty=1; + + if (RADEON_DEBUG & DEBUG_STATE) { + foreach(atom, &rmesa->hw.atomlist) { + if (atom->dirty || rmesa->hw.all_dirty) { + if (atom->check(rmesa->glCtx)) + print_state_atom(atom); + else + fprintf(stderr, "skip state %s\n", atom->name); + } + } + } + + foreach(atom, &rmesa->hw.atomlist) { + if (rmesa->hw.all_dirty) + atom->dirty = GL_TRUE; + if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) && + atom->is_tcl) + atom->dirty = GL_FALSE; + if (atom->dirty) { + if (atom->check(rmesa->glCtx)) { + int size = atom->cmd_size * 4; + memcpy(dest, atom->cmd, size); + dest += size; + rmesa->store.cmd_used += size; + atom->dirty = GL_FALSE; + } + } + } + + assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ); + + rmesa->hw.is_dirty = GL_FALSE; + rmesa->hw.all_dirty = GL_FALSE; +} + +/* Fire a section of the retained (indexed_verts) buffer as a regular + * primtive. + */ +extern void radeonEmitVbufPrim( radeonContextPtr rmesa, + GLuint vertex_format, + GLuint primitive, + GLuint vertex_nr ) +{ + drm_radeon_cmd_header_t *cmd; + + + assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); + + radeonEmitState( rmesa ); + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__, + rmesa->store.cmd_used/4); + + cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ, + __FUNCTION__ ); +#if RADEON_OLD_PACKETS + cmd[0].i = 0; + cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; + cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16); + cmd[2].i = rmesa->ioctl.vertex_offset; + cmd[3].i = vertex_nr; + cmd[4].i = vertex_format; + cmd[5].i = (primitive | + RADEON_CP_VC_CNTL_PRIM_WALK_LIST | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | + (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n", + __FUNCTION__, + cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i); +#else + cmd[0].i = 0; + cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; + cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16); + cmd[2].i = vertex_format; + cmd[3].i = (primitive | + RADEON_CP_VC_CNTL_PRIM_WALK_LIST | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_MAOS_ENABLE | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | + (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); + + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n", + __FUNCTION__, + cmd[1].i, cmd[2].i, cmd[3].i); +#endif +} + + +void radeonFlushElts( radeonContextPtr rmesa ) +{ + int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start); + int dwords; +#if RADEON_OLD_PACKETS + int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2; +#else + int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2; +#endif + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + assert( rmesa->dma.flush == radeonFlushElts ); + rmesa->dma.flush = NULL; + + /* Cope with odd number of elts: + */ + rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2; + dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4; + +#if RADEON_OLD_PACKETS + cmd[1] |= (dwords - 3) << 16; + cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT; +#else + cmd[1] |= (dwords - 3) << 16; + cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT; +#endif + + if (RADEON_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__); + radeonFinish( rmesa->glCtx ); + } +} + + +GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa, + GLuint vertex_format, + GLuint primitive, + GLuint min_nr ) +{ + drm_radeon_cmd_header_t *cmd; + GLushort *retval; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr); + + assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); + + radeonEmitState( rmesa ); + + cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, + ELTS_BUFSZ(min_nr), + __FUNCTION__ ); +#if RADEON_OLD_PACKETS + cmd[0].i = 0; + cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; + cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM; + cmd[2].i = rmesa->ioctl.vertex_offset; + cmd[3].i = 0xffff; + cmd[4].i = vertex_format; + cmd[5].i = (primitive | + RADEON_CP_VC_CNTL_PRIM_WALK_IND | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); + + retval = (GLushort *)(cmd+6); +#else + cmd[0].i = 0; + cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; + cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX; + cmd[2].i = vertex_format; + cmd[3].i = (primitive | + RADEON_CP_VC_CNTL_PRIM_WALK_IND | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_MAOS_ENABLE | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); + + retval = (GLushort *)(cmd+4); +#endif + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n", + __FUNCTION__, + cmd[1].i, vertex_format, primitive); + + assert(!rmesa->dma.flush); + rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->dma.flush = radeonFlushElts; + + rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf; + + return retval; +} + + + +void radeonEmitVertexAOS( radeonContextPtr rmesa, + GLuint vertex_size, + GLuint offset ) +{ +#if RADEON_OLD_PACKETS + rmesa->ioctl.vertex_size = vertex_size; + rmesa->ioctl.vertex_offset = offset; +#else + drm_radeon_cmd_header_t *cmd; + + if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL)) + fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + + cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ, + __FUNCTION__ ); + + cmd[0].i = 0; + cmd[0].header.cmd_type = RADEON_CMD_PACKET3; + cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16); + cmd[2].i = 1; + cmd[3].i = vertex_size | (vertex_size << 8); + cmd[4].i = offset; +#endif +} + + +void radeonEmitAOS( radeonContextPtr rmesa, + struct radeon_dma_region **component, + GLuint nr, + GLuint offset ) +{ +#if RADEON_OLD_PACKETS + assert( nr == 1 ); + assert( component[0]->aos_size == component[0]->aos_stride ); + rmesa->ioctl.vertex_size = component[0]->aos_size; + rmesa->ioctl.vertex_offset = + (component[0]->aos_start + offset * component[0]->aos_stride * 4); +#else + drm_radeon_cmd_header_t *cmd; + int sz = AOS_BUFSZ(nr); + int i; + int *tmp; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + + cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz, + __FUNCTION__ ); + cmd[0].i = 0; + cmd[0].header.cmd_type = RADEON_CMD_PACKET3; + cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16); + cmd[2].i = nr; + tmp = &cmd[0].i; + cmd += 3; + + for (i = 0 ; i < nr ; i++) { + if (i & 1) { + cmd[0].i |= ((component[i]->aos_stride << 24) | + (component[i]->aos_size << 16)); + cmd[2].i = (component[i]->aos_start + + offset * component[i]->aos_stride * 4); + cmd += 3; + } + else { + cmd[0].i = ((component[i]->aos_stride << 8) | + (component[i]->aos_size << 0)); + cmd[1].i = (component[i]->aos_start + + offset * component[i]->aos_stride * 4); + } + } + + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "%s:\n", __FUNCTION__); + for (i = 0 ; i < sz ; i++) + fprintf(stderr, " %d: %x\n", i, tmp[i]); + } +#endif +} + +/* using already shifted color_fmt! */ +void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */ + GLuint color_fmt, + GLuint src_pitch, + GLuint src_offset, + GLuint dst_pitch, + GLuint dst_offset, + GLint srcx, GLint srcy, + GLint dstx, GLint dsty, + GLuint w, GLuint h ) +{ + drm_radeon_cmd_header_t *cmd; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", + __FUNCTION__, + src_pitch, src_offset, srcx, srcy, + dst_pitch, dst_offset, dstx, dsty, + w, h); + + assert( (src_pitch & 63) == 0 ); + assert( (dst_pitch & 63) == 0 ); + assert( (src_offset & 1023) == 0 ); + assert( (dst_offset & 1023) == 0 ); + assert( w < (1<<16) ); + assert( h < (1<<16) ); + + cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int), + __FUNCTION__ ); + + + cmd[0].i = 0; + cmd[0].header.cmd_type = RADEON_CMD_PACKET3; + cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16); + cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + color_fmt | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_S | + RADEON_DP_SRC_SOURCE_MEMORY | + RADEON_GMC_CLR_CMP_CNTL_DIS | + RADEON_GMC_WR_MSK_DIS ); + + cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10); + cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10); + cmd[5].i = (srcx << 16) | srcy; + cmd[6].i = (dstx << 16) | dsty; /* dst */ + cmd[7].i = (w << 16) | h; +} + + +void radeonEmitWait( radeonContextPtr rmesa, GLuint flags ) +{ + drm_radeon_cmd_header_t *cmd; + + assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) ); + + cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int), + __FUNCTION__ ); + cmd[0].i = 0; + cmd[0].wait.cmd_type = RADEON_CMD_WAIT; + cmd[0].wait.flags = flags; +} + + +static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, + const char * caller ) +{ + int ret, i; + drm_radeon_cmd_buffer_t cmd; + + if (rmesa->lost_context) + radeonBackUpAndEmitLostStateLocked(rmesa); + + if (RADEON_DEBUG & DEBUG_IOCTL) { + fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); + + if (RADEON_DEBUG & DEBUG_VERBOSE) + for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 ) + fprintf(stderr, "%d: %x\n", i/4, + *(int *)(&rmesa->store.cmd_buf[i])); + } + + if (RADEON_DEBUG & DEBUG_DMA) + fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__, + rmesa->dma.nr_released_bufs); + + + if (RADEON_DEBUG & DEBUG_SANITY) { + if (rmesa->state.scissor.enabled) + ret = radeonSanityCmdBuffer( rmesa, + rmesa->state.scissor.numClipRects, + rmesa->state.scissor.pClipRects); + else + ret = radeonSanityCmdBuffer( rmesa, + rmesa->numClipRects, + rmesa->pClipRects); + if (ret) { + fprintf(stderr, "drmSanityCommandWrite: %d\n", ret); + goto out; + } + } + + + cmd.bufsz = rmesa->store.cmd_used; + cmd.buf = rmesa->store.cmd_buf; + + if (rmesa->state.scissor.enabled) { + cmd.nbox = rmesa->state.scissor.numClipRects; + cmd.boxes = rmesa->state.scissor.pClipRects; + } else { + cmd.nbox = rmesa->numClipRects; + cmd.boxes = rmesa->pClipRects; + } + + ret = drmCommandWrite( rmesa->dri.fd, + DRM_RADEON_CMDBUF, + &cmd, sizeof(cmd) ); + + if (ret) + fprintf(stderr, "drmCommandWrite: %d\n", ret); + + if (RADEON_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__); + radeonWaitForIdleLocked( rmesa ); + } + + out: + rmesa->store.primnr = 0; + rmesa->store.statenr = 0; + rmesa->store.cmd_used = 0; + rmesa->dma.nr_released_bufs = 0; + rmesa->save_on_next_emit = 1; + + return ret; +} + + +/* Note: does not emit any commands to avoid recursion on + * radeonAllocCmdBuf. + */ +void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller ) +{ + int ret; + + + LOCK_HARDWARE( rmesa ); + + ret = radeonFlushCmdBufLocked( rmesa, caller ); + + UNLOCK_HARDWARE( rmesa ); + + if (ret) { + fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret); + exit(ret); + } +} + +/* ============================================================= + * Hardware vertex buffer handling + */ + + +void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa ) +{ + struct radeon_dma_buffer *dmabuf; + int fd = rmesa->dri.fd; + int index = 0; + int size = 0; + drmDMAReq dma; + int ret; + + if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->dma.flush) { + rmesa->dma.flush( rmesa ); + } + + if (rmesa->dma.current.buf) + radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); + + if (rmesa->dma.nr_released_bufs > 4) + radeonFlushCmdBuf( rmesa, __FUNCTION__ ); + + dma.context = rmesa->dri.hwContext; + dma.send_count = 0; + dma.send_list = NULL; + dma.send_sizes = NULL; + dma.flags = 0; + dma.request_count = 1; + dma.request_size = RADEON_BUFFER_SIZE; + dma.request_list = &index; + dma.request_sizes = &size; + dma.granted_count = 0; + + LOCK_HARDWARE(rmesa); /* no need to validate */ + + ret = drmDMA( fd, &dma ); + + if (ret != 0) { + /* Free some up this way? + */ + if (rmesa->dma.nr_released_bufs) { + radeonFlushCmdBufLocked( rmesa, __FUNCTION__ ); + } + + if (RADEON_DEBUG & DEBUG_DMA) + fprintf(stderr, "Waiting for buffers\n"); + + radeonWaitForIdleLocked( rmesa ); + ret = drmDMA( fd, &dma ); + + if ( ret != 0 ) { + UNLOCK_HARDWARE( rmesa ); + fprintf( stderr, "Error: Could not get dma buffer... exiting\n" ); + exit( -1 ); + } + } + + UNLOCK_HARDWARE(rmesa); + + if (RADEON_DEBUG & DEBUG_DMA) + fprintf(stderr, "Allocated buffer %d\n", index); + + dmabuf = CALLOC_STRUCT( radeon_dma_buffer ); + dmabuf->buf = &rmesa->radeonScreen->buffers->list[index]; + dmabuf->refcount = 1; + + rmesa->dma.current.buf = dmabuf; + rmesa->dma.current.address = dmabuf->buf->address; + rmesa->dma.current.end = dmabuf->buf->total; + rmesa->dma.current.start = 0; + rmesa->dma.current.ptr = 0; + + rmesa->c_vertexBuffers++; +} + +void radeonReleaseDmaRegion( radeonContextPtr rmesa, + struct radeon_dma_region *region, + const char *caller ) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); + + if (!region->buf) + return; + + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + if (--region->buf->refcount == 0) { + drm_radeon_cmd_header_t *cmd; + + if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) + fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__, + region->buf->buf->idx); + + cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), + __FUNCTION__ ); + cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD; + cmd->dma.buf_idx = region->buf->buf->idx; + FREE(region->buf); + rmesa->dma.nr_released_bufs++; + } + + region->buf = NULL; + region->start = 0; +} + +/* Allocates a region from rmesa->dma.current. If there isn't enough + * space in current, grab a new buffer (and discard what was left of current) + */ +void radeonAllocDmaRegion( radeonContextPtr rmesa, + struct radeon_dma_region *region, + int bytes, + int alignment ) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); + + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + if (region->buf) + radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ ); + + alignment--; + rmesa->dma.current.start = rmesa->dma.current.ptr = + (rmesa->dma.current.ptr + alignment) & ~alignment; + + if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) + radeonRefillCurrentDmaRegion( rmesa ); + + region->start = rmesa->dma.current.start; + region->ptr = rmesa->dma.current.start; + region->end = rmesa->dma.current.start + bytes; + region->address = rmesa->dma.current.address; + region->buf = rmesa->dma.current.buf; + region->buf->refcount++; + + rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ + rmesa->dma.current.start = + rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; +} + +/* ================================================================ + * SwapBuffers with client-side throttling + */ + +static u_int32_t radeonGetLastFrame (radeonContextPtr rmesa) +{ + drm_radeon_getparam_t gp; + int ret; + u_int32_t frame; + + gp.param = RADEON_PARAM_LAST_FRAME; + gp.value = (int *)&frame; + ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM, + &gp, sizeof(gp) ); + + if ( ret ) { + fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret ); + exit(1); + } + + return frame; +} + +static void radeonEmitIrqLocked( radeonContextPtr rmesa ) +{ + drm_radeon_irq_emit_t ie; + int ret; + + ie.irq_seq = &rmesa->iw.irq_seq; + ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, + &ie, sizeof(ie) ); + if ( ret ) { + fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret ); + exit(1); + } +} + + +static void radeonWaitIrq( radeonContextPtr rmesa ) +{ + int ret; + + do { + ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT, + &rmesa->iw, sizeof(rmesa->iw) ); + } while (ret && (errno == EINTR || errno == EBUSY)); + + if ( ret ) { + fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret ); + exit(1); + } +} + + +static void radeonWaitForFrameCompletion( radeonContextPtr rmesa ) +{ + drm_radeon_sarea_t *sarea = rmesa->sarea; + + if (rmesa->do_irqs) { + if (radeonGetLastFrame(rmesa) < sarea->last_frame) { + if (!rmesa->irqsEmitted) { + while (radeonGetLastFrame (rmesa) < sarea->last_frame) + ; + } + else { + UNLOCK_HARDWARE( rmesa ); + radeonWaitIrq( rmesa ); + LOCK_HARDWARE( rmesa ); + } + rmesa->irqsEmitted = 10; + } + + if (rmesa->irqsEmitted) { + radeonEmitIrqLocked( rmesa ); + rmesa->irqsEmitted--; + } + } + else { + while (radeonGetLastFrame (rmesa) < sarea->last_frame) { + UNLOCK_HARDWARE( rmesa ); + if (rmesa->do_usleeps) + DO_USLEEP( 1 ); + LOCK_HARDWARE( rmesa ); + } + } +} + +/* Copy the back color buffer to the front color buffer. + */ +void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv, + const drm_clip_rect_t *rect) +{ + radeonContextPtr rmesa; + GLint nbox, i, ret; + GLboolean missed_target; + int64_t ust; + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + + if ( RADEON_DEBUG & DEBUG_IOCTL ) { + fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); + } + + RADEON_FIREVERTICES( rmesa ); + LOCK_HARDWARE( rmesa ); + + /* Throttle the frame rate -- only allow one pending swap buffers + * request at a time. + */ + radeonWaitForFrameCompletion( rmesa ); + if (!rect) + { + UNLOCK_HARDWARE( rmesa ); + driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target ); + LOCK_HARDWARE( rmesa ); + } + + nbox = dPriv->numClipRects; /* must be in locked region */ + + for ( i = 0 ; i < nbox ; ) { + GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox ); + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t *b = rmesa->sarea->boxes; + GLint n = 0; + + for ( ; i < nr ; i++ ) { + + *b = box[i]; + + if (rect) + { + if (rect->x1 > b->x1) + b->x1 = rect->x1; + if (rect->y1 > b->y1) + b->y1 = rect->y1; + if (rect->x2 < b->x2) + b->x2 = rect->x2; + if (rect->y2 < b->y2) + b->y2 = rect->y2; + + if (b->x1 < b->x2 && b->y1 < b->y2) + b++; + } + else + b++; + + n++; + } + rmesa->sarea->nbox = n; + + ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); + + if ( ret ) { + fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret ); + UNLOCK_HARDWARE( rmesa ); + exit( 1 ); + } + } + + UNLOCK_HARDWARE( rmesa ); + if (!rect) + { + rmesa->swap_count++; + (*dri_interface->getUST)( & ust ); + if ( missed_target ) { + rmesa->swap_missed_count++; + rmesa->swap_missed_ust = ust - rmesa->swap_ust; + } + + rmesa->swap_ust = ust; + rmesa->hw.all_dirty = GL_TRUE; + } +} + +void radeonPageFlip( const __DRIdrawablePrivate *dPriv ) +{ + radeonContextPtr rmesa; + GLint ret; + GLboolean missed_target; + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + + if ( RADEON_DEBUG & DEBUG_IOCTL ) { + fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, + rmesa->sarea->pfCurrentPage); + } + + RADEON_FIREVERTICES( rmesa ); + LOCK_HARDWARE( rmesa ); + + /* Need to do this for the perf box placement: + */ + if (dPriv->numClipRects) + { + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t *b = rmesa->sarea->boxes; + b[0] = box[0]; + rmesa->sarea->nbox = 1; + } + + /* Throttle the frame rate -- only allow a few pending swap buffers + * request at a time. + */ + radeonWaitForFrameCompletion( rmesa ); + UNLOCK_HARDWARE( rmesa ); + driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target ); + if ( missed_target ) { + rmesa->swap_missed_count++; + (void) (*dri_interface->getUST)( & rmesa->swap_missed_ust ); + } + LOCK_HARDWARE( rmesa ); + + ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP ); + + UNLOCK_HARDWARE( rmesa ); + + if ( ret ) { + fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret ); + exit( 1 ); + } + + rmesa->swap_count++; + (void) (*dri_interface->getUST)( & rmesa->swap_ust ); + + /* Get ready for drawing next frame. Update the renderbuffers' + * flippedOffset/Pitch fields so we draw into the right place. + */ + driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, + rmesa->sarea->pfCurrentPage); + + radeonUpdateDrawBuffer(rmesa->glCtx); +} + + +/* ================================================================ + * Buffer clear + */ +#define RADEON_MAX_CLEARS 256 + +static void radeonClear( GLcontext *ctx, GLbitfield mask ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + drm_radeon_sarea_t *sarea = rmesa->sarea; + u_int32_t clear; + GLuint flags = 0; + GLuint color_mask = 0; + GLint ret, i; + GLint cx, cy, cw, ch; + + if ( RADEON_DEBUG & DEBUG_IOCTL ) { + fprintf( stderr, "radeonClear\n"); + } + + { + LOCK_HARDWARE( rmesa ); + UNLOCK_HARDWARE( rmesa ); + if ( dPriv->numClipRects == 0 ) + return; + } + + radeonFlush( ctx ); + + if ( mask & BUFFER_BIT_FRONT_LEFT ) { + flags |= RADEON_FRONT; + color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; + mask &= ~BUFFER_BIT_FRONT_LEFT; + } + + if ( mask & BUFFER_BIT_BACK_LEFT ) { + flags |= RADEON_BACK; + color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; + mask &= ~BUFFER_BIT_BACK_LEFT; + } + + if ( mask & BUFFER_BIT_DEPTH ) { + flags |= RADEON_DEPTH; + mask &= ~BUFFER_BIT_DEPTH; + } + + if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) { + flags |= RADEON_STENCIL; + mask &= ~BUFFER_BIT_STENCIL; + } + + if ( mask ) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask); + _swrast_Clear( ctx, mask ); + } + + if ( !flags ) + return; + + if (rmesa->using_hyperz) { + flags |= RADEON_USE_COMP_ZBUF; +/* if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) + flags |= RADEON_USE_HIERZ; */ + if (!(rmesa->state.stencil.hwBuffer) || + ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && + ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) { + flags |= RADEON_CLEAR_FASTZ; + } + } + + LOCK_HARDWARE( rmesa ); + + /* compute region after locking: */ + cx = ctx->DrawBuffer->_Xmin; + cy = ctx->DrawBuffer->_Ymin; + cw = ctx->DrawBuffer->_Xmax - cx; + ch = ctx->DrawBuffer->_Ymax - cy; + + /* Flip top to bottom */ + cx += dPriv->x; + cy = dPriv->y + dPriv->h - cy - ch; + + /* Throttle the number of clear ioctls we do. + */ + while ( 1 ) { + int ret; + drm_radeon_getparam_t gp; + + gp.param = RADEON_PARAM_LAST_CLEAR; + gp.value = (int *)&clear; + ret = drmCommandWriteRead( rmesa->dri.fd, + DRM_RADEON_GETPARAM, &gp, sizeof(gp) ); + + if ( ret ) { + fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret ); + exit(1); + } + + if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) { + break; + } + + if ( rmesa->do_usleeps ) { + UNLOCK_HARDWARE( rmesa ); + DO_USLEEP( 1 ); + LOCK_HARDWARE( rmesa ); + } + } + + /* Send current state to the hardware */ + radeonFlushCmdBufLocked( rmesa, __FUNCTION__ ); + + for ( i = 0 ; i < dPriv->numClipRects ; ) { + GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects ); + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t *b = rmesa->sarea->boxes; + drm_radeon_clear_t clear; + drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS]; + GLint n = 0; + + if (cw != dPriv->w || ch != dPriv->h) { + /* clear subregion */ + for ( ; i < nr ; i++ ) { + GLint x = box[i].x1; + GLint y = box[i].y1; + GLint w = box[i].x2 - x; + GLint h = box[i].y2 - y; + + if ( x < cx ) w -= cx - x, x = cx; + if ( y < cy ) h -= cy - y, y = cy; + if ( x + w > cx + cw ) w = cx + cw - x; + if ( y + h > cy + ch ) h = cy + ch - y; + if ( w <= 0 ) continue; + if ( h <= 0 ) continue; + + b->x1 = x; + b->y1 = y; + b->x2 = x + w; + b->y2 = y + h; + b++; + n++; + } + } else { + /* clear whole buffer */ + for ( ; i < nr ; i++ ) { + *b++ = box[i]; + n++; + } + } + + rmesa->sarea->nbox = n; + + clear.flags = flags; + clear.clear_color = rmesa->state.color.clear; + clear.clear_depth = rmesa->state.depth.clear; + clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; + clear.depth_mask = rmesa->state.stencil.clear; + clear.depth_boxes = depth_boxes; + + n--; + b = rmesa->sarea->boxes; + for ( ; n >= 0 ; n-- ) { + depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1; + depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1; + depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2; + depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2; + depth_boxes[n].f[CLEAR_DEPTH] = + (float)rmesa->state.depth.clear; + } + + ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR, + &clear, sizeof(drm_radeon_clear_t)); + + if ( ret ) { + UNLOCK_HARDWARE( rmesa ); + fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret ); + exit( 1 ); + } + } + + UNLOCK_HARDWARE( rmesa ); + rmesa->hw.all_dirty = GL_TRUE; +} + + +void radeonWaitForIdleLocked( radeonContextPtr rmesa ) +{ + int fd = rmesa->dri.fd; + int to = 0; + int ret, i = 0; + + rmesa->c_drawWaits++; + + do { + do { + ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE); + } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY ); + } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) ); + + if ( ret < 0 ) { + UNLOCK_HARDWARE( rmesa ); + fprintf( stderr, "Error: Radeon timed out... exiting\n" ); + exit( -1 ); + } +} + + +static void radeonWaitForIdle( radeonContextPtr rmesa ) +{ + LOCK_HARDWARE(rmesa); + radeonWaitForIdleLocked( rmesa ); + UNLOCK_HARDWARE(rmesa); +} + + +void radeonFlush( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + radeonEmitState( rmesa ); + + if (rmesa->store.cmd_used) + radeonFlushCmdBuf( rmesa, __FUNCTION__ ); +} + +/* Make sure all commands have been sent to the hardware and have + * completed processing. + */ +void radeonFinish( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + radeonFlush( ctx ); + + if (rmesa->do_irqs) { + LOCK_HARDWARE( rmesa ); + radeonEmitIrqLocked( rmesa ); + UNLOCK_HARDWARE( rmesa ); + radeonWaitIrq( rmesa ); + } + else + radeonWaitForIdle( rmesa ); +} + + +void radeonInitIoctlFuncs( GLcontext *ctx ) +{ + ctx->Driver.Clear = radeonClear; + ctx->Driver.Finish = radeonFinish; + ctx->Driver.Flush = radeonFlush; +} + diff --git a/radeon/radeon_ioctl.h b/radeon/radeon_ioctl.h new file mode 100644 index 0000000..11a7d02 --- /dev/null +++ b/radeon/radeon_ioctl.h @@ -0,0 +1,203 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.h,v 1.6 2002/12/16 16:18:58 dawes Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + */ + +#ifndef __RADEON_IOCTL_H__ +#define __RADEON_IOCTL_H__ + +#include "simple_list.h" +#include "radeon_lock.h" + + +extern void radeonEmitState( radeonContextPtr rmesa ); +extern void radeonEmitVertexAOS( radeonContextPtr rmesa, + GLuint vertex_size, + GLuint offset ); + +extern void radeonEmitVbufPrim( radeonContextPtr rmesa, + GLuint vertex_format, + GLuint primitive, + GLuint vertex_nr ); + +extern void radeonFlushElts( radeonContextPtr rmesa ); + +extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa, + GLuint vertex_format, + GLuint primitive, + GLuint min_nr ); + +extern void radeonEmitAOS( radeonContextPtr rmesa, + struct radeon_dma_region **regions, + GLuint n, + GLuint offset ); + +extern void radeonEmitBlit( radeonContextPtr rmesa, + GLuint color_fmt, + GLuint src_pitch, + GLuint src_offset, + GLuint dst_pitch, + GLuint dst_offset, + GLint srcx, GLint srcy, + GLint dstx, GLint dsty, + GLuint w, GLuint h ); + +extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags ); + +extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * ); +extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa ); + +extern void radeonAllocDmaRegion( radeonContextPtr rmesa, + struct radeon_dma_region *region, + int bytes, + int alignment ); + +extern void radeonReleaseDmaRegion( radeonContextPtr rmesa, + struct radeon_dma_region *region, + const char *caller ); + +extern void radeonCopyBuffer( const __DRIdrawablePrivate *drawable, + const drm_clip_rect_t *rect); +extern void radeonPageFlip( const __DRIdrawablePrivate *drawable ); +extern void radeonFlush( GLcontext *ctx ); +extern void radeonFinish( GLcontext *ctx ); +extern void radeonWaitForIdleLocked( radeonContextPtr rmesa ); +extern void radeonWaitForVBlank( radeonContextPtr rmesa ); +extern void radeonInitIoctlFuncs( GLcontext *ctx ); +extern void radeonGetAllParams( radeonContextPtr rmesa ); +extern void radeonSetUpAtomList( radeonContextPtr rmesa ); + +/* ================================================================ + * Helper macros: + */ + +/* Close off the last primitive, if it exists. + */ +#define RADEON_NEWPRIM( rmesa ) \ +do { \ + if ( rmesa->dma.flush ) \ + rmesa->dma.flush( rmesa ); \ +} while (0) + +/* Can accomodate several state changes and primitive changes without + * actually firing the buffer. + */ +#define RADEON_STATECHANGE( rmesa, ATOM ) \ +do { \ + RADEON_NEWPRIM( rmesa ); \ + rmesa->hw.ATOM.dirty = GL_TRUE; \ + rmesa->hw.is_dirty = GL_TRUE; \ +} while (0) + +#define RADEON_DB_STATE( ATOM ) \ + memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \ + rmesa->hw.ATOM.cmd_size * 4) + +static __inline int RADEON_DB_STATECHANGE( + radeonContextPtr rmesa, + struct radeon_state_atom *atom ) +{ + if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) { + int *tmp; + RADEON_NEWPRIM( rmesa ); + atom->dirty = GL_TRUE; + rmesa->hw.is_dirty = GL_TRUE; + tmp = atom->cmd; + atom->cmd = atom->lastcmd; + atom->lastcmd = tmp; + return 1; + } + else + return 0; +} + + +/* Fire the buffered vertices no matter what. + */ +#define RADEON_FIREVERTICES( rmesa ) \ +do { \ + if ( rmesa->store.cmd_used || rmesa->dma.flush ) { \ + radeonFlush( rmesa->glCtx ); \ + } \ +} while (0) + +/* Command lengths. Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ + * are available, you will also be adding an rmesa->state.max_state_size because + * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts. + */ +#if RADEON_OLD_PACKETS +#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int)) +#define VERT_AOS_BUFSZ (0) +#define ELTS_BUFSZ(nr) (24 + nr * 2) +#define VBUF_BUFSZ (6 * sizeof(int)) +#else +#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int)) +#define VERT_AOS_BUFSZ (5 * sizeof(int)) +#define ELTS_BUFSZ(nr) (16 + nr * 2) +#define VBUF_BUFSZ (4 * sizeof(int)) +#endif + +/* Ensure that a minimum amount of space is available in the command buffer. + * This is used to ensure atomicity of state updates with the rendering requests + * that rely on them. + * + * An alternative would be to implement a "soft lock" such that when the buffer + * wraps at an inopportune time, we grab the lock, flush the current buffer, + * and hang on to the lock until the critical section is finished and we flush + * the buffer again and unlock. + */ +static __inline void radeonEnsureCmdBufSpace( radeonContextPtr rmesa, + int bytes ) +{ + if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ) + radeonFlushCmdBuf( rmesa, __FUNCTION__ ); + assert( bytes <= RADEON_CMD_BUF_SZ ); +} + +/* Alloc space in the command buffer + */ +static __inline char *radeonAllocCmdBuf( radeonContextPtr rmesa, + int bytes, const char *where ) +{ + if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ) + radeonFlushCmdBuf( rmesa, __FUNCTION__ ); + + { + char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used; + rmesa->store.cmd_used += bytes; + return head; + } +} + +#endif /* __RADEON_IOCTL_H__ */ diff --git a/radeon/radeon_lighting.c b/radeon/radeon_lighting.c new file mode 100644 index 0000000..44e00af --- /dev/null +++ b/radeon/radeon_lighting.c @@ -0,0 +1,682 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.c,v 1.5 2002/09/16 18:05:20 eich Exp $ */ +/* + * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "api_arrayelt.h" +/* #include "mmath.h" */ +#include "enums.h" +#include "colormac.h" + + +#include "radeon_context.h" +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "radeon_tcl.h" +#include "radeon_tex.h" +#include "radeon_vtxfmt.h" + + + +/* ============================================================= + * Materials + */ + + +/* Update on colormaterial, material emmissive/ambient, + * lightmodel.globalambient + */ +void update_global_ambient( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + float *fcmd = (float *)RADEON_DB_STATE( glt ); + + /* Need to do more if both emmissive & ambient are PREMULT: + */ + if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] & + ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | + (3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) + { + COPY_3V( &fcmd[GLT_RED], + ctx->Light.Material[0].Emission); + ACC_SCALE_3V( &fcmd[GLT_RED], + ctx->Light.Model.Ambient, + ctx->Light.Material[0].Ambient); + } + else + { + COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient ); + } + + RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt); +} + +/* Update on change to + * - light[p].colors + * - light[p].enabled + * - material, + * - colormaterial enabled + * - colormaterial bitmask + */ +void update_light_colors( GLcontext *ctx, GLuint p ) +{ + struct gl_light *l = &ctx->Light.Light[p]; + +/* fprintf(stderr, "%s\n", __FUNCTION__); */ + + if (l->Enabled) { + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + float *fcmd = (float *)RADEON_DB_STATE( lit[p] ); + GLuint bitmask = ctx->Light.ColorMaterialBitmask; + struct gl_material *mat = &ctx->Light.Material[0]; + + COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); + COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse ); + COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular ); + + if (!ctx->Light.ColorMaterialEnabled) + bitmask = 0; + + if ((bitmask & FRONT_AMBIENT_BIT) == 0) + SELF_SCALE_3V( &fcmd[LIT_AMBIENT_RED], mat->Ambient ); + + if ((bitmask & FRONT_DIFFUSE_BIT) == 0) + SELF_SCALE_3V( &fcmd[LIT_DIFFUSE_RED], mat->Diffuse ); + + if ((bitmask & FRONT_SPECULAR_BIT) == 0) + SELF_SCALE_3V( &fcmd[LIT_SPECULAR_RED], mat->Specular ); + + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); + } +} + +/* Also fallback for asym colormaterial mode in twoside lighting... + */ +void check_twoside_fallback( GLcontext *ctx ) +{ + GLboolean fallback = GL_FALSE; + + if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { + if (memcmp( &ctx->Light.Material[0], + &ctx->Light.Material[1], + sizeof(struct gl_material)) != 0) + fallback = GL_TRUE; + else if (ctx->Light.ColorMaterialEnabled && + (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != + ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1)) + fallback = GL_TRUE; + } + + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_LIGHT_TWOSIDE, fallback ); +} + +void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) +{ + if (ctx->Light.ColorMaterialEnabled) { + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint light_model_ctl = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]; + GLuint mask = ctx->Light.ColorMaterialBitmask; + + /* Default to PREMULT: + */ + light_model_ctl &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | + (3 << RADEON_AMBIENT_SOURCE_SHIFT) | + (3 << RADEON_DIFFUSE_SOURCE_SHIFT) | + (3 << RADEON_SPECULAR_SOURCE_SHIFT)); + + if (mask & FRONT_EMISSION_BIT) { + light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << + RADEON_EMISSIVE_SOURCE_SHIFT); + } + + if (mask & FRONT_AMBIENT_BIT) { + light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << + RADEON_AMBIENT_SOURCE_SHIFT); + } + + if (mask & FRONT_DIFFUSE_BIT) { + light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << + RADEON_DIFFUSE_SOURCE_SHIFT); + } + + if (mask & FRONT_SPECULAR_BIT) { + light_model_ctl |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << + RADEON_SPECULAR_SOURCE_SHIFT); + } + + if (light_model_ctl != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) { + GLuint p; + + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl; + + for (p = 0 ; p < MAX_LIGHTS; p++) + update_light_colors( ctx, p ); + update_global_ambient( ctx ); + } + } + + check_twoside_fallback( ctx ); +} + +void radeonUpdateMaterial( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl ); + GLuint p; + GLuint mask = ~0; + + if (ctx->Light.ColorMaterialEnabled) + mask &= ~ctx->Light.ColorMaterialBitmask; + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s\n", __FUNCTION__); + + + if (mask & FRONT_EMISSION_BIT) { + fcmd[MTL_EMMISSIVE_RED] = ctx->Light.Material[0].Emission[0]; + fcmd[MTL_EMMISSIVE_GREEN] = ctx->Light.Material[0].Emission[1]; + fcmd[MTL_EMMISSIVE_BLUE] = ctx->Light.Material[0].Emission[2]; + fcmd[MTL_EMMISSIVE_ALPHA] = ctx->Light.Material[0].Emission[3]; + } + if (mask & FRONT_AMBIENT_BIT) { + fcmd[MTL_AMBIENT_RED] = ctx->Light.Material[0].Ambient[0]; + fcmd[MTL_AMBIENT_GREEN] = ctx->Light.Material[0].Ambient[1]; + fcmd[MTL_AMBIENT_BLUE] = ctx->Light.Material[0].Ambient[2]; + fcmd[MTL_AMBIENT_ALPHA] = ctx->Light.Material[0].Ambient[3]; + } + if (mask & FRONT_DIFFUSE_BIT) { + fcmd[MTL_DIFFUSE_RED] = ctx->Light.Material[0].Diffuse[0]; + fcmd[MTL_DIFFUSE_GREEN] = ctx->Light.Material[0].Diffuse[1]; + fcmd[MTL_DIFFUSE_BLUE] = ctx->Light.Material[0].Diffuse[2]; + fcmd[MTL_DIFFUSE_ALPHA] = ctx->Light.Material[0].Diffuse[3]; + } + if (mask & FRONT_SPECULAR_BIT) { + fcmd[MTL_SPECULAR_RED] = ctx->Light.Material[0].Specular[0]; + fcmd[MTL_SPECULAR_GREEN] = ctx->Light.Material[0].Specular[1]; + fcmd[MTL_SPECULAR_BLUE] = ctx->Light.Material[0].Specular[2]; + fcmd[MTL_SPECULAR_ALPHA] = ctx->Light.Material[0].Specular[3]; + } + if (mask & FRONT_SHININESS_BIT) { + fcmd[MTL_SHININESS] = ctx->Light.Material[0].Shininess; + } + + if (RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mtl )) { + for (p = 0 ; p < MAX_LIGHTS; p++) + update_light_colors( ctx, p ); + + check_twoside_fallback( ctx ); + update_global_ambient( ctx ); + } + else if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_STATE)) + fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__); +} + +/* _NEW_LIGHT + * _NEW_MODELVIEW + * _MESA_NEW_NEED_EYE_COORDS + * + * Uses derived state from mesa: + * _VP_inf_norm + * _h_inf_norm + * _Position + * _NormDirection + * _ModelViewInvScale + * _NeedEyeCoords + * _EyeZDir + * + * which are calculated in light.c and are correct for the current + * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW + * and _MESA_NEW_NEED_EYE_COORDS. + */ +void radeonUpdateLighting( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + /* Have to check these, or have an automatic shortcircuit mechanism + * to remove noop statechanges. (Or just do a better job on the + * front end). + */ + { + GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]; + + if (ctx->_NeedEyeCoords) + tmp &= ~RADEON_LIGHT_IN_MODELSPACE; + else + tmp |= RADEON_LIGHT_IN_MODELSPACE; + + + /* Leave this test disabled: (unexplained q3 lockup) (even with + new packets) + */ + if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) + { + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp; + } + } + + { + GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( eye ); + fcmd[EYE_X] = ctx->_EyeZDir[0]; + fcmd[EYE_Y] = ctx->_EyeZDir[1]; + fcmd[EYE_Z] = - ctx->_EyeZDir[2]; + fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale; + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.eye ); + } + + +/* RADEON_STATECHANGE( rmesa, glt ); */ + + if (ctx->Light.Enabled) { + GLint p; + for (p = 0 ; p < MAX_LIGHTS; p++) { + if (ctx->Light.Light[p].Enabled) { + struct gl_light *l = &ctx->Light.Light[p]; + GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] ); + + if (l->EyePosition[3] == 0.0) { + COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); + COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); + fcmd[LIT_POSITION_W] = 0; + fcmd[LIT_DIRECTION_W] = 0; + } else { + COPY_4V( &fcmd[LIT_POSITION_X], l->_Position ); + fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0]; + fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1]; + fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2]; + fcmd[LIT_DIRECTION_W] = 0; + } + + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); + } + } + } +} + + +void radeonLightfv( GLcontext *ctx, GLenum light, + GLenum pname, const GLfloat *params ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLint p = light - GL_LIGHT0; + struct gl_light *l = &ctx->Light.Light[p]; + GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd; + + + switch (pname) { + case GL_AMBIENT: + case GL_DIFFUSE: + case GL_SPECULAR: + update_light_colors( ctx, p ); + break; + + case GL_SPOT_DIRECTION: + /* picked up in update_light */ + break; + + case GL_POSITION: { + /* positions picked up in update_light, but can do flag here */ + GLuint flag = (p&1)? RADEON_LIGHT_1_IS_LOCAL : RADEON_LIGHT_0_IS_LOCAL; + GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; + + RADEON_STATECHANGE(rmesa, tcl); + if (l->EyePosition[3] != 0.0F) + rmesa->hw.tcl.cmd[idx] |= flag; + else + rmesa->hw.tcl.cmd[idx] &= ~flag; + break; + } + + case GL_SPOT_EXPONENT: + RADEON_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_SPOT_EXPONENT] = params[0]; + break; + + case GL_SPOT_CUTOFF: { + GLuint flag = (p&1) ? RADEON_LIGHT_1_IS_SPOT : RADEON_LIGHT_0_IS_SPOT; + GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; + + RADEON_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff; + + RADEON_STATECHANGE(rmesa, tcl); + if (l->SpotCutoff != 180.0F) + rmesa->hw.tcl.cmd[idx] |= flag; + else + rmesa->hw.tcl.cmd[idx] &= ~flag; + break; + } + + case GL_CONSTANT_ATTENUATION: + RADEON_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_ATTEN_CONST] = params[0]; + break; + case GL_LINEAR_ATTENUATION: + RADEON_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_ATTEN_LINEAR] = params[0]; + break; + case GL_QUADRATIC_ATTENUATION: + RADEON_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_ATTEN_QUADRATIC] = params[0]; + break; + default: + return; + } + +} + + + + +void radeonLightModelfv( GLcontext *ctx, GLenum pname, + const GLfloat *param ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + switch (pname) { + case GL_LIGHT_MODEL_AMBIENT: + update_global_ambient( ctx ); + break; + + case GL_LIGHT_MODEL_LOCAL_VIEWER: + RADEON_STATECHANGE( rmesa, tcl ); + if (ctx->Light.Model.LocalViewer) + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LOCAL_VIEWER; + else + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LOCAL_VIEWER; + break; + + case GL_LIGHT_MODEL_TWO_SIDE: + RADEON_STATECHANGE( rmesa, tcl ); + if (ctx->Light.Model.TwoSide) + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_LIGHT_TWOSIDE; + else + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_LIGHT_TWOSIDE; + + check_twoside_fallback( ctx ); + +#if _HAVE_SWTNL + if (rmesa->TclFallback) { + radeonChooseRenderState( ctx ); + radeonChooseVertexState( ctx ); + } +#endif + break; + + case GL_LIGHT_MODEL_COLOR_CONTROL: + radeonUpdateSpecular(ctx); + + RADEON_STATECHANGE( rmesa, tcl ); + if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= + ~RADEON_DIFFUSE_SPECULAR_COMBINE; + else + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= + RADEON_DIFFUSE_SPECULAR_COMBINE; + break; + + default: + break; + } +} + + +/* ============================================================= + * Fog + */ + + +static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + union { int i; float f; } c, d; + GLchan col[4]; + + c.i = rmesa->hw.fog.cmd[FOG_C]; + d.i = rmesa->hw.fog.cmd[FOG_D]; + + switch (pname) { + case GL_FOG_MODE: + if (!ctx->Fog.Enabled) + return; + RADEON_STATECHANGE(rmesa, tcl); + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK; + switch (ctx->Fog.Mode) { + case GL_LINEAR: + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_LINEAR; + if (ctx->Fog.Start == ctx->Fog.End) { + c.f = 1.0F; + d.f = 1.0F; + } + else { + c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start); + d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start); + } + break; + case GL_EXP: + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP; + c.f = 0.0; + d.f = ctx->Fog.Density; + break; + case GL_EXP2: + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP2; + c.f = 0.0; + d.f = -(ctx->Fog.Density * ctx->Fog.Density); + break; + default: + return; + } + break; + case GL_FOG_DENSITY: + switch (ctx->Fog.Mode) { + case GL_EXP: + c.f = 0.0; + d.f = ctx->Fog.Density; + break; + case GL_EXP2: + c.f = 0.0; + d.f = -(ctx->Fog.Density * ctx->Fog.Density); + break; + default: + break; + } + break; + case GL_FOG_START: + case GL_FOG_END: + if (ctx->Fog.Mode == GL_LINEAR) { + if (ctx->Fog.Start == ctx->Fog.End) { + c.f = 1.0F; + d.f = 1.0F; + } else { + c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start); + d.f = 1.0/(ctx->Fog.End-ctx->Fog.Start); + } + } + break; + case GL_FOG_COLOR: + RADEON_STATECHANGE( rmesa, ctx ); + UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color ); + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = + radeonPackColor( 4, col[0], col[1], col[2], 0 ); + break; + case GL_FOG_COORDINATE_SOURCE_EXT: + /* What to do? + */ + break; + default: + return; + } + + if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) { + RADEON_STATECHANGE( rmesa, fog ); + rmesa->hw.fog.cmd[FOG_C] = c.i; + rmesa->hw.fog.cmd[FOG_D] = d.i; + } +} + +/* Examine lighting and texture state to determine if separate specular + * should be enabled. + */ +void radeonUpdateSpecular( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint p = rmesa->hw.ctx.cmd[CTX_PP_CNTL]; + + if (NEED_SECONDARY_COLOR(ctx)) { + p |= RADEON_SPECULAR_ENABLE; + } else { + p &= ~RADEON_SPECULAR_ENABLE; + } + + if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) { + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p; + } + + /* Bizzare: have to leave lighting enabled to get fog. + */ + RADEON_STATECHANGE( rmesa, tcl ); + if ((ctx->Light.Enabled && + ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR)) { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; + } + else if (ctx->Fog.Enabled) { + if (ctx->Light.Enabled) { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; + } else { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; + } + } + else if (ctx->Light.Enabled) { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; + } else if (ctx->Fog.ColorSumEnabled ) { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE; + } else { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE; + } + +#if _HAVE_SWTNL + /* Update vertex/render formats + */ + if (rmesa->TclFallback) { + radeonChooseRenderState( ctx ); + radeonChooseVertexState( ctx ); + } +#endif +} + + + +static void radeonLightingSpaceChange( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLboolean tmp; + RADEON_STATECHANGE( rmesa, tcl ); + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s %d\n", __FUNCTION__, ctx->_NeedEyeCoords); + + if (ctx->_NeedEyeCoords) + tmp = ctx->Transform.RescaleNormals; + else + tmp = !ctx->Transform.RescaleNormals; + + if ( tmp ) { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_RESCALE_NORMALS; + } else { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS; + } +} + +void radeonInitLightStateFuncs( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + int i; + + ctx->Driver.LightModelfv = radeonLightModelfv; + ctx->Driver.Lightfv = radeonLightfv; + ctx->Driver.Fogfv = radeonFogfv; + ctx->Driver.LightingSpaceChange = radeonLightingSpaceChange; + + for (i = 0 ; i < 8; i++) { + struct gl_light *l = &ctx->Light.Light[i]; + GLenum p = GL_LIGHT0 + i; + *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX; + + ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient ); + ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse ); + ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular ); + ctx->Driver.Lightfv( ctx, p, GL_POSITION, 0 ); + ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, 0 ); + ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent ); + ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff ); + ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION, + &l->ConstantAttenuation ); + ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, + &l->LinearAttenuation ); + ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, + &l->QuadraticAttenuation ); + } + + ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, + ctx->Light.Model.Ambient ); + + ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 ); + ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density ); + ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start ); + ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End ); + ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color ); + ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, 0 ); +} diff --git a/radeon/radeon_lock.c b/radeon/radeon_lock.c new file mode 100644 index 0000000..30a0c38 --- /dev/null +++ b/radeon/radeon_lock.c @@ -0,0 +1,124 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + * Kevin E. Martin <martin@valinux.com> + */ + +#include "glheader.h" +#include "mtypes.h" +#include "radeon_context.h" +#include "radeon_lock.h" +#include "radeon_tex.h" +#include "radeon_state.h" +#include "radeon_ioctl.h" + +#include "drirenderbuffer.h" + +#if DEBUG_LOCKING +char *prevLockFile = NULL; +int prevLockLine = 0; +#endif + +/* Turn on/off page flipping according to the flags in the sarea: + */ +static void radeonUpdatePageFlipping(radeonContextPtr rmesa) +{ + rmesa->doPageFlip = rmesa->sarea->pfState; + if (rmesa->glCtx->WinSysDrawBuffer) { + driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, + rmesa->sarea->pfCurrentPage); + } +} + +/* Update the hardware state. This is called if another context has + * grabbed the hardware lock, which includes the X server. This + * function also updates the driver's window state after the X server + * moves, resizes or restacks a window -- the change will be reflected + * in the drawable position and clip rects. Since the X server grabs + * the hardware lock when it changes the window state, this routine will + * automatically be called after such a change. + */ +void radeonGetLock(radeonContextPtr rmesa, GLuint flags) +{ + __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; + __DRIdrawablePrivate *const readable = rmesa->dri.readable; + __DRIscreenPrivate *sPriv = rmesa->dri.screen; + drm_radeon_sarea_t *sarea = rmesa->sarea; + + drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags); + + /* The window might have moved, so we might need to get new clip + * rects. + * + * NOTE: This releases and regrabs the hw lock to allow the X server + * to respond to the DRI protocol request for new drawable info. + * Since the hardware state depends on having the latest drawable + * clip rects, all state checking must be done _after_ this call. + */ + DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable); + if (drawable != readable) { + DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable); + } + + if (rmesa->lastStamp != drawable->lastStamp) { + radeonUpdatePageFlipping(rmesa); + radeonSetCliprects(rmesa); + radeonUpdateViewportOffset(rmesa->glCtx); + driUpdateFramebufferSize(rmesa->glCtx, drawable); + } + + RADEON_STATECHANGE(rmesa, ctx); + if (rmesa->sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= + RADEON_COLOR_TILE_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= + ~RADEON_COLOR_TILE_ENABLE; + } + + if (sarea->ctx_owner != rmesa->dri.hwContext) { + int i; + sarea->ctx_owner = rmesa->dri.hwContext; + + for (i = 0; i < rmesa->nr_heaps; i++) { + DRI_AGE_TEXTURES(rmesa->texture_heaps[i]); + } + } + + rmesa->lost_context = GL_TRUE; +} diff --git a/radeon/radeon_lock.h b/radeon/radeon_lock.h new file mode 100644 index 0000000..86e96aa --- /dev/null +++ b/radeon/radeon_lock.h @@ -0,0 +1,112 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + * Kevin E. Martin <martin@valinux.com> + */ + +#ifndef __RADEON_LOCK_H__ +#define __RADEON_LOCK_H__ + +extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); + +/* Turn DEBUG_LOCKING on to find locking conflicts. + */ +#define DEBUG_LOCKING 0 + +#if DEBUG_LOCKING +extern char *prevLockFile; +extern int prevLockLine; + +#define DEBUG_LOCK() \ + do { \ + prevLockFile = (__FILE__); \ + prevLockLine = (__LINE__); \ + } while (0) + +#define DEBUG_RESET() \ + do { \ + prevLockFile = 0; \ + prevLockLine = 0; \ + } while (0) + +#define DEBUG_CHECK_LOCK() \ + do { \ + if ( prevLockFile ) { \ + fprintf( stderr, \ + "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \ + prevLockFile, prevLockLine, __FILE__, __LINE__ ); \ + exit( 1 ); \ + } \ + } while (0) + +#else + +#define DEBUG_LOCK() +#define DEBUG_RESET() +#define DEBUG_CHECK_LOCK() + +#endif + +/* + * !!! We may want to separate locks from locks with validation. This + * could be used to improve performance for those things commands that + * do not do any drawing !!! + */ + +/* Lock the hardware and validate our state. + */ +#define LOCK_HARDWARE( rmesa ) \ + do { \ + char __ret = 0; \ + DEBUG_CHECK_LOCK(); \ + DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \ + (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret ); \ + if ( __ret ) \ + radeonGetLock( (rmesa), 0 ); \ + DEBUG_LOCK(); \ + } while (0) + +#define UNLOCK_HARDWARE( rmesa ) \ + do { \ + DRM_UNLOCK( (rmesa)->dri.fd, \ + (rmesa)->dri.hwLock, \ + (rmesa)->dri.hwContext ); \ + DEBUG_RESET(); \ + } while (0) + +#endif /* __RADEON_LOCK_H__ */ diff --git a/radeon/radeon_maos.c b/radeon/radeon_maos.c new file mode 100644 index 0000000..ea1e893 --- /dev/null +++ b/radeon/radeon_maos.c @@ -0,0 +1,12 @@ + + +/* If using new packets, can choose either verts or arrays. + * Otherwise, must use verts. + */ +#include "radeon_context.h" +#define RADEON_MAOS_VERTS 0 +#if (RADEON_MAOS_VERTS) || (RADEON_OLD_PACKETS) +#include "radeon_maos_verts.c" +#else +#include "radeon_maos_arrays.c" +#endif diff --git a/radeon/radeon_maos.h b/radeon/radeon_maos.h new file mode 100644 index 0000000..09039d6 --- /dev/null +++ b/radeon/radeon_maos.h @@ -0,0 +1,44 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos.h,v 1.1 2002/10/30 12:51:55 alanh Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + Tungsten Grahpics Inc., Austin, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __RADEON_MAOS_H__ +#define __RADEON_MAOS_H__ + +#include "radeon_context.h" + +extern void radeonEmitArrays( GLcontext *ctx, GLuint inputs ); +extern void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ); + +#endif diff --git a/radeon/radeon_maos_arrays.c b/radeon/radeon_maos_arrays.c new file mode 100644 index 0000000..49118b5 --- /dev/null +++ b/radeon/radeon_maos_arrays.c @@ -0,0 +1,657 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "mtypes.h" +#include "macros.h" + +#include "swrast_setup/swrast_setup.h" +#include "math/m_translate.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" + +#include "radeon_context.h" +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "radeon_swtcl.h" +#include "radeon_maos.h" +#include "radeon_tcl.h" + +#if 0 +/* Usage: + * - from radeon_tcl_render + * - call radeonEmitArrays to ensure uptodate arrays in dma + * - emit primitives (new type?) which reference the data + * -- need to use elts for lineloop, quads, quadstrip/flat + * -- other primitives are all well-formed (need tristrip-1,fake-poly) + * + */ +static void emit_ubyte_rgba3( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p\n", + __FUNCTION__, count, stride, (void *)out); + + for (i = 0; i < count; i++) { + out->red = *data; + out->green = *(data+1); + out->blue = *(data+2); + out->alpha = 0xFF; + out++; + data += stride; + } +} + +static void emit_ubyte_rgba4( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 4) + COPY_DWORDS( out, data, count ); + else + for (i = 0; i < count; i++) { + *out++ = LE32_TO_CPU(*(int *)data); + data += stride; + } +} + + +static void emit_ubyte_rgba( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int size, + int stride, + int count ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); + + assert (!rvb->buf); + + if (stride == 0) { + radeonAllocDmaRegion( rmesa, rvb, 4, 4 ); + count = 1; + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 0; + rvb->aos_size = 1; + } + else { + radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */ + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 1; + rvb->aos_size = 1; + } + + /* Emit the data + */ + switch (size) { + case 3: + emit_ubyte_rgba3( ctx, rvb, data, stride, count ); + break; + case 4: + emit_ubyte_rgba4( ctx, rvb, data, stride, count ); + break; + default: + assert(0); + exit(1); + break; + } +} +#endif + +#if defined(USE_X86_ASM) +#define COPY_DWORDS( dst, src, nr ) \ +do { \ + int __tmp; \ + __asm__ __volatile__( "rep ; movsl" \ + : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ + : "0" (nr), \ + "D" ((long)dst), \ + "S" ((long)src) ); \ +} while (0) +#else +#define COPY_DWORDS( dst, src, nr ) \ +do { \ + int j; \ + for ( j = 0 ; j < nr ; j++ ) \ + dst[j] = ((int *)src)[j]; \ + dst += nr; \ +} while (0) +#endif + +static void emit_vecfog( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + GLfloat *out; + + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + assert (!rvb->buf); + + if (stride == 0) { + radeonAllocDmaRegion( rmesa, rvb, 4, 4 ); + count = 1; + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 0; + rvb->aos_size = 1; + } + else { + radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */ + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 1; + rvb->aos_size = 1; + } + + /* Emit the data + */ + out = (GLfloat *)(rvb->address + rvb->start); + for (i = 0; i < count; i++) { + out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data ); + out++; + data += stride; + } +} + +static void emit_vec4( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 4) + COPY_DWORDS( out, data, count ); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out++; + data += stride; + } +} + + +static void emit_vec8( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 8) + COPY_DWORDS( out, data, count*2 ); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data+4); + out += 2; + data += stride; + } +} + +static void emit_vec12( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 12) + COPY_DWORDS( out, data, count*3 ); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data+4); + out[2] = *(int *)(data+8); + out += 3; + data += stride; + } +} + +static void emit_vec16( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + if (stride == 16) + COPY_DWORDS( out, data, count*4 ); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data+4); + out[2] = *(int *)(data+8); + out[3] = *(int *)(data+12); + out += 4; + data += stride; + } +} + + +static void emit_vector( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int size, + int stride, + int count ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d size %d stride %d\n", + __FUNCTION__, count, size, stride); + + assert (!rvb->buf); + + if (stride == 0) { + radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 ); + count = 1; + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 0; + rvb->aos_size = size; + } + else { + radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */ + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = size; + rvb->aos_size = size; + } + + /* Emit the data + */ + switch (size) { + case 1: + emit_vec4( ctx, rvb, data, stride, count ); + break; + case 2: + emit_vec8( ctx, rvb, data, stride, count ); + break; + case 3: + emit_vec12( ctx, rvb, data, stride, count ); + break; + case 4: + emit_vec16( ctx, rvb, data, stride, count ); + break; + default: + assert(0); + exit(1); + break; + } + +} + + + +static void emit_s0_vec( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = 0; + out += 2; + data += stride; + } +} + +static void emit_stq_vec( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int stride, + int count ) +{ + int i; + int *out = (int *)(rvb->address + rvb->start); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d\n", + __FUNCTION__, count, stride); + + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data+4); + out[2] = *(int *)(data+12); + out += 3; + data += stride; + } +} + + + + +static void emit_tex_vector( GLcontext *ctx, + struct radeon_dma_region *rvb, + char *data, + int size, + int stride, + int count ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + int emitsize; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); + + assert (!rvb->buf); + + switch (size) { + case 4: emitsize = 3; break; + case 3: emitsize = 3; break; + default: emitsize = 2; break; + } + + + if (stride == 0) { + radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 ); + count = 1; + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = 0; + rvb->aos_size = emitsize; + } + else { + radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 ); + rvb->aos_start = GET_START(rvb); + rvb->aos_stride = emitsize; + rvb->aos_size = emitsize; + } + + + /* Emit the data + */ + switch (size) { + case 1: + emit_s0_vec( ctx, rvb, data, stride, count ); + break; + case 2: + emit_vec8( ctx, rvb, data, stride, count ); + break; + case 3: + emit_vec12( ctx, rvb, data, stride, count ); + break; + case 4: + emit_stq_vec( ctx, rvb, data, stride, count ); + break; + default: + assert(0); + exit(1); + break; + } +} + + + + +/* Emit any changed arrays to new GART memory, re-emit a packet to + * update the arrays. + */ +void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb; + struct radeon_dma_region **component = rmesa->tcl.aos_components; + GLuint nr = 0; + GLuint vfmt = 0; + GLuint count = VB->Count; + GLuint vtx, unit; + +#if 0 + if (RADEON_DEBUG & DEBUG_VERTS) + _tnl_print_vert_flags( __FUNCTION__, inputs ); +#endif + + if (1) { + if (!rmesa->tcl.obj.buf) + emit_vector( ctx, + &rmesa->tcl.obj, + (char *)VB->ObjPtr->data, + VB->ObjPtr->size, + VB->ObjPtr->stride, + count); + + switch( VB->ObjPtr->size ) { + case 4: vfmt |= RADEON_CP_VC_FRMT_W0; + case 3: vfmt |= RADEON_CP_VC_FRMT_Z; + case 2: vfmt |= RADEON_CP_VC_FRMT_XY; + default: + break; + } + component[nr++] = &rmesa->tcl.obj; + } + + + if (inputs & VERT_BIT_NORMAL) { + if (!rmesa->tcl.norm.buf) + emit_vector( ctx, + &(rmesa->tcl.norm), + (char *)VB->NormalPtr->data, + 3, + VB->NormalPtr->stride, + count); + + vfmt |= RADEON_CP_VC_FRMT_N0; + component[nr++] = &rmesa->tcl.norm; + } + + if (inputs & VERT_BIT_COLOR0) { + int emitsize; + if (VB->ColorPtr[0]->size == 4 && + (VB->ColorPtr[0]->stride != 0 || + VB->ColorPtr[0]->data[0][3] != 1.0)) { + vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA; + emitsize = 4; + } + + else { + vfmt |= RADEON_CP_VC_FRMT_FPCOLOR; + emitsize = 3; + } + + if (!rmesa->tcl.rgba.buf) + emit_vector( ctx, + &(rmesa->tcl.rgba), + (char *)VB->ColorPtr[0]->data, + emitsize, + VB->ColorPtr[0]->stride, + count); + + + component[nr++] = &rmesa->tcl.rgba; + } + + + if (inputs & VERT_BIT_COLOR1) { + if (!rmesa->tcl.spec.buf) { + + emit_vector( ctx, + &rmesa->tcl.spec, + (char *)VB->SecondaryColorPtr[0]->data, + 3, + VB->SecondaryColorPtr[0]->stride, + count); + } + + vfmt |= RADEON_CP_VC_FRMT_FPSPEC; + component[nr++] = &rmesa->tcl.spec; + } + +/* FIXME: not sure if this is correct. May need to stitch this together with + secondary color. It seems odd that for primary color color and alpha values + are emitted together but for secondary color not. */ + if (inputs & VERT_BIT_FOG) { + if (!rmesa->tcl.fog.buf) + emit_vecfog( ctx, + &(rmesa->tcl.fog), + (char *)VB->FogCoordPtr->data, + VB->FogCoordPtr->stride, + count); + + vfmt |= RADEON_CP_VC_FRMT_FPFOG; + component[nr++] = &rmesa->tcl.fog; + } + + + vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] & + ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2)); + + for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { + if (inputs & VERT_BIT_TEX(unit)) { + if (!rmesa->tcl.tex[unit].buf) + emit_tex_vector( ctx, + &(rmesa->tcl.tex[unit]), + (char *)VB->TexCoordPtr[unit]->data, + VB->TexCoordPtr[unit]->size, + VB->TexCoordPtr[unit]->stride, + count ); + + vfmt |= RADEON_ST_BIT(unit); + /* assume we need the 3rd coord if texgen is active for r/q OR at least + 3 coords are submitted. This may not be 100% correct */ + if (VB->TexCoordPtr[unit]->size >= 3) { + vtx |= RADEON_Q_BIT(unit); + vfmt |= RADEON_Q_BIT(unit); + } + if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) ) + vtx |= RADEON_Q_BIT(unit); + else if ((VB->TexCoordPtr[unit]->size >= 3) && + ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) { + GLuint swaptexmatcol = (VB->TexCoordPtr[unit]->size - 3); + if (((rmesa->NeedTexMatrix >> unit) & 1) && + (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1))) + radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ; + } + component[nr++] = &rmesa->tcl.tex[unit]; + } + } + + if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) { + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx; + } + + rmesa->tcl.nr_aos_components = nr; + rmesa->tcl.vertex_format = vfmt; +} + + +void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + GLuint unit; + +#if 0 + if (RADEON_DEBUG & DEBUG_VERTS) + _tnl_print_vert_flags( __FUNCTION__, newinputs ); +#endif + + if (newinputs & VERT_BIT_POS) + radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ ); + + if (newinputs & VERT_BIT_NORMAL) + radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ ); + + if (newinputs & VERT_BIT_COLOR0) + radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ ); + + if (newinputs & VERT_BIT_COLOR1) + radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ ); + + if (newinputs & VERT_BIT_FOG) + radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ ); + + for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) { + if (newinputs & VERT_BIT_TEX(unit)) + radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ ); + } +} diff --git a/radeon/radeon_maos_vbtmp.h b/radeon/radeon_maos_vbtmp.h new file mode 100644 index 0000000..034cda8 --- /dev/null +++ b/radeon/radeon_maos_vbtmp.h @@ -0,0 +1,301 @@ +/* + * Mesa 3-D graphics library + * Version: 4.1 + * + * Copyright (C) 1999-2002 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef LOCALVARS +#define LOCALVARS +#endif + +#undef TCL_DEBUG +#ifndef TCL_DEBUG +#define TCL_DEBUG 0 +#endif + +static void TAG(emit)( GLcontext *ctx, + GLuint start, GLuint end, + void *dest ) +{ + LOCALVARS + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + GLuint (*tc0)[4], (*tc1)[4], (*tc2)[4]; + GLfloat (*col)[4], (*spec)[4]; + GLfloat (*fog)[4]; + GLuint (*norm)[4]; + GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride; + GLuint tc2_stride, norm_stride; + GLuint fill_tex = 0; + GLuint rqcoordsnoswap = 0; + GLuint (*coord)[4]; + GLuint coord_stride; /* object coordinates */ + int i; + + union emit_union *v = (union emit_union *)dest; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s\n", __FUNCTION__); + + coord = (GLuint (*)[4])VB->ObjPtr->data; + coord_stride = VB->ObjPtr->stride; + + if (DO_TEX2) { + if (VB->TexCoordPtr[2]) { + const GLuint t2 = GET_TEXSOURCE(2); + tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data; + tc2_stride = VB->TexCoordPtr[t2]->stride; + if (DO_PTEX && VB->TexCoordPtr[t2]->size < 3) { + fill_tex |= (1<<2); + } + else if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) { + rqcoordsnoswap |= (1<<2); + } + } else { + tc2 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX2]; + tc2_stride = 0; + } + } + + if (DO_TEX1) { + if (VB->TexCoordPtr[1]) { + const GLuint t1 = GET_TEXSOURCE(1); + tc1 = (GLuint (*)[4])VB->TexCoordPtr[t1]->data; + tc1_stride = VB->TexCoordPtr[t1]->stride; + if (DO_PTEX && VB->TexCoordPtr[t1]->size < 3) { + fill_tex |= (1<<1); + } + else if (DO_PTEX && VB->TexCoordPtr[t1]->size < 4) { + rqcoordsnoswap |= (1<<1); + } + } else { + tc1 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX1]; + tc1_stride = 0; + } + } + + if (DO_TEX0) { + if (VB->TexCoordPtr[0]) { + const GLuint t0 = GET_TEXSOURCE(0); + tc0_stride = VB->TexCoordPtr[t0]->stride; + tc0 = (GLuint (*)[4])VB->TexCoordPtr[t0]->data; + if (DO_PTEX && VB->TexCoordPtr[t0]->size < 3) { + fill_tex |= (1<<0); + } + else if (DO_PTEX && VB->TexCoordPtr[t0]->size < 4) { + rqcoordsnoswap |= (1<<0); + } + } else { + tc0 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX0]; + tc0_stride = 0; + } + + } + + if (DO_NORM) { + if (VB->NormalPtr) { + norm_stride = VB->NormalPtr->stride; + norm = (GLuint (*)[4])VB->NormalPtr->data; + } else { + norm_stride = 0; + norm = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_NORMAL]; + } + } + + if (DO_RGBA) { + if (VB->ColorPtr[0]) { + col = VB->ColorPtr[0]->data; + col_stride = VB->ColorPtr[0]->stride; + } else { + col = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; + col_stride = 0; + } + } + + if (DO_SPEC_OR_FOG) { + if (VB->SecondaryColorPtr[0]) { + spec = VB->SecondaryColorPtr[0]->data; + spec_stride = VB->SecondaryColorPtr[0]->stride; + } else { + spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1]; + spec_stride = 0; + } + } + + if (DO_SPEC_OR_FOG) { + if (VB->FogCoordPtr) { + fog = VB->FogCoordPtr->data; + fog_stride = VB->FogCoordPtr->stride; + } else { + fog = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_FOG]; + fog_stride = 0; + } + } + + + if (start) { + coord = (GLuint (*)[4])((GLubyte *)coord + start * coord_stride); + if (DO_TEX0) + tc0 = (GLuint (*)[4])((GLubyte *)tc0 + start * tc0_stride); + if (DO_TEX1) + tc1 = (GLuint (*)[4])((GLubyte *)tc1 + start * tc1_stride); + if (DO_TEX2) + tc2 = (GLuint (*)[4])((GLubyte *)tc2 + start * tc2_stride); + if (DO_NORM) + norm = (GLuint (*)[4])((GLubyte *)norm + start * norm_stride); + if (DO_RGBA) + STRIDE_4F(col, start * col_stride); + if (DO_SPEC) + STRIDE_4F(spec, start * spec_stride); + if (DO_FOG) + STRIDE_4F(fog, start * fog_stride); + } + + + { + for (i=start; i < end; i++) { + + v[0].ui = coord[0][0]; + v[1].ui = coord[0][1]; + v[2].ui = coord[0][2]; + if (DO_W) { + v[3].ui = coord[0][3]; + v += 4; + } + else + v += 3; + coord = (GLuint (*)[4])((GLubyte *)coord + coord_stride); + + if (DO_NORM) { + v[0].ui = norm[0][0]; + v[1].ui = norm[0][1]; + v[2].ui = norm[0][2]; + v += 3; + norm = (GLuint (*)[4])((GLubyte *)norm + norm_stride); + } + if (DO_RGBA) { + UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.red, col[0][0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.green, col[0][1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.blue, col[0][2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.alpha, col[0][3]); + STRIDE_4F(col, col_stride); + v++; + } + if (DO_SPEC_OR_FOG) { + if (DO_SPEC) { + UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.red, spec[0][0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.green, spec[0][1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.blue, spec[0][2]); + STRIDE_4F(spec, spec_stride); + } + if (DO_FOG) { + UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.alpha, radeonComputeFogBlendFactor(ctx, fog[0][0])); + STRIDE_4F(fog, fog_stride); + } + if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui); + v++; + } + if (DO_TEX0) { + v[0].ui = tc0[0][0]; + v[1].ui = tc0[0][1]; + if (TCL_DEBUG) fprintf(stderr, "t0: %.2f %.2f ", v[0].f, v[1].f); + if (DO_PTEX) { + if (fill_tex & (1<<0)) + v[2].f = 1.0; + else if (rqcoordsnoswap & (1<<0)) + v[2].ui = tc0[0][2]; + else + v[2].ui = tc0[0][3]; + if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f); + v += 3; + } + else + v += 2; + tc0 = (GLuint (*)[4])((GLubyte *)tc0 + tc0_stride); + } + if (DO_TEX1) { + v[0].ui = tc1[0][0]; + v[1].ui = tc1[0][1]; + if (TCL_DEBUG) fprintf(stderr, "t1: %.2f %.2f ", v[0].f, v[1].f); + if (DO_PTEX) { + if (fill_tex & (1<<1)) + v[2].f = 1.0; + else if (rqcoordsnoswap & (1<<1)) + v[2].ui = tc1[0][2]; + else + v[2].ui = tc1[0][3]; + if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f); + v += 3; + } + else + v += 2; + tc1 = (GLuint (*)[4])((GLubyte *)tc1 + tc1_stride); + } + if (DO_TEX2) { + v[0].ui = tc2[0][0]; + v[1].ui = tc2[0][1]; + if (TCL_DEBUG) fprintf(stderr, "t2: %.2f %.2f ", v[0].f, v[1].f); + if (DO_PTEX) { + if (fill_tex & (1<<2)) + v[2].f = 1.0; + else if (rqcoordsnoswap & (1<<2)) + v[2].ui = tc2[0][2]; + else + v[2].ui = tc2[0][3]; + if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f); + v += 3; + } + else + v += 2; + tc2 = (GLuint (*)[4])((GLubyte *)tc2 + tc2_stride); + } + if (TCL_DEBUG) fprintf(stderr, "\n"); + } + } +} + + + +static void TAG(init)( void ) +{ + int sz = 3; + if (DO_W) sz++; + if (DO_NORM) sz += 3; + if (DO_RGBA) sz++; + if (DO_SPEC_OR_FOG) sz++; + if (DO_TEX0) sz += 2; + if (DO_TEX0 && DO_PTEX) sz++; + if (DO_TEX1) sz += 2; + if (DO_TEX1 && DO_PTEX) sz++; + if (DO_TEX2) sz += 2; + if (DO_TEX2 && DO_PTEX) sz++; + + setup_tab[IDX].emit = TAG(emit); + setup_tab[IDX].vertex_format = IND; + setup_tab[IDX].vertex_size = sz; +} + + +#undef IND +#undef TAG +#undef IDX diff --git a/radeon/radeon_maos_verts.c b/radeon/radeon_maos_verts.c new file mode 100644 index 0000000..65dbecf --- /dev/null +++ b/radeon/radeon_maos_verts.c @@ -0,0 +1,450 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + Tungsten Graphics Inc., Austin, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "mtypes.h" + +#include "vbo/vbo.h" +#include "math/m_translate.h" +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "math/m_translate.h" +#include "radeon_context.h" +#include "radeon_state.h" +#include "radeon_ioctl.h" +#include "radeon_tex.h" +#include "radeon_tcl.h" +#include "radeon_swtcl.h" +#include "radeon_maos.h" + + +#define RADEON_TCL_MAX_SETUP 19 + +union emit_union { float f; GLuint ui; radeon_color_t rgba; }; + +static struct { + void (*emit)( GLcontext *, GLuint, GLuint, void * ); + GLuint vertex_size; + GLuint vertex_format; +} setup_tab[RADEON_TCL_MAX_SETUP]; + +#define DO_W (IND & RADEON_CP_VC_FRMT_W0) +#define DO_RGBA (IND & RADEON_CP_VC_FRMT_PKCOLOR) +#define DO_SPEC_OR_FOG (IND & RADEON_CP_VC_FRMT_PKSPEC) +#define DO_SPEC ((IND & RADEON_CP_VC_FRMT_PKSPEC) && \ + (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)) +#define DO_FOG ((IND & RADEON_CP_VC_FRMT_PKSPEC) && ctx->Fog.Enabled && \ + (ctx->Fog.FogCoordinateSource == GL_FOG_COORD)) +#define DO_TEX0 (IND & RADEON_CP_VC_FRMT_ST0) +#define DO_TEX1 (IND & RADEON_CP_VC_FRMT_ST1) +#define DO_TEX2 (IND & RADEON_CP_VC_FRMT_ST2) +#define DO_PTEX (IND & RADEON_CP_VC_FRMT_Q0) +#define DO_NORM (IND & RADEON_CP_VC_FRMT_N0) + +#define DO_TEX3 0 + +#define GET_TEXSOURCE(n) n + +/*********************************************************************** + * Generate vertex emit functions * + ***********************************************************************/ + + +/* Defined in order of increasing vertex size: + */ +#define IDX 0 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR) +#define TAG(x) x##_rgba +#include "radeon_maos_vbtmp.h" + +#define IDX 1 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_N0) +#define TAG(x) x##_n +#include "radeon_maos_vbtmp.h" + +#define IDX 2 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_ST0) +#define TAG(x) x##_rgba_st +#include "radeon_maos_vbtmp.h" + +#define IDX 3 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_N0) +#define TAG(x) x##_rgba_n +#include "radeon_maos_vbtmp.h" + +#define IDX 4 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_N0) +#define TAG(x) x##_st_n +#include "radeon_maos_vbtmp.h" + +#define IDX 5 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_ST1) +#define TAG(x) x##_rgba_st_st +#include "radeon_maos_vbtmp.h" + +#define IDX 6 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_N0) +#define TAG(x) x##_rgba_st_n +#include "radeon_maos_vbtmp.h" + +#define IDX 7 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_PKSPEC| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_ST1) +#define TAG(x) x##_rgba_spec_st_st +#include "radeon_maos_vbtmp.h" + +#define IDX 8 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_ST1| \ + RADEON_CP_VC_FRMT_N0) +#define TAG(x) x##_st_st_n +#include "radeon_maos_vbtmp.h" + +#define IDX 9 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_PKSPEC| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_ST1| \ + RADEON_CP_VC_FRMT_N0) +#define TAG(x) x##_rgba_spec_st_st_n +#include "radeon_maos_vbtmp.h" + +#define IDX 10 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_Q0) +#define TAG(x) x##_rgba_stq +#include "radeon_maos_vbtmp.h" + +#define IDX 11 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_ST1| \ + RADEON_CP_VC_FRMT_Q1| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_Q0) +#define TAG(x) x##_rgba_stq_stq +#include "radeon_maos_vbtmp.h" + +#define IDX 12 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_W0| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_PKSPEC| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_Q0| \ + RADEON_CP_VC_FRMT_ST1| \ + RADEON_CP_VC_FRMT_Q1| \ + RADEON_CP_VC_FRMT_N0) +#define TAG(x) x##_w_rgba_spec_stq_stq_n +#include "radeon_maos_vbtmp.h" + +#define IDX 13 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_ST1| \ + RADEON_CP_VC_FRMT_ST2) +#define TAG(x) x##_rgba_st_st_st +#include "radeon_maos_vbtmp.h" + +#define IDX 14 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_PKSPEC| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_ST1| \ + RADEON_CP_VC_FRMT_ST2) +#define TAG(x) x##_rgba_spec_st_st_st +#include "radeon_maos_vbtmp.h" + +#define IDX 15 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_ST1| \ + RADEON_CP_VC_FRMT_ST2| \ + RADEON_CP_VC_FRMT_N0) +#define TAG(x) x##_st_st_st_n +#include "radeon_maos_vbtmp.h" + +#define IDX 16 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_PKSPEC| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_ST1| \ + RADEON_CP_VC_FRMT_ST2| \ + RADEON_CP_VC_FRMT_N0) +#define TAG(x) x##_rgba_spec_st_st_st_n +#include "radeon_maos_vbtmp.h" + +#define IDX 17 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_Q0| \ + RADEON_CP_VC_FRMT_ST1| \ + RADEON_CP_VC_FRMT_Q1| \ + RADEON_CP_VC_FRMT_ST2| \ + RADEON_CP_VC_FRMT_Q2) +#define TAG(x) x##_rgba_stq_stq_stq +#include "radeon_maos_vbtmp.h" + +#define IDX 18 +#define IND (RADEON_CP_VC_FRMT_XY| \ + RADEON_CP_VC_FRMT_Z| \ + RADEON_CP_VC_FRMT_W0| \ + RADEON_CP_VC_FRMT_PKCOLOR| \ + RADEON_CP_VC_FRMT_PKSPEC| \ + RADEON_CP_VC_FRMT_ST0| \ + RADEON_CP_VC_FRMT_Q0| \ + RADEON_CP_VC_FRMT_ST1| \ + RADEON_CP_VC_FRMT_Q1| \ + RADEON_CP_VC_FRMT_ST2| \ + RADEON_CP_VC_FRMT_Q2| \ + RADEON_CP_VC_FRMT_N0) +#define TAG(x) x##_w_rgba_spec_stq_stq_stq_n +#include "radeon_maos_vbtmp.h" + + + + +/*********************************************************************** + * Initialization + ***********************************************************************/ + + +static void init_tcl_verts( void ) +{ + init_rgba(); + init_n(); + init_rgba_n(); + init_rgba_st(); + init_st_n(); + init_rgba_st_st(); + init_rgba_st_n(); + init_rgba_spec_st_st(); + init_st_st_n(); + init_rgba_spec_st_st_n(); + init_rgba_stq(); + init_rgba_stq_stq(); + init_w_rgba_spec_stq_stq_n(); + init_rgba_st_st_st(); + init_rgba_spec_st_st_st(); + init_st_st_st_n(); + init_rgba_spec_st_st_st_n(); + init_rgba_stq_stq_stq(); + init_w_rgba_spec_stq_stq_stq_n(); +} + + +void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + GLuint req = 0; + GLuint unit; + GLuint vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] & + ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2)); + int i; + static int firsttime = 1; + + if (firsttime) { + init_tcl_verts(); + firsttime = 0; + } + + if (1) { + req |= RADEON_CP_VC_FRMT_Z; + if (VB->ObjPtr->size == 4) { + req |= RADEON_CP_VC_FRMT_W0; + } + } + + if (inputs & VERT_BIT_NORMAL) { + req |= RADEON_CP_VC_FRMT_N0; + } + + if (inputs & VERT_BIT_COLOR0) { + req |= RADEON_CP_VC_FRMT_PKCOLOR; + } + + if (inputs & (VERT_BIT_COLOR1|VERT_BIT_FOG)) { + req |= RADEON_CP_VC_FRMT_PKSPEC; + } + + for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { + if (inputs & VERT_BIT_TEX(unit)) { + req |= RADEON_ST_BIT(unit); + /* assume we need the 3rd coord if texgen is active for r/q OR at least + 3 coords are submitted. This may not be 100% correct */ + if (VB->TexCoordPtr[unit]->size >= 3) { + req |= RADEON_Q_BIT(unit); + vtx |= RADEON_Q_BIT(unit); + } + if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) ) + vtx |= RADEON_Q_BIT(unit); + else if ((VB->TexCoordPtr[unit]->size >= 3) && + ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) { + GLuint swaptexmatcol = (VB->TexCoordPtr[unit]->size - 3); + if (((rmesa->NeedTexMatrix >> unit) & 1) && + (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1))) + radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ; + } + } + } + + if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) { + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx; + } + + for (i = 0 ; i < RADEON_TCL_MAX_SETUP ; i++) + if ((setup_tab[i].vertex_format & req) == req) + break; + + if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format && + rmesa->tcl.indexed_verts.buf) + return; + + if (rmesa->tcl.indexed_verts.buf) + radeonReleaseArrays( ctx, ~0 ); + + radeonAllocDmaRegion( rmesa, + &rmesa->tcl.indexed_verts, + VB->Count * setup_tab[i].vertex_size * 4, + 4); + + /* The vertex code expects Obj to be clean to element 3. To fix + * this, add more vertex code (for obj-2, obj-3) or preferably move + * to maos. + */ + if (VB->ObjPtr->size < 3 || + (VB->ObjPtr->size == 3 && + (setup_tab[i].vertex_format & RADEON_CP_VC_FRMT_W0))) { + + _math_trans_4f( rmesa->tcl.ObjClean.data, + VB->ObjPtr->data, + VB->ObjPtr->stride, + GL_FLOAT, + VB->ObjPtr->size, + 0, + VB->Count ); + + switch (VB->ObjPtr->size) { + case 1: + _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 1); + case 2: + _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 2); + case 3: + if (setup_tab[i].vertex_format & RADEON_CP_VC_FRMT_W0) { + _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 3); + } + case 4: + default: + break; + } + + VB->ObjPtr = &rmesa->tcl.ObjClean; + } + + + + setup_tab[i].emit( ctx, 0, VB->Count, + rmesa->tcl.indexed_verts.address + + rmesa->tcl.indexed_verts.start ); + + rmesa->tcl.vertex_format = setup_tab[i].vertex_format; + rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts ); + rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size; + rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size; + + rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts; + rmesa->tcl.nr_aos_components = 1; +} + + + +void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + +#if 0 + if (RADEON_DEBUG & DEBUG_VERTS) + _tnl_print_vert_flags( __FUNCTION__, newinputs ); +#endif + + if (newinputs) + radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ ); +} diff --git a/radeon/radeon_sanity.c b/radeon/radeon_sanity.c new file mode 100644 index 0000000..5570577 --- /dev/null +++ b/radeon/radeon_sanity.c @@ -0,0 +1,1082 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */ +/************************************************************************** + +Copyright 2002 ATI Technologies Inc., Ontario, Canada, and + Tungsten Graphics Inc, Cedar Park, TX. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * + */ +#include <errno.h> + +#include "glheader.h" + +#include "radeon_context.h" +#include "radeon_ioctl.h" +#include "radeon_sanity.h" + +/* Set this '1' to get more verbiage. + */ +#define MORE_VERBOSE 1 + +#if MORE_VERBOSE +#define VERBOSE (RADEON_DEBUG & DEBUG_VERBOSE) +#define NORMAL (1) +#else +#define VERBOSE 0 +#define NORMAL (RADEON_DEBUG & DEBUG_VERBOSE) +#endif + + +/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in + * 1.3 cmdbuffers allow all previous state to be updated as well as + * the tcl scalar and vector areas. + */ +static struct { + int start; + int len; + const char *name; +} packet[RADEON_MAX_STATE_PACKETS] = { + { RADEON_PP_MISC,7,"RADEON_PP_MISC" }, + { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" }, + { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" }, + { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" }, + { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" }, + { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" }, + { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" }, + { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" }, + { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" }, + { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" }, + { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" }, + { RADEON_RE_MISC,1,"RADEON_RE_MISC" }, + { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" }, + { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" }, + { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" }, + { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" }, + { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" }, + { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" }, + { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" }, + { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" }, + { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" }, + { 0, 4, "R200_PP_TXCBLEND_0" }, + { 0, 4, "R200_PP_TXCBLEND_1" }, + { 0, 4, "R200_PP_TXCBLEND_2" }, + { 0, 4, "R200_PP_TXCBLEND_3" }, + { 0, 4, "R200_PP_TXCBLEND_4" }, + { 0, 4, "R200_PP_TXCBLEND_5" }, + { 0, 4, "R200_PP_TXCBLEND_6" }, + { 0, 4, "R200_PP_TXCBLEND_7" }, + { 0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" }, + { 0, 6, "R200_PP_TFACTOR_0" }, + { 0, 4, "R200_SE_VTX_FMT_0" }, + { 0, 1, "R200_SE_VAP_CNTL" }, + { 0, 5, "R200_SE_TCL_MATRIX_SEL_0" }, + { 0, 5, "R200_SE_TCL_TEX_PROC_CTL_2" }, + { 0, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" }, + { 0, 6, "R200_PP_TXFILTER_0" }, + { 0, 6, "R200_PP_TXFILTER_1" }, + { 0, 6, "R200_PP_TXFILTER_2" }, + { 0, 6, "R200_PP_TXFILTER_3" }, + { 0, 6, "R200_PP_TXFILTER_4" }, + { 0, 6, "R200_PP_TXFILTER_5" }, + { 0, 1, "R200_PP_TXOFFSET_0" }, + { 0, 1, "R200_PP_TXOFFSET_1" }, + { 0, 1, "R200_PP_TXOFFSET_2" }, + { 0, 1, "R200_PP_TXOFFSET_3" }, + { 0, 1, "R200_PP_TXOFFSET_4" }, + { 0, 1, "R200_PP_TXOFFSET_5" }, + { 0, 1, "R200_SE_VTE_CNTL" }, + { 0, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" }, + { 0, 1, "R200_PP_TAM_DEBUG3" }, + { 0, 1, "R200_PP_CNTL_X" }, + { 0, 1, "R200_RB3D_DEPTHXY_OFFSET" }, + { 0, 1, "R200_RE_AUX_SCISSOR_CNTL" }, + { 0, 2, "R200_RE_SCISSOR_TL_0" }, + { 0, 2, "R200_RE_SCISSOR_TL_1" }, + { 0, 2, "R200_RE_SCISSOR_TL_2" }, + { 0, 1, "R200_SE_VAP_CNTL_STATUS" }, + { 0, 1, "R200_SE_VTX_STATE_CNTL" }, + { 0, 1, "R200_RE_POINTSIZE" }, + { 0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" }, + { 0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */ + { 0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */ + { 0, 1, "R200_PP_CUBIC_FACES_1" }, + { 0, 5, "R200_PP_CUBIC_OFFSET_F1_1" }, + { 0, 1, "R200_PP_CUBIC_FACES_2" }, + { 0, 5, "R200_PP_CUBIC_OFFSET_F1_2" }, + { 0, 1, "R200_PP_CUBIC_FACES_3" }, + { 0, 5, "R200_PP_CUBIC_OFFSET_F1_3" }, + { 0, 1, "R200_PP_CUBIC_FACES_4" }, + { 0, 5, "R200_PP_CUBIC_OFFSET_F1_4" }, + { 0, 1, "R200_PP_CUBIC_FACES_5" }, + { 0, 5, "R200_PP_CUBIC_OFFSET_F1_5" }, + { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" }, + { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" }, + { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" }, + { 0, 3, "R200_RB3D_BLENDCOLOR" }, + { 0, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" }, + { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0" }, + { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0" }, + { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1" }, + { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0" }, + { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2" }, + { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0" }, + { 0, 2, "R200_PP_TRI_PERF" }, + { 0, 32, "R200_PP_AFS_0"}, /* 85 */ + { 0, 32, "R200_PP_AFS_1"}, + { 0, 8, "R200_ATF_TFACTOR"}, + { 0, 8, "R200_PP_TXCTLALL_0"}, + { 0, 8, "R200_PP_TXCTLALL_1"}, + { 0, 8, "R200_PP_TXCTLALL_2"}, + { 0, 8, "R200_PP_TXCTLALL_3"}, + { 0, 8, "R200_PP_TXCTLALL_4"}, + { 0, 8, "R200_PP_TXCTLALL_5"}, + { 0, 2, "R200_VAP_PVS_CNTL"}, +}; + +struct reg_names { + int idx; + const char *name; +}; + +static struct reg_names reg_names[] = { + { RADEON_PP_MISC, "RADEON_PP_MISC" }, + { RADEON_PP_FOG_COLOR, "RADEON_PP_FOG_COLOR" }, + { RADEON_RE_SOLID_COLOR, "RADEON_RE_SOLID_COLOR" }, + { RADEON_RB3D_BLENDCNTL, "RADEON_RB3D_BLENDCNTL" }, + { RADEON_RB3D_DEPTHOFFSET, "RADEON_RB3D_DEPTHOFFSET" }, + { RADEON_RB3D_DEPTHPITCH, "RADEON_RB3D_DEPTHPITCH" }, + { RADEON_RB3D_ZSTENCILCNTL, "RADEON_RB3D_ZSTENCILCNTL" }, + { RADEON_PP_CNTL, "RADEON_PP_CNTL" }, + { RADEON_RB3D_CNTL, "RADEON_RB3D_CNTL" }, + { RADEON_RB3D_COLOROFFSET, "RADEON_RB3D_COLOROFFSET" }, + { RADEON_RB3D_COLORPITCH, "RADEON_RB3D_COLORPITCH" }, + { RADEON_SE_CNTL, "RADEON_SE_CNTL" }, + { RADEON_SE_COORD_FMT, "RADEON_SE_COORDFMT" }, + { RADEON_SE_CNTL_STATUS, "RADEON_SE_CNTL_STATUS" }, + { RADEON_RE_LINE_PATTERN, "RADEON_RE_LINE_PATTERN" }, + { RADEON_RE_LINE_STATE, "RADEON_RE_LINE_STATE" }, + { RADEON_SE_LINE_WIDTH, "RADEON_SE_LINE_WIDTH" }, + { RADEON_RB3D_STENCILREFMASK, "RADEON_RB3D_STENCILREFMASK" }, + { RADEON_RB3D_ROPCNTL, "RADEON_RB3D_ROPCNTL" }, + { RADEON_RB3D_PLANEMASK, "RADEON_RB3D_PLANEMASK" }, + { RADEON_SE_VPORT_XSCALE, "RADEON_SE_VPORT_XSCALE" }, + { RADEON_SE_VPORT_XOFFSET, "RADEON_SE_VPORT_XOFFSET" }, + { RADEON_SE_VPORT_YSCALE, "RADEON_SE_VPORT_YSCALE" }, + { RADEON_SE_VPORT_YOFFSET, "RADEON_SE_VPORT_YOFFSET" }, + { RADEON_SE_VPORT_ZSCALE, "RADEON_SE_VPORT_ZSCALE" }, + { RADEON_SE_VPORT_ZOFFSET, "RADEON_SE_VPORT_ZOFFSET" }, + { RADEON_RE_MISC, "RADEON_RE_MISC" }, + { RADEON_PP_TXFILTER_0, "RADEON_PP_TXFILTER_0" }, + { RADEON_PP_TXFILTER_1, "RADEON_PP_TXFILTER_1" }, + { RADEON_PP_TXFILTER_2, "RADEON_PP_TXFILTER_2" }, + { RADEON_PP_TXFORMAT_0, "RADEON_PP_TXFORMAT_0" }, + { RADEON_PP_TXFORMAT_1, "RADEON_PP_TXFORMAT_1" }, + { RADEON_PP_TXFORMAT_2, "RADEON_PP_TXFORMAT_2" }, + { RADEON_PP_TXOFFSET_0, "RADEON_PP_TXOFFSET_0" }, + { RADEON_PP_TXOFFSET_1, "RADEON_PP_TXOFFSET_1" }, + { RADEON_PP_TXOFFSET_2, "RADEON_PP_TXOFFSET_2" }, + { RADEON_PP_TXCBLEND_0, "RADEON_PP_TXCBLEND_0" }, + { RADEON_PP_TXCBLEND_1, "RADEON_PP_TXCBLEND_1" }, + { RADEON_PP_TXCBLEND_2, "RADEON_PP_TXCBLEND_2" }, + { RADEON_PP_TXABLEND_0, "RADEON_PP_TXABLEND_0" }, + { RADEON_PP_TXABLEND_1, "RADEON_PP_TXABLEND_1" }, + { RADEON_PP_TXABLEND_2, "RADEON_PP_TXABLEND_2" }, + { RADEON_PP_TFACTOR_0, "RADEON_PP_TFACTOR_0" }, + { RADEON_PP_TFACTOR_1, "RADEON_PP_TFACTOR_1" }, + { RADEON_PP_TFACTOR_2, "RADEON_PP_TFACTOR_2" }, + { RADEON_PP_BORDER_COLOR_0, "RADEON_PP_BORDER_COLOR_0" }, + { RADEON_PP_BORDER_COLOR_1, "RADEON_PP_BORDER_COLOR_1" }, + { RADEON_PP_BORDER_COLOR_2, "RADEON_PP_BORDER_COLOR_2" }, + { RADEON_SE_ZBIAS_FACTOR, "RADEON_SE_ZBIAS_FACTOR" }, + { RADEON_SE_ZBIAS_CONSTANT, "RADEON_SE_ZBIAS_CONSTANT" }, + { RADEON_SE_TCL_OUTPUT_VTX_FMT, "RADEON_SE_TCL_OUTPUT_VTXFMT" }, + { RADEON_SE_TCL_OUTPUT_VTX_SEL, "RADEON_SE_TCL_OUTPUT_VTXSEL" }, + { RADEON_SE_TCL_MATRIX_SELECT_0, "RADEON_SE_TCL_MATRIX_SELECT_0" }, + { RADEON_SE_TCL_MATRIX_SELECT_1, "RADEON_SE_TCL_MATRIX_SELECT_1" }, + { RADEON_SE_TCL_UCP_VERT_BLEND_CTL, "RADEON_SE_TCL_UCP_VERT_BLEND_CTL" }, + { RADEON_SE_TCL_TEXTURE_PROC_CTL, "RADEON_SE_TCL_TEXTURE_PROC_CTL" }, + { RADEON_SE_TCL_LIGHT_MODEL_CTL, "RADEON_SE_TCL_LIGHT_MODEL_CTL" }, + { RADEON_SE_TCL_PER_LIGHT_CTL_0, "RADEON_SE_TCL_PER_LIGHT_CTL_0" }, + { RADEON_SE_TCL_PER_LIGHT_CTL_1, "RADEON_SE_TCL_PER_LIGHT_CTL_1" }, + { RADEON_SE_TCL_PER_LIGHT_CTL_2, "RADEON_SE_TCL_PER_LIGHT_CTL_2" }, + { RADEON_SE_TCL_PER_LIGHT_CTL_3, "RADEON_SE_TCL_PER_LIGHT_CTL_3" }, + { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, "RADEON_SE_TCL_EMMISSIVE_RED" }, + { RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN, "RADEON_SE_TCL_EMMISSIVE_GREEN" }, + { RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE, "RADEON_SE_TCL_EMMISSIVE_BLUE" }, + { RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA, "RADEON_SE_TCL_EMMISSIVE_ALPHA" }, + { RADEON_SE_TCL_MATERIAL_AMBIENT_RED, "RADEON_SE_TCL_AMBIENT_RED" }, + { RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN, "RADEON_SE_TCL_AMBIENT_GREEN" }, + { RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE, "RADEON_SE_TCL_AMBIENT_BLUE" }, + { RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA, "RADEON_SE_TCL_AMBIENT_ALPHA" }, + { RADEON_SE_TCL_MATERIAL_DIFFUSE_RED, "RADEON_SE_TCL_DIFFUSE_RED" }, + { RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN, "RADEON_SE_TCL_DIFFUSE_GREEN" }, + { RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE, "RADEON_SE_TCL_DIFFUSE_BLUE" }, + { RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA, "RADEON_SE_TCL_DIFFUSE_ALPHA" }, + { RADEON_SE_TCL_MATERIAL_SPECULAR_RED, "RADEON_SE_TCL_SPECULAR_RED" }, + { RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN, "RADEON_SE_TCL_SPECULAR_GREEN" }, + { RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE, "RADEON_SE_TCL_SPECULAR_BLUE" }, + { RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA, "RADEON_SE_TCL_SPECULAR_ALPHA" }, + { RADEON_SE_TCL_SHININESS, "RADEON_SE_TCL_SHININESS" }, + { RADEON_SE_COORD_FMT, "RADEON_SE_COORD_FMT" }, + { RADEON_PP_TEX_SIZE_0, "RADEON_PP_TEX_SIZE_0" }, + { RADEON_PP_TEX_SIZE_1, "RADEON_PP_TEX_SIZE_1" }, + { RADEON_PP_TEX_SIZE_2, "RADEON_PP_TEX_SIZE_2" }, + { RADEON_PP_TEX_SIZE_0+4, "RADEON_PP_TEX_PITCH_0" }, + { RADEON_PP_TEX_SIZE_1+4, "RADEON_PP_TEX_PITCH_1" }, + { RADEON_PP_TEX_SIZE_2+4, "RADEON_PP_TEX_PITCH_2" }, + { RADEON_PP_CUBIC_FACES_0, "RADEON_PP_CUBIC_FACES_0" }, + { RADEON_PP_CUBIC_FACES_1, "RADEON_PP_CUBIC_FACES_1" }, + { RADEON_PP_CUBIC_FACES_2, "RADEON_PP_CUBIC_FACES_2" }, + { RADEON_PP_CUBIC_OFFSET_T0_0, "RADEON_PP_CUBIC_OFFSET_T0_0" }, + { RADEON_PP_CUBIC_OFFSET_T0_1, "RADEON_PP_CUBIC_OFFSET_T0_1" }, + { RADEON_PP_CUBIC_OFFSET_T0_2, "RADEON_PP_CUBIC_OFFSET_T0_2" }, + { RADEON_PP_CUBIC_OFFSET_T0_3, "RADEON_PP_CUBIC_OFFSET_T0_3" }, + { RADEON_PP_CUBIC_OFFSET_T0_4, "RADEON_PP_CUBIC_OFFSET_T0_4" }, + { RADEON_PP_CUBIC_OFFSET_T1_0, "RADEON_PP_CUBIC_OFFSET_T1_0" }, + { RADEON_PP_CUBIC_OFFSET_T1_1, "RADEON_PP_CUBIC_OFFSET_T1_1" }, + { RADEON_PP_CUBIC_OFFSET_T1_2, "RADEON_PP_CUBIC_OFFSET_T1_2" }, + { RADEON_PP_CUBIC_OFFSET_T1_3, "RADEON_PP_CUBIC_OFFSET_T1_3" }, + { RADEON_PP_CUBIC_OFFSET_T1_4, "RADEON_PP_CUBIC_OFFSET_T1_4" }, + { RADEON_PP_CUBIC_OFFSET_T2_0, "RADEON_PP_CUBIC_OFFSET_T2_0" }, + { RADEON_PP_CUBIC_OFFSET_T2_1, "RADEON_PP_CUBIC_OFFSET_T2_1" }, + { RADEON_PP_CUBIC_OFFSET_T2_2, "RADEON_PP_CUBIC_OFFSET_T2_2" }, + { RADEON_PP_CUBIC_OFFSET_T2_3, "RADEON_PP_CUBIC_OFFSET_T2_3" }, + { RADEON_PP_CUBIC_OFFSET_T2_4, "RADEON_PP_CUBIC_OFFSET_T2_4" }, +}; + +static struct reg_names scalar_names[] = { + { RADEON_SS_LIGHT_DCD_ADDR, "LIGHT_DCD" }, + { RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR, "LIGHT_SPOT_EXPONENT" }, + { RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR, "LIGHT_SPOT_CUTOFF" }, + { RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR, "LIGHT_SPECULAR_THRESH" }, + { RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR, "LIGHT_RANGE_CUTOFF" }, + { RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, "VERT_GUARD_CLIP" }, + { RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR, "VERT_GUARD_DISCARD" }, + { RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR, "HORZ_GUARD_CLIP" }, + { RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR, "HORZ_GUARD_DISCARD" }, + { RADEON_SS_SHININESS, "SHININESS" }, + { 1000, "" }, +}; + +/* Puff these out to make them look like normal (dword) registers. + */ +static struct reg_names vector_names[] = { + { RADEON_VS_MATRIX_0_ADDR * 4, "MATRIX_0" }, + { RADEON_VS_MATRIX_1_ADDR * 4, "MATRIX_1" }, + { RADEON_VS_MATRIX_2_ADDR * 4, "MATRIX_2" }, + { RADEON_VS_MATRIX_3_ADDR * 4, "MATRIX_3" }, + { RADEON_VS_MATRIX_4_ADDR * 4, "MATRIX_4" }, + { RADEON_VS_MATRIX_5_ADDR * 4, "MATRIX_5" }, + { RADEON_VS_MATRIX_6_ADDR * 4, "MATRIX_6" }, + { RADEON_VS_MATRIX_7_ADDR * 4, "MATRIX_7" }, + { RADEON_VS_MATRIX_8_ADDR * 4, "MATRIX_8" }, + { RADEON_VS_MATRIX_9_ADDR * 4, "MATRIX_9" }, + { RADEON_VS_MATRIX_10_ADDR * 4, "MATRIX_10" }, + { RADEON_VS_MATRIX_11_ADDR * 4, "MATRIX_11" }, + { RADEON_VS_MATRIX_12_ADDR * 4, "MATRIX_12" }, + { RADEON_VS_MATRIX_13_ADDR * 4, "MATRIX_13" }, + { RADEON_VS_MATRIX_14_ADDR * 4, "MATRIX_14" }, + { RADEON_VS_MATRIX_15_ADDR * 4, "MATRIX_15" }, + { RADEON_VS_LIGHT_AMBIENT_ADDR * 4, "LIGHT_AMBIENT" }, + { RADEON_VS_LIGHT_DIFFUSE_ADDR * 4, "LIGHT_DIFFUSE" }, + { RADEON_VS_LIGHT_SPECULAR_ADDR * 4, "LIGHT_SPECULAR" }, + { RADEON_VS_LIGHT_DIRPOS_ADDR * 4, "LIGHT_DIRPOS" }, + { RADEON_VS_LIGHT_HWVSPOT_ADDR * 4, "LIGHT_HWVSPOT" }, + { RADEON_VS_LIGHT_ATTENUATION_ADDR * 4, "LIGHT_ATTENUATION" }, + { RADEON_VS_MATRIX_EYE2CLIP_ADDR * 4, "MATRIX_EYE2CLIP" }, + { RADEON_VS_UCP_ADDR * 4, "UCP" }, + { RADEON_VS_GLOBAL_AMBIENT_ADDR * 4, "GLOBAL_AMBIENT" }, + { RADEON_VS_FOG_PARAM_ADDR * 4, "FOG_PARAM" }, + { RADEON_VS_EYE_VECTOR_ADDR * 4, "EYE_VECTOR" }, + { 1000, "" }, +}; + +union fi { float f; int i; }; + +#define ISVEC 1 +#define ISFLOAT 2 +#define TOUCHED 4 + +struct reg { + int idx; + struct reg_names *closest; + int flags; + union fi current; + union fi *values; + int nvalues; + int nalloc; + float vmin, vmax; +}; + + +static struct reg regs[Elements(reg_names)+1]; +static struct reg scalars[512+1]; +static struct reg vectors[512*4+1]; + +static int total, total_changed, bufs; + +static void init_regs( void ) +{ + struct reg_names *tmp; + int i; + + for (i = 0 ; i < Elements(regs)-1 ; i++) { + regs[i].idx = reg_names[i].idx; + regs[i].closest = ®_names[i]; + regs[i].flags = 0; + } + + for (i = 0, tmp = scalar_names ; i < Elements(scalars) ; i++) { + if (tmp[1].idx == i) tmp++; + scalars[i].idx = i; + scalars[i].closest = tmp; + scalars[i].flags = ISFLOAT; + } + + for (i = 0, tmp = vector_names ; i < Elements(vectors) ; i++) { + if (tmp[1].idx*4 == i) tmp++; + vectors[i].idx = i; + vectors[i].closest = tmp; + vectors[i].flags = ISFLOAT|ISVEC; + } + + regs[Elements(regs)-1].idx = -1; + scalars[Elements(scalars)-1].idx = -1; + vectors[Elements(vectors)-1].idx = -1; +} + +static int find_or_add_value( struct reg *reg, int val ) +{ + int j; + + for ( j = 0 ; j < reg->nvalues ; j++) + if ( val == reg->values[j].i ) + return 1; + + if (j == reg->nalloc) { + reg->nalloc += 5; + reg->nalloc *= 2; + reg->values = (union fi *) realloc( reg->values, + reg->nalloc * sizeof(union fi) ); + } + + reg->values[reg->nvalues++].i = val; + return 0; +} + +static struct reg *lookup_reg( struct reg *tab, int reg ) +{ + int i; + + for (i = 0 ; tab[i].idx != -1 ; i++) { + if (tab[i].idx == reg) + return &tab[i]; + } + + fprintf(stderr, "*** unknown reg 0x%x\n", reg); + return NULL; +} + + +static const char *get_reg_name( struct reg *reg ) +{ + static char tmp[80]; + + if (reg->idx == reg->closest->idx) + return reg->closest->name; + + + if (reg->flags & ISVEC) { + if (reg->idx/4 != reg->closest->idx) + sprintf(tmp, "%s+%d[%d]", + reg->closest->name, + (reg->idx/4) - reg->closest->idx, + reg->idx%4); + else + sprintf(tmp, "%s[%d]", reg->closest->name, reg->idx%4); + } + else { + if (reg->idx != reg->closest->idx) + sprintf(tmp, "%s+%d", reg->closest->name, reg->idx - reg->closest->idx); + else + sprintf(tmp, "%s", reg->closest->name); + } + + return tmp; +} + +static int print_int_reg_assignment( struct reg *reg, int data ) +{ + int changed = (reg->current.i != data); + int ever_seen = find_or_add_value( reg, data ); + + if (VERBOSE || (NORMAL && (changed || !ever_seen))) + fprintf(stderr, " %s <-- 0x%x", get_reg_name(reg), data); + + if (NORMAL) { + if (!ever_seen) + fprintf(stderr, " *** BRAND NEW VALUE"); + else if (changed) + fprintf(stderr, " *** CHANGED"); + } + + reg->current.i = data; + + if (VERBOSE || (NORMAL && (changed || !ever_seen))) + fprintf(stderr, "\n"); + + return changed; +} + + +static int print_float_reg_assignment( struct reg *reg, float data ) +{ + int changed = (reg->current.f != data); + int newmin = (data < reg->vmin); + int newmax = (data > reg->vmax); + + if (VERBOSE || (NORMAL && (newmin || newmax || changed))) + fprintf(stderr, " %s <-- %.3f", get_reg_name(reg), data); + + if (NORMAL) { + if (newmin) { + fprintf(stderr, " *** NEW MIN (prev %.3f)", reg->vmin); + reg->vmin = data; + } + else if (newmax) { + fprintf(stderr, " *** NEW MAX (prev %.3f)", reg->vmax); + reg->vmax = data; + } + else if (changed) { + fprintf(stderr, " *** CHANGED"); + } + } + + reg->current.f = data; + + if (VERBOSE || (NORMAL && (newmin || newmax || changed))) + fprintf(stderr, "\n"); + + return changed; +} + +static int print_reg_assignment( struct reg *reg, int data ) +{ + float_ui32_type datau; + datau.ui32 = data; + reg->flags |= TOUCHED; + if (reg->flags & ISFLOAT) + return print_float_reg_assignment( reg, datau.f ); + else + return print_int_reg_assignment( reg, data ); +} + +static void print_reg( struct reg *reg ) +{ + if (reg->flags & TOUCHED) { + if (reg->flags & ISFLOAT) { + fprintf(stderr, " %s == %f\n", get_reg_name(reg), reg->current.f); + } else { + fprintf(stderr, " %s == 0x%x\n", get_reg_name(reg), reg->current.i); + } + } +} + + +static void dump_state( void ) +{ + int i; + + for (i = 0 ; i < Elements(regs) ; i++) + print_reg( ®s[i] ); + + for (i = 0 ; i < Elements(scalars) ; i++) + print_reg( &scalars[i] ); + + for (i = 0 ; i < Elements(vectors) ; i++) + print_reg( &vectors[i] ); +} + + + +static int radeon_emit_packets( + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int id = (int)header.packet.packet_id; + int sz = packet[id].len; + int *data = (int *)cmdbuf->buf; + int i; + + if (sz * sizeof(int) > cmdbuf->bufsz) { + fprintf(stderr, "Packet overflows cmdbuf\n"); + return -EINVAL; + } + + if (!packet[id].name) { + fprintf(stderr, "*** Unknown packet 0 nr %d\n", id ); + return -EINVAL; + } + + + if (VERBOSE) + fprintf(stderr, "Packet 0 reg %s nr %d\n", packet[id].name, sz ); + + for ( i = 0 ; i < sz ; i++) { + struct reg *reg = lookup_reg( regs, packet[id].start + i*4 ); + if (print_reg_assignment( reg, data[i] )) + total_changed++; + total++; + } + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + + +static int radeon_emit_scalars( + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.scalars.count; + int *data = (int *)cmdbuf->buf; + int start = header.scalars.offset; + int stride = header.scalars.stride; + int i; + + if (VERBOSE) + fprintf(stderr, "emit scalars, start %d stride %d nr %d (end %d)\n", + start, stride, sz, start + stride * sz); + + + for (i = 0 ; i < sz ; i++, start += stride) { + struct reg *reg = lookup_reg( scalars, start ); + if (print_reg_assignment( reg, data[i] )) + total_changed++; + total++; + } + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + + +static int radeon_emit_scalars2( + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.scalars.count; + int *data = (int *)cmdbuf->buf; + int start = header.scalars.offset + 0x100; + int stride = header.scalars.stride; + int i; + + if (VERBOSE) + fprintf(stderr, "emit scalars2, start %d stride %d nr %d (end %d)\n", + start, stride, sz, start + stride * sz); + + if (start + stride * sz > 257) { + fprintf(stderr, "emit scalars OVERFLOW %d/%d/%d\n", start, stride, sz); + return -1; + } + + for (i = 0 ; i < sz ; i++, start += stride) { + struct reg *reg = lookup_reg( scalars, start ); + if (print_reg_assignment( reg, data[i] )) + total_changed++; + total++; + } + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + +/* Check: inf/nan/extreme-size? + * Check: table start, end, nr, etc. + */ +static int radeon_emit_vectors( + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.vectors.count; + int *data = (int *)cmdbuf->buf; + int start = header.vectors.offset; + int stride = header.vectors.stride; + int i,j; + + if (VERBOSE) + fprintf(stderr, "emit vectors, start %d stride %d nr %d (end %d) (0x%x)\n", + start, stride, sz, start + stride * sz, header.i); + +/* if (start + stride * (sz/4) > 128) { */ +/* fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz); */ +/* return -1; */ +/* } */ + + for (i = 0 ; i < sz ; start += stride) { + int changed = 0; + for (j = 0 ; j < 4 ; i++,j++) { + struct reg *reg = lookup_reg( vectors, start*4+j ); + if (print_reg_assignment( reg, data[i] )) + changed = 1; + } + if (changed) + total_changed += 4; + total += 4; + } + + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + + +static int print_vertex_format( int vfmt ) +{ + if (NORMAL) { + fprintf(stderr, " %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + "vertex format", + vfmt, + "xy,", + (vfmt & RADEON_CP_VC_FRMT_Z) ? "z," : "", + (vfmt & RADEON_CP_VC_FRMT_W0) ? "w0," : "", + (vfmt & RADEON_CP_VC_FRMT_FPCOLOR) ? "fpcolor," : "", + (vfmt & RADEON_CP_VC_FRMT_FPALPHA) ? "fpalpha," : "", + (vfmt & RADEON_CP_VC_FRMT_PKCOLOR) ? "pkcolor," : "", + (vfmt & RADEON_CP_VC_FRMT_FPSPEC) ? "fpspec," : "", + (vfmt & RADEON_CP_VC_FRMT_FPFOG) ? "fpfog," : "", + (vfmt & RADEON_CP_VC_FRMT_PKSPEC) ? "pkspec," : "", + (vfmt & RADEON_CP_VC_FRMT_ST0) ? "st0," : "", + (vfmt & RADEON_CP_VC_FRMT_ST1) ? "st1," : "", + (vfmt & RADEON_CP_VC_FRMT_Q1) ? "q1," : "", + (vfmt & RADEON_CP_VC_FRMT_ST2) ? "st2," : "", + (vfmt & RADEON_CP_VC_FRMT_Q2) ? "q2," : "", + (vfmt & RADEON_CP_VC_FRMT_ST3) ? "st3," : "", + (vfmt & RADEON_CP_VC_FRMT_Q3) ? "q3," : "", + (vfmt & RADEON_CP_VC_FRMT_Q0) ? "q0," : "", + (vfmt & RADEON_CP_VC_FRMT_N0) ? "n0," : "", + (vfmt & RADEON_CP_VC_FRMT_XY1) ? "xy1," : "", + (vfmt & RADEON_CP_VC_FRMT_Z1) ? "z1," : "", + (vfmt & RADEON_CP_VC_FRMT_W1) ? "w1," : "", + (vfmt & RADEON_CP_VC_FRMT_N1) ? "n1," : ""); + + +/* if (!find_or_add_value( &others[V_VTXFMT], vfmt )) */ +/* fprintf(stderr, " *** NEW VALUE"); */ + + fprintf(stderr, "\n"); + } + + return 0; +} + +static char *primname[0xf] = { + "NONE", + "POINTS", + "LINES", + "LINE_STRIP", + "TRIANGLES", + "TRIANGLE_FAN", + "TRIANGLE_STRIP", + "TRI_TYPE_2", + "RECT_LIST", + "3VRT_POINTS", + "3VRT_LINES", +}; + +static int print_prim_and_flags( int prim ) +{ + int numverts; + + if (NORMAL) + fprintf(stderr, " %s(%x): %s%s%s%s%s%s%s\n", + "prim flags", + prim, + ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_IND) ? "IND," : "", + ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_LIST) ? "LIST," : "", + ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_RING) ? "RING," : "", + (prim & RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA) ? "RGBA," : "BGRA, ", + (prim & RADEON_CP_VC_CNTL_MAOS_ENABLE) ? "MAOS," : "", + (prim & RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE) ? "RADEON," : "", + (prim & RADEON_CP_VC_CNTL_TCL_ENABLE) ? "TCL," : ""); + + if ((prim & 0xf) > RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST) { + fprintf(stderr, " *** Bad primitive: %x\n", prim & 0xf); + return -1; + } + + numverts = prim>>16; + + if (NORMAL) + fprintf(stderr, " prim: %s numverts %d\n", primname[prim&0xf], numverts); + + switch (prim & 0xf) { + case RADEON_CP_VC_CNTL_PRIM_TYPE_NONE: + case RADEON_CP_VC_CNTL_PRIM_TYPE_POINT: + if (numverts < 1) { + fprintf(stderr, "Bad nr verts for line %d\n", numverts); + return -1; + } + break; + case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE: + if ((numverts & 1) || numverts == 0) { + fprintf(stderr, "Bad nr verts for line %d\n", numverts); + return -1; + } + break; + case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP: + if (numverts < 2) { + fprintf(stderr, "Bad nr verts for line_strip %d\n", numverts); + return -1; + } + break; + case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST: + case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST: + case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST: + case RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST: + if (numverts % 3 || numverts == 0) { + fprintf(stderr, "Bad nr verts for tri %d\n", numverts); + return -1; + } + break; + case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN: + case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP: + if (numverts < 3) { + fprintf(stderr, "Bad nr verts for strip/fan %d\n", numverts); + return -1; + } + break; + default: + fprintf(stderr, "Bad primitive\n"); + return -1; + } + return 0; +} + +/* build in knowledge about each packet type + */ +static int radeon_emit_packet3( drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int cmdsz; + int *cmd = (int *)cmdbuf->buf; + int *tmp; + int i, stride, size, start; + + cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16); + + if ((cmd[0] & RADEON_CP_PACKET_MASK) != RADEON_CP_PACKET3 || + cmdsz * 4 > cmdbuf->bufsz || + cmdsz > RADEON_CP_PACKET_MAX_DWORDS) { + fprintf(stderr, "Bad packet\n"); + return -EINVAL; + } + + switch( cmd[0] & ~RADEON_CP_PACKET_COUNT_MASK ) { + case RADEON_CP_PACKET3_NOP: + if (NORMAL) + fprintf(stderr, "PACKET3_NOP, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_NEXT_CHAR: + if (NORMAL) + fprintf(stderr, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_PLY_NEXTSCAN: + if (NORMAL) + fprintf(stderr, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_SET_SCISSORS: + if (NORMAL) + fprintf(stderr, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM: + if (NORMAL) + fprintf(stderr, "PACKET3_3D_RNDR_GEN_INDX_PRIM, %d dwords\n", + cmdsz); + break; + case RADEON_CP_PACKET3_LOAD_MICROCODE: + if (NORMAL) + fprintf(stderr, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_WAIT_FOR_IDLE: + if (NORMAL) + fprintf(stderr, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz); + break; + + case RADEON_CP_PACKET3_3D_DRAW_VBUF: + if (NORMAL) + fprintf(stderr, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz); + print_vertex_format(cmd[1]); + print_prim_and_flags(cmd[2]); + break; + + case RADEON_CP_PACKET3_3D_DRAW_IMMD: + if (NORMAL) + fprintf(stderr, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_3D_DRAW_INDX: { + int neltdwords; + if (NORMAL) + fprintf(stderr, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz); + print_vertex_format(cmd[1]); + print_prim_and_flags(cmd[2]); + neltdwords = cmd[2]>>16; + neltdwords += neltdwords & 1; + neltdwords /= 2; + if (neltdwords + 3 != cmdsz) + fprintf(stderr, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n", + neltdwords, cmdsz); + break; + } + case RADEON_CP_PACKET3_LOAD_PALETTE: + if (NORMAL) + fprintf(stderr, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_3D_LOAD_VBPNTR: + if (NORMAL) { + fprintf(stderr, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz); + fprintf(stderr, " nr arrays: %d\n", cmd[1]); + } + + if (cmd[1]/2 + cmd[1]%2 != cmdsz - 3) { + fprintf(stderr, " ****** MISMATCH %d/%d *******\n", + cmd[1]/2 + cmd[1]%2 + 3, cmdsz); + return -EINVAL; + } + + if (NORMAL) { + tmp = cmd+2; + for (i = 0 ; i < cmd[1] ; i++) { + if (i & 1) { + stride = (tmp[0]>>24) & 0xff; + size = (tmp[0]>>16) & 0xff; + start = tmp[2]; + tmp += 3; + } + else { + stride = (tmp[0]>>8) & 0xff; + size = (tmp[0]) & 0xff; + start = tmp[1]; + } + fprintf(stderr, " array %d: start 0x%x vsize %d vstride %d\n", + i, start, size, stride ); + } + } + break; + case RADEON_CP_PACKET3_CNTL_PAINT: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_CNTL_BITBLT: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_CNTL_SMALLTEXT: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n", + cmdsz); + break; + case RADEON_CP_PACKET3_CNTL_POLYLINE: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz); + break; + case RADEON_CP_PACKET3_CNTL_POLYSCANLINES: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n", + cmdsz); + break; + case RADEON_CP_PACKET3_CNTL_PAINT_MULTI: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n", + cmdsz); + break; + case RADEON_CP_PACKET3_CNTL_BITBLT_MULTI: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n", + cmdsz); + break; + case RADEON_CP_PACKET3_CNTL_TRANS_BITBLT: + if (NORMAL) + fprintf(stderr, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n", + cmdsz); + break; + default: + fprintf(stderr, "UNKNOWN PACKET, %d dwords\n", cmdsz); + break; + } + + cmdbuf->buf += cmdsz * 4; + cmdbuf->bufsz -= cmdsz * 4; + return 0; +} + + +/* Check cliprects for bounds, then pass on to above: + */ +static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf ) +{ + drm_clip_rect_t *boxes = cmdbuf->boxes; + int i = 0; + + if (VERBOSE && total_changed) { + dump_state(); + total_changed = 0; + } + else fprintf(stderr, "total_changed zero\n"); + + if (NORMAL) { + do { + if ( i < cmdbuf->nbox ) { + fprintf(stderr, "Emit box %d/%d %d,%d %d,%d\n", + i, cmdbuf->nbox, + boxes[i].x1, boxes[i].y1, boxes[i].x2, boxes[i].y2); + } + } while ( ++i < cmdbuf->nbox ); + } + + if (cmdbuf->nbox == 1) + cmdbuf->nbox = 0; + + return radeon_emit_packet3( cmdbuf ); +} + + +int radeonSanityCmdBuffer( radeonContextPtr rmesa, + int nbox, + drm_clip_rect_t *boxes ) +{ + int idx; + drm_radeon_cmd_buffer_t cmdbuf; + drm_radeon_cmd_header_t header; + static int inited = 0; + + if (!inited) { + init_regs(); + inited = 1; + } + + cmdbuf.buf = rmesa->store.cmd_buf; + cmdbuf.bufsz = rmesa->store.cmd_used; + cmdbuf.boxes = boxes; + cmdbuf.nbox = nbox; + + while ( cmdbuf.bufsz >= sizeof(header) ) { + + header.i = *(int *)cmdbuf.buf; + cmdbuf.buf += sizeof(header); + cmdbuf.bufsz -= sizeof(header); + + switch (header.header.cmd_type) { + case RADEON_CMD_PACKET: + if (radeon_emit_packets( header, &cmdbuf )) { + fprintf(stderr,"radeon_emit_packets failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_SCALARS: + if (radeon_emit_scalars( header, &cmdbuf )) { + fprintf(stderr,"radeon_emit_scalars failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_SCALARS2: + if (radeon_emit_scalars2( header, &cmdbuf )) { + fprintf(stderr,"radeon_emit_scalars failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_VECTORS: + if (radeon_emit_vectors( header, &cmdbuf )) { + fprintf(stderr,"radeon_emit_vectors failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_DMA_DISCARD: + idx = header.dma.buf_idx; + if (NORMAL) + fprintf(stderr, "RADEON_CMD_DMA_DISCARD buf %d\n", idx); + bufs++; + break; + + case RADEON_CMD_PACKET3: + if (radeon_emit_packet3( &cmdbuf )) { + fprintf(stderr,"radeon_emit_packet3 failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_PACKET3_CLIP: + if (radeon_emit_packet3_cliprect( &cmdbuf )) { + fprintf(stderr,"radeon_emit_packet3_clip failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_WAIT: + break; + + default: + fprintf(stderr,"bad cmd_type %d at %p\n", + header.header.cmd_type, + cmdbuf.buf - sizeof(header)); + return -EINVAL; + } + } + + if (0) + { + static int n = 0; + n++; + if (n == 10) { + fprintf(stderr, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n", + bufs, + total, total_changed, + ((float)total_changed/(float)total*100.0)); + fprintf(stderr, "Total emitted per buf: %.2f\n", + (float)total/(float)bufs); + fprintf(stderr, "Real changes per buf: %.2f\n", + (float)total_changed/(float)bufs); + + bufs = n = total = total_changed = 0; + } + } + + return 0; +} diff --git a/radeon/radeon_sanity.h b/radeon/radeon_sanity.h new file mode 100644 index 0000000..1ec06bc --- /dev/null +++ b/radeon/radeon_sanity.h @@ -0,0 +1,8 @@ +#ifndef RADEON_SANITY_H +#define RADEON_SANITY_H + +extern int radeonSanityCmdBuffer( radeonContextPtr rmesa, + int nbox, + drm_clip_rect_t *boxes ); + +#endif diff --git a/radeon/radeon_screen.c b/radeon/radeon_screen.c new file mode 100644 index 0000000..907a987 --- /dev/null +++ b/radeon/radeon_screen.c @@ -0,0 +1,1113 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_screen.c,v 1.7 2003/03/26 20:43:51 tsi Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file radeon_screen.c + * Screen initialization functions for the Radeon driver. + * + * \author Kevin E. Martin <martin@valinux.com> + * \author Gareth Hughes <gareth@valinux.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "mtypes.h" +#include "framebuffer.h" +#include "renderbuffer.h" + +#define STANDALONE_MMIO +#include "radeon_chipset.h" +#include "radeon_macros.h" +#include "radeon_screen.h" +#if !RADEON_COMMON +#include "radeon_context.h" +#include "radeon_span.h" +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) +#include "r200_context.h" +#include "r200_ioctl.h" +#include "r200_span.h" +#include "r200_tex.h" +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#include "r300_context.h" +#include "r300_fragprog.h" +#include "r300_tex.h" +#include "radeon_span.h" +#endif + +#include "utils.h" +#include "context.h" +#include "vblank.h" +#include "drirenderbuffer.h" + +#include "GL/internal/dri_interface.h" + +/* Radeon configuration + */ +#include "xmlpool.h" + +#if !RADEON_COMMON /* R100 */ +PUBLIC const char __driConfigOptions[] = +DRI_CONF_BEGIN + DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN) + DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + DRI_CONF_MAX_TEXTURE_UNITS(3,2,3) + DRI_CONF_HYPERZ(false) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY + DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) + DRI_CONF_DEF_MAX_ANISOTROPY(1.0,"1.0,2.0,4.0,8.0,16.0") + DRI_CONF_NO_NEG_LOD_BIAS(false) + DRI_CONF_FORCE_S3TC_ENABLE(false) + DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) + DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) + DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF) + DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_DEBUG + DRI_CONF_NO_RAST(false) + DRI_CONF_SECTION_END +DRI_CONF_END; +static const GLuint __driNConfigOptions = 14; + +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + +PUBLIC const char __driConfigOptions[] = +DRI_CONF_BEGIN + DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN) + DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + DRI_CONF_MAX_TEXTURE_UNITS(6,2,6) + DRI_CONF_HYPERZ(false) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY + DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) + DRI_CONF_DEF_MAX_ANISOTROPY(1.0,"1.0,2.0,4.0,8.0,16.0") + DRI_CONF_NO_NEG_LOD_BIAS(false) + DRI_CONF_FORCE_S3TC_ENABLE(false) + DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) + DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) + DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF) + DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_TEXTURE_BLEND_QUALITY(1.0,"0.0:1.0") + DRI_CONF_SECTION_END + DRI_CONF_SECTION_DEBUG + DRI_CONF_NO_RAST(false) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_SOFTWARE + DRI_CONF_NV_VERTEX_PROGRAM(false) + DRI_CONF_SECTION_END +DRI_CONF_END; +static const GLuint __driNConfigOptions = 16; + +extern const struct dri_extension blend_extensions[]; +extern const struct dri_extension ARB_vp_extension[]; +extern const struct dri_extension NV_vp_extension[]; +extern const struct dri_extension ATI_fs_extension[]; +extern const struct dri_extension point_extensions[]; + +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) + +/* TODO: integrate these into xmlpool.h! */ +#define DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(texture_image_units,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,"Number of texture image units") \ + DRI_CONF_DESC(de,"Anzahl der Textureinheiten") \ +DRI_CONF_OPT_END + +#define DRI_CONF_MAX_TEXTURE_COORD_UNITS(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(texture_coord_units,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,"Number of texture coordinate units") \ + DRI_CONF_DESC(de,"Anzahl der Texturkoordinateneinheiten") \ +DRI_CONF_OPT_END + +#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,"Size of command buffer (in KB)") \ + DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \ +DRI_CONF_OPT_END + +#define DRI_CONF_DISABLE_S3TC(def) \ +DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \ + DRI_CONF_DESC(en,"Disable S3TC compression") \ +DRI_CONF_OPT_END + +#define DRI_CONF_DISABLE_FALLBACK(def) \ +DRI_CONF_OPT_BEGIN(disable_lowimpact_fallback,bool,def) \ + DRI_CONF_DESC(en,"Disable Low-impact fallback") \ +DRI_CONF_OPT_END + +#define DRI_CONF_DISABLE_DOUBLE_SIDE_STENCIL(def) \ +DRI_CONF_OPT_BEGIN(disable_stencil_two_side,bool,def) \ + DRI_CONF_DESC(en,"Disable GL_EXT_stencil_two_side") \ +DRI_CONF_OPT_END + +#define DRI_CONF_FP_OPTIMIZATION(def) \ +DRI_CONF_OPT_BEGIN_V(fp_optimization,enum,def,"0:1") \ + DRI_CONF_DESC_BEGIN(en,"Fragment Program optimization") \ + DRI_CONF_ENUM(0,"Optimize for Speed") \ + DRI_CONF_ENUM(1,"Optimize for Quality") \ + DRI_CONF_DESC_END \ +DRI_CONF_OPT_END + +const char __driConfigOptions[] = +DRI_CONF_BEGIN + DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN) + DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(8, 2, 8) + DRI_CONF_MAX_TEXTURE_COORD_UNITS(8, 2, 8) + DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32) + DRI_CONF_DISABLE_FALLBACK(false) + DRI_CONF_DISABLE_DOUBLE_SIDE_STENCIL(false) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY + DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) + DRI_CONF_DEF_MAX_ANISOTROPY(1.0, "1.0,2.0,4.0,8.0,16.0") + DRI_CONF_NO_NEG_LOD_BIAS(false) + DRI_CONF_FORCE_S3TC_ENABLE(false) + DRI_CONF_DISABLE_S3TC(false) + DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) + DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) + DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF) + DRI_CONF_FP_OPTIMIZATION(DRI_CONF_FP_OPTIMIZATION_SPEED) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_DEBUG + DRI_CONF_NO_RAST(false) + DRI_CONF_SECTION_END +DRI_CONF_END; +static const GLuint __driNConfigOptions = 18; + +#ifndef RADEON_DEBUG +int RADEON_DEBUG = 0; + +static const struct dri_debug_control debug_control[] = { + {"fall", DEBUG_FALLBACKS}, + {"tex", DEBUG_TEXTURE}, + {"ioctl", DEBUG_IOCTL}, + {"prim", DEBUG_PRIMS}, + {"vert", DEBUG_VERTS}, + {"state", DEBUG_STATE}, + {"code", DEBUG_CODEGEN}, + {"vfmt", DEBUG_VFMT}, + {"vtxf", DEBUG_VFMT}, + {"verb", DEBUG_VERBOSE}, + {"dri", DEBUG_DRI}, + {"dma", DEBUG_DMA}, + {"san", DEBUG_SANITY}, + {"sync", DEBUG_SYNC}, + {"pix", DEBUG_PIXEL}, + {"mem", DEBUG_MEMORY}, + {"allmsg", ~DEBUG_SYNC}, /* avoid the term "sync" because the parser uses strstr */ + {NULL, 0} +}; +#endif /* RADEON_DEBUG */ + +#endif /* RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) */ + +extern const struct dri_extension card_extensions[]; + +static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ); + +static int +radeonGetParam(int fd, int param, void *value) +{ + int ret; + drm_radeon_getparam_t gp; + + gp.param = param; + gp.value = value; + + ret = drmCommandWriteRead( fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); + return ret; +} + +static __GLcontextModes * +radeonFillInModes( unsigned pixel_bits, unsigned depth_bits, + unsigned stencil_bits, GLboolean have_back_buffer ) +{ + __GLcontextModes * modes; + __GLcontextModes * m; + unsigned num_modes; + unsigned depth_buffer_factor; + unsigned back_buffer_factor; + GLenum fb_format; + GLenum fb_type; + + /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy + * enough to add support. Basically, if a context is created with an + * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping + * will never be used. + */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML /*, GLX_SWAP_COPY_OML */ + }; + + u_int8_t depth_bits_array[2]; + u_int8_t stencil_bits_array[2]; + + + depth_bits_array[0] = depth_bits; + depth_bits_array[1] = depth_bits; + + /* Just like with the accumulation buffer, always provide some modes + * with a stencil buffer. It will be a sw fallback, but some apps won't + * care about that. + */ + stencil_bits_array[0] = 0; + stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; + + depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1; + back_buffer_factor = (have_back_buffer) ? 2 : 1; + + num_modes = depth_buffer_factor * back_buffer_factor * 4; + + if ( pixel_bits == 16 ) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_SHORT_5_6_5; + } + else { + fb_format = GL_BGRA; + fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } + + modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) ); + m = modes; + if ( ! driFillInModes( & m, fb_format, fb_type, + depth_bits_array, stencil_bits_array, depth_buffer_factor, + back_buffer_modes, back_buffer_factor, + GLX_TRUE_COLOR ) ) { + fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__ ); + return NULL; + } + + if ( ! driFillInModes( & m, fb_format, fb_type, + depth_bits_array, stencil_bits_array, depth_buffer_factor, + back_buffer_modes, back_buffer_factor, + GLX_DIRECT_COLOR ) ) { + fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__ ); + return NULL; + } + + /* Mark the visual as slow if there are "fake" stencil bits. + */ + for ( m = modes ; m != NULL ; m = m->next ) { + if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) { + m->visualRating = GLX_SLOW_CONFIG; + } + } + + return modes; +} + + +/* Create the device specific screen private data struct. + */ +static radeonScreenPtr +radeonCreateScreen( __DRIscreenPrivate *sPriv ) +{ + radeonScreenPtr screen; + RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv; + unsigned char *RADEONMMIO; + PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension = + (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension")); + void * const psc = sPriv->psc->screenConfigs; + + if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) { + fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n"); + return GL_FALSE; + } + + /* Allocate the private area */ + screen = (radeonScreenPtr) CALLOC( sizeof(*screen) ); + if ( !screen ) { + __driUtilMessage("%s: Could not allocate memory for screen structure", + __FUNCTION__); + return NULL; + } + +#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) + RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); +#endif + + /* parse information in __driConfigOptions */ + driParseOptionInfo (&screen->optionCache, + __driConfigOptions, __driNConfigOptions); + + /* This is first since which regions we map depends on whether or + * not we are using a PCI card. + */ + screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP); + { + int ret; + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET, + &screen->gart_buffer_offset); + + if (ret) { + FREE( screen ); + fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret); + return NULL; + } + + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE, + &screen->gart_base); + if (ret) { + FREE( screen ); + fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret); + return NULL; + } + + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR, + &screen->irq); + if (ret) { + FREE( screen ); + fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret); + return NULL; + } + screen->drmSupportsCubeMapsR200 = (sPriv->drmMinor >= 7); + screen->drmSupportsBlendColor = (sPriv->drmMinor >= 11); + screen->drmSupportsTriPerf = (sPriv->drmMinor >= 16); + screen->drmSupportsFragShader = (sPriv->drmMinor >= 18); + screen->drmSupportsPointSprites = (sPriv->drmMinor >= 13); + screen->drmSupportsCubeMapsR100 = (sPriv->drmMinor >= 15); + screen->drmSupportsVertexProgram = (sPriv->drmMinor >= 25); + } + + screen->mmio.handle = dri_priv->registerHandle; + screen->mmio.size = dri_priv->registerSize; + if ( drmMap( sPriv->fd, + screen->mmio.handle, + screen->mmio.size, + &screen->mmio.map ) ) { + FREE( screen ); + __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); + return NULL; + } + + RADEONMMIO = screen->mmio.map; + + screen->status.handle = dri_priv->statusHandle; + screen->status.size = dri_priv->statusSize; + if ( drmMap( sPriv->fd, + screen->status.handle, + screen->status.size, + &screen->status.map ) ) { + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); + __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); + return NULL; + } + screen->scratch = (__volatile__ u_int32_t *) + ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); + + screen->buffers = drmMapBufs( sPriv->fd ); + if ( !screen->buffers ) { + drmUnmap( screen->status.map, screen->status.size ); + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); + __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ ); + return NULL; + } + + if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { + screen->gartTextures.handle = dri_priv->gartTexHandle; + screen->gartTextures.size = dri_priv->gartTexMapSize; + if ( drmMap( sPriv->fd, + screen->gartTextures.handle, + screen->gartTextures.size, + (drmAddressPtr)&screen->gartTextures.map ) ) { + drmUnmapBufs( screen->buffers ); + drmUnmap( screen->status.map, screen->status.size ); + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); + __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); + return NULL; + } + + screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; + } + + screen->chip_flags = 0; + /* XXX: add more chipsets */ + switch ( dri_priv->deviceID ) { + case PCI_CHIP_RADEON_LY: + case PCI_CHIP_RADEON_LZ: + case PCI_CHIP_RADEON_QY: + case PCI_CHIP_RADEON_QZ: + case PCI_CHIP_RN50_515E: + case PCI_CHIP_RN50_5969: + screen->chip_family = CHIP_FAMILY_RV100; + break; + + case PCI_CHIP_RS100_4136: + case PCI_CHIP_RS100_4336: + screen->chip_family = CHIP_FAMILY_RS100; + break; + + case PCI_CHIP_RS200_4137: + case PCI_CHIP_RS200_4337: + case PCI_CHIP_RS250_4237: + case PCI_CHIP_RS250_4437: + screen->chip_family = CHIP_FAMILY_RS200; + break; + + case PCI_CHIP_RADEON_QD: + case PCI_CHIP_RADEON_QE: + case PCI_CHIP_RADEON_QF: + case PCI_CHIP_RADEON_QG: + /* all original radeons (7200) presumably have a stencil op bug */ + screen->chip_family = CHIP_FAMILY_R100; + screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_BROKEN_STENCIL; + break; + + case PCI_CHIP_RV200_QW: + case PCI_CHIP_RV200_QX: + case PCI_CHIP_RADEON_LW: + case PCI_CHIP_RADEON_LX: + screen->chip_family = CHIP_FAMILY_RV200; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_R200_BB: + case PCI_CHIP_R200_BC: + case PCI_CHIP_R200_QH: + case PCI_CHIP_R200_QL: + case PCI_CHIP_R200_QM: + screen->chip_family = CHIP_FAMILY_R200; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV250_If: + case PCI_CHIP_RV250_Ig: + case PCI_CHIP_RV250_Ld: + case PCI_CHIP_RV250_Lf: + case PCI_CHIP_RV250_Lg: + screen->chip_family = CHIP_FAMILY_RV250; + screen->chip_flags = R200_CHIPSET_YCBCR_BROKEN | RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV280_5960: + case PCI_CHIP_RV280_5961: + case PCI_CHIP_RV280_5962: + case PCI_CHIP_RV280_5964: + case PCI_CHIP_RV280_5965: + case PCI_CHIP_RV280_5C61: + case PCI_CHIP_RV280_5C63: + screen->chip_family = CHIP_FAMILY_RV280; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RS300_5834: + case PCI_CHIP_RS300_5835: + case PCI_CHIP_RS350_7834: + case PCI_CHIP_RS350_7835: + screen->chip_family = CHIP_FAMILY_RS300; + break; + + case PCI_CHIP_R300_AD: + case PCI_CHIP_R300_AE: + case PCI_CHIP_R300_AF: + case PCI_CHIP_R300_AG: + case PCI_CHIP_R300_ND: + case PCI_CHIP_R300_NE: + case PCI_CHIP_R300_NF: + case PCI_CHIP_R300_NG: + screen->chip_family = CHIP_FAMILY_R300; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV350_AP: + case PCI_CHIP_RV350_AQ: + case PCI_CHIP_RV350_AR: + case PCI_CHIP_RV350_AS: + case PCI_CHIP_RV350_AT: + case PCI_CHIP_RV350_AV: + case PCI_CHIP_RV350_AU: + case PCI_CHIP_RV350_NP: + case PCI_CHIP_RV350_NQ: + case PCI_CHIP_RV350_NR: + case PCI_CHIP_RV350_NS: + case PCI_CHIP_RV350_NT: + case PCI_CHIP_RV350_NV: + screen->chip_family = CHIP_FAMILY_RV350; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_R350_AH: + case PCI_CHIP_R350_AI: + case PCI_CHIP_R350_AJ: + case PCI_CHIP_R350_AK: + case PCI_CHIP_R350_NH: + case PCI_CHIP_R350_NI: + case PCI_CHIP_R360_NJ: + case PCI_CHIP_R350_NK: + screen->chip_family = CHIP_FAMILY_R350; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV370_5460: + case PCI_CHIP_RV370_5462: + case PCI_CHIP_RV370_5464: + case PCI_CHIP_RV370_5B60: + case PCI_CHIP_RV370_5B62: + case PCI_CHIP_RV370_5B63: + case PCI_CHIP_RV370_5B64: + case PCI_CHIP_RV370_5B65: + case PCI_CHIP_RV370_5657: + case PCI_CHIP_RV380_3150: + case PCI_CHIP_RV380_3152: + case PCI_CHIP_RV380_3154: + case PCI_CHIP_RV380_3E50: + case PCI_CHIP_RV380_3E54: + screen->chip_family = CHIP_FAMILY_RV380; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_R420_JN: + case PCI_CHIP_R420_JH: + case PCI_CHIP_R420_JI: + case PCI_CHIP_R420_JJ: + case PCI_CHIP_R420_JK: + case PCI_CHIP_R420_JL: + case PCI_CHIP_R420_JM: + case PCI_CHIP_R420_JO: + case PCI_CHIP_R420_JP: + case PCI_CHIP_R420_JT: + case PCI_CHIP_R481_4B49: + case PCI_CHIP_R481_4B4A: + case PCI_CHIP_R481_4B4B: + case PCI_CHIP_R481_4B4C: + case PCI_CHIP_R423_UH: + case PCI_CHIP_R423_UI: + case PCI_CHIP_R423_UJ: + case PCI_CHIP_R423_UK: + case PCI_CHIP_R430_554C: + case PCI_CHIP_R430_554D: + case PCI_CHIP_R430_554E: + case PCI_CHIP_R430_554F: + case PCI_CHIP_R423_5550: + case PCI_CHIP_R423_UQ: + case PCI_CHIP_R423_UR: + case PCI_CHIP_R423_UT: + case PCI_CHIP_R430_5D48: + case PCI_CHIP_R430_5D49: + case PCI_CHIP_R430_5D4A: + case PCI_CHIP_R480_5D4C: + case PCI_CHIP_R480_5D4D: + case PCI_CHIP_R480_5D4E: + case PCI_CHIP_R480_5D4F: + case PCI_CHIP_R480_5D50: + case PCI_CHIP_R480_5D52: + case PCI_CHIP_R423_5D57: + screen->chip_family = CHIP_FAMILY_R420; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + /* RV410 SE chips have half the pipes of regular RV410 */ + case PCI_CHIP_RV410_5E4C: + case PCI_CHIP_RV410_5E4F: + screen->chip_family = CHIP_FAMILY_RV380; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV410_564A: + case PCI_CHIP_RV410_564B: + case PCI_CHIP_RV410_564F: + case PCI_CHIP_RV410_5652: + case PCI_CHIP_RV410_5653: + case PCI_CHIP_RV410_5E48: + case PCI_CHIP_RV410_5E4A: + case PCI_CHIP_RV410_5E4B: + case PCI_CHIP_RV410_5E4D: + screen->chip_family = CHIP_FAMILY_RV410; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RS480_5954: + case PCI_CHIP_RS480_5955: + case PCI_CHIP_RS482_5974: + case PCI_CHIP_RS482_5975: + case PCI_CHIP_RS400_5A41: + case PCI_CHIP_RS400_5A42: + case PCI_CHIP_RC410_5A61: + case PCI_CHIP_RC410_5A62: + screen->chip_family = CHIP_FAMILY_RS400; + fprintf(stderr, "Warning, xpress200 detected.\n"); + break; + + default: + fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", + dri_priv->deviceID); + return NULL; + } + if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) && + sPriv->ddxMinor < 2) { + fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n"); + return NULL; + } + + if (screen->chip_family <= CHIP_FAMILY_RS200) + screen->chip_flags |= RADEON_CLASS_R100; + else if (screen->chip_family <= CHIP_FAMILY_RV280) + screen->chip_flags |= RADEON_CLASS_R200; + else + screen->chip_flags |= RADEON_CLASS_R300; + + screen->cpp = dri_priv->bpp / 8; + screen->AGPMode = dri_priv->AGPMode; + + screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff ) << 16; + + if ( sPriv->drmMinor >= 10 ) { + drm_radeon_setparam_t sp; + + sp.param = RADEON_SETPARAM_FB_LOCATION; + sp.value = screen->fbLocation; + + drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM, + &sp, sizeof( sp ) ); + } + + screen->frontOffset = dri_priv->frontOffset; + screen->frontPitch = dri_priv->frontPitch; + screen->backOffset = dri_priv->backOffset; + screen->backPitch = dri_priv->backPitch; + screen->depthOffset = dri_priv->depthOffset; + screen->depthPitch = dri_priv->depthPitch; + + /* Check if ddx has set up a surface reg to cover depth buffer */ + screen->depthHasSurface = (sPriv->ddxMajor > 4) || + /* these chips don't use tiled z without hyperz. So always pretend + we have set up a surface which will cause linear reads/writes */ + ((screen->chip_family & RADEON_CLASS_R100) && + !(screen->chip_flags & RADEON_CHIPSET_TCL)); + + if ( dri_priv->textureSize == 0 ) { + screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset; + screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize; + screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = + dri_priv->log2GARTTexGran; + } else { + screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset + + screen->fbLocation; + screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize; + screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] = + dri_priv->log2TexGran; + } + + if ( !screen->gartTextures.map || dri_priv->textureSize == 0 + || getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) { + screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1; + screen->texOffset[RADEON_GART_TEX_HEAP] = 0; + screen->texSize[RADEON_GART_TEX_HEAP] = 0; + screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0; + } else { + screen->numTexHeaps = RADEON_NR_TEX_HEAPS; + screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset; + screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize; + screen->logTexGranularity[RADEON_GART_TEX_HEAP] = + dri_priv->log2GARTTexGran; + } + + if ( glx_enable_extension != NULL ) { + if ( screen->irq != 0 ) { + (*glx_enable_extension)( psc, "GLX_SGI_swap_control" ); + (*glx_enable_extension)( psc, "GLX_SGI_video_sync" ); + (*glx_enable_extension)( psc, "GLX_MESA_swap_control" ); + } + + (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" ); + if (IS_R200_CLASS(screen)) + (*glx_enable_extension)( psc, "GLX_MESA_allocate_memory" ); + + (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" ); + (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" ); + } + +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + if (IS_R200_CLASS(screen)) { + sPriv->psc->allocateMemory = (void *) r200AllocateMemoryMESA; + sPriv->psc->freeMemory = (void *) r200FreeMemoryMESA; + sPriv->psc->memoryOffset = (void *) r200GetMemoryOffsetMESA; + } +#endif + + screen->driScreen = sPriv; + screen->sarea_priv_offset = dri_priv->sarea_priv_offset; + return screen; +} + +/* Destroy the device specific screen private data struct. + */ +static void +radeonDestroyScreen( __DRIscreenPrivate *sPriv ) +{ + radeonScreenPtr screen = (radeonScreenPtr)sPriv->private; + + if (!screen) + return; + + if ( screen->gartTextures.map ) { + drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); + } + drmUnmapBufs( screen->buffers ); + drmUnmap( screen->status.map, screen->status.size ); + drmUnmap( screen->mmio.map, screen->mmio.size ); + + /* free all option information */ + driDestroyOptionInfo (&screen->optionCache); + + FREE( screen ); + sPriv->private = NULL; +} + + +/* Initialize the driver specific screen private data. + */ +static GLboolean +radeonInitDriver( __DRIscreenPrivate *sPriv ) +{ + sPriv->private = (void *) radeonCreateScreen( sPriv ); + if ( !sPriv->private ) { + radeonDestroyScreen( sPriv ); + return GL_FALSE; + } + + return GL_TRUE; +} + + +/** + * Create the Mesa framebuffer and renderbuffers for a given window/drawable. + * + * \todo This function (and its interface) will need to be updated to support + * pbuffers. + */ +static GLboolean +radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, + __DRIdrawablePrivate *driDrawPriv, + const __GLcontextModes *mesaVis, + GLboolean isPixmap ) +{ + radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private; + + if (isPixmap) { + return GL_FALSE; /* not implemented */ + } + else { + const GLboolean swDepth = GL_FALSE; + const GLboolean swAlpha = GL_FALSE; + const GLboolean swAccum = mesaVis->accumRedBits > 0; + const GLboolean swStencil = mesaVis->stencilBits > 0 && + mesaVis->depthBits != 24; + struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); + + /* front color renderbuffer */ + { + driRenderbuffer *frontRb + = driNewRenderbuffer(GL_RGBA, + driScrnPriv->pFB + screen->frontOffset, + screen->cpp, + screen->frontOffset, screen->frontPitch, + driDrawPriv); + radeonSetSpanFunctions(frontRb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base); + } + + /* back color renderbuffer */ + if (mesaVis->doubleBufferMode) { + driRenderbuffer *backRb + = driNewRenderbuffer(GL_RGBA, + driScrnPriv->pFB + screen->backOffset, + screen->cpp, + screen->backOffset, screen->backPitch, + driDrawPriv); + radeonSetSpanFunctions(backRb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base); + } + + /* depth renderbuffer */ + if (mesaVis->depthBits == 16) { + driRenderbuffer *depthRb + = driNewRenderbuffer(GL_DEPTH_COMPONENT16, + driScrnPriv->pFB + screen->depthOffset, + screen->cpp, + screen->depthOffset, screen->depthPitch, + driDrawPriv); + radeonSetSpanFunctions(depthRb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); + depthRb->depthHasSurface = screen->depthHasSurface; + } + else if (mesaVis->depthBits == 24) { + driRenderbuffer *depthRb + = driNewRenderbuffer(GL_DEPTH_COMPONENT24, + driScrnPriv->pFB + screen->depthOffset, + screen->cpp, + screen->depthOffset, screen->depthPitch, + driDrawPriv); + radeonSetSpanFunctions(depthRb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); + depthRb->depthHasSurface = screen->depthHasSurface; + } + + /* stencil renderbuffer */ + if (mesaVis->stencilBits > 0 && !swStencil) { + driRenderbuffer *stencilRb + = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT, + driScrnPriv->pFB + screen->depthOffset, + screen->cpp, + screen->depthOffset, screen->depthPitch, + driDrawPriv); + radeonSetSpanFunctions(stencilRb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base); + stencilRb->depthHasSurface = screen->depthHasSurface; + } + + _mesa_add_soft_renderbuffers(fb, + GL_FALSE, /* color */ + swDepth, + swStencil, + swAccum, + swAlpha, + GL_FALSE /* aux */); + driDrawPriv->driverPrivate = (void *) fb; + + return (driDrawPriv->driverPrivate != NULL); + } +} + + +static void +radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) +{ + _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); +} + +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +/** + * Choose the appropriate CreateContext function based on the chipset. + * Eventually, all drivers will go through this process. + */ +static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPriv) +{ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); + + if (IS_R300_CLASS(screen)) + return r300CreateContext(glVisual, driContextPriv, sharedContextPriv); + return GL_FALSE; +} + +/** + * Choose the appropriate DestroyContext function based on the chipset. + */ +static void radeonDestroyContext(__DRIcontextPrivate * driContextPriv) +{ + radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; + + if (IS_R300_CLASS(radeon->radeonScreen)) + return r300DestroyContext(driContextPriv); +} + + +#endif + +#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)) +static struct __DriverAPIRec radeonAPI = { + .InitDriver = radeonInitDriver, + .DestroyScreen = radeonDestroyScreen, + .CreateContext = radeonCreateContext, + .DestroyContext = radeonDestroyContext, + .CreateBuffer = radeonCreateBuffer, + .DestroyBuffer = radeonDestroyBuffer, + .SwapBuffers = radeonSwapBuffers, + .MakeCurrent = radeonMakeCurrent, + .UnbindContext = radeonUnbindContext, + .GetSwapInfo = getSwapInfo, + .GetMSC = driGetMSC32, + .WaitForMSC = driWaitForMSC32, + .WaitForSBC = NULL, + .SwapBuffersMSC = NULL, + .CopySubBuffer = radeonCopySubBuffer, +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) + .setTexOffset = r300SetTexOffset, +#endif +}; +#else +static const struct __DriverAPIRec r200API = { + .InitDriver = radeonInitDriver, + .DestroyScreen = radeonDestroyScreen, + .CreateContext = r200CreateContext, + .DestroyContext = r200DestroyContext, + .CreateBuffer = radeonCreateBuffer, + .DestroyBuffer = radeonDestroyBuffer, + .SwapBuffers = r200SwapBuffers, + .MakeCurrent = r200MakeCurrent, + .UnbindContext = r200UnbindContext, + .GetSwapInfo = getSwapInfo, + .GetMSC = driGetMSC32, + .WaitForMSC = driWaitForMSC32, + .WaitForSBC = NULL, + .SwapBuffersMSC = NULL, + .CopySubBuffer = r200CopySubBuffer, + .setTexOffset = r200SetTexOffset +}; +#endif + +/** + * This is the bootstrap function for the driver. libGL supplies all of the + * requisite information about the system, and the driver initializes itself. + * This routine also fills in the linked list pointed to by \c driver_modes + * with the \c __GLcontextModes that the driver can support for windows or + * pbuffers. + * + * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on + * failure. + */ +PUBLIC void * +__driCreateNewScreen_20050727( __DRInativeDisplay *dpy, + int scrn, __DRIscreen *psc, + const __GLcontextModes * modes, + const __DRIversion * ddx_version, + const __DRIversion * dri_version, + const __DRIversion * drm_version, + const __DRIframebuffer * frame_buffer, + drmAddress pSAREA, int fd, + int internal_api_version, + const __DRIinterfaceMethods * interface, + __GLcontextModes ** driver_modes ) +{ + __DRIscreenPrivate *psp; +#if !RADEON_COMMON + static const char *driver_name = "Radeon"; + static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = { 1, 6, 0 }; +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + static const char *driver_name = "R200"; + static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = { 1, 6, 0 }; +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) + static const char *driver_name = "R300"; + static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = { 1, 24, 0 }; +#endif + + dri_interface = interface; + + if ( ! driCheckDriDdxDrmVersions3( driver_name, + dri_version, & dri_expected, + ddx_version, & ddx_expected, + drm_version, & drm_expected ) ) { + return NULL; + } +#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)) + psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, + ddx_version, dri_version, drm_version, + frame_buffer, pSAREA, fd, + internal_api_version, &radeonAPI); +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, + ddx_version, dri_version, drm_version, + frame_buffer, pSAREA, fd, + internal_api_version, &r200API); +#endif + + if ( psp != NULL ) { + RADEONDRIPtr dri_priv = (RADEONDRIPtr) psp->pDevPriv; + if (driver_modes) { + *driver_modes = radeonFillInModes( dri_priv->bpp, + (dri_priv->bpp == 16) ? 16 : 24, + (dri_priv->bpp == 16) ? 0 : 8, + (dri_priv->backOffset != dri_priv->depthOffset) ); + } + + /* Calling driInitExtensions here, with a NULL context pointer, + * does not actually enable the extensions. It just makes sure + * that all the dispatch offsets for all the extensions that + * *might* be enables are known. This is needed because the + * dispatch offsets need to be known when _mesa_context_create + * is called, but we can't enable the extensions until we have a + * context pointer. + * + * Hello chicken. Hello egg. How are you two today? + */ + driInitExtensions( NULL, card_extensions, GL_FALSE ); +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + driInitExtensions( NULL, blend_extensions, GL_FALSE ); + driInitSingleExtension( NULL, ARB_vp_extension ); + driInitSingleExtension( NULL, NV_vp_extension ); + driInitSingleExtension( NULL, ATI_fs_extension ); + driInitExtensions( NULL, point_extensions, GL_FALSE ); +#endif + } + + return (void *) psp; +} + + +/** + * Get information about previous buffer swaps. + */ +static int +getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) +{ +#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)) + radeonContextPtr rmesa; +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + r200ContextPtr rmesa; +#endif + + if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL) + || (dPriv->driContextPriv->driverPrivate == NULL) + || (sInfo == NULL) ) { + return -1; + } + + rmesa = dPriv->driContextPriv->driverPrivate; + sInfo->swap_count = rmesa->swap_count; + sInfo->swap_ust = rmesa->swap_ust; + sInfo->swap_missed_count = rmesa->swap_missed_count; + + sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0) + ? driCalculateSwapUsage( dPriv, 0, rmesa->swap_missed_ust ) + : 0.0; + + return 0; +} diff --git a/radeon/radeon_screen.h b/radeon/radeon_screen.h new file mode 100644 index 0000000..25e6fcf --- /dev/null +++ b/radeon/radeon_screen.h @@ -0,0 +1,115 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_screen.h,v 1.5 2002/12/16 16:18:58 dawes Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + */ + +#ifndef __RADEON_SCREEN_H__ +#define __RADEON_SCREEN_H__ + +/* + * IMPORTS: these headers contain all the DRI, X and kernel-related + * definitions that we need. + */ +#include "dri_util.h" +#include "radeon_dri.h" +#include "radeon_chipset.h" +#include "radeon_reg.h" +#include "drm_sarea.h" +#include "xmlconfig.h" + + +typedef struct { + drm_handle_t handle; /* Handle to the DRM region */ + drmSize size; /* Size of the DRM region */ + drmAddress map; /* Mapping of the DRM region */ +} radeonRegionRec, *radeonRegionPtr; + +typedef struct { + int chip_family; + int chip_flags; + int cpp; + int card_type; + int AGPMode; + unsigned int irq; /* IRQ number (0 means none) */ + + unsigned int fbLocation; + unsigned int frontOffset; + unsigned int frontPitch; + unsigned int backOffset; + unsigned int backPitch; + + unsigned int depthOffset; + unsigned int depthPitch; + + /* Shared texture data */ + int numTexHeaps; + int texOffset[RADEON_NR_TEX_HEAPS]; + int texSize[RADEON_NR_TEX_HEAPS]; + int logTexGranularity[RADEON_NR_TEX_HEAPS]; + + radeonRegionRec mmio; + radeonRegionRec status; + radeonRegionRec gartTextures; + + drmBufMapPtr buffers; + + __volatile__ u_int32_t *scratch; + + __DRIscreenPrivate *driScreen; + unsigned int sarea_priv_offset; + unsigned int gart_buffer_offset; /* offset in card memory space */ + unsigned int gart_texture_offset; /* offset in card memory space */ + unsigned int gart_base; + + GLboolean drmSupportsCubeMapsR200; /* need radeon kernel module >= 1.7 */ + GLboolean drmSupportsBlendColor; /* need radeon kernel module >= 1.11 */ + GLboolean drmSupportsTriPerf; /* need radeon kernel module >= 1.16 */ + GLboolean drmSupportsFragShader; /* need radeon kernel module >= 1.18 */ + GLboolean drmSupportsPointSprites; /* need radeon kernel module >= 1.13 */ + GLboolean drmSupportsCubeMapsR100; /* need radeon kernel module >= 1.15 */ + GLboolean drmSupportsVertexProgram; /* need radeon kernel module >= 1.25 */ + GLboolean depthHasSurface; + + /* Configuration cache with default values for all contexts */ + driOptionCache optionCache; +} radeonScreenRec, *radeonScreenPtr; + +#define IS_R100_CLASS(screen) \ + ((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R100) +#define IS_R200_CLASS(screen) \ + ((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R200) +#define IS_R300_CLASS(screen) \ + ((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R300) + +#endif /* __RADEON_SCREEN_H__ */ diff --git a/radeon/radeon_span.c b/radeon/radeon_span.c new file mode 100644 index 0000000..732a85e --- /dev/null +++ b/radeon/radeon_span.c @@ -0,0 +1,322 @@ +/************************************************************************** + +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + * + */ + +#include "glheader.h" +#include "swrast/swrast.h" + +#include "radeon_context.h" +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "radeon_span.h" +#include "radeon_tex.h" + +#include "drirenderbuffer.h" + +#define DBG 0 + +/* + * Note that all information needed to access pixels in a renderbuffer + * should be obtained through the gl_renderbuffer parameter, not per-context + * information. + */ +#define LOCAL_VARS \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + const __DRIdrawablePrivate *dPriv = drb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ + GLubyte *buf = (GLubyte *) drb->flippedData \ + + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ + GLuint p; \ + (void) p; + +#define LOCAL_DEPTH_VARS \ + driRenderbuffer *drb = (driRenderbuffer *) rb; \ + const __DRIdrawablePrivate *dPriv = drb->dPriv; \ + const GLuint bottom = dPriv->h - 1; \ + GLuint xo = dPriv->x; \ + GLuint yo = dPriv->y; \ + GLubyte *buf = (GLubyte *) drb->Base.Data; + +#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS + +#define Y_FLIP(Y) (bottom - (Y)) + +#define HW_LOCK() + +#define HW_UNLOCK() + +/* ================================================================ + * Color buffer + */ + +/* 16 bit, RGB565 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) radeon##x##_RGB565 +#define TAG2(x,y) radeon##x##_RGB565##y +#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) +#include "spantmp2.h" + +/* 32 bit, ARGB8888 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) radeon##x##_ARGB8888 +#define TAG2(x,y) radeon##x##_ARGB8888##y +#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) +#include "spantmp2.h" + +/* ================================================================ + * Depth buffer + */ + +/* The Radeon family has depth tiling on all the time, so we have to convert + * the x,y coordinates into the memory bus address (mba) in the same + * manner as the engine. In each case, the linear block address (ba) + * is calculated, and then wired with x and y to produce the final + * memory address. + * The chip will do address translation on its own if the surface registers + * are set up correctly. It is not quite enough to get it working with hyperz + * too... + */ + +static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) +{ + GLuint pitch = drb->pitch; + if (drb->depthHasSurface) { + return 4 * (x + y * pitch); + } else { + GLuint ba, address = 0; /* a[0..1] = 0 */ + +#ifdef COMPILE_R300 + ba = (y / 8) * (pitch / 8) + (x / 8); +#else + ba = (y / 16) * (pitch / 16) + (x / 16); +#endif + + address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */ + address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */ + address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */ + address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ + + address |= (y & 0x8) << 7; /* a[10] = y[3] */ + address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */ + address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ + + return address; + } +} + +static INLINE GLuint +radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) +{ + GLuint pitch = drb->pitch; + if (drb->depthHasSurface) { + return 2 * (x + y * pitch); + } else { + GLuint ba, address = 0; /* a[0] = 0 */ + + ba = (y / 16) * (pitch / 32) + (x / 32); + + address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */ + address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */ + address |= (x & 0x8) << 4; /* a[7] = x[3] */ + address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ + address |= (y & 0x8) << 7; /* a[10] = y[3] */ + address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */ + address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ + + return address; + } +} + +/* 16-bit depth buffer functions + */ +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; + +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )); + +#define TAG(x) radeon##x##_z16 +#include "depthtmp.h" + +/* 24 bit depth, 8 bit stencil depthbuffer functions + * + * Careful: It looks like the R300 uses ZZZS byte order while the R200 + * uses SZZZ for 24 bit depth, 8 bit stencil mode. + */ +#ifdef COMPILE_R300 +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0x000000ff; \ + tmp |= ((d << 8) & 0xffffff00); \ + *(GLuint *)(buf + offset) = tmp; \ +} while (0) +#else +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ + *(GLuint *)(buf + offset) = tmp; \ +} while (0) +#endif + +#ifdef COMPILE_R300 +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ + _y + yo )) & 0xffffff00) >> 8; \ + }while(0) +#else +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ + _y + yo )) & 0x00ffffff; +#endif + +#define TAG(x) radeon##x##_z24_s8 +#include "depthtmp.h" + +/* ================================================================ + * Stencil buffer + */ + +/* 24 bit depth, 8 bit stencil depthbuffer functions + */ +#ifdef COMPILE_R300 +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0xffffff00; \ + tmp |= (d) & 0xff; \ + *(GLuint *)(buf + offset) = tmp; \ +} while (0) +#else +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + tmp &= 0x00ffffff; \ + tmp |= (((d) & 0xff) << 24); \ + *(GLuint *)(buf + offset) = tmp; \ +} while (0) +#endif + +#ifdef COMPILE_R300 +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + d = tmp & 0x000000ff; \ +} while (0) +#else +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ + GLuint tmp = *(GLuint *)(buf + offset); \ + d = (tmp & 0xff000000) >> 24; \ +} while (0) +#endif + +#define TAG(x) radeon##x##_z24_s8 +#include "stenciltmp.h" + +/* Move locking out to get reasonable span performance (10x better + * than doing this in HW_LOCK above). WaitForIdle() is the main + * culprit. + */ + +static void radeonSpanRenderStart(GLcontext * ctx) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); +#ifdef COMPILE_R300 + r300ContextPtr r300 = (r300ContextPtr) rmesa; + R300_FIREVERTICES(r300); +#else + RADEON_FIREVERTICES(rmesa); +#endif + LOCK_HARDWARE(rmesa); + radeonWaitForIdleLocked(rmesa); +} + +static void radeonSpanRenderFinish(GLcontext * ctx) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + _swrast_flush(ctx); + UNLOCK_HARDWARE(rmesa); +} + +void radeonInitSpanFuncs(GLcontext * ctx) +{ + struct swrast_device_driver *swdd = + _swrast_GetDeviceDriverReference(ctx); + swdd->SpanRenderStart = radeonSpanRenderStart; + swdd->SpanRenderFinish = radeonSpanRenderFinish; +} + +/** + * Plug in the Get/Put routines for the given driRenderbuffer. + */ +void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis) +{ + if (drb->Base.InternalFormat == GL_RGBA) { + if (vis->redBits == 5 && vis->greenBits == 6 + && vis->blueBits == 5) { + radeonInitPointers_RGB565(&drb->Base); + } else { + radeonInitPointers_ARGB8888(&drb->Base); + } + } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { + radeonInitDepthPointers_z16(&drb->Base); + } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { + radeonInitDepthPointers_z24_s8(&drb->Base); + } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { + radeonInitStencilPointers_z24_s8(&drb->Base); + } +} diff --git a/radeon/radeon_span.h b/radeon/radeon_span.h new file mode 100644 index 0000000..9abe086 --- /dev/null +++ b/radeon/radeon_span.h @@ -0,0 +1,50 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + * Kevin E. Martin <martin@valinux.com> + */ + +#ifndef __RADEON_SPAN_H__ +#define __RADEON_SPAN_H__ + +#include "drirenderbuffer.h" + +extern void radeonInitSpanFuncs(GLcontext * ctx); +extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis); + +#endif diff --git a/radeon/radeon_state.c b/radeon/radeon_state.c new file mode 100644 index 0000000..4de05c7 --- /dev/null +++ b/radeon/radeon_state.c @@ -0,0 +1,2409 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.c,v 1.8 2002/12/16 16:18:58 dawes Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "api_arrayelt.h" +#include "enums.h" +#include "light.h" +#include "state.h" +#include "context.h" +#include "framebuffer.h" + +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "swrast_setup/swrast_setup.h" + +#include "radeon_context.h" +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "radeon_tcl.h" +#include "radeon_tex.h" +#include "radeon_swtcl.h" +#include "drirenderbuffer.h" + +static void radeonUpdateSpecular( GLcontext *ctx ); + +/* ============================================================= + * Alpha blending + */ + +static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC]; + GLubyte refByte; + + CLAMPED_FLOAT_TO_UBYTE(refByte, ref); + + RADEON_STATECHANGE( rmesa, ctx ); + + pp_misc &= ~(RADEON_ALPHA_TEST_OP_MASK | RADEON_REF_ALPHA_MASK); + pp_misc |= (refByte & RADEON_REF_ALPHA_MASK); + + switch ( func ) { + case GL_NEVER: + pp_misc |= RADEON_ALPHA_TEST_FAIL; + break; + case GL_LESS: + pp_misc |= RADEON_ALPHA_TEST_LESS; + break; + case GL_EQUAL: + pp_misc |= RADEON_ALPHA_TEST_EQUAL; + break; + case GL_LEQUAL: + pp_misc |= RADEON_ALPHA_TEST_LEQUAL; + break; + case GL_GREATER: + pp_misc |= RADEON_ALPHA_TEST_GREATER; + break; + case GL_NOTEQUAL: + pp_misc |= RADEON_ALPHA_TEST_NEQUAL; + break; + case GL_GEQUAL: + pp_misc |= RADEON_ALPHA_TEST_GEQUAL; + break; + case GL_ALWAYS: + pp_misc |= RADEON_ALPHA_TEST_PASS; + break; + } + + rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc; +} + +static void radeonBlendEquationSeparate( GLcontext *ctx, + GLenum modeRGB, GLenum modeA ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK; + GLboolean fallback = GL_FALSE; + + assert( modeRGB == modeA ); + + switch ( modeRGB ) { + case GL_FUNC_ADD: + case GL_LOGIC_OP: + b |= RADEON_COMB_FCN_ADD_CLAMP; + break; + + case GL_FUNC_SUBTRACT: + b |= RADEON_COMB_FCN_SUB_CLAMP; + break; + + default: + if (ctx->Color.BlendEnabled) + fallback = GL_TRUE; + else + b |= RADEON_COMB_FCN_ADD_CLAMP; + break; + } + + FALLBACK( rmesa, RADEON_FALLBACK_BLEND_EQ, fallback ); + if ( !fallback ) { + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b; + if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled + && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_ROP_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; + } + } +} + +static void radeonBlendFuncSeparate( GLcontext *ctx, + GLenum sfactorRGB, GLenum dfactorRGB, + GLenum sfactorA, GLenum dfactorA ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & + ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK); + GLboolean fallback = GL_FALSE; + + switch ( ctx->Color.BlendSrcRGB ) { + case GL_ZERO: + b |= RADEON_SRC_BLEND_GL_ZERO; + break; + case GL_ONE: + b |= RADEON_SRC_BLEND_GL_ONE; + break; + case GL_DST_COLOR: + b |= RADEON_SRC_BLEND_GL_DST_COLOR; + break; + case GL_ONE_MINUS_DST_COLOR: + b |= RADEON_SRC_BLEND_GL_ONE_MINUS_DST_COLOR; + break; + case GL_SRC_COLOR: + b |= RADEON_SRC_BLEND_GL_SRC_COLOR; + break; + case GL_ONE_MINUS_SRC_COLOR: + b |= RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR; + break; + case GL_SRC_ALPHA: + b |= RADEON_SRC_BLEND_GL_SRC_ALPHA; + break; + case GL_ONE_MINUS_SRC_ALPHA: + b |= RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA; + break; + case GL_DST_ALPHA: + b |= RADEON_SRC_BLEND_GL_DST_ALPHA; + break; + case GL_ONE_MINUS_DST_ALPHA: + b |= RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA; + break; + case GL_SRC_ALPHA_SATURATE: + b |= RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE; + break; + case GL_CONSTANT_COLOR: + case GL_ONE_MINUS_CONSTANT_COLOR: + case GL_CONSTANT_ALPHA: + case GL_ONE_MINUS_CONSTANT_ALPHA: + if (ctx->Color.BlendEnabled) + fallback = GL_TRUE; + else + b |= RADEON_SRC_BLEND_GL_ONE; + break; + default: + break; + } + + switch ( ctx->Color.BlendDstRGB ) { + case GL_ZERO: + b |= RADEON_DST_BLEND_GL_ZERO; + break; + case GL_ONE: + b |= RADEON_DST_BLEND_GL_ONE; + break; + case GL_SRC_COLOR: + b |= RADEON_DST_BLEND_GL_SRC_COLOR; + break; + case GL_ONE_MINUS_SRC_COLOR: + b |= RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR; + break; + case GL_SRC_ALPHA: + b |= RADEON_DST_BLEND_GL_SRC_ALPHA; + break; + case GL_ONE_MINUS_SRC_ALPHA: + b |= RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA; + break; + case GL_DST_COLOR: + b |= RADEON_DST_BLEND_GL_DST_COLOR; + break; + case GL_ONE_MINUS_DST_COLOR: + b |= RADEON_DST_BLEND_GL_ONE_MINUS_DST_COLOR; + break; + case GL_DST_ALPHA: + b |= RADEON_DST_BLEND_GL_DST_ALPHA; + break; + case GL_ONE_MINUS_DST_ALPHA: + b |= RADEON_DST_BLEND_GL_ONE_MINUS_DST_ALPHA; + break; + case GL_CONSTANT_COLOR: + case GL_ONE_MINUS_CONSTANT_COLOR: + case GL_CONSTANT_ALPHA: + case GL_ONE_MINUS_CONSTANT_ALPHA: + if (ctx->Color.BlendEnabled) + fallback = GL_TRUE; + else + b |= RADEON_DST_BLEND_GL_ZERO; + break; + default: + break; + } + + FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, fallback ); + if ( !fallback ) { + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b; + } +} + + +/* ============================================================= + * Depth testing + */ + +static void radeonDepthFunc( GLcontext *ctx, GLenum func ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK; + + switch ( ctx->Depth.Func ) { + case GL_NEVER: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEVER; + break; + case GL_LESS: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LESS; + break; + case GL_EQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_EQUAL; + break; + case GL_LEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LEQUAL; + break; + case GL_GREATER: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GREATER; + break; + case GL_NOTEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEQUAL; + break; + case GL_GEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GEQUAL; + break; + case GL_ALWAYS: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_ALWAYS; + break; + } +} + + +static void radeonDepthMask( GLcontext *ctx, GLboolean flag ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + RADEON_STATECHANGE( rmesa, ctx ); + + if ( ctx->Depth.Mask ) { + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_WRITE_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_WRITE_ENABLE; + } +} + +static void radeonClearDepth( GLcontext *ctx, GLclampd d ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] & + RADEON_DEPTH_FORMAT_MASK); + + switch ( format ) { + case RADEON_DEPTH_FORMAT_16BIT_INT_Z: + rmesa->state.depth.clear = d * 0x0000ffff; + break; + case RADEON_DEPTH_FORMAT_24BIT_INT_Z: + rmesa->state.depth.clear = d * 0x00ffffff; + break; + } +} + + +/* ============================================================= + * Fog + */ + + +static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + union { int i; float f; } c, d; + GLchan col[4]; + + switch (pname) { + case GL_FOG_MODE: + if (!ctx->Fog.Enabled) + return; + RADEON_STATECHANGE(rmesa, tcl); + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK; + switch (ctx->Fog.Mode) { + case GL_LINEAR: + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_LINEAR; + break; + case GL_EXP: + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP; + break; + case GL_EXP2: + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP2; + break; + default: + return; + } + /* fallthrough */ + case GL_FOG_DENSITY: + case GL_FOG_START: + case GL_FOG_END: + if (!ctx->Fog.Enabled) + return; + c.i = rmesa->hw.fog.cmd[FOG_C]; + d.i = rmesa->hw.fog.cmd[FOG_D]; + switch (ctx->Fog.Mode) { + case GL_EXP: + c.f = 0.0; + /* While this is the opposite sign from the DDK, it makes the fog test + * pass, and matches r200. + */ + d.f = -ctx->Fog.Density; + break; + case GL_EXP2: + c.f = 0.0; + d.f = -(ctx->Fog.Density * ctx->Fog.Density); + break; + case GL_LINEAR: + if (ctx->Fog.Start == ctx->Fog.End) { + c.f = 1.0F; + d.f = 1.0F; + } else { + c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start); + /* While this is the opposite sign from the DDK, it makes the fog + * test pass, and matches r200. + */ + d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start); + } + break; + default: + break; + } + if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) { + RADEON_STATECHANGE( rmesa, fog ); + rmesa->hw.fog.cmd[FOG_C] = c.i; + rmesa->hw.fog.cmd[FOG_D] = d.i; + } + break; + case GL_FOG_COLOR: + RADEON_STATECHANGE( rmesa, ctx ); + UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color ); + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~RADEON_FOG_COLOR_MASK; + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= + radeonPackColor( 4, col[0], col[1], col[2], 0 ); + break; + case GL_FOG_COORD_SRC: + radeonUpdateSpecular( ctx ); + break; + default: + return; + } +} + + +/* ============================================================= + * Scissoring + */ + + +static GLboolean intersect_rect( drm_clip_rect_t *out, + drm_clip_rect_t *a, + drm_clip_rect_t *b ) +{ + *out = *a; + if ( b->x1 > out->x1 ) out->x1 = b->x1; + if ( b->y1 > out->y1 ) out->y1 = b->y1; + if ( b->x2 < out->x2 ) out->x2 = b->x2; + if ( b->y2 < out->y2 ) out->y2 = b->y2; + if ( out->x1 >= out->x2 ) return GL_FALSE; + if ( out->y1 >= out->y2 ) return GL_FALSE; + return GL_TRUE; +} + + +void radeonRecalcScissorRects( radeonContextPtr rmesa ) +{ + drm_clip_rect_t *out; + int i; + + /* Grow cliprect store? + */ + if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { + while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { + rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */ + rmesa->state.scissor.numAllocedClipRects *= 2; + } + + if (rmesa->state.scissor.pClipRects) + FREE(rmesa->state.scissor.pClipRects); + + rmesa->state.scissor.pClipRects = + MALLOC( rmesa->state.scissor.numAllocedClipRects * + sizeof(drm_clip_rect_t) ); + + if ( rmesa->state.scissor.pClipRects == NULL ) { + rmesa->state.scissor.numAllocedClipRects = 0; + return; + } + } + + out = rmesa->state.scissor.pClipRects; + rmesa->state.scissor.numClipRects = 0; + + for ( i = 0 ; i < rmesa->numClipRects ; i++ ) { + if ( intersect_rect( out, + &rmesa->pClipRects[i], + &rmesa->state.scissor.rect ) ) { + rmesa->state.scissor.numClipRects++; + out++; + } + } +} + + +static void radeonUpdateScissor( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if ( rmesa->dri.drawable ) { + __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + + int x = ctx->Scissor.X; + int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height; + int w = ctx->Scissor.X + ctx->Scissor.Width - 1; + int h = dPriv->h - ctx->Scissor.Y - 1; + + rmesa->state.scissor.rect.x1 = x + dPriv->x; + rmesa->state.scissor.rect.y1 = y + dPriv->y; + rmesa->state.scissor.rect.x2 = w + dPriv->x + 1; + rmesa->state.scissor.rect.y2 = h + dPriv->y + 1; + + radeonRecalcScissorRects( rmesa ); + } +} + + +static void radeonScissor( GLcontext *ctx, + GLint x, GLint y, GLsizei w, GLsizei h ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if ( ctx->Scissor.Enabled ) { + RADEON_FIREVERTICES( rmesa ); /* don't pipeline cliprect changes */ + radeonUpdateScissor( ctx ); + } + +} + + +/* ============================================================= + * Culling + */ + +static void radeonCullFace( GLcontext *ctx, GLenum unused ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL]; + GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL]; + + s |= RADEON_FFACE_SOLID | RADEON_BFACE_SOLID; + t &= ~(RADEON_CULL_FRONT | RADEON_CULL_BACK); + + if ( ctx->Polygon.CullFlag ) { + switch ( ctx->Polygon.CullFaceMode ) { + case GL_FRONT: + s &= ~RADEON_FFACE_SOLID; + t |= RADEON_CULL_FRONT; + break; + case GL_BACK: + s &= ~RADEON_BFACE_SOLID; + t |= RADEON_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + s &= ~(RADEON_FFACE_SOLID | RADEON_BFACE_SOLID); + t |= (RADEON_CULL_FRONT | RADEON_CULL_BACK); + break; + } + } + + if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) { + RADEON_STATECHANGE(rmesa, set ); + rmesa->hw.set.cmd[SET_SE_CNTL] = s; + } + + if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) { + RADEON_STATECHANGE(rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t; + } +} + +static void radeonFrontFace( GLcontext *ctx, GLenum mode ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + RADEON_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK; + + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_CULL_FRONT_IS_CCW; + + switch ( mode ) { + case GL_CW: + rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CW; + break; + case GL_CCW: + rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CCW; + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_CULL_FRONT_IS_CCW; + break; + } +} + + +/* ============================================================= + * Line state + */ +static void radeonLineWidth( GLcontext *ctx, GLfloat widthf ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + RADEON_STATECHANGE( rmesa, lin ); + RADEON_STATECHANGE( rmesa, set ); + + /* Line width is stored in U6.4 format. + */ + rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (GLuint)(widthf * 16.0); + if ( widthf > 1.0 ) { + rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_WIDELINE_ENABLE; + } else { + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_WIDELINE_ENABLE; + } +} + +static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + RADEON_STATECHANGE( rmesa, lin ); + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = + ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern)); +} + + +/* ============================================================= + * Masks + */ +static void radeonColorMask( GLcontext *ctx, + GLboolean r, GLboolean g, + GLboolean b, GLboolean a ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp, + ctx->Color.ColorMask[RCOMP], + ctx->Color.ColorMask[GCOMP], + ctx->Color.ColorMask[BCOMP], + ctx->Color.ColorMask[ACOMP] ); + + if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) { + RADEON_STATECHANGE( rmesa, msk ); + rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask; + } +} + + +/* ============================================================= + * Polygon state + */ + +static void radeonPolygonOffset( GLcontext *ctx, + GLfloat factor, GLfloat units ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + float_ui32_type constant = { units * rmesa->state.depth.scale }; + float_ui32_type factoru = { factor }; + + RADEON_STATECHANGE( rmesa, zbs ); + rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR] = factoru.ui32; + rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32; +} + +static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint i; + drm_radeon_stipple_t stipple; + + /* Must flip pattern upside down. + */ + for ( i = 0 ; i < 32 ; i++ ) { + rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i]; + } + + /* TODO: push this into cmd mechanism + */ + RADEON_FIREVERTICES( rmesa ); + LOCK_HARDWARE( rmesa ); + + /* FIXME: Use window x,y offsets into stipple RAM. + */ + stipple.mask = rmesa->state.stipple.mask; + drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, + &stipple, sizeof(drm_radeon_stipple_t) ); + UNLOCK_HARDWARE( rmesa ); +} + +static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0; + + /* Can't generally do unfilled via tcl, but some good special + * cases work. + */ + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag); + if (rmesa->TclFallback) { + radeonChooseRenderState( ctx ); + radeonChooseVertexState( ctx ); + } +} + + +/* ============================================================= + * Rendering attributes + * + * We really don't want to recalculate all this every time we bind a + * texture. These things shouldn't change all that often, so it makes + * sense to break them out of the core texture state update routines. + */ + +/* Examine lighting and texture state to determine if separate specular + * should be enabled. + */ +static void radeonUpdateSpecular( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + u_int32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL]; + GLuint flag = 0; + + RADEON_STATECHANGE( rmesa, tcl ); + + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE; + + p &= ~RADEON_SPECULAR_ENABLE; + + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_DIFFUSE_SPECULAR_COMBINE; + + + if (ctx->Light.Enabled && + ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; + p |= RADEON_SPECULAR_ENABLE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= + ~RADEON_DIFFUSE_SPECULAR_COMBINE; + } + else if (ctx->Light.Enabled) { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; + } else if (ctx->Fog.ColorSumEnabled ) { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; + p |= RADEON_SPECULAR_ENABLE; + } else { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; + } + + if (ctx->Fog.Enabled) { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC; + if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH) { + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR; + /* Bizzare: have to leave lighting enabled to get fog. */ + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; + } + else { + /* cannot do tcl fog factor calculation with fog coord source + * (send precomputed factors). Cannot use precomputed fog + * factors together with tcl spec light (need tcl fallback) */ + flag = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] & + RADEON_TCL_COMPUTE_SPECULAR) != 0; + } + } + + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_FOGCOORDSPEC, flag); + + if (NEED_SECONDARY_COLOR(ctx)) { + assert( (p & RADEON_SPECULAR_ENABLE) != 0 ); + } else { + assert( (p & RADEON_SPECULAR_ENABLE) == 0 ); + } + + if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) { + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p; + } + + /* Update vertex/render formats + */ + if (rmesa->TclFallback) { + radeonChooseRenderState( ctx ); + radeonChooseVertexState( ctx ); + } +} + + +/* ============================================================= + * Materials + */ + + +/* Update on colormaterial, material emmissive/ambient, + * lightmodel.globalambient + */ +static void update_global_ambient( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + float *fcmd = (float *)RADEON_DB_STATE( glt ); + + /* Need to do more if both emmissive & ambient are PREMULT: + * Hope this is not needed for MULT + */ + if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] & + ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | + (3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) + { + COPY_3V( &fcmd[GLT_RED], + ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]); + ACC_SCALE_3V( &fcmd[GLT_RED], + ctx->Light.Model.Ambient, + ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]); + } + else + { + COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient ); + } + + RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt); +} + +/* Update on change to + * - light[p].colors + * - light[p].enabled + */ +static void update_light_colors( GLcontext *ctx, GLuint p ) +{ + struct gl_light *l = &ctx->Light.Light[p]; + +/* fprintf(stderr, "%s\n", __FUNCTION__); */ + + if (l->Enabled) { + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + float *fcmd = (float *)RADEON_DB_STATE( lit[p] ); + + COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); + COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse ); + COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular ); + + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); + } +} + +/* Also fallback for asym colormaterial mode in twoside lighting... + */ +static void check_twoside_fallback( GLcontext *ctx ) +{ + GLboolean fallback = GL_FALSE; + GLint i; + + if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { + if (ctx->Light.ColorMaterialEnabled && + (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != + ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1)) + fallback = GL_TRUE; + else { + for (i = MAT_ATTRIB_FRONT_AMBIENT; i < MAT_ATTRIB_FRONT_INDEXES; i+=2) + if (memcmp( ctx->Light.Material.Attrib[i], + ctx->Light.Material.Attrib[i+1], + sizeof(GLfloat)*4) != 0) { + fallback = GL_TRUE; + break; + } + } + } + + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_LIGHT_TWOSIDE, fallback ); +} + + +static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]; + + light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | + (3 << RADEON_AMBIENT_SOURCE_SHIFT) | + (3 << RADEON_DIFFUSE_SOURCE_SHIFT) | + (3 << RADEON_SPECULAR_SOURCE_SHIFT)); + + if (ctx->Light.ColorMaterialEnabled) { + GLuint mask = ctx->Light.ColorMaterialBitmask; + + if (mask & MAT_BIT_FRONT_EMISSION) { + light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << + RADEON_EMISSIVE_SOURCE_SHIFT); + } + else { + light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << + RADEON_EMISSIVE_SOURCE_SHIFT); + } + + if (mask & MAT_BIT_FRONT_AMBIENT) { + light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << + RADEON_AMBIENT_SOURCE_SHIFT); + } + else { + light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << + RADEON_AMBIENT_SOURCE_SHIFT); + } + + if (mask & MAT_BIT_FRONT_DIFFUSE) { + light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << + RADEON_DIFFUSE_SOURCE_SHIFT); + } + else { + light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << + RADEON_DIFFUSE_SOURCE_SHIFT); + } + + if (mask & MAT_BIT_FRONT_SPECULAR) { + light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << + RADEON_SPECULAR_SOURCE_SHIFT); + } + else { + light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << + RADEON_SPECULAR_SOURCE_SHIFT); + } + } + else { + /* Default to MULT: + */ + light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << RADEON_EMISSIVE_SOURCE_SHIFT) | + (RADEON_LM_SOURCE_STATE_MULT << RADEON_AMBIENT_SOURCE_SHIFT) | + (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT) | + (RADEON_LM_SOURCE_STATE_MULT << RADEON_SPECULAR_SOURCE_SHIFT); + } + + if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) { + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1; + } +} + +void radeonUpdateMaterial( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLfloat (*mat)[4] = ctx->Light.Material.Attrib; + GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl ); + GLuint mask = ~0; + + if (ctx->Light.ColorMaterialEnabled) + mask &= ~ctx->Light.ColorMaterialBitmask; + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s\n", __FUNCTION__); + + + if (mask & MAT_BIT_FRONT_EMISSION) { + fcmd[MTL_EMMISSIVE_RED] = mat[MAT_ATTRIB_FRONT_EMISSION][0]; + fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1]; + fcmd[MTL_EMMISSIVE_BLUE] = mat[MAT_ATTRIB_FRONT_EMISSION][2]; + fcmd[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_FRONT_EMISSION][3]; + } + if (mask & MAT_BIT_FRONT_AMBIENT) { + fcmd[MTL_AMBIENT_RED] = mat[MAT_ATTRIB_FRONT_AMBIENT][0]; + fcmd[MTL_AMBIENT_GREEN] = mat[MAT_ATTRIB_FRONT_AMBIENT][1]; + fcmd[MTL_AMBIENT_BLUE] = mat[MAT_ATTRIB_FRONT_AMBIENT][2]; + fcmd[MTL_AMBIENT_ALPHA] = mat[MAT_ATTRIB_FRONT_AMBIENT][3]; + } + if (mask & MAT_BIT_FRONT_DIFFUSE) { + fcmd[MTL_DIFFUSE_RED] = mat[MAT_ATTRIB_FRONT_DIFFUSE][0]; + fcmd[MTL_DIFFUSE_GREEN] = mat[MAT_ATTRIB_FRONT_DIFFUSE][1]; + fcmd[MTL_DIFFUSE_BLUE] = mat[MAT_ATTRIB_FRONT_DIFFUSE][2]; + fcmd[MTL_DIFFUSE_ALPHA] = mat[MAT_ATTRIB_FRONT_DIFFUSE][3]; + } + if (mask & MAT_BIT_FRONT_SPECULAR) { + fcmd[MTL_SPECULAR_RED] = mat[MAT_ATTRIB_FRONT_SPECULAR][0]; + fcmd[MTL_SPECULAR_GREEN] = mat[MAT_ATTRIB_FRONT_SPECULAR][1]; + fcmd[MTL_SPECULAR_BLUE] = mat[MAT_ATTRIB_FRONT_SPECULAR][2]; + fcmd[MTL_SPECULAR_ALPHA] = mat[MAT_ATTRIB_FRONT_SPECULAR][3]; + } + if (mask & MAT_BIT_FRONT_SHININESS) { + fcmd[MTL_SHININESS] = mat[MAT_ATTRIB_FRONT_SHININESS][0]; + } + + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mtl ); + + check_twoside_fallback( ctx ); +/* update_global_ambient( ctx );*/ +} + +/* _NEW_LIGHT + * _NEW_MODELVIEW + * _MESA_NEW_NEED_EYE_COORDS + * + * Uses derived state from mesa: + * _VP_inf_norm + * _h_inf_norm + * _Position + * _NormDirection + * _ModelViewInvScale + * _NeedEyeCoords + * _EyeZDir + * + * which are calculated in light.c and are correct for the current + * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW + * and _MESA_NEW_NEED_EYE_COORDS. + */ +static void update_light( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + /* Have to check these, or have an automatic shortcircuit mechanism + * to remove noop statechanges. (Or just do a better job on the + * front end). + */ + { + GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]; + + if (ctx->_NeedEyeCoords) + tmp &= ~RADEON_LIGHT_IN_MODELSPACE; + else + tmp |= RADEON_LIGHT_IN_MODELSPACE; + + + /* Leave this test disabled: (unexplained q3 lockup) (even with + new packets) + */ + if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) + { + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp; + } + } + + { + GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( eye ); + fcmd[EYE_X] = ctx->_EyeZDir[0]; + fcmd[EYE_Y] = ctx->_EyeZDir[1]; + fcmd[EYE_Z] = - ctx->_EyeZDir[2]; + fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale; + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.eye ); + } + + + + if (ctx->Light.Enabled) { + GLint p; + for (p = 0 ; p < MAX_LIGHTS; p++) { + if (ctx->Light.Light[p].Enabled) { + struct gl_light *l = &ctx->Light.Light[p]; + GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] ); + + if (l->EyePosition[3] == 0.0) { + COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); + COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); + fcmd[LIT_POSITION_W] = 0; + fcmd[LIT_DIRECTION_W] = 0; + } else { + COPY_4V( &fcmd[LIT_POSITION_X], l->_Position ); + fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0]; + fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1]; + fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2]; + fcmd[LIT_DIRECTION_W] = 0; + } + + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); + } + } + } +} + +static void radeonLightfv( GLcontext *ctx, GLenum light, + GLenum pname, const GLfloat *params ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLint p = light - GL_LIGHT0; + struct gl_light *l = &ctx->Light.Light[p]; + GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd; + + + switch (pname) { + case GL_AMBIENT: + case GL_DIFFUSE: + case GL_SPECULAR: + update_light_colors( ctx, p ); + break; + + case GL_SPOT_DIRECTION: + /* picked up in update_light */ + break; + + case GL_POSITION: { + /* positions picked up in update_light, but can do flag here */ + GLuint flag; + GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; + + /* FIXME: Set RANGE_ATTEN only when needed */ + if (p&1) + flag = RADEON_LIGHT_1_IS_LOCAL; + else + flag = RADEON_LIGHT_0_IS_LOCAL; + + RADEON_STATECHANGE(rmesa, tcl); + if (l->EyePosition[3] != 0.0F) + rmesa->hw.tcl.cmd[idx] |= flag; + else + rmesa->hw.tcl.cmd[idx] &= ~flag; + break; + } + + case GL_SPOT_EXPONENT: + RADEON_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_SPOT_EXPONENT] = params[0]; + break; + + case GL_SPOT_CUTOFF: { + GLuint flag = (p&1) ? RADEON_LIGHT_1_IS_SPOT : RADEON_LIGHT_0_IS_SPOT; + GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; + + RADEON_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff; + + RADEON_STATECHANGE(rmesa, tcl); + if (l->SpotCutoff != 180.0F) + rmesa->hw.tcl.cmd[idx] |= flag; + else + rmesa->hw.tcl.cmd[idx] &= ~flag; + + break; + } + + case GL_CONSTANT_ATTENUATION: + RADEON_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_ATTEN_CONST] = params[0]; + if ( params[0] == 0.0 ) + fcmd[LIT_ATTEN_CONST_INV] = FLT_MAX; + else + fcmd[LIT_ATTEN_CONST_INV] = 1.0 / params[0]; + break; + case GL_LINEAR_ATTENUATION: + RADEON_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_ATTEN_LINEAR] = params[0]; + break; + case GL_QUADRATIC_ATTENUATION: + RADEON_STATECHANGE(rmesa, lit[p]); + fcmd[LIT_ATTEN_QUADRATIC] = params[0]; + break; + default: + return; + } + + /* Set RANGE_ATTEN only when needed */ + switch (pname) { + case GL_POSITION: + case GL_CONSTANT_ATTENUATION: + case GL_LINEAR_ATTENUATION: + case GL_QUADRATIC_ATTENUATION: + { + GLuint *icmd = (GLuint *)RADEON_DB_STATE( tcl ); + GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; + GLuint atten_flag = ( p&1 ) ? RADEON_LIGHT_1_ENABLE_RANGE_ATTEN + : RADEON_LIGHT_0_ENABLE_RANGE_ATTEN; + GLuint atten_const_flag = ( p&1 ) ? RADEON_LIGHT_1_CONSTANT_RANGE_ATTEN + : RADEON_LIGHT_0_CONSTANT_RANGE_ATTEN; + + if ( l->EyePosition[3] == 0.0F || + ( ( fcmd[LIT_ATTEN_CONST] == 0.0 || fcmd[LIT_ATTEN_CONST] == 1.0 ) && + fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) ) { + /* Disable attenuation */ + icmd[idx] &= ~atten_flag; + } else { + if ( fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) { + /* Enable only constant portion of attenuation calculation */ + icmd[idx] |= ( atten_flag | atten_const_flag ); + } else { + /* Enable full attenuation calculation */ + icmd[idx] &= ~atten_const_flag; + icmd[idx] |= atten_flag; + } + } + + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tcl ); + break; + } + default: + break; + } +} + + + + +static void radeonLightModelfv( GLcontext *ctx, GLenum pname, + const GLfloat *param ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + switch (pname) { + case GL_LIGHT_MODEL_AMBIENT: + update_global_ambient( ctx ); + break; + + case GL_LIGHT_MODEL_LOCAL_VIEWER: + RADEON_STATECHANGE( rmesa, tcl ); + if (ctx->Light.Model.LocalViewer) + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LOCAL_VIEWER; + else + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LOCAL_VIEWER; + break; + + case GL_LIGHT_MODEL_TWO_SIDE: + RADEON_STATECHANGE( rmesa, tcl ); + if (ctx->Light.Model.TwoSide) + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_LIGHT_TWOSIDE; + else + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_LIGHT_TWOSIDE; + + check_twoside_fallback( ctx ); + + if (rmesa->TclFallback) { + radeonChooseRenderState( ctx ); + radeonChooseVertexState( ctx ); + } + break; + + case GL_LIGHT_MODEL_COLOR_CONTROL: + radeonUpdateSpecular(ctx); + break; + + default: + break; + } +} + +static void radeonShadeModel( GLcontext *ctx, GLenum mode ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL]; + + s &= ~(RADEON_DIFFUSE_SHADE_MASK | + RADEON_ALPHA_SHADE_MASK | + RADEON_SPECULAR_SHADE_MASK | + RADEON_FOG_SHADE_MASK); + + switch ( mode ) { + case GL_FLAT: + s |= (RADEON_DIFFUSE_SHADE_FLAT | + RADEON_ALPHA_SHADE_FLAT | + RADEON_SPECULAR_SHADE_FLAT | + RADEON_FOG_SHADE_FLAT); + break; + case GL_SMOOTH: + s |= (RADEON_DIFFUSE_SHADE_GOURAUD | + RADEON_ALPHA_SHADE_GOURAUD | + RADEON_SPECULAR_SHADE_GOURAUD | + RADEON_FOG_SHADE_GOURAUD); + break; + default: + return; + } + + if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) { + RADEON_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_CNTL] = s; + } +} + + +/* ============================================================= + * User clip planes + */ + +static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) +{ + GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0; + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + RADEON_STATECHANGE( rmesa, ucp[p] ); + rmesa->hw.ucp[p].cmd[UCP_X] = ip[0]; + rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1]; + rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2]; + rmesa->hw.ucp[p].cmd[UCP_W] = ip[3]; +} + +static void radeonUpdateClipPlanes( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint p; + + for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { + GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + RADEON_STATECHANGE( rmesa, ucp[p] ); + rmesa->hw.ucp[p].cmd[UCP_X] = ip[0]; + rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1]; + rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2]; + rmesa->hw.ucp[p].cmd[UCP_W] = ip[3]; + } + } +} + + +/* ============================================================= + * Stencil + */ + +static void +radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func, + GLint ref, GLuint mask ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) | + ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT)); + + RADEON_STATECHANGE( rmesa, ctx ); + RADEON_STATECHANGE( rmesa, msk ); + + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_STENCIL_TEST_MASK; + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(RADEON_STENCIL_REF_MASK| + RADEON_STENCIL_VALUE_MASK); + + switch ( ctx->Stencil.Function[0] ) { + case GL_NEVER: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEVER; + break; + case GL_LESS: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LESS; + break; + case GL_EQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_EQUAL; + break; + case GL_LEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LEQUAL; + break; + case GL_GREATER: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GREATER; + break; + case GL_NOTEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEQUAL; + break; + case GL_GEQUAL: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GEQUAL; + break; + case GL_ALWAYS: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_ALWAYS; + break; + } + + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask; +} + +static void +radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + RADEON_STATECHANGE( rmesa, msk ); + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK; + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= + ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT); +} + +static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail, + GLenum zfail, GLenum zpass ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP, + and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC, + but DEC_WRAP can be fixed by using DEC and INC_WRAP at least use INC. */ + + GLuint tempRADEON_STENCIL_FAIL_DEC_WRAP; + GLuint tempRADEON_STENCIL_FAIL_INC_WRAP; + GLuint tempRADEON_STENCIL_ZFAIL_DEC_WRAP; + GLuint tempRADEON_STENCIL_ZFAIL_INC_WRAP; + GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP; + GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP; + + if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) { + tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC; + tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC; + tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC; + tempRADEON_STENCIL_ZFAIL_INC_WRAP = RADEON_STENCIL_ZFAIL_INC; + tempRADEON_STENCIL_ZPASS_DEC_WRAP = RADEON_STENCIL_ZPASS_DEC; + tempRADEON_STENCIL_ZPASS_INC_WRAP = RADEON_STENCIL_ZPASS_INC; + } + else { + tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC_WRAP; + tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC_WRAP; + tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC_WRAP; + tempRADEON_STENCIL_ZFAIL_INC_WRAP = RADEON_STENCIL_ZFAIL_INC_WRAP; + tempRADEON_STENCIL_ZPASS_DEC_WRAP = RADEON_STENCIL_ZPASS_DEC_WRAP; + tempRADEON_STENCIL_ZPASS_INC_WRAP = RADEON_STENCIL_ZPASS_INC_WRAP; + } + + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(RADEON_STENCIL_FAIL_MASK | + RADEON_STENCIL_ZFAIL_MASK | + RADEON_STENCIL_ZPASS_MASK); + + switch ( ctx->Stencil.FailFunc[0] ) { + case GL_KEEP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_KEEP; + break; + case GL_ZERO: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_ZERO; + break; + case GL_REPLACE: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_REPLACE; + break; + case GL_INCR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INC; + break; + case GL_DECR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_DEC; + break; + case GL_INCR_WRAP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_FAIL_INC_WRAP; + break; + case GL_DECR_WRAP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_FAIL_DEC_WRAP; + break; + case GL_INVERT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INVERT; + break; + } + + switch ( ctx->Stencil.ZFailFunc[0] ) { + case GL_KEEP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_KEEP; + break; + case GL_ZERO: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_ZERO; + break; + case GL_REPLACE: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_REPLACE; + break; + case GL_INCR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INC; + break; + case GL_DECR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_DEC; + break; + case GL_INCR_WRAP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZFAIL_INC_WRAP; + break; + case GL_DECR_WRAP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZFAIL_DEC_WRAP; + break; + case GL_INVERT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INVERT; + break; + } + + switch ( ctx->Stencil.ZPassFunc[0] ) { + case GL_KEEP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_KEEP; + break; + case GL_ZERO: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_ZERO; + break; + case GL_REPLACE: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_REPLACE; + break; + case GL_INCR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INC; + break; + case GL_DECR: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_DEC; + break; + case GL_INCR_WRAP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZPASS_INC_WRAP; + break; + case GL_DECR_WRAP: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZPASS_DEC_WRAP; + break; + case GL_INVERT: + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INVERT; + break; + } +} + +static void radeonClearStencil( GLcontext *ctx, GLint s ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + rmesa->state.stencil.clear = + ((GLuint) (ctx->Stencil.Clear & 0xff) | + (0xff << RADEON_STENCIL_MASK_SHIFT) | + ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT)); +} + + +/* ============================================================= + * Window position and viewport transformation + */ + +/* + * To correctly position primitives: + */ +#define SUBPIXEL_X 0.125 +#define SUBPIXEL_Y 0.125 + + +/** + * Called when window size or position changes or viewport or depth range + * state is changed. We update the hardware viewport state here. + */ +void radeonUpdateWindow( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + GLfloat xoffset = (GLfloat)dPriv->x; + GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; + const GLfloat *v = ctx->Viewport._WindowMap.m; + + float_ui32_type sx = { v[MAT_SX] }; + float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X }; + float_ui32_type sy = { - v[MAT_SY] }; + float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y }; + float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale }; + float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale }; + + RADEON_FIREVERTICES( rmesa ); + RADEON_STATECHANGE( rmesa, vpt ); + + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = sx.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE] = sy.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE] = sz.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32; +} + + +static void radeonViewport( GLcontext *ctx, GLint x, GLint y, + GLsizei width, GLsizei height ) +{ + /* Don't pipeline viewport changes, conflict with window offset + * setting below. Could apply deltas to rescue pipelined viewport + * values, or keep the originals hanging around. + */ + radeonUpdateWindow( ctx ); +} + +static void radeonDepthRange( GLcontext *ctx, GLclampd nearval, + GLclampd farval ) +{ + radeonUpdateWindow( ctx ); +} + +void radeonUpdateViewportOffset( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + GLfloat xoffset = (GLfloat)dPriv->x; + GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; + const GLfloat *v = ctx->Viewport._WindowMap.m; + + float_ui32_type tx; + float_ui32_type ty; + + tx.f = v[MAT_TX] + xoffset + SUBPIXEL_X; + ty.f = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y; + + if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 || + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 ) + { + /* Note: this should also modify whatever data the context reset + * code uses... + */ + RADEON_STATECHANGE( rmesa, vpt ); + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32; + + /* update polygon stipple x/y screen offset */ + { + GLuint stx, sty; + GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC]; + + m &= ~(RADEON_STIPPLE_X_OFFSET_MASK | + RADEON_STIPPLE_Y_OFFSET_MASK); + + /* add magic offsets, then invert */ + stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK); + sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1) + & RADEON_STIPPLE_COORD_MASK); + + m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) | + (sty << RADEON_STIPPLE_Y_OFFSET_SHIFT)); + + if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) { + RADEON_STATECHANGE( rmesa, msc ); + rmesa->hw.msc.cmd[MSC_RE_MISC] = m; + } + } + } + + radeonUpdateScissor( ctx ); +} + + + +/* ============================================================= + * Miscellaneous + */ + +static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); + rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp, + c[0], c[1], c[2], c[3] ); +} + + +static void radeonRenderMode( GLcontext *ctx, GLenum mode ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) ); +} + + +static GLuint radeon_rop_tab[] = { + RADEON_ROP_CLEAR, + RADEON_ROP_AND, + RADEON_ROP_AND_REVERSE, + RADEON_ROP_COPY, + RADEON_ROP_AND_INVERTED, + RADEON_ROP_NOOP, + RADEON_ROP_XOR, + RADEON_ROP_OR, + RADEON_ROP_NOR, + RADEON_ROP_EQUIV, + RADEON_ROP_INVERT, + RADEON_ROP_OR_REVERSE, + RADEON_ROP_COPY_INVERTED, + RADEON_ROP_OR_INVERTED, + RADEON_ROP_NAND, + RADEON_ROP_SET, +}; + +static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint rop = (GLuint)opcode - GL_CLEAR; + + ASSERT( rop < 16 ); + + RADEON_STATECHANGE( rmesa, msk ); + rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = radeon_rop_tab[rop]; +} + + +/** + * Set up the cliprects for either front or back-buffer drawing. + */ +void radeonSetCliprects( radeonContextPtr rmesa ) +{ + __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; + __DRIdrawablePrivate *const readable = rmesa->dri.readable; + GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate; + GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate; + + if (draw_fb->_ColorDrawBufferMask[0] + == BUFFER_BIT_BACK_LEFT) { + /* Can't ignore 2d windows if we are page flipping. + */ + if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) { + rmesa->numClipRects = drawable->numClipRects; + rmesa->pClipRects = drawable->pClipRects; + } + else { + rmesa->numClipRects = drawable->numBackClipRects; + rmesa->pClipRects = drawable->pBackClipRects; + } + } + else { + /* front buffer (or none, or multiple buffers */ + rmesa->numClipRects = drawable->numClipRects; + rmesa->pClipRects = drawable->pClipRects; + } + + if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) { + _mesa_resize_framebuffer(rmesa->glCtx, draw_fb, + drawable->w, drawable->h); + draw_fb->Initialized = GL_TRUE; + } + + if (drawable != readable) { + if ((read_fb->Width != readable->w) || (read_fb->Height != readable->h)) { + _mesa_resize_framebuffer(rmesa->glCtx, read_fb, + readable->w, readable->h); + read_fb->Initialized = GL_TRUE; + } + } + + if (rmesa->state.scissor.enabled) + radeonRecalcScissorRects( rmesa ); + + rmesa->lastStamp = drawable->lastStamp; +} + + +/** + * Called via glDrawBuffer. + */ +static void radeonDrawBuffer( GLcontext *ctx, GLenum mode ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s %s\n", __FUNCTION__, + _mesa_lookup_enum_by_nr( mode )); + + RADEON_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */ + + /* + * _ColorDrawBufferMask is easier to cope with than <mode>. + * Check for software fallback, update cliprects. + */ + switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) { + case BUFFER_BIT_FRONT_LEFT: + case BUFFER_BIT_BACK_LEFT: + FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE ); + break; + default: + /* 0 (GL_NONE) buffers or multiple color drawing buffers */ + FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE ); + return; + } + + radeonSetCliprects( rmesa ); + + /* We'll set the drawing engine's offset/pitch parameters later + * when we update other state. + */ +} + +static void radeonReadBuffer( GLcontext *ctx, GLenum mode ) +{ + /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ +} + + +/* ============================================================= + * State enable/disable + */ + +static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint p, flag; + + if ( RADEON_DEBUG & DEBUG_STATE ) + fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr( cap ), + state ? "GL_TRUE" : "GL_FALSE" ); + + switch ( cap ) { + /* Fast track this one... + */ + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + break; + + case GL_ALPHA_TEST: + RADEON_STATECHANGE( rmesa, ctx ); + if (state) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_ALPHA_TEST_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ALPHA_TEST_ENABLE; + } + break; + + case GL_BLEND: + RADEON_STATECHANGE( rmesa, ctx ); + if (state) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_ALPHA_BLEND_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ALPHA_BLEND_ENABLE; + } + if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled + && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_ROP_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; + } + + /* Catch a possible fallback: + */ + if (state) { + ctx->Driver.BlendEquationSeparate( ctx, + ctx->Color.BlendEquationRGB, + ctx->Color.BlendEquationA ); + ctx->Driver.BlendFuncSeparate( ctx, ctx->Color.BlendSrcRGB, + ctx->Color.BlendDstRGB, + ctx->Color.BlendSrcA, + ctx->Color.BlendDstA ); + } + else { + FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, GL_FALSE ); + FALLBACK( rmesa, RADEON_FALLBACK_BLEND_EQ, GL_FALSE ); + } + break; + + case GL_CLIP_PLANE0: + case GL_CLIP_PLANE1: + case GL_CLIP_PLANE2: + case GL_CLIP_PLANE3: + case GL_CLIP_PLANE4: + case GL_CLIP_PLANE5: + p = cap-GL_CLIP_PLANE0; + RADEON_STATECHANGE( rmesa, tcl ); + if (state) { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (RADEON_UCP_ENABLE_0<<p); + radeonClipPlane( ctx, cap, NULL ); + } + else { + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(RADEON_UCP_ENABLE_0<<p); + } + break; + + case GL_COLOR_MATERIAL: + radeonColorMaterial( ctx, 0, 0 ); + radeonUpdateMaterial( ctx ); + break; + + case GL_CULL_FACE: + radeonCullFace( ctx, 0 ); + break; + + case GL_DEPTH_TEST: + RADEON_STATECHANGE(rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_Z_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_Z_ENABLE; + } + break; + + case GL_DITHER: + RADEON_STATECHANGE(rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE; + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE; + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; + } + break; + + case GL_FOG: + RADEON_STATECHANGE(rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_FOG_ENABLE; + radeonFogfv( ctx, GL_FOG_MODE, NULL ); + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_FOG_ENABLE; + RADEON_STATECHANGE(rmesa, tcl); + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK; + } + radeonUpdateSpecular( ctx ); /* for PK_SPEC */ + _mesa_allow_light_in_model( ctx, !state ); + break; + + case GL_LIGHT0: + case GL_LIGHT1: + case GL_LIGHT2: + case GL_LIGHT3: + case GL_LIGHT4: + case GL_LIGHT5: + case GL_LIGHT6: + case GL_LIGHT7: + RADEON_STATECHANGE(rmesa, tcl); + p = cap - GL_LIGHT0; + if (p&1) + flag = (RADEON_LIGHT_1_ENABLE | + RADEON_LIGHT_1_ENABLE_AMBIENT | + RADEON_LIGHT_1_ENABLE_SPECULAR); + else + flag = (RADEON_LIGHT_0_ENABLE | + RADEON_LIGHT_0_ENABLE_AMBIENT | + RADEON_LIGHT_0_ENABLE_SPECULAR); + + if (state) + rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag; + else + rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag; + + /* + */ + update_light_colors( ctx, p ); + break; + + case GL_LIGHTING: + RADEON_STATECHANGE(rmesa, tcl); + radeonUpdateSpecular(ctx); + check_twoside_fallback( ctx ); + break; + + case GL_LINE_SMOOTH: + RADEON_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_ANTI_ALIAS_LINE; + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_LINE; + } + break; + + case GL_LINE_STIPPLE: + RADEON_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_PATTERN_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_PATTERN_ENABLE; + } + break; + + case GL_COLOR_LOGIC_OP: + RADEON_STATECHANGE( rmesa, ctx ); + if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled + && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_ROP_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; + } + break; + + case GL_NORMALIZE: + RADEON_STATECHANGE( rmesa, tcl ); + if ( state ) { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_NORMALIZE_NORMALS; + } else { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_NORMALIZE_NORMALS; + } + break; + + case GL_POLYGON_OFFSET_POINT: + RADEON_STATECHANGE( rmesa, set ); + if ( state ) { + rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_ZBIAS_ENABLE_POINT; + } else { + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_POINT; + } + break; + + case GL_POLYGON_OFFSET_LINE: + RADEON_STATECHANGE( rmesa, set ); + if ( state ) { + rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_ZBIAS_ENABLE_LINE; + } else { + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_LINE; + } + break; + + case GL_POLYGON_OFFSET_FILL: + RADEON_STATECHANGE( rmesa, set ); + if ( state ) { + rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_ZBIAS_ENABLE_TRI; + } else { + rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_TRI; + } + break; + + case GL_POLYGON_SMOOTH: + RADEON_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_ANTI_ALIAS_POLY; + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_POLY; + } + break; + + case GL_POLYGON_STIPPLE: + RADEON_STATECHANGE(rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_STIPPLE_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_STIPPLE_ENABLE; + } + break; + + case GL_RESCALE_NORMAL_EXT: { + GLboolean tmp = ctx->_NeedEyeCoords ? state : !state; + RADEON_STATECHANGE( rmesa, tcl ); + if ( tmp ) { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_RESCALE_NORMALS; + } else { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS; + } + break; + } + + case GL_SCISSOR_TEST: + RADEON_FIREVERTICES( rmesa ); + rmesa->state.scissor.enabled = state; + radeonUpdateScissor( ctx ); + break; + + case GL_STENCIL_TEST: + if ( rmesa->state.stencil.hwBuffer ) { + RADEON_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_STENCIL_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE; + } + } else { + FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state ); + } + break; + + case GL_TEXTURE_GEN_Q: + case GL_TEXTURE_GEN_R: + case GL_TEXTURE_GEN_S: + case GL_TEXTURE_GEN_T: + /* Picked up in radeonUpdateTextureState. + */ + rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; + break; + + case GL_COLOR_SUM_EXT: + radeonUpdateSpecular ( ctx ); + break; + + default: + return; + } +} + + +static void radeonLightingSpaceChange( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLboolean tmp; + RADEON_STATECHANGE( rmesa, tcl ); + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords, + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]); + + if (ctx->_NeedEyeCoords) + tmp = ctx->Transform.RescaleNormals; + else + tmp = !ctx->Transform.RescaleNormals; + + if ( tmp ) { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_RESCALE_NORMALS; + } else { + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS; + } + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords, + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]); +} + +/* ============================================================= + * Deferred state management - matrices, textures, other? + */ + + +void radeonUploadTexMatrix( radeonContextPtr rmesa, + int unit, GLboolean swapcols ) +{ +/* Here's how this works: on r100, only 3 tex coords can be submitted, so the + vector looks like this probably: (s t r|q 0) (not sure if the last coord + is hardwired to 0, could be 1 too). Interestingly, it actually looks like + texgen generates all 4 coords, at least tests with projtex indicated that. + So: if we need the q coord in the end (solely determined by the texture + target, i.e. 2d / 1d / texrect targets) we swap the third and 4th row. + Additionally, if we don't have texgen but 4 tex coords submitted, we swap + column 3 and 4 (for the 2d / 1d / texrect targets) since the the q coord + will get submitted in the "wrong", i.e. 3rd, slot. + If an app submits 3 coords for 2d targets, we assume it is saving on vertex + size and using the texture matrix to swap the r and q coords around (ut2k3 + does exactly that), so we don't need the 3rd / 4th column swap - still need + the 3rd / 4th row swap of course. This will potentially break for apps which + use TexCoord3x just for fun. Additionally, it will never work if an app uses + an "advanced" texture matrix and relies on all 4 texcoord inputs to generate + the maximum needed 3. This seems impossible to do with hw tcl on r100, and + incredibly hard to detect so we can't just fallback in such a case. Assume + it never happens... - rs +*/ + + int idx = TEXMAT_0 + unit; + float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0; + int i; + struct gl_texture_unit tUnit = rmesa->glCtx->Texture.Unit[unit]; + GLfloat *src = rmesa->tmpmat[unit].m; + + rmesa->TexMatColSwap &= ~(1 << unit); + if ((tUnit._ReallyEnabled & (TEXTURE_3D_BIT | TEXTURE_CUBE_BIT)) == 0) { + if (swapcols) { + rmesa->TexMatColSwap |= 1 << unit; + /* attention some elems are swapped 2 times! */ + *dest++ = src[0]; + *dest++ = src[4]; + *dest++ = src[12]; + *dest++ = src[8]; + *dest++ = src[1]; + *dest++ = src[5]; + *dest++ = src[13]; + *dest++ = src[9]; + *dest++ = src[2]; + *dest++ = src[6]; + *dest++ = src[15]; + *dest++ = src[11]; + /* those last 4 are probably never used */ + *dest++ = src[3]; + *dest++ = src[7]; + *dest++ = src[14]; + *dest++ = src[10]; + } + else { + for (i = 0; i < 2; i++) { + *dest++ = src[i]; + *dest++ = src[i+4]; + *dest++ = src[i+8]; + *dest++ = src[i+12]; + } + for (i = 3; i >= 2; i--) { + *dest++ = src[i]; + *dest++ = src[i+4]; + *dest++ = src[i+8]; + *dest++ = src[i+12]; + } + } + } + else { + for (i = 0 ; i < 4 ; i++) { + *dest++ = src[i]; + *dest++ = src[i+4]; + *dest++ = src[i+8]; + *dest++ = src[i+12]; + } + } + + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] ); +} + + +static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx ) +{ + float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0; + int i; + + + for (i = 0 ; i < 4 ; i++) { + *dest++ = src[i]; + *dest++ = src[i+4]; + *dest++ = src[i+8]; + *dest++ = src[i+12]; + } + + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] ); +} + +static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx ) +{ + float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0; + memcpy(dest, src, 16*sizeof(float)); + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] ); +} + + +static void update_texturematrix( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL]; + GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]; + int unit; + GLuint texMatEnabled = 0; + rmesa->NeedTexMatrix = 0; + rmesa->TexMatColSwap = 0; + + for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) { + if (ctx->Texture.Unit[unit]._ReallyEnabled) { + GLboolean needMatrix = GL_FALSE; + if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) { + needMatrix = GL_TRUE; + texMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE | + RADEON_TEXMAT_0_ENABLE) << unit; + + if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) { + /* Need to preconcatenate any active texgen + * obj/eyeplane matrices: + */ + _math_matrix_mul_matrix( &rmesa->tmpmat[unit], + ctx->TextureMatrixStack[unit].Top, + &rmesa->TexGenMatrix[unit] ); + } + else { + _math_matrix_copy( &rmesa->tmpmat[unit], + ctx->TextureMatrixStack[unit].Top ); + } + } + else if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) { + _math_matrix_copy( &rmesa->tmpmat[unit], &rmesa->TexGenMatrix[unit] ); + needMatrix = GL_TRUE; + } + if (needMatrix) { + rmesa->NeedTexMatrix |= 1 << unit; + radeonUploadTexMatrix( rmesa, unit, + !ctx->Texture.Unit[unit].TexGenEnabled ); + } + } + } + + tpc = (texMatEnabled | rmesa->TexGenEnabled); + + /* TCL_TEX_COMPUTED_x is TCL_TEX_INPUT_x | 0x8 */ + vs &= ~((RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT) | + (RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_1_OUTPUT_SHIFT) | + (RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_2_OUTPUT_SHIFT)); + + vs |= (((tpc & RADEON_TEXGEN_TEXMAT_0_ENABLE) << + (RADEON_TCL_TEX_0_OUTPUT_SHIFT + 3)) | + ((tpc & RADEON_TEXGEN_TEXMAT_1_ENABLE) << + (RADEON_TCL_TEX_1_OUTPUT_SHIFT + 2)) | + ((tpc & RADEON_TEXGEN_TEXMAT_2_ENABLE) << + (RADEON_TCL_TEX_2_OUTPUT_SHIFT + 1))); + + if (tpc != rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] || + vs != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]) { + + RADEON_STATECHANGE(rmesa, tcl); + rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = tpc; + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] = vs; + } +} + + +/** + * Tell the card where to render (offset, pitch). + * Effected by glDrawBuffer, etc + */ +void +radeonUpdateDrawBuffer(GLcontext *ctx) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + driRenderbuffer *drb; + + if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) { + /* draw to front */ + drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + } + else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) { + /* draw to back */ + drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + } + else { + /* drawing to multiple buffers, or none */ + return; + } + + assert(drb); + assert(drb->flippedPitch); + + RADEON_STATECHANGE( rmesa, ctx ); + + /* Note: we used the (possibly) page-flipped values */ + rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] + = ((drb->flippedOffset + rmesa->radeonScreen->fbLocation) + & RADEON_COLOROFFSET_MASK); + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch; + if (rmesa->sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE; + } +} + + +void radeonValidateState( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint new_state = rmesa->NewGLState; + + if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + radeonUpdateDrawBuffer(ctx); + } + + if (new_state & _NEW_TEXTURE) { + radeonUpdateTextureState( ctx ); + new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */ + } + + /* Need an event driven matrix update? + */ + if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) + upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, MODEL_PROJ ); + + /* Need these for lighting (shouldn't upload otherwise) + */ + if (new_state & (_NEW_MODELVIEW)) { + upload_matrix( rmesa, ctx->ModelviewMatrixStack.Top->m, MODEL ); + upload_matrix_t( rmesa, ctx->ModelviewMatrixStack.Top->inv, MODEL_IT ); + } + + /* Does this need to be triggered on eg. modelview for + * texgen-derived objplane/eyeplane matrices? + */ + if (new_state & _NEW_TEXTURE_MATRIX) { + update_texturematrix( ctx ); + } + + if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) { + update_light( ctx ); + } + + /* emit all active clip planes if projection matrix changes. + */ + if (new_state & (_NEW_PROJECTION)) { + if (ctx->Transform.ClipPlanesEnabled) + radeonUpdateClipPlanes( ctx ); + } + + + rmesa->NewGLState = 0; +} + + +static void radeonInvalidateState( GLcontext *ctx, GLuint new_state ) +{ + _swrast_InvalidateState( ctx, new_state ); + _swsetup_InvalidateState( ctx, new_state ); + _vbo_InvalidateState( ctx, new_state ); + _tnl_InvalidateState( ctx, new_state ); + _ae_invalidate_state( ctx, new_state ); + RADEON_CONTEXT(ctx)->NewGLState |= new_state; +} + + +/* A hack. Need a faster way to find this out. + */ +static GLboolean check_material( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLint i; + + for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT; + i < _TNL_ATTRIB_MAT_BACK_INDEXES; + i++) + if (tnl->vb.AttribPtr[i] && + tnl->vb.AttribPtr[i]->stride) + return GL_TRUE; + + return GL_FALSE; +} + + +static void radeonWrapRunPipeline( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLboolean has_material; + + if (0) + fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState); + + /* Validate state: + */ + if (rmesa->NewGLState) + radeonValidateState( ctx ); + + has_material = (ctx->Light.Enabled && check_material( ctx )); + + if (has_material) { + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_TRUE ); + } + + /* Run the pipeline. + */ + _tnl_run_pipeline( ctx ); + + if (has_material) { + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_FALSE ); + } +} + + +/* Initialize the driver's state functions. + * Many of the ctx->Driver functions might have been initialized to + * software defaults in the earlier _mesa_init_driver_functions() call. + */ +void radeonInitStateFuncs( GLcontext *ctx ) +{ + ctx->Driver.UpdateState = radeonInvalidateState; + ctx->Driver.LightingSpaceChange = radeonLightingSpaceChange; + + ctx->Driver.DrawBuffer = radeonDrawBuffer; + ctx->Driver.ReadBuffer = radeonReadBuffer; + + ctx->Driver.AlphaFunc = radeonAlphaFunc; + ctx->Driver.BlendEquationSeparate = radeonBlendEquationSeparate; + ctx->Driver.BlendFuncSeparate = radeonBlendFuncSeparate; + ctx->Driver.ClearColor = radeonClearColor; + ctx->Driver.ClearDepth = radeonClearDepth; + ctx->Driver.ClearIndex = NULL; + ctx->Driver.ClearStencil = radeonClearStencil; + ctx->Driver.ClipPlane = radeonClipPlane; + ctx->Driver.ColorMask = radeonColorMask; + ctx->Driver.CullFace = radeonCullFace; + ctx->Driver.DepthFunc = radeonDepthFunc; + ctx->Driver.DepthMask = radeonDepthMask; + ctx->Driver.DepthRange = radeonDepthRange; + ctx->Driver.Enable = radeonEnable; + ctx->Driver.Fogfv = radeonFogfv; + ctx->Driver.FrontFace = radeonFrontFace; + ctx->Driver.Hint = NULL; + ctx->Driver.IndexMask = NULL; + ctx->Driver.LightModelfv = radeonLightModelfv; + ctx->Driver.Lightfv = radeonLightfv; + ctx->Driver.LineStipple = radeonLineStipple; + ctx->Driver.LineWidth = radeonLineWidth; + ctx->Driver.LogicOpcode = radeonLogicOpCode; + ctx->Driver.PolygonMode = radeonPolygonMode; + ctx->Driver.PolygonOffset = radeonPolygonOffset; + ctx->Driver.PolygonStipple = radeonPolygonStipple; + ctx->Driver.RenderMode = radeonRenderMode; + ctx->Driver.Scissor = radeonScissor; + ctx->Driver.ShadeModel = radeonShadeModel; + ctx->Driver.StencilFuncSeparate = radeonStencilFuncSeparate; + ctx->Driver.StencilMaskSeparate = radeonStencilMaskSeparate; + ctx->Driver.StencilOpSeparate = radeonStencilOpSeparate; + ctx->Driver.Viewport = radeonViewport; + + TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = radeonUpdateMaterial; + TNL_CONTEXT(ctx)->Driver.RunPipeline = radeonWrapRunPipeline; +} diff --git a/radeon/radeon_state.h b/radeon/radeon_state.h new file mode 100644 index 0000000..ad7db3b --- /dev/null +++ b/radeon/radeon_state.h @@ -0,0 +1,77 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state.h,v 1.5 2002/11/05 17:46:09 tsi Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + * + */ + +#ifndef __RADEON_STATE_H__ +#define __RADEON_STATE_H__ + +#include "radeon_context.h" + +extern void radeonInitState( radeonContextPtr rmesa ); +extern void radeonInitStateFuncs( GLcontext *ctx ); + +extern void radeonUpdateMaterial( GLcontext *ctx ); + +extern void radeonSetCliprects( radeonContextPtr rmesa ); +extern void radeonRecalcScissorRects( radeonContextPtr rmesa ); +extern void radeonUpdateViewportOffset( GLcontext *ctx ); +extern void radeonUpdateWindow( GLcontext *ctx ); +extern void radeonUpdateDrawBuffer( GLcontext *ctx ); +extern void radeonUploadTexMatrix( radeonContextPtr rmesa, + int unit, GLboolean swapcols ); + +extern void radeonValidateState( GLcontext *ctx ); + +extern void radeonPrintDirty( radeonContextPtr rmesa, + const char *msg ); + + +extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ); +#define FALLBACK( rmesa, bit, mode ) do { \ + if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ + __FUNCTION__, bit, mode ); \ + radeonFallback( rmesa->glCtx, bit, mode ); \ +} while (0) + + +#define MODEL_PROJ 0 +#define MODEL 1 +#define MODEL_IT 2 +#define TEXMAT_0 3 +#define TEXMAT_1 4 +#define TEXMAT_2 5 + +#endif diff --git a/radeon/radeon_state_init.c b/radeon/radeon_state_init.c new file mode 100644 index 0000000..5fc34f0 --- /dev/null +++ b/radeon/radeon_state_init.c @@ -0,0 +1,618 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c,v 1.3 2003/02/22 06:21:11 dawes Exp $ */ +/* + * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "api_arrayelt.h" + +#include "swrast/swrast.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "swrast_setup/swrast_setup.h" + +#include "radeon_context.h" +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "radeon_tcl.h" +#include "radeon_tex.h" +#include "radeon_swtcl.h" + +#include "xmlpool.h" + +/* ============================================================= + * State initialization + */ + +void radeonPrintDirty( radeonContextPtr rmesa, const char *msg ) +{ + struct radeon_state_atom *l; + + fprintf(stderr, msg); + fprintf(stderr, ": "); + + foreach(l, &rmesa->hw.atomlist) { + if (l->dirty || rmesa->hw.all_dirty) + fprintf(stderr, "%s, ", l->name); + } + + fprintf(stderr, "\n"); +} + +static int cmdpkt( int id ) +{ + drm_radeon_cmd_header_t h; + h.i = 0; + h.packet.cmd_type = RADEON_CMD_PACKET; + h.packet.packet_id = id; + return h.i; +} + +static int cmdvec( int offset, int stride, int count ) +{ + drm_radeon_cmd_header_t h; + h.i = 0; + h.vectors.cmd_type = RADEON_CMD_VECTORS; + h.vectors.offset = offset; + h.vectors.stride = stride; + h.vectors.count = count; + return h.i; +} + +static int cmdscl( int offset, int stride, int count ) +{ + drm_radeon_cmd_header_t h; + h.i = 0; + h.scalars.cmd_type = RADEON_CMD_SCALARS; + h.scalars.offset = offset; + h.scalars.stride = stride; + h.scalars.count = count; + return h.i; +} + +#define CHECK( NM, FLAG ) \ +static GLboolean check_##NM( GLcontext *ctx ) \ +{ \ + return FLAG; \ +} + +#define TCL_CHECK( NM, FLAG ) \ +static GLboolean check_##NM( GLcontext *ctx ) \ +{ \ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ + return !rmesa->TclFallback && (FLAG); \ +} + + +CHECK( always, GL_TRUE ) +CHECK( never, GL_FALSE ) +CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled ) +CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled ) +/* need this for the cubic_map on disabled unit 2 bug, maybe r100 only? */ +CHECK( tex2, ctx->Texture._EnabledUnits ) +CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT)) +CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT)) +CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT)) +CHECK( fog, ctx->Fog.Enabled ) +TCL_CHECK( tcl, GL_TRUE ) +TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled ) +TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled ) +TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled ) +TCL_CHECK( tcl_lighting, ctx->Light.Enabled ) +TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled ) +TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled ) +TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled ) +TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled ) +TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled ) +TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled ) +TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled ) +TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled ) +TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled ) +TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1) ) +TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2) ) +TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4) ) +TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8) ) +TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10) ) +TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20) ) +TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled ) + +CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT)) +CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT)) +CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT)) + + + +/* Initialize the context's hardware state. + */ +void radeonInitState( radeonContextPtr rmesa ) +{ + GLcontext *ctx = rmesa->glCtx; + GLuint color_fmt, depth_fmt, i; + GLint drawPitch, drawOffset; + + switch ( rmesa->radeonScreen->cpp ) { + case 2: + color_fmt = RADEON_COLOR_FORMAT_RGB565; + break; + case 4: + color_fmt = RADEON_COLOR_FORMAT_ARGB8888; + break; + default: + fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" ); + exit( -1 ); + } + + rmesa->state.color.clear = 0x00000000; + + switch ( ctx->Visual.depthBits ) { + case 16: + rmesa->state.depth.clear = 0x0000ffff; + rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff; + depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; + rmesa->state.stencil.clear = 0x00000000; + break; + case 24: + rmesa->state.depth.clear = 0x00ffffff; + rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff; + depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; + rmesa->state.stencil.clear = 0xffff0000; + break; + default: + fprintf( stderr, "Error: Unsupported depth %d... exiting\n", + ctx->Visual.depthBits ); + exit( -1 ); + } + + /* Only have hw stencil when depth buffer is 24 bits deep */ + rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 && + ctx->Visual.depthBits == 24 ); + + rmesa->Fallback = 0; + + if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) { + drawOffset = rmesa->radeonScreen->backOffset; + drawPitch = rmesa->radeonScreen->backPitch; + } else { + drawOffset = rmesa->radeonScreen->frontOffset; + drawPitch = rmesa->radeonScreen->frontPitch; + } + + rmesa->hw.max_state_size = 0; + +#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \ + do { \ + rmesa->hw.ATOM.cmd_size = SZ; \ + rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \ + rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \ + rmesa->hw.ATOM.name = NM; \ + rmesa->hw.ATOM.is_tcl = FLAG; \ + rmesa->hw.ATOM.check = check_##CHK; \ + rmesa->hw.ATOM.dirty = GL_TRUE; \ + rmesa->hw.max_state_size += SZ * sizeof(int); \ + } while (0) + + + /* Allocate state buffers: + */ + ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 ); + ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); + ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); + ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 ); + ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 ); + ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 ); + ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 ); + ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 ); + ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 ); + ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 ); + ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 ); + ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 ); + ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 ); + ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 ); + ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 ); + if (rmesa->radeonScreen->drmSupportsCubeMapsR100) + { + ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 ); + ALLOC_STATE( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0 ); + ALLOC_STATE( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0 ); + } + else + { + ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0 ); + ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0 ); + ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0 ); + } + ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 ); + ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 ); + ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 ); + ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 ); + ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 ); + ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 ); + ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 ); + ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); + ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 ); + ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 ); + ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 ); + ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 ); + ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 ); + ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 ); + ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 ); + ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 ); + ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 ); + ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 ); + ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 ); + ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 ); + ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 ); + ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 ); + ALLOC_STATE( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0 ); + + radeonSetUpAtomList( rmesa ); + + /* Fill in the packet headers: + */ + rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC); + rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL); + rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH); + rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN); + rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH); + rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK); + rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE); + rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL); + rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS); + rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC); + rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0); + rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0); + rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1); + rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1); + rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2); + rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2); + rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0); + rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0); + rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1); + rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1); + rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_2); + rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T2); + rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR); + rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT); + rmesa->hw.mtl.cmd[MTL_CMD_0] = + cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED); + rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0); + rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1); + rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2); + rmesa->hw.grd.cmd[GRD_CMD_0] = + cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 ); + rmesa->hw.fog.cmd[FOG_CMD_0] = + cmdvec( RADEON_VS_FOG_PARAM_ADDR, 1, 4 ); + rmesa->hw.glt.cmd[GLT_CMD_0] = + cmdvec( RADEON_VS_GLOBAL_AMBIENT_ADDR, 1, 4 ); + rmesa->hw.eye.cmd[EYE_CMD_0] = + cmdvec( RADEON_VS_EYE_VECTOR_ADDR, 1, 4 ); + + for (i = 0 ; i < 6; i++) { + rmesa->hw.mat[i].cmd[MAT_CMD_0] = + cmdvec( RADEON_VS_MATRIX_0_ADDR + i*4, 1, 16); + } + + for (i = 0 ; i < 8; i++) { + rmesa->hw.lit[i].cmd[LIT_CMD_0] = + cmdvec( RADEON_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 ); + rmesa->hw.lit[i].cmd[LIT_CMD_1] = + cmdscl( RADEON_SS_LIGHT_DCD_ADDR + i, 8, 6 ); + } + + for (i = 0 ; i < 6; i++) { + rmesa->hw.ucp[i].cmd[UCP_CMD_0] = + cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 ); + } + + rmesa->last_ReallyEnabled = -1; + + /* Initial Harware state: + */ + rmesa->hw.ctx.cmd[CTX_PP_MISC] = (RADEON_ALPHA_TEST_PASS | + RADEON_CHROMA_FUNC_FAIL | + RADEON_CHROMA_KEY_NEAREST | + RADEON_SHADOW_FUNC_EQUAL | + RADEON_SHADOW_PASS_1 /*| + RADEON_RIGHT_HAND_CUBE_OGL */); + + rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (RADEON_FOG_VERTEX | + /* this bit unused for vertex fog */ + RADEON_FOG_USE_DEPTH); + + rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000; + + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (RADEON_COMB_FCN_ADD_CLAMP | + RADEON_SRC_BLEND_GL_ONE | + RADEON_DST_BLEND_GL_ZERO ); + + rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] = + rmesa->radeonScreen->depthOffset + rmesa->radeonScreen->fbLocation; + + rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = + ((rmesa->radeonScreen->depthPitch & + RADEON_DEPTHPITCH_MASK) | + RADEON_DEPTH_ENDIAN_NO_SWAP); + + if (rmesa->using_hyperz) + rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= RADEON_DEPTH_HYPERZ; + + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt | + RADEON_Z_TEST_LESS | + RADEON_STENCIL_TEST_ALWAYS | + RADEON_STENCIL_FAIL_KEEP | + RADEON_STENCIL_ZPASS_KEEP | + RADEON_STENCIL_ZFAIL_KEEP | + RADEON_Z_WRITE_ENABLE); + + if (rmesa->using_hyperz) { + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE | + RADEON_Z_DECOMPRESSION_ENABLE; + if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + /* works for q3, but slight rendering errors with glxgears ? */ +/* rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/ + /* need this otherwise get lots of lockups with q3 ??? */ + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_FORCE_Z_DIRTY; + } + } + + rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (RADEON_SCISSOR_ENABLE | + RADEON_ANTI_ALIAS_NONE); + + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE | + color_fmt | + RADEON_ZBLOCK16); + + switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) { + case DRI_CONF_DITHER_XERRORDIFFRESET: + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT; + break; + case DRI_CONF_DITHER_ORDERED: + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE; + break; + } + if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) == + DRI_CONF_ROUND_ROUND ) + rmesa->state.color.roundEnable = RADEON_ROUND_ENABLE; + else + rmesa->state.color.roundEnable = 0; + if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) == + DRI_CONF_COLOR_REDUCTION_DITHER ) + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE; + else + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; + + rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset + + rmesa->radeonScreen->fbLocation) + & RADEON_COLOROFFSET_MASK); + + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch & + RADEON_COLORPITCH_MASK) | + RADEON_COLOR_ENDIAN_NO_SWAP); + + + /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */ + if (rmesa->sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE; + } + + rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW | + RADEON_BFACE_SOLID | + RADEON_FFACE_SOLID | +/* RADEON_BADVTX_CULL_DISABLE | */ + RADEON_FLAT_SHADE_VTX_LAST | + RADEON_DIFFUSE_SHADE_GOURAUD | + RADEON_ALPHA_SHADE_GOURAUD | + RADEON_SPECULAR_SHADE_GOURAUD | + RADEON_FOG_SHADE_GOURAUD | + RADEON_VPORT_XY_XFORM_ENABLE | + RADEON_VPORT_Z_XFORM_ENABLE | + RADEON_VTX_PIX_CENTER_OGL | + RADEON_ROUND_MODE_TRUNC | + RADEON_ROUND_PREC_8TH_PIX); + + rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] = +#ifdef MESA_BIG_ENDIAN + RADEON_VC_32BIT_SWAP; +#else + RADEON_VC_NO_SWAP; +#endif + + if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { + rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS; + } + + rmesa->hw.set.cmd[SET_SE_COORDFMT] = ( + RADEON_VTX_W0_IS_NOT_1_OVER_W0 | + RADEON_TEX1_W_ROUTING_USE_Q1); + + + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff); + + rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] = + ((0 << RADEON_LINE_CURRENT_PTR_SHIFT) | + (1 << RADEON_LINE_CURRENT_COUNT_SHIFT)); + + rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4); + + rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] = + ((0x00 << RADEON_STENCIL_REF_SHIFT) | + (0xff << RADEON_STENCIL_MASK_SHIFT) | + (0xff << RADEON_STENCIL_WRITEMASK_SHIFT)); + + rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = RADEON_ROP_COPY; + rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff; + + rmesa->hw.msc.cmd[MSC_RE_MISC] = + ((0 << RADEON_STIPPLE_X_OFFSET_SHIFT) | + (0 << RADEON_STIPPLE_Y_OFFSET_SHIFT) | + RADEON_STIPPLE_BIG_BIT_ORDER); + + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = 0x00000000; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE] = 0x00000000; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE] = 0x00000000; + rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000; + + for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) { + rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] = RADEON_BORDER_MODE_OGL; + rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT] = + (RADEON_TXFORMAT_ENDIAN_NO_SWAP | + RADEON_TXFORMAT_PERSPECTIVE_ENABLE | + (i << 24) | /* This is one of RADEON_TXFORMAT_ST_ROUTE_STQ[012] */ + (2 << RADEON_TXFORMAT_WIDTH_SHIFT) | + (2 << RADEON_TXFORMAT_HEIGHT_SHIFT)); + + /* Initialize the texture offset to the start of the card texture heap */ + rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] = + rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + + rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0; + rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] = + (RADEON_COLOR_ARG_A_ZERO | + RADEON_COLOR_ARG_B_ZERO | + RADEON_COLOR_ARG_C_CURRENT_COLOR | + RADEON_BLEND_CTL_ADD | + RADEON_SCALE_1X | + RADEON_CLAMP_TX); + rmesa->hw.tex[i].cmd[TEX_PP_TXABLEND] = + (RADEON_ALPHA_ARG_A_ZERO | + RADEON_ALPHA_ARG_B_ZERO | + RADEON_ALPHA_ARG_C_CURRENT_ALPHA | + RADEON_BLEND_CTL_ADD | + RADEON_SCALE_1X | + RADEON_CLAMP_TX); + rmesa->hw.tex[i].cmd[TEX_PP_TFACTOR] = 0; + + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] = + rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] = + rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] = + rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] = + rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] = + rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + } + + /* Can only add ST1 at the time of doing some multitex but can keep + * it after that. Errors if DIFFUSE is missing. + */ + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = + (RADEON_TCL_VTX_Z0 | + RADEON_TCL_VTX_W0 | + RADEON_TCL_VTX_PK_DIFFUSE + ); /* need to keep this uptodate */ + + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] = + ( RADEON_TCL_COMPUTE_XYZW | + (RADEON_TCL_TEX_INPUT_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT) | + (RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT) | + (RADEON_TCL_TEX_INPUT_TEX_2 << RADEON_TCL_TEX_2_OUTPUT_SHIFT)); + + + /* XXX */ + rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_0] = + ((MODEL << RADEON_MODELVIEW_0_SHIFT) | + (MODEL_IT << RADEON_IT_MODELVIEW_0_SHIFT)); + + rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_1] = + ((MODEL_PROJ << RADEON_MODELPROJECT_0_SHIFT) | + (TEXMAT_0 << RADEON_TEXMAT_0_SHIFT) | + (TEXMAT_1 << RADEON_TEXMAT_1_SHIFT) | + (TEXMAT_2 << RADEON_TEXMAT_2_SHIFT)); + + rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = + (RADEON_UCP_IN_CLIP_SPACE | + RADEON_CULL_FRONT_IS_CCW); + + rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = 0; + + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = + (RADEON_SPECULAR_LIGHTS | + RADEON_DIFFUSE_SPECULAR_COMBINE | + RADEON_LOCAL_LIGHT_VEC_GL | + (RADEON_LM_SOURCE_STATE_MULT << RADEON_EMISSIVE_SOURCE_SHIFT) | + (RADEON_LM_SOURCE_STATE_MULT << RADEON_AMBIENT_SOURCE_SHIFT) | + (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT) | + (RADEON_LM_SOURCE_STATE_MULT << RADEON_SPECULAR_SOURCE_SHIFT)); + + for (i = 0 ; i < 8; i++) { + struct gl_light *l = &ctx->Light.Light[i]; + GLenum p = GL_LIGHT0 + i; + *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX; + + ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient ); + ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse ); + ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular ); + ctx->Driver.Lightfv( ctx, p, GL_POSITION, NULL ); + ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, NULL ); + ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent ); + ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff ); + ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION, + &l->ConstantAttenuation ); + ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, + &l->LinearAttenuation ); + ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, + &l->QuadraticAttenuation ); + *(float *)&(rmesa->hw.lit[i].cmd[LIT_ATTEN_XXX]) = 0.0; + } + + ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, + ctx->Light.Model.Ambient ); + + TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx ); + + for (i = 0 ; i < 6; i++) { + ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL ); + } + + ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL ); + ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density ); + ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start ); + ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End ); + ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color ); + ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL ); + + rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE; + rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE; + rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE; + rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE; + + rmesa->hw.eye.cmd[EYE_X] = 0; + rmesa->hw.eye.cmd[EYE_Y] = 0; + rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE; + rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE; + + rmesa->hw.all_dirty = GL_TRUE; +} diff --git a/radeon/radeon_swtcl.c b/radeon/radeon_swtcl.c new file mode 100644 index 0000000..7ce1fa6 --- /dev/null +++ b/radeon/radeon_swtcl.c @@ -0,0 +1,890 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c,v 1.6 2003/05/06 23:52:08 daenzer Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "mtypes.h" +#include "colormac.h" +#include "enums.h" +#include "imports.h" +#include "macros.h" + +#include "swrast_setup/swrast_setup.h" +#include "math/m_translate.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" +#include "tnl/t_pipeline.h" + +#include "radeon_context.h" +#include "radeon_ioctl.h" +#include "radeon_state.h" +#include "radeon_swtcl.h" +#include "radeon_tcl.h" + + +static void flush_last_swtcl_prim( radeonContextPtr rmesa ); + +/* R100: xyzw, c0, c1/fog, stq[0..2] = 4+1+1+3*3 = 15 right? */ +/* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */ +#define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat)) /* for mesa _tnl stage */ + +/*********************************************************************** + * Initialization + ***********************************************************************/ + +#define EMIT_ATTR( ATTR, STYLE, F0 ) \ +do { \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->swtcl.vertex_attr_count++; \ + fmt_0 |= F0; \ +} while (0) + +#define EMIT_PAD( N ) \ +do { \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ + rmesa->swtcl.vertex_attr_count++; \ +} while (0) + +static GLuint radeon_cp_vc_frmts[3][2] = +{ + { RADEON_CP_VC_FRMT_ST0, RADEON_CP_VC_FRMT_ST0 | RADEON_CP_VC_FRMT_Q0 }, + { RADEON_CP_VC_FRMT_ST1, RADEON_CP_VC_FRMT_ST1 | RADEON_CP_VC_FRMT_Q1 }, + { RADEON_CP_VC_FRMT_ST2, RADEON_CP_VC_FRMT_ST2 | RADEON_CP_VC_FRMT_Q2 }, +}; + +static void radeonSetVertexFormat( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + DECLARE_RENDERINPUTS(index_bitset); + int fmt_0 = 0; + int offset = 0; + + RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); + + /* Important: + */ + if ( VB->NdcPtr != NULL ) { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; + } + else { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + } + + assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); + rmesa->swtcl.vertex_attr_count = 0; + + /* EMIT_ATTR's must be in order as they tell t_vertex.c how to + * build up a hardware vertex. + */ + if ( !rmesa->swtcl.needproj || + RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { /* for projtex */ + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F, + RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_Z | RADEON_CP_VC_FRMT_W0 ); + offset = 4; + } + else { + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F, + RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_Z ); + offset = 3; + } + + rmesa->swtcl.coloroffset = offset; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, + RADEON_CP_VC_FRMT_PKCOLOR ); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, + RADEON_CP_VC_FRMT_PKCOLOR ); +#endif + offset += 1; + + rmesa->swtcl.specoffset = 0; + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) || + RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { + +#if MESA_LITTLE_ENDIAN + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + rmesa->swtcl.specoffset = offset; + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, + RADEON_CP_VC_FRMT_PKSPEC ); + } + else { + EMIT_PAD( 3 ); + } + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, + RADEON_CP_VC_FRMT_PKSPEC ); + } + else { + EMIT_PAD( 1 ); + } +#else + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, + RADEON_CP_VC_FRMT_PKSPEC ); + } + else { + EMIT_PAD( 1 ); + } + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + rmesa->swtcl.specoffset = offset; + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, + RADEON_CP_VC_FRMT_PKSPEC ); + } + else { + EMIT_PAD( 3 ); + } +#endif + } + + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + GLuint sz = VB->TexCoordPtr[i]->size; + + switch (sz) { + case 1: + case 2: + EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_2F, + radeon_cp_vc_frmts[i][0] ); + break; + case 3: + case 4: + if (ctx->Texture.Unit[i]._ReallyEnabled & (TEXTURE_CUBE_BIT) ) { + EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_3F, + radeon_cp_vc_frmts[i][1] ); + } else { + EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_3F_XYW, + radeon_cp_vc_frmts[i][1] ); + } + break; + default: + continue; + }; + } + } + } + + if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) || + fmt_0 != rmesa->swtcl.vertex_format) { + RADEON_NEWPRIM(rmesa); + rmesa->swtcl.vertex_format = fmt_0; + rmesa->swtcl.vertex_size = + _tnl_install_attrs( ctx, + rmesa->swtcl.vertex_attrs, + rmesa->swtcl.vertex_attr_count, + NULL, 0 ); + rmesa->swtcl.vertex_size /= 4; + RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf( stderr, "%s: vertex_size= %d floats\n", + __FUNCTION__, rmesa->swtcl.vertex_size); + } +} + + +static void radeonRenderStart( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + + radeonSetVertexFormat( ctx ); + + if (rmesa->dma.flush != 0 && + rmesa->dma.flush != flush_last_swtcl_prim) + rmesa->dma.flush( rmesa ); +} + + +/** + * Set vertex state for SW TCL. The primary purpose of this function is to + * determine in advance whether or not the hardware can / should do the + * projection divide or Mesa should do it. + */ +void radeonChooseVertexState( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + + GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; + + se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | + RADEON_VTX_Z_PRE_MULT_1_OVER_W0 | + RADEON_VTX_W0_IS_NOT_1_OVER_W0); + + /* We must ensure that we don't do _tnl_need_projected_coords while in a + * rasterization fallback. As this function will be called again when we + * leave a rasterization fallback, we can just skip it for now. + */ + if (rmesa->Fallback != 0) + return; + + /* HW perspective divide is a win, but tiny vertex formats are a + * bigger one. + */ + + if ((!RENDERINPUTS_TEST_RANGE( tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX ) && + !RENDERINPUTS_TEST( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 )) + || (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) { + rmesa->swtcl.needproj = GL_TRUE; + se_coord_fmt |= (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | + RADEON_VTX_Z_PRE_MULT_1_OVER_W0); + } + else { + rmesa->swtcl.needproj = GL_FALSE; + se_coord_fmt |= (RADEON_VTX_W0_IS_NOT_1_OVER_W0); + } + + _tnl_need_projected_coords( ctx, rmesa->swtcl.needproj ); + + if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) { + RADEON_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt; + } +} + + +/* Flush vertices in the current dma region. + */ +static void flush_last_swtcl_prim( radeonContextPtr rmesa ) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + rmesa->dma.flush = NULL; + + if (rmesa->dma.current.buf) { + struct radeon_dma_region *current = &rmesa->dma.current; + GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset + + current->buf->buf->idx * RADEON_BUFFER_SIZE + + current->start); + + assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); + + assert (current->start + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + current->ptr); + + if (rmesa->dma.current.start != rmesa->dma.current.ptr) { + radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ + + rmesa->hw.max_state_size + VBUF_BUFSZ ); + + radeonEmitVertexAOS( rmesa, + rmesa->swtcl.vertex_size, + current_offset); + + radeonEmitVbufPrim( rmesa, + rmesa->swtcl.vertex_format, + rmesa->swtcl.hw_primitive, + rmesa->swtcl.numverts); + } + + rmesa->swtcl.numverts = 0; + current->start = current->ptr; + } +} + + +/* Alloc space in the current dma region. + */ +static INLINE void * +radeonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) +{ + GLuint bytes = vsize * nverts; + + if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) + radeonRefillCurrentDmaRegion( rmesa ); + + if (!rmesa->dma.flush) { + rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->dma.flush = flush_last_swtcl_prim; + } + + assert( vsize == rmesa->swtcl.vertex_size * 4 ); + assert( rmesa->dma.flush == flush_last_swtcl_prim ); + assert (rmesa->dma.current.start + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + rmesa->dma.current.ptr); + + + { + GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr); + rmesa->dma.current.ptr += bytes; + rmesa->swtcl.numverts += nverts; + return head; + } + +} + + +/* + * Render unclipped vertex buffers by emitting vertices directly to + * dma buffers. Use strip/fan hardware primitives where possible. + * Try to simulate missing primitives with indexed vertices. + */ +#define HAVE_POINTS 1 +#define HAVE_LINES 1 +#define HAVE_LINE_STRIPS 1 +#define HAVE_TRIANGLES 1 +#define HAVE_TRI_STRIPS 1 +#define HAVE_TRI_STRIP_1 0 +#define HAVE_TRI_FANS 1 +#define HAVE_QUADS 0 +#define HAVE_QUAD_STRIPS 0 +#define HAVE_POLYGONS 0 +/* \todo: is it possible to make "ELTS" work with t_vertex code ? */ +#define HAVE_ELTS 0 + +static const GLuint hw_prim[GL_POLYGON+1] = { + RADEON_CP_VC_CNTL_PRIM_TYPE_POINT, + RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, + 0, + RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP, + RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP, + RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN, + 0, + 0, + 0 +}; + +static INLINE void +radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim ) +{ + RADEON_NEWPRIM( rmesa ); + rmesa->swtcl.hw_primitive = hw_prim[prim]; + assert(rmesa->dma.current.ptr == rmesa->dma.current.start); +} + +#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx) +#define INIT( prim ) radeonDmaPrimitive( rmesa, prim ) +#define FLUSH() RADEON_NEWPRIM( rmesa ) +#define GET_CURRENT_VB_MAX_VERTS() \ + (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4)) +#define GET_SUBSEQUENT_VB_MAX_VERTS() \ + ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4)) +#define ALLOC_VERTS( nr ) \ + radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 ) +#define EMIT_VERTS( ctx, j, nr, buf ) \ + _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf) + +#define TAG(x) radeon_dma_##x +#include "tnl_dd/t_dd_dmatmp.h" + + +/**********************************************************************/ +/* Render pipeline stage */ +/**********************************************************************/ + + +static GLboolean radeon_run_render( GLcontext *ctx, + struct tnl_pipeline_stage *stage ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + tnl_render_func *tab = TAG(render_tab_verts); + GLuint i; + + if (rmesa->swtcl.indexed_verts.buf) + RELEASE_ELT_VERTS(); + + if (rmesa->swtcl.RenderIndex != 0 || + !radeon_dma_validate_render( ctx, VB )) + return GL_TRUE; + + tnl->Driver.Render.Start( ctx ); + + for (i = 0 ; i < VB->PrimitiveCount ; i++) + { + GLuint prim = VB->Primitive[i].mode; + GLuint start = VB->Primitive[i].start; + GLuint length = VB->Primitive[i].count; + + if (!length) + continue; + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "radeon_render.c: prim %s %d..%d\n", + _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), + start, start+length); + + if (length) + tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, prim ); + } + + tnl->Driver.Render.Finish( ctx ); + + return GL_FALSE; /* finished the pipe */ +} + + + +const struct tnl_pipeline_stage _radeon_render_stage = +{ + "radeon render", + NULL, + NULL, + NULL, + NULL, + radeon_run_render /* run */ +}; + + +/**************************************************************************/ + + +static const GLuint reduced_hw_prim[GL_POLYGON+1] = { + RADEON_CP_VC_CNTL_PRIM_TYPE_POINT, + RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, + RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, + RADEON_CP_VC_CNTL_PRIM_TYPE_LINE, + RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST, + RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST +}; + +static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim ); +static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim ); +static void radeonResetLineStipple( GLcontext *ctx ); + + +/*********************************************************************** + * Emit primitives as inline vertices * + ***********************************************************************/ + +#undef LOCAL_VARS +#undef ALLOC_VERTS +#define CTX_ARG radeonContextPtr rmesa +#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size +#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 ) +#undef LOCAL_VARS +#define LOCAL_VARS \ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ + const char *radeonverts = (char *)rmesa->swtcl.verts; +#define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int))) +#define VERTEX radeonVertex +#undef TAG +#define TAG(x) radeon_##x +#include "tnl_dd/t_dd_triemit.h" + + +/*********************************************************************** + * Macros for t_dd_tritmp.h to draw basic primitives * + ***********************************************************************/ + +#define QUAD( a, b, c, d ) radeon_quad( rmesa, a, b, c, d ) +#define TRI( a, b, c ) radeon_triangle( rmesa, a, b, c ) +#define LINE( a, b ) radeon_line( rmesa, a, b ) +#define POINT( a ) radeon_point( rmesa, a ) + +/*********************************************************************** + * Build render functions from dd templates * + ***********************************************************************/ + +#define RADEON_TWOSIDE_BIT 0x01 +#define RADEON_UNFILLED_BIT 0x02 +#define RADEON_MAX_TRIFUNC 0x04 + + +static struct { + tnl_points_func points; + tnl_line_func line; + tnl_triangle_func triangle; + tnl_quad_func quad; +} rast_tab[RADEON_MAX_TRIFUNC]; + + +#define DO_FALLBACK 0 +#define DO_OFFSET 0 +#define DO_UNFILLED (IND & RADEON_UNFILLED_BIT) +#define DO_TWOSIDE (IND & RADEON_TWOSIDE_BIT) +#define DO_FLAT 0 +#define DO_TRI 1 +#define DO_QUAD 1 +#define DO_LINE 1 +#define DO_POINTS 1 +#define DO_FULL_QUAD 1 + +#define HAVE_RGBA 1 +#define HAVE_SPEC 1 +#define HAVE_BACK_COLORS 0 +#define HAVE_HW_FLATSHADE 1 +#define TAB rast_tab + +#define DEPTH_SCALE 1.0 +#define UNFILLED_TRI unfilled_tri +#define UNFILLED_QUAD unfilled_quad +#define VERT_X(_v) _v->v.x +#define VERT_Y(_v) _v->v.y +#define VERT_Z(_v) _v->v.z +#define AREA_IS_CCW( a ) (a < 0) +#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int))) + +#define VERT_SET_RGBA( v, c ) \ +do { \ + radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \ +} while (0) + +#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset] + +#define VERT_SET_SPEC( v, c ) \ +do { \ + if (specoffset) { \ + radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]); \ + UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]); \ + } \ +} while (0) +#define VERT_COPY_SPEC( v0, v1 ) \ +do { \ + if (specoffset) { \ + radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]); \ + radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]); \ + spec0->red = spec1->red; \ + spec0->green = spec1->green; \ + spec0->blue = spec1->blue; \ + } \ +} while (0) + +/* These don't need LE32_TO_CPU() as they used to save and restore + * colors which are already in the correct format. + */ +#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset] +#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx] +#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset] +#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] + +#undef LOCAL_VARS +#undef TAG +#undef INIT + +#define LOCAL_VARS(n) \ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ + GLuint color[n], spec[n]; \ + GLuint coloroffset = rmesa->swtcl.coloroffset; \ + GLuint specoffset = rmesa->swtcl.specoffset; \ + (void) color; (void) spec; (void) coloroffset; (void) specoffset; + +/*********************************************************************** + * Helpers for rendering unfilled primitives * + ***********************************************************************/ + +#define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] ) +#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive +#undef TAG +#define TAG(x) x +#include "tnl_dd/t_dd_unfilled.h" +#undef IND + + +/*********************************************************************** + * Generate GL render functions * + ***********************************************************************/ + + +#define IND (0) +#define TAG(x) x +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (RADEON_TWOSIDE_BIT) +#define TAG(x) x##_twoside +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (RADEON_UNFILLED_BIT) +#define TAG(x) x##_unfilled +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT) +#define TAG(x) x##_twoside_unfilled +#include "tnl_dd/t_dd_tritmp.h" + + +static void init_rast_tab( void ) +{ + init(); + init_twoside(); + init_unfilled(); + init_twoside_unfilled(); +} + +/**********************************************************************/ +/* Render unclipped begin/end objects */ +/**********************************************************************/ + +#define RENDER_POINTS( start, count ) \ + for ( ; start < count ; start++) \ + radeon_point( rmesa, VERT(start) ) +#define RENDER_LINE( v0, v1 ) \ + radeon_line( rmesa, VERT(v0), VERT(v1) ) +#define RENDER_TRI( v0, v1, v2 ) \ + radeon_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) ) +#define RENDER_QUAD( v0, v1, v2, v3 ) \ + radeon_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) ) +#undef INIT +#define INIT(x) do { \ + radeonRenderPrimitive( ctx, x ); \ +} while (0) +#undef LOCAL_VARS +#define LOCAL_VARS \ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ + const GLuint vertsize = rmesa->swtcl.vertex_size; \ + const char *radeonverts = (char *)rmesa->swtcl.verts; \ + const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ + const GLboolean stipple = ctx->Line.StippleFlag; \ + (void) elt; (void) stipple; +#define RESET_STIPPLE if ( stipple ) radeonResetLineStipple( ctx ); +#define RESET_OCCLUSION +#define PRESERVE_VB_DEFS +#define ELT(x) (x) +#define TAG(x) radeon_##x##_verts +#include "tnl/t_vb_rendertmp.h" +#undef ELT +#undef TAG +#define TAG(x) radeon_##x##_elts +#define ELT(x) elt[x] +#include "tnl/t_vb_rendertmp.h" + + + +/**********************************************************************/ +/* Choose render functions */ +/**********************************************************************/ + +void radeonChooseRenderState( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint index = 0; + GLuint flags = ctx->_TriangleCaps; + + if (!rmesa->TclFallback || rmesa->Fallback) + return; + + if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT; + if (flags & DD_TRI_UNFILLED) index |= RADEON_UNFILLED_BIT; + + if (index != rmesa->swtcl.RenderIndex) { + tnl->Driver.Render.Points = rast_tab[index].points; + tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; + tnl->Driver.Render.Triangle = rast_tab[index].triangle; + tnl->Driver.Render.Quad = rast_tab[index].quad; + + if (index == 0) { + tnl->Driver.Render.PrimTabVerts = radeon_render_tab_verts; + tnl->Driver.Render.PrimTabElts = radeon_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = radeon_fast_clipped_poly; + } else { + tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; + tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; + } + + rmesa->swtcl.RenderIndex = index; + } +} + + +/**********************************************************************/ +/* High level hooks for t_vb_render.c */ +/**********************************************************************/ + + +static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if (rmesa->swtcl.hw_primitive != hwprim) { + RADEON_NEWPRIM( rmesa ); + rmesa->swtcl.hw_primitive = hwprim; + } +} + +static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + rmesa->swtcl.render_primitive = prim; + if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) + radeonRasterPrimitive( ctx, reduced_hw_prim[prim] ); +} + +static void radeonRenderFinish( GLcontext *ctx ) +{ +} + +static void radeonResetLineStipple( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + RADEON_STATECHANGE( rmesa, lin ); +} + + +/**********************************************************************/ +/* Transition to/from hardware rasterization. */ +/**********************************************************************/ + +static const char * const fallbackStrings[] = { + "Texture mode", + "glDrawBuffer(GL_FRONT_AND_BACK)", + "glEnable(GL_STENCIL) without hw stencil buffer", + "glRenderMode(selection or feedback)", + "glBlendEquation", + "glBlendFunc", + "RADEON_NO_RAST", + "Mixing GL_CLAMP_TO_BORDER and GL_CLAMP (or GL_MIRROR_CLAMP_ATI)" +}; + + +static const char *getFallbackString(GLuint bit) +{ + int i = 0; + while (bit > 1) { + i++; + bit >>= 1; + } + return fallbackStrings[i]; +} + + +void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint oldfallback = rmesa->Fallback; + + if (mode) { + rmesa->Fallback |= bit; + if (oldfallback == 0) { + RADEON_FIREVERTICES( rmesa ); + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE ); + _swsetup_Wakeup( ctx ); + rmesa->swtcl.RenderIndex = ~0; + if (RADEON_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n", + bit, getFallbackString(bit)); + } + } + } + else { + rmesa->Fallback &= ~bit; + if (oldfallback == bit) { + _swrast_flush( ctx ); + tnl->Driver.Render.Start = radeonRenderStart; + tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive; + tnl->Driver.Render.Finish = radeonRenderFinish; + + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple; + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE ); + if (rmesa->TclFallback) { + /* These are already done if rmesa->TclFallback goes to + * zero above. But not if it doesn't (RADEON_NO_TCL for + * example?) + */ + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); + radeonChooseVertexState( ctx ); + radeonChooseRenderState( ctx ); + } + if (RADEON_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "Radeon end rasterization fallback: 0x%x %s\n", + bit, getFallbackString(bit)); + } + } + } +} + + +/**********************************************************************/ +/* Initialization. */ +/**********************************************************************/ + +void radeonInitSwtcl( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + static int firsttime = 1; + + if (firsttime) { + init_rast_tab(); + firsttime = 0; + } + + tnl->Driver.Render.Start = radeonRenderStart; + tnl->Driver.Render.Finish = radeonRenderFinish; + tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive; + tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple; + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + RADEON_MAX_TNL_VERTEX_SIZE); + + rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->swtcl.RenderIndex = ~0; + rmesa->swtcl.render_primitive = GL_TRIANGLES; + rmesa->swtcl.hw_primitive = 0; +} + + +void radeonDestroySwtcl( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if (rmesa->swtcl.indexed_verts.buf) + radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, + __FUNCTION__ ); +} diff --git a/radeon/radeon_swtcl.h b/radeon/radeon_swtcl.h new file mode 100644 index 0000000..64f9019 --- /dev/null +++ b/radeon/radeon_swtcl.h @@ -0,0 +1,68 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * + */ + +#ifndef __RADEON_TRIS_H__ +#define __RADEON_TRIS_H__ + +#include "mtypes.h" +#include "swrast/swrast.h" +#include "radeon_context.h" + +extern void radeonInitSwtcl( GLcontext *ctx ); +extern void radeonDestroySwtcl( GLcontext *ctx ); + +extern void radeonChooseRenderState( GLcontext *ctx ); +extern void radeonChooseVertexState( GLcontext *ctx ); + +extern void radeonCheckTexSizes( GLcontext *ctx ); + +extern void radeonBuildVertices( GLcontext *ctx, GLuint start, GLuint count, + GLuint newinputs ); + +extern void radeonPrintSetupFlags(char *msg, GLuint flags ); + + +extern void radeon_emit_indexed_verts( GLcontext *ctx, + GLuint start, + GLuint count ); + +extern void radeon_translate_vertex( GLcontext *ctx, + const radeonVertex *src, + SWvertex *dst ); + +extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v ); + + +#endif diff --git a/radeon/radeon_tcl.c b/radeon/radeon_tcl.c new file mode 100644 index 0000000..0f4baf2 --- /dev/null +++ b/radeon/radeon_tcl.c @@ -0,0 +1,575 @@ +/* $XFree86$ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + Tungsten Graphics Inc., Austin, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "light.h" +#include "mtypes.h" +#include "enums.h" + +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" + +#include "radeon_context.h" +#include "radeon_state.h" +#include "radeon_ioctl.h" +#include "radeon_tex.h" +#include "radeon_tcl.h" +#include "radeon_swtcl.h" +#include "radeon_maos.h" + + + +/* + * Render unclipped vertex buffers by emitting vertices directly to + * dma buffers. Use strip/fan hardware primitives where possible. + * Try to simulate missing primitives with indexed vertices. + */ +#define HAVE_POINTS 1 +#define HAVE_LINES 1 +#define HAVE_LINE_LOOP 0 +#define HAVE_LINE_STRIPS 1 +#define HAVE_TRIANGLES 1 +#define HAVE_TRI_STRIPS 1 +#define HAVE_TRI_STRIP_1 0 +#define HAVE_TRI_FANS 1 +#define HAVE_QUADS 0 +#define HAVE_QUAD_STRIPS 0 +#define HAVE_POLYGONS 1 +#define HAVE_ELTS 1 + + +#define HW_POINTS RADEON_CP_VC_CNTL_PRIM_TYPE_POINT +#define HW_LINES RADEON_CP_VC_CNTL_PRIM_TYPE_LINE +#define HW_LINE_LOOP 0 +#define HW_LINE_STRIP RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP +#define HW_TRIANGLES RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST +#define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP +#define HW_TRIANGLE_STRIP_1 0 +#define HW_TRIANGLE_FAN RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN +#define HW_QUADS 0 +#define HW_QUAD_STRIP 0 +#define HW_POLYGON RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN + + +static GLboolean discrete_prim[0x10] = { + 0, /* 0 none */ + 1, /* 1 points */ + 1, /* 2 lines */ + 0, /* 3 line_strip */ + 1, /* 4 tri_list */ + 0, /* 5 tri_fan */ + 0, /* 6 tri_type2 */ + 1, /* 7 rect list (unused) */ + 1, /* 8 3vert point */ + 1, /* 9 3vert line */ + 0, + 0, + 0, + 0, + 0, + 0, +}; + + +#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx) +#define ELT_TYPE GLushort + +#define ELT_INIT(prim, hw_prim) \ + radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND ) + +#define GET_MESA_ELTS() rmesa->tcl.Elts + + +/* Don't really know how many elts will fit in what's left of cmdbuf, + * as there is state to emit, etc: + */ + +/* Testing on isosurf shows a maximum around here. Don't know if it's + * the card or driver or kernel module that is causing the behaviour. + */ +#define GET_MAX_HW_ELTS() 300 + + +#define RESET_STIPPLE() do { \ + RADEON_STATECHANGE( rmesa, lin ); \ + radeonEmitState( rmesa ); \ +} while (0) + +#define AUTO_STIPPLE( mode ) do { \ + RADEON_STATECHANGE( rmesa, lin ); \ + if (mode) \ + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |= \ + RADEON_LINE_PATTERN_AUTO_RESET; \ + else \ + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \ + ~RADEON_LINE_PATTERN_AUTO_RESET; \ + radeonEmitState( rmesa ); \ +} while (0) + + + +#define ALLOC_ELTS(nr) radeonAllocElts( rmesa, nr ) + +static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) +{ + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + + rmesa->hw.max_state_size + ELTS_BUFSZ(nr)); + + radeonEmitAOS( rmesa, + rmesa->tcl.aos_components, + rmesa->tcl.nr_aos_components, 0 ); + + return radeonAllocEltsOpenEnded( rmesa, + rmesa->tcl.vertex_format, + rmesa->tcl.hw_primitive, nr ); +} + +#define CLOSE_ELTS() RADEON_NEWPRIM( rmesa ) + + + +/* TODO: Try to extend existing primitive if both are identical, + * discrete and there are no intervening state changes. (Somewhat + * duplicates changes to DrawArrays code) + */ +static void radeonEmitPrim( GLcontext *ctx, + GLenum prim, + GLuint hwprim, + GLuint start, + GLuint count) +{ + radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + radeonTclPrimitive( ctx, prim, hwprim ); + + radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + + rmesa->hw.max_state_size + VBUF_BUFSZ ); + + radeonEmitAOS( rmesa, + rmesa->tcl.aos_components, + rmesa->tcl.nr_aos_components, + start ); + + /* Why couldn't this packet have taken an offset param? + */ + radeonEmitVbufPrim( rmesa, + rmesa->tcl.vertex_format, + rmesa->tcl.hw_primitive, + count - start ); +} + +#define EMIT_PRIM( ctx, prim, hwprim, start, count ) do { \ + radeonEmitPrim( ctx, prim, hwprim, start, count ); \ + (void) rmesa; } while (0) + +/* Try & join small primitives + */ +#if 0 +#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0 +#else +#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) \ + ((NR) < 20 || \ + ((NR) < 40 && \ + rmesa->tcl.hw_primitive == (PRIM| \ + RADEON_CP_VC_CNTL_PRIM_WALK_IND| \ + RADEON_CP_VC_CNTL_TCL_ENABLE))) +#endif + +#ifdef MESA_BIG_ENDIAN +/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */ +#define EMIT_ELT(dest, offset, x) do { \ + int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 ); \ + GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 ); \ + (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); \ + (void)rmesa; } while (0) +#else +#define EMIT_ELT(dest, offset, x) do { \ + (dest)[offset] = (GLushort) (x); \ + (void)rmesa; } while (0) +#endif + +#define EMIT_TWO_ELTS(dest, offset, x, y) *(GLuint *)(dest+offset) = ((y)<<16)|(x); + + + +#define TAG(x) tcl_##x +#include "tnl_dd/t_dd_dmatmp2.h" + +/**********************************************************************/ +/* External entrypoints */ +/**********************************************************************/ + +void radeonEmitPrimitive( GLcontext *ctx, + GLuint first, + GLuint last, + GLuint flags ) +{ + tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags ); +} + +void radeonEmitEltPrimitive( GLcontext *ctx, + GLuint first, + GLuint last, + GLuint flags ) +{ + tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags ); +} + +void radeonTclPrimitive( GLcontext *ctx, + GLenum prim, + int hw_prim ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint se_cntl; + GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; + + if (newprim != rmesa->tcl.hw_primitive || + !discrete_prim[hw_prim&0xf]) { + RADEON_NEWPRIM( rmesa ); + rmesa->tcl.hw_primitive = newprim; + } + + se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL]; + se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST; + + if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE)) + se_cntl |= RADEON_FLAT_SHADE_VTX_0; + else + se_cntl |= RADEON_FLAT_SHADE_VTX_LAST; + + if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) { + RADEON_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl; + } +} + +/**********************************************************************/ +/* Fog blend factor computation for hw tcl */ +/* same calculation used as in t_vb_fog.c */ +/**********************************************************************/ + +#define FOG_EXP_TABLE_SIZE 256 +#define FOG_MAX (10.0) +#define EXP_FOG_MAX .0006595 +#define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE) +static GLfloat exp_table[FOG_EXP_TABLE_SIZE]; + +#if 1 +#define NEG_EXP( result, narg ) \ +do { \ + GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR)); \ + GLint k = (GLint) f; \ + if (k > FOG_EXP_TABLE_SIZE-2) \ + result = (GLfloat) EXP_FOG_MAX; \ + else \ + result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]); \ +} while (0) +#else +#define NEG_EXP( result, narg ) \ +do { \ + result = exp(-narg); \ +} while (0) +#endif + + +/** + * Initialize the exp_table[] lookup table for approximating exp(). + */ +void +radeonInitStaticFogData( void ) +{ + GLfloat f = 0.0F; + GLint i = 0; + for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) { + exp_table[i] = (GLfloat) exp(-f); + } +} + + +/** + * Compute per-vertex fog blend factors from fog coordinates by + * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function. + * Fog coordinates are distances from the eye (typically between the + * near and far clip plane distances). + * Note the fog (eye Z) coords may be negative so we use ABS(z) below. + * Fog blend factors are in the range [0,1]. + */ +float +radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) +{ + GLfloat end = ctx->Fog.End; + GLfloat d, temp; + const GLfloat z = FABSF(fogcoord); + + switch (ctx->Fog.Mode) { + case GL_LINEAR: + if (ctx->Fog.Start == ctx->Fog.End) + d = 1.0F; + else + d = 1.0F / (ctx->Fog.End - ctx->Fog.Start); + temp = (end - z) * d; + return CLAMP(temp, 0.0F, 1.0F); + break; + case GL_EXP: + d = ctx->Fog.Density; + NEG_EXP( temp, d * z ); + return temp; + break; + case GL_EXP2: + d = ctx->Fog.Density*ctx->Fog.Density; + NEG_EXP( temp, d * z * z ); + return temp; + break; + default: + _mesa_problem(ctx, "Bad fog mode in make_fog_coord"); + return 0; + } +} + +/**********************************************************************/ +/* Render pipeline stage */ +/**********************************************************************/ + + +/* TCL render. + */ +static GLboolean radeon_run_tcl_render( GLcontext *ctx, + struct tnl_pipeline_stage *stage ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0; + GLuint i; + + /* TODO: separate this from the swtnl pipeline + */ + if (rmesa->TclFallback) + return GL_TRUE; /* fallback to software t&l */ + + if (VB->Count == 0) + return GL_FALSE; + + /* NOTE: inputs != tnl->render_inputs - these are the untransformed + * inputs. + */ + if (ctx->Light.Enabled) { + inputs |= VERT_BIT_NORMAL; + } + + if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { + inputs |= VERT_BIT_COLOR1; + } + + if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) { + inputs |= VERT_BIT_FOG; + } + + for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + /* TODO: probably should not emit texture coords when texgen is enabled */ + if (rmesa->TexGenNeedNormals[i]) { + inputs |= VERT_BIT_NORMAL; + } + inputs |= VERT_BIT_TEX(i); + } + } + + radeonReleaseArrays( ctx, ~0 ); + radeonEmitArrays( ctx, inputs ); + + rmesa->tcl.Elts = VB->Elts; + + for (i = 0 ; i < VB->PrimitiveCount ; i++) + { + GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); + GLuint start = VB->Primitive[i].start; + GLuint length = VB->Primitive[i].count; + + if (!length) + continue; + + if (rmesa->tcl.Elts) + radeonEmitEltPrimitive( ctx, start, start+length, prim ); + else + radeonEmitPrimitive( ctx, start, start+length, prim ); + } + + return GL_FALSE; /* finished the pipe */ +} + + + +/* Initial state for tcl stage. + */ +const struct tnl_pipeline_stage _radeon_tcl_stage = +{ + "radeon render", + NULL, + NULL, + NULL, + NULL, + radeon_run_tcl_render /* run */ +}; + + + +/**********************************************************************/ +/* Validate state at pipeline start */ +/**********************************************************************/ + + +/*----------------------------------------------------------------------- + * Manage TCL fallbacks + */ + + +static void transition_to_swtnl( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint se_cntl; + + RADEON_NEWPRIM( rmesa ); + rmesa->swtcl.vertex_format = 0; + + radeonChooseVertexState( ctx ); + radeonChooseRenderState( ctx ); + + _mesa_validate_all_lighting_tables( ctx ); + + tnl->Driver.NotifyMaterialChange = + _mesa_validate_all_lighting_tables; + + radeonReleaseArrays( ctx, ~0 ); + + se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL]; + se_cntl |= RADEON_FLAT_SHADE_VTX_LAST; + + if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) { + RADEON_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl; + } +} + + +static void transition_to_hwtnl( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; + + se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | + RADEON_VTX_Z_PRE_MULT_1_OVER_W0 | + RADEON_VTX_W0_IS_NOT_1_OVER_W0); + se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0; + + if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) { + RADEON_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt; + _tnl_need_projected_coords( ctx, GL_FALSE ); + } + + radeonUpdateMaterial( ctx ); + + tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial; + + if ( rmesa->dma.flush ) + rmesa->dma.flush( rmesa ); + + rmesa->dma.flush = NULL; + rmesa->swtcl.vertex_format = 0; + + if (rmesa->swtcl.indexed_verts.buf) + radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, + __FUNCTION__ ); + + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "Radeon end tcl fallback\n"); +} + +static char *fallbackStrings[] = { + "Rasterization fallback", + "Unfilled triangles", + "Twosided lighting, differing materials", + "Materials in VB (maybe between begin/end)", + "Texgen unit 0", + "Texgen unit 1", + "Texgen unit 2", + "User disable", + "Fogcoord with separate specular lighting" +}; + + +static char *getFallbackString(GLuint bit) +{ + int i = 0; + while (bit > 1) { + i++; + bit >>= 1; + } + return fallbackStrings[i]; +} + + + +void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint oldfallback = rmesa->TclFallback; + + if (mode) { + rmesa->TclFallback |= bit; + if (oldfallback == 0) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "Radeon begin tcl fallback %s\n", + getFallbackString( bit )); + transition_to_swtnl( ctx ); + } + } + else { + rmesa->TclFallback &= ~bit; + if (oldfallback == bit) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "Radeon end tcl fallback %s\n", + getFallbackString( bit )); + transition_to_hwtnl( ctx ); + } + } +} diff --git a/radeon/radeon_tcl.h b/radeon/radeon_tcl.h new file mode 100644 index 0000000..168ab95 --- /dev/null +++ b/radeon/radeon_tcl.h @@ -0,0 +1,68 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h,v 1.2 2003/02/08 21:26:45 dawes Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + Tungsten Grahpics Inc., Austin, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * + */ + +#ifndef __RADEON_TCL_H__ +#define __RADEON_TCL_H__ + +#include "radeon_context.h" + +extern void radeonTclPrimitive( GLcontext *ctx, GLenum prim, int hw_prim ); +extern void radeonEmitEltPrimitive( GLcontext *ctx, GLuint first, GLuint last, + GLuint flags ); +extern void radeonEmitPrimitive( GLcontext *ctx, GLuint first, GLuint last, + GLuint flags ); + +extern void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ); + +extern void radeonInitStaticFogData( void ); +extern float radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ); + +#define RADEON_TCL_FALLBACK_RASTER 0x1 /* rasterization */ +#define RADEON_TCL_FALLBACK_UNFILLED 0x2 /* unfilled tris */ +#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE 0x4 /* twoside tris */ +#define RADEON_TCL_FALLBACK_MATERIAL 0x8 /* material in vb */ +#define RADEON_TCL_FALLBACK_TEXGEN_0 0x10 /* texgen, unit 0 */ +#define RADEON_TCL_FALLBACK_TEXGEN_1 0x20 /* texgen, unit 1 */ +#define RADEON_TCL_FALLBACK_TEXGEN_2 0x40 /* texgen, unit 2 */ +#define RADEON_TCL_FALLBACK_TCL_DISABLE 0x80 /* user disable */ +#define RADEON_TCL_FALLBACK_FOGCOORDSPEC 0x100 /* fogcoord, sep. spec light */ + +/* max maos_verts vertex format has a size of 18 floats */ +#define RADEON_MAX_TCL_VERTSIZE (18*4) + +#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode ) + +#endif diff --git a/radeon/radeon_tex.c b/radeon/radeon_tex.c new file mode 100644 index 0000000..edaea6c --- /dev/null +++ b/radeon/radeon_tex.c @@ -0,0 +1,883 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_tex.c,v 1.6 2002/09/16 18:05:20 eich Exp $ */ +/* +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Brian Paul <brianp@valinux.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "colormac.h" +#include "context.h" +#include "enums.h" +#include "image.h" +#include "simple_list.h" +#include "texformat.h" +#include "texstore.h" +#include "teximage.h" +#include "texobj.h" + + +#include "radeon_context.h" +#include "radeon_state.h" +#include "radeon_ioctl.h" +#include "radeon_swtcl.h" +#include "radeon_tex.h" + +#include "xmlpool.h" + + + +/** + * Set the texture wrap modes. + * + * \param t Texture object whose wrap modes are to be set + * \param swrap Wrap mode for the \a s texture coordinate + * \param twrap Wrap mode for the \a t texture coordinate + */ + +static void radeonSetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap ) +{ + GLboolean is_clamp = GL_FALSE; + GLboolean is_clamp_to_border = GL_FALSE; + + t->pp_txfilter &= ~(RADEON_CLAMP_S_MASK | RADEON_CLAMP_T_MASK | RADEON_BORDER_MODE_D3D); + + switch ( swrap ) { + case GL_REPEAT: + t->pp_txfilter |= RADEON_CLAMP_S_WRAP; + break; + case GL_CLAMP: + t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_CLAMP_TO_EDGE: + t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_LAST; + break; + case GL_CLAMP_TO_BORDER: + t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + case GL_MIRRORED_REPEAT: + t->pp_txfilter |= RADEON_CLAMP_S_MIRROR; + break; + case GL_MIRROR_CLAMP_EXT: + t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_LAST; + break; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + default: + _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__); + } + + switch ( twrap ) { + case GL_REPEAT: + t->pp_txfilter |= RADEON_CLAMP_T_WRAP; + break; + case GL_CLAMP: + t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_CLAMP_TO_EDGE: + t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_LAST; + break; + case GL_CLAMP_TO_BORDER: + t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + case GL_MIRRORED_REPEAT: + t->pp_txfilter |= RADEON_CLAMP_T_MIRROR; + break; + case GL_MIRROR_CLAMP_EXT: + t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_GL; + is_clamp = GL_TRUE; + break; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_LAST; + break; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_GL; + is_clamp_to_border = GL_TRUE; + break; + default: + _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__); + } + + if ( is_clamp_to_border ) { + t->pp_txfilter |= RADEON_BORDER_MODE_D3D; + } + + t->border_fallback = (is_clamp && is_clamp_to_border); +} + +static void radeonSetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max ) +{ + t->pp_txfilter &= ~RADEON_MAX_ANISO_MASK; + + if ( max == 1.0 ) { + t->pp_txfilter |= RADEON_MAX_ANISO_1_TO_1; + } else if ( max <= 2.0 ) { + t->pp_txfilter |= RADEON_MAX_ANISO_2_TO_1; + } else if ( max <= 4.0 ) { + t->pp_txfilter |= RADEON_MAX_ANISO_4_TO_1; + } else if ( max <= 8.0 ) { + t->pp_txfilter |= RADEON_MAX_ANISO_8_TO_1; + } else { + t->pp_txfilter |= RADEON_MAX_ANISO_16_TO_1; + } +} + +/** + * Set the texture magnification and minification modes. + * + * \param t Texture whose filter modes are to be set + * \param minf Texture minification mode + * \param magf Texture magnification mode + */ + +static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf ) +{ + GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK); + + t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK); + + /* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */ + if ( t->base.tObj->Target == GL_TEXTURE_CUBE_MAP ) { + switch ( minf ) { + case GL_NEAREST: + case GL_NEAREST_MIPMAP_NEAREST: + case GL_NEAREST_MIPMAP_LINEAR: + t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST; + break; + case GL_LINEAR: + case GL_LINEAR_MIPMAP_NEAREST: + case GL_LINEAR_MIPMAP_LINEAR: + t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR; + break; + default: + break; + } + } + else if ( anisotropy == RADEON_MAX_ANISO_1_TO_1 ) { + switch ( minf ) { + case GL_NEAREST: + t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST; + break; + case GL_LINEAR: + t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: + t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR_MIP_NEAREST; + break; + case GL_LINEAR_MIPMAP_NEAREST: + t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST_MIP_LINEAR; + break; + case GL_LINEAR_MIPMAP_LINEAR: + t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR_MIP_LINEAR; + break; + } + } else { + switch ( minf ) { + case GL_NEAREST: + t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST; + break; + case GL_LINEAR: + t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: + case GL_LINEAR_MIPMAP_NEAREST: + t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + case GL_LINEAR_MIPMAP_LINEAR: + t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR; + break; + } + } + + switch ( magf ) { + case GL_NEAREST: + t->pp_txfilter |= RADEON_MAG_FILTER_NEAREST; + break; + case GL_LINEAR: + t->pp_txfilter |= RADEON_MAG_FILTER_LINEAR; + break; + } +} + +static void radeonSetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] ) +{ + t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); +} + + +/** + * Allocate space for and load the mesa images into the texture memory block. + * This will happen before drawing with a new texture, or drawing with a + * texture after it was swapped out or teximaged again. + */ + +static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj ) +{ + radeonTexObjPtr t; + + t = CALLOC_STRUCT( radeon_tex_obj ); + texObj->DriverData = t; + if ( t != NULL ) { + if ( RADEON_DEBUG & DEBUG_TEXTURE ) { + fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, (void *)t ); + } + + /* Initialize non-image-dependent parts of the state: + */ + t->base.tObj = texObj; + t->border_fallback = GL_FALSE; + + t->pp_txfilter = RADEON_BORDER_MODE_OGL; + t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP | + RADEON_TXFORMAT_PERSPECTIVE_ENABLE); + + make_empty_list( & t->base ); + + radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT ); + radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); + radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); + radeonSetTexBorderColor( t, texObj->_BorderChan ); + } + + return t; +} + + +static const struct gl_texture_format * +radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat, + GLenum format, GLenum type ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + const GLboolean do32bpt = + ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 ); + const GLboolean force16bpt = + ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 ); + (void) format; + + switch ( internalFormat ) { + case 4: + case GL_RGBA: + case GL_COMPRESSED_RGBA: + switch ( type ) { + case GL_UNSIGNED_INT_10_10_10_2: + case GL_UNSIGNED_INT_2_10_10_10_REV: + return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555; + case GL_UNSIGNED_SHORT_4_4_4_4: + case GL_UNSIGNED_SHORT_4_4_4_4_REV: + return _dri_texformat_argb4444; + case GL_UNSIGNED_SHORT_5_5_5_1: + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + return _dri_texformat_argb1555; + default: + return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb4444; + } + + case 3: + case GL_RGB: + case GL_COMPRESSED_RGB: + switch ( type ) { + case GL_UNSIGNED_SHORT_4_4_4_4: + case GL_UNSIGNED_SHORT_4_4_4_4_REV: + return _dri_texformat_argb4444; + case GL_UNSIGNED_SHORT_5_5_5_1: + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + return _dri_texformat_argb1555; + case GL_UNSIGNED_SHORT_5_6_5: + case GL_UNSIGNED_SHORT_5_6_5_REV: + return _dri_texformat_rgb565; + default: + return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; + } + + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + case GL_RGBA16: + return !force16bpt ? + _dri_texformat_argb8888 : _dri_texformat_argb4444; + + case GL_RGBA4: + case GL_RGBA2: + return _dri_texformat_argb4444; + + case GL_RGB5_A1: + return _dri_texformat_argb1555; + + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; + + case GL_RGB5: + case GL_RGB4: + case GL_R3_G3_B2: + return _dri_texformat_rgb565; + + case GL_ALPHA: + case GL_ALPHA4: + case GL_ALPHA8: + case GL_ALPHA12: + case GL_ALPHA16: + case GL_COMPRESSED_ALPHA: + return _dri_texformat_a8; + + case 1: + case GL_LUMINANCE: + case GL_LUMINANCE4: + case GL_LUMINANCE8: + case GL_LUMINANCE12: + case GL_LUMINANCE16: + case GL_COMPRESSED_LUMINANCE: + return _dri_texformat_l8; + + case 2: + case GL_LUMINANCE_ALPHA: + case GL_LUMINANCE4_ALPHA4: + case GL_LUMINANCE6_ALPHA2: + case GL_LUMINANCE8_ALPHA8: + case GL_LUMINANCE12_ALPHA4: + case GL_LUMINANCE12_ALPHA12: + case GL_LUMINANCE16_ALPHA16: + case GL_COMPRESSED_LUMINANCE_ALPHA: + return _dri_texformat_al88; + + case GL_INTENSITY: + case GL_INTENSITY4: + case GL_INTENSITY8: + case GL_INTENSITY12: + case GL_INTENSITY16: + case GL_COMPRESSED_INTENSITY: + return _dri_texformat_i8; + + case GL_YCBCR_MESA: + if (type == GL_UNSIGNED_SHORT_8_8_APPLE || + type == GL_UNSIGNED_BYTE) + return &_mesa_texformat_ycbcr; + else + return &_mesa_texformat_ycbcr_rev; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return &_mesa_texformat_rgb_dxt1; + + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return &_mesa_texformat_rgba_dxt1; + + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return &_mesa_texformat_rgba_dxt3; + + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return &_mesa_texformat_rgba_dxt5; + + default: + _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__); + return NULL; + } + + return NULL; /* never get here */ +} + + +static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint border, + GLenum format, GLenum type, const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + + if ( t ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) radeonAllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); + return; + } + } + + /* Note, this will call ChooseTextureFormat */ + _mesa_store_teximage1d(ctx, target, level, internalFormat, + width, border, format, type, pixels, + &ctx->Unpack, texObj, texImage); + + t->dirty_images[0] |= (1 << level); +} + + +static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, + GLsizei width, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + + assert( t ); /* this _should_ be true */ + if ( t ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) radeonAllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); + return; + } + } + + _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, + format, type, pixels, packing, texObj, + texImage); + + t->dirty_images[0] |= (1 << level); +} + + +static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLenum format, GLenum type, const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + if ( t != NULL ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) radeonAllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); + return; + } + } + + /* Note, this will call ChooseTextureFormat */ + _mesa_store_teximage2d(ctx, target, level, internalFormat, + width, height, border, format, type, pixels, + &ctx->Unpack, texObj, texImage); + + t->dirty_images[face] |= (1 << level); +} + + +static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + assert( t ); /* this _should_ be true */ + if ( t ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) radeonAllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); + return; + } + } + + _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, + height, format, type, pixels, packing, texObj, + texImage); + + t->dirty_images[face] |= (1 << level); +} + +static void radeonCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + GLuint face; + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + if ( t != NULL ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) radeonAllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D"); + return; + } + } + + /* Note, this will call ChooseTextureFormat */ + _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width, + height, border, imageSize, data, texObj, texImage); + + t->dirty_images[face] |= (1 << level); +} + + +static void radeonCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) +{ + driTextureObject * t = (driTextureObject *) texObj->DriverData; + GLuint face; + + + /* which cube face or ordinary 2D image */ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + ASSERT(face < 6); + break; + default: + face = 0; + } + + assert( t ); /* this _should_ be true */ + if ( t ) { + driSwapOutTextureObject( t ); + } + else { + t = (driTextureObject *) radeonAllocTexObj( texObj ); + if (!t) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D"); + return; + } + } + + _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, + height, format, imageSize, data, texObj, texImage); + + t->dirty_images[face] |= (1 << level); +} + +#define SCALED_FLOAT_TO_BYTE( x, scale ) \ + (((GLuint)((255.0F / scale) * (x))) / 2) + +static void radeonTexEnv( GLcontext *ctx, GLenum target, + GLenum pname, const GLfloat *param ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint unit = ctx->Texture.CurrentUnit; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + + if ( RADEON_DEBUG & DEBUG_STATE ) { + fprintf( stderr, "%s( %s )\n", + __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) ); + } + + switch ( pname ) { + case GL_TEXTURE_ENV_COLOR: { + GLubyte c[4]; + GLuint envColor; + UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor ); + envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); + if ( rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] != envColor ) { + RADEON_STATECHANGE( rmesa, tex[unit] ); + rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] = envColor; + } + break; + } + + case GL_TEXTURE_LOD_BIAS_EXT: { + GLfloat bias, min; + GLuint b; + + /* The Radeon's LOD bias is a signed 2's complement value with a + * range of -1.0 <= bias < 4.0. We break this into two linear + * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping + * [0.0,4.0] to [0,127]. + */ + min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ? + 0.0 : -1.0; + bias = CLAMP( *param, min, 4.0 ); + if ( bias == 0 ) { + b = 0; + } else if ( bias > 0 ) { + b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 4.0 )) << RADEON_LOD_BIAS_SHIFT; + } else { + b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 1.0 )) << RADEON_LOD_BIAS_SHIFT; + } + if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] & RADEON_LOD_BIAS_MASK) != b ) { + RADEON_STATECHANGE( rmesa, tex[unit] ); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] &= ~RADEON_LOD_BIAS_MASK; + rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] |= (b & RADEON_LOD_BIAS_MASK); + } + break; + } + + default: + return; + } +} + + +/** + * Changes variables and flags for a state update, which will happen at the + * next UpdateTextureState + */ + +static void radeonTexParameter( GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat *params ) +{ + radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData; + + if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { + fprintf( stderr, "%s( %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr( pname ) ); + } + + switch ( pname ) { + case GL_TEXTURE_MIN_FILTER: + case GL_TEXTURE_MAG_FILTER: + case GL_TEXTURE_MAX_ANISOTROPY_EXT: + radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); + radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); + break; + + case GL_TEXTURE_WRAP_S: + case GL_TEXTURE_WRAP_T: + radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT ); + break; + + case GL_TEXTURE_BORDER_COLOR: + radeonSetTexBorderColor( t, texObj->_BorderChan ); + break; + + case GL_TEXTURE_BASE_LEVEL: + case GL_TEXTURE_MAX_LEVEL: + case GL_TEXTURE_MIN_LOD: + case GL_TEXTURE_MAX_LOD: + /* This isn't the most efficient solution but there doesn't appear to + * be a nice alternative. Since there's no LOD clamping, + * we just have to rely on loading the right subset of mipmap levels + * to simulate a clamped LOD. + */ + driSwapOutTextureObject( (driTextureObject *) t ); + break; + + default: + return; + } + + /* Mark this texobj as dirty (one bit per tex unit) + */ + t->dirty_state = TEX_ALL; +} + + +static void radeonBindTexture( GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj ) +{ + if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { + fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj, + ctx->Texture.CurrentUnit ); + } + + assert( (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D && + target != GL_TEXTURE_RECTANGLE_NV && target != GL_TEXTURE_CUBE_MAP) || + (texObj->DriverData != NULL) ); +} + + +static void radeonDeleteTexture( GLcontext *ctx, + struct gl_texture_object *texObj ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + driTextureObject * t = (driTextureObject *) texObj->DriverData; + + if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { + fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, + _mesa_lookup_enum_by_nr( texObj->Target ) ); + } + + if ( t != NULL ) { + if ( rmesa ) { + RADEON_FIREVERTICES( rmesa ); + } + + driDestroyTextureObject( t ); + } + + /* Free mipmap images and the texture object itself */ + _mesa_delete_texture_object(ctx, texObj); +} + +/* Need: + * - Same GEN_MODE for all active bits + * - Same EyePlane/ObjPlane for all active bits when using Eye/Obj + * - STRQ presumably all supported (matrix means incoming R values + * can end up in STQ, this has implications for vertex support, + * presumably ok if maos is used, though?) + * + * Basically impossible to do this on the fly - just collect some + * basic info & do the checks from ValidateState(). + */ +static void radeonTexGen( GLcontext *ctx, + GLenum coord, + GLenum pname, + const GLfloat *params ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLuint unit = ctx->Texture.CurrentUnit; + rmesa->recheck_texgen[unit] = GL_TRUE; +} + +/** + * Allocate a new texture object. + * Called via ctx->Driver.NewTextureObject. + * Note: we could use containment here to 'derive' the driver-specific + * texture object from the core mesa gl_texture_object. Not done at this time. + */ +static struct gl_texture_object * +radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + struct gl_texture_object *obj; + obj = _mesa_new_texture_object(ctx, name, target); + if (!obj) + return NULL; + obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; + radeonAllocTexObj( obj ); + return obj; +} + + +void radeonInitTextureFuncs( struct dd_function_table *functions ) +{ + functions->ChooseTextureFormat = radeonChooseTextureFormat; + functions->TexImage1D = radeonTexImage1D; + functions->TexImage2D = radeonTexImage2D; + functions->TexSubImage1D = radeonTexSubImage1D; + functions->TexSubImage2D = radeonTexSubImage2D; + + functions->NewTextureObject = radeonNewTextureObject; + functions->BindTexture = radeonBindTexture; + functions->DeleteTexture = radeonDeleteTexture; + functions->IsTextureResident = driIsTextureResident; + + functions->TexEnv = radeonTexEnv; + functions->TexParameter = radeonTexParameter; + functions->TexGen = radeonTexGen; + + functions->CompressedTexImage2D = radeonCompressedTexImage2D; + functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + + driInitTextureFormats(); +} diff --git a/radeon/radeon_tex.h b/radeon/radeon_tex.h new file mode 100644 index 0000000..a806981 --- /dev/null +++ b/radeon/radeon_tex.h @@ -0,0 +1,50 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_tex.h,v 1.3 2002/02/22 21:45:01 dawes Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + * + */ + +#ifndef __RADEON_TEX_H__ +#define __RADEON_TEX_H__ + +extern void radeonUpdateTextureState( GLcontext *ctx ); + +extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, + GLuint face ); + +extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t ); + +extern void radeonInitTextureFuncs( struct dd_function_table *functions ); + +#endif /* __RADEON_TEX_H__ */ diff --git a/radeon/radeon_texmem.c b/radeon/radeon_texmem.c new file mode 100644 index 0000000..20f25dd --- /dev/null +++ b/radeon/radeon_texmem.c @@ -0,0 +1,405 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_texmem.c,v 1.7 2002/12/16 16:18:59 dawes Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation on the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR +SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + * + */ +#include <errno.h> + +#include "glheader.h" +#include "imports.h" +#include "context.h" +#include "macros.h" + +#include "radeon_context.h" +#include "radeon_ioctl.h" +#include "radeon_tex.h" + +#include <unistd.h> /* for usleep() */ + + +/** + * Destroy any device-dependent state associated with the texture. This may + * include NULLing out hardware state that points to the texture. + */ +void +radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t ) +{ + if ( RADEON_DEBUG & DEBUG_TEXTURE ) { + fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj ); + } + + if ( rmesa != NULL ) { + unsigned i; + + + for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) { + if ( t == rmesa->state.texture.unit[i].texobj ) { + rmesa->state.texture.unit[i].texobj = NULL; + } + } + } +} + + +/* ------------------------------------------------------------ + * Texture image conversions + */ + + +static void radeonUploadRectSubImage( radeonContextPtr rmesa, + radeonTexObjPtr t, + struct gl_texture_image *texImage, + GLint x, GLint y, + GLint width, GLint height ) +{ + const struct gl_texture_format *texFormat = texImage->TexFormat; + int blit_format, dstPitch, done; + + switch ( texFormat->TexelBytes ) { + case 1: + blit_format = RADEON_GMC_DST_8BPP_CI; + break; + case 2: + blit_format = RADEON_GMC_DST_16BPP; + break; + case 4: + blit_format = RADEON_GMC_DST_32BPP; + break; + default: + fprintf( stderr, "radeonUploadRectSubImage: unknown blit_format (texelbytes=%d)\n", + texFormat->TexelBytes); + return; + } + + t->image[0][0].data = texImage->Data; + + /* Currently don't need to cope with small pitches. + */ + width = texImage->Width; + height = texImage->Height; + dstPitch = t->pp_txpitch + 32; + + { /* FIXME: prefer GART-texturing if possible */ + /* Data not in GART memory, or bad pitch. + */ + for (done = 0; done < height ; ) { + struct radeon_dma_region region; + int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch ); + int src_pitch; + char *tex; + + src_pitch = texImage->RowStride * texFormat->TexelBytes; + + tex = (char *)texImage->Data + done * src_pitch; + + memset(®ion, 0, sizeof(region)); + radeonAllocDmaRegion( rmesa, ®ion, lines * dstPitch, 1024 ); + + /* Copy texdata to dma: + */ + if (0) + fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n", + __FUNCTION__, src_pitch, dstPitch); + + if (src_pitch == dstPitch) { + memcpy( region.address + region.start, tex, lines * src_pitch ); + } + else { + char *buf = region.address + region.start; + int i; + for (i = 0 ; i < lines ; i++) { + memcpy( buf, tex, src_pitch ); + buf += dstPitch; + tex += src_pitch; + } + } + + radeonEmitWait( rmesa, RADEON_WAIT_3D ); + + + + /* Blit to framebuffer + */ + radeonEmitBlit( rmesa, + blit_format, + dstPitch, GET_START( ®ion ), + dstPitch, t->bufAddr, + 0, 0, + 0, done, + width, lines ); + + radeonEmitWait( rmesa, RADEON_WAIT_2D ); + + radeonReleaseDmaRegion( rmesa, ®ion, __FUNCTION__ ); + done += lines; + } + } +} + + +/** + * Upload the texture image associated with texture \a t at the specified + * level at the address relative to \a start. + */ +static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, + GLint hwlevel, + GLint x, GLint y, GLint width, GLint height, + GLuint face ) +{ + struct gl_texture_image *texImage = NULL; + GLuint offset; + GLint imageWidth, imageHeight; + GLint ret; + drm_radeon_texture_t tex; + drm_radeon_tex_image_t tmp; + const int level = hwlevel + t->base.firstLevel; + + if ( RADEON_DEBUG & DEBUG_TEXTURE ) { + fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", + __FUNCTION__, (void *)t, (void *)t->base.tObj, level, width, height, face ); + } + + ASSERT(face < 6); + + /* Ensure we have a valid texture to upload */ + if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) { + _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); + return; + } + + texImage = t->base.tObj->Image[face][level]; + + if ( !texImage ) { + if ( RADEON_DEBUG & DEBUG_TEXTURE ) + fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level ); + return; + } + if ( !texImage->Data ) { + if ( RADEON_DEBUG & DEBUG_TEXTURE ) + fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ ); + return; + } + + + if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + assert(level == 0); + assert(hwlevel == 0); + if ( RADEON_DEBUG & DEBUG_TEXTURE ) + fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__); + radeonUploadRectSubImage( rmesa, t, texImage, x, y, width, height ); + return; + } + + imageWidth = texImage->Width; + imageHeight = texImage->Height; + + offset = t->bufAddr + t->base.totalSize * face / 6; + + if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { + GLint imageX = 0; + GLint imageY = 0; + GLint blitX = t->image[face][hwlevel].x; + GLint blitY = t->image[face][hwlevel].y; + GLint blitWidth = t->image[face][hwlevel].width; + GLint blitHeight = t->image[face][hwlevel].height; + fprintf( stderr, " upload image: %d,%d at %d,%d\n", + imageWidth, imageHeight, imageX, imageY ); + fprintf( stderr, " upload blit: %d,%d at %d,%d\n", + blitWidth, blitHeight, blitX, blitY ); + fprintf( stderr, " blit ofs: 0x%07x level: %d/%d\n", + (GLuint)offset, hwlevel, level ); + } + + t->image[face][hwlevel].data = texImage->Data; + + /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. + * NOTE: we're always use a 1KB-wide blit and I8 texture format. + * We used to use 1, 2 and 4-byte texels and used to use the texture + * width to dictate the blit width - but that won't work for compressed + * textures. (Brian) + * NOTE: can't do that with texture tiling. (sroland) + */ + tex.offset = offset; + tex.image = &tmp; + /* copy (x,y,width,height,data) */ + memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) ); + + if (texImage->TexFormat->TexelBytes) { + /* use multi-byte upload scheme */ + tex.height = imageHeight; + tex.width = imageWidth; + tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK; + tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1); + tex.offset += tmp.x & ~1023; + tmp.x = tmp.x % 1024; + if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) { + /* need something like "tiled coordinates" ? */ + tmp.y = tmp.x / (tex.pitch * 128) * 2; + tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes; + tex.pitch |= RADEON_DST_TILE_MICRO >> 22; + } + else { + tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); + } + if ((t->tile_bits & RADEON_TXO_MACRO_TILE) && + (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) { + /* radeon switches off macro tiling for small textures/mipmaps it seems */ + tex.pitch |= RADEON_DST_TILE_MACRO >> 22; + } + } + else { + /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is + needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ + /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed + so the kernel module reads the right amount of data. */ + tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ + tex.pitch = (BLIT_WIDTH_BYTES / 64); + tex.height = (imageHeight + 3) / 4; + tex.width = (imageWidth + 3) / 4; + switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) { + case RADEON_TXFORMAT_DXT1: + tex.width *= 8; + break; + case RADEON_TXFORMAT_DXT23: + case RADEON_TXFORMAT_DXT45: + tex.width *= 16; + break; + } + } + + LOCK_HARDWARE( rmesa ); + do { + ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE, + &tex, sizeof(drm_radeon_texture_t) ); + } while ( ret == -EAGAIN ); + + UNLOCK_HARDWARE( rmesa ); + + if ( ret ) { + fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret ); + fprintf( stderr, " offset=0x%08x\n", + offset ); + fprintf( stderr, " image width=%d height=%d\n", + imageWidth, imageHeight ); + fprintf( stderr, " blit width=%d height=%d data=%p\n", + t->image[face][hwlevel].width, t->image[face][hwlevel].height, + t->image[face][hwlevel].data ); + exit( 1 ); + } +} + + +/** + * Upload the texture images associated with texture \a t. This might + * require the allocation of texture memory. + * + * \param rmesa Context pointer + * \param t Texture to be uploaded + * \param face Cube map face to be uploaded. Zero for non-cube maps. + */ + +int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint face ) +{ + int numLevels; + + if ( !t || t->base.totalSize == 0 ) + return 0; + + if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { + fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, + (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize, + t->base.firstLevel, t->base.lastLevel ); + } + + numLevels = t->base.lastLevel - t->base.firstLevel + 1; + + if (RADEON_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); + radeonFinish( rmesa->glCtx ); + } + + LOCK_HARDWARE( rmesa ); + + if ( t->base.memBlock == NULL ) { + int heap; + + heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps, + (driTextureObject *) t ); + if ( heap == -1 ) { + UNLOCK_HARDWARE( rmesa ); + return -1; + } + + /* Set the base offset of the texture image */ + t->bufAddr = rmesa->radeonScreen->texOffset[heap] + + t->base.memBlock->ofs; + t->pp_txoffset = t->bufAddr; + + if (!(t->base.tObj->Image[0][0]->IsClientData)) { + /* hope it's safe to add that here... */ + t->pp_txoffset |= t->tile_bits; + } + + /* Mark this texobj as dirty on all units: + */ + t->dirty_state = TEX_ALL; + } + + + /* Let the world know we've used this memory recently. + */ + driUpdateTextureLRU( (driTextureObject *) t ); + UNLOCK_HARDWARE( rmesa ); + + + /* Upload any images that are new */ + if (t->base.dirty_images[face]) { + int i; + for ( i = 0 ; i < numLevels ; i++ ) { + if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) { + uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width, + t->image[face][i].height, face ); + } + } + t->base.dirty_images[face] = 0; + } + + if (RADEON_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); + radeonFinish( rmesa->glCtx ); + } + + return 0; +} diff --git a/radeon/radeon_texstate.c b/radeon/radeon_texstate.c new file mode 100644 index 0000000..37bb749 --- /dev/null +++ b/radeon/radeon_texstate.c @@ -0,0 +1,1334 @@ +/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c,v 1.6 2002/12/16 16:18:59 dawes Exp $ */ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + */ + +#include "glheader.h" +#include "imports.h" +#include "colormac.h" +#include "context.h" +#include "macros.h" +#include "texformat.h" +#include "enums.h" + +#include "radeon_context.h" +#include "radeon_state.h" +#include "radeon_ioctl.h" +#include "radeon_swtcl.h" +#include "radeon_tex.h" +#include "radeon_tcl.h" + + +#define RADEON_TXFORMAT_A8 RADEON_TXFORMAT_I8 +#define RADEON_TXFORMAT_L8 RADEON_TXFORMAT_I8 +#define RADEON_TXFORMAT_AL88 RADEON_TXFORMAT_AI88 +#define RADEON_TXFORMAT_YCBCR RADEON_TXFORMAT_YVYU422 +#define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422 +#define RADEON_TXFORMAT_RGB_DXT1 RADEON_TXFORMAT_DXT1 +#define RADEON_TXFORMAT_RGBA_DXT1 RADEON_TXFORMAT_DXT1 +#define RADEON_TXFORMAT_RGBA_DXT3 RADEON_TXFORMAT_DXT23 +#define RADEON_TXFORMAT_RGBA_DXT5 RADEON_TXFORMAT_DXT45 + +#define _COLOR(f) \ + [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, 0 } +#define _COLOR_REV(f) \ + [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f, 0 } +#define _ALPHA(f) \ + [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 } +#define _ALPHA_REV(f) \ + [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 } +#define _YUV(f) \ + [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, RADEON_YUV_TO_RGB } +#define _INVALID(f) \ + [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 } +#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \ + && (tx_table[f].format != 0xffffffff) ) + +static const struct { + GLuint format, filter; +} +tx_table[] = +{ + _ALPHA(RGBA8888), + _ALPHA_REV(RGBA8888), + _ALPHA(ARGB8888), + _ALPHA_REV(ARGB8888), + _INVALID(RGB888), + _COLOR(RGB565), + _COLOR_REV(RGB565), + _ALPHA(ARGB4444), + _ALPHA_REV(ARGB4444), + _ALPHA(ARGB1555), + _ALPHA_REV(ARGB1555), + _ALPHA(AL88), + _ALPHA_REV(AL88), + _ALPHA(A8), + _COLOR(L8), + _ALPHA(I8), + _INVALID(CI8), + _YUV(YCBCR), + _YUV(YCBCR_REV), + _INVALID(RGB_FXT1), + _INVALID(RGBA_FXT1), + _COLOR(RGB_DXT1), + _ALPHA(RGBA_DXT1), + _ALPHA(RGBA_DXT3), + _ALPHA(RGBA_DXT5), +}; + +#undef _COLOR +#undef _ALPHA +#undef _INVALID + +/** + * This function computes the number of bytes of storage needed for + * the given texture object (all mipmap levels, all cube faces). + * The \c image[face][level].x/y/width/height parameters for upload/blitting + * are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here + * too. + * + * \param rmesa Context pointer + * \param tObj GL texture object whose images are to be posted to + * hardware state. + */ +static void radeonSetTexImages( radeonContextPtr rmesa, + struct gl_texture_object *tObj ) +{ + radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData; + const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; + GLint curOffset, blitWidth; + GLint i, texelBytes; + GLint numLevels; + GLint log2Width, log2Height, log2Depth; + + /* Set the hardware texture format + */ + + t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK | + RADEON_TXFORMAT_ALPHA_IN_MAP); + t->pp_txfilter &= ~RADEON_YUV_TO_RGB; + + if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) { + t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format; + t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter; + } + else { + _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); + return; + } + + texelBytes = baseImage->TexFormat->TexelBytes; + + /* Compute which mipmap levels we really want to send to the hardware. + */ + + if (tObj->Target != GL_TEXTURE_CUBE_MAP) + driCalculateTextureFirstLastLevel( (driTextureObject *) t ); + else { + /* r100 can't handle mipmaps for cube/3d textures, so don't waste + memory for them */ + t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel; + } + log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; + log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; + log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; + + numLevels = t->base.lastLevel - t->base.firstLevel + 1; + + assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); + + /* Calculate mipmap offsets and dimensions for blitting (uploading) + * The idea is that we lay out the mipmap levels within a block of + * memory organized as a rectangle of width BLIT_WIDTH_BYTES. + */ + curOffset = 0; + blitWidth = BLIT_WIDTH_BYTES; + t->tile_bits = 0; + + /* figure out if this texture is suitable for tiling. */ + if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) { + if (rmesa->texmicrotile && (baseImage->Height > 1)) { + /* allow 32 (bytes) x 1 mip (which will use two times the space + the non-tiled version would use) max if base texture is large enough */ + if ((numLevels == 1) || + (((baseImage->Width * texelBytes / baseImage->Height) <= 32) && + (baseImage->Width * texelBytes > 64)) || + ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) { + /* R100 has two microtile bits (only the txoffset reg, not the blitter) + weird: X2 + OPT: 32bit correct, 16bit completely hosed + X2: 32bit correct, 16bit correct + OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */ + t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/; + } + } + if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) { + /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not + in the case if height is smaller than 16 (not 100% sure), as does the r200, + so need to disable macro tiling in that case */ + if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) { + t->tile_bits |= RADEON_TXO_MACRO_TILE; + } + } + } + + for (i = 0; i < numLevels; i++) { + const struct gl_texture_image *texImage; + GLuint size; + + texImage = tObj->Image[0][i + t->base.firstLevel]; + if ( !texImage ) + break; + + /* find image size in bytes */ + if (texImage->IsCompressed) { + /* need to calculate the size AFTER padding even though the texture is + submitted without padding. + Only handle pot textures currently - don't know if npot is even possible, + size calculation would certainly need (trivial) adjustments. + Align (and later pad) to 32byte, not sure what that 64byte blit width is + good for? */ + if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) { + /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */ + if ((texImage->Width + 3) < 8) /* width one block */ + size = texImage->CompressedSize * 4; + else if ((texImage->Width + 3) < 16) + size = texImage->CompressedSize * 2; + else size = texImage->CompressedSize; + } + else /* DXT3/5, 16 bytes per block */ + if ((texImage->Width + 3) < 8) + size = texImage->CompressedSize * 2; + else size = texImage->CompressedSize; + } + else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height; + } + else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + though the actual offset may be different (if texture is less than + 32 bytes width) to the untiled case */ + int w = (texImage->Width * texelBytes * 2 + 31) & ~31; + size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + } + else { + int w = (texImage->Width * texelBytes + 31) & ~31; + size = w * texImage->Height * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + } + assert(size > 0); + + /* Align to 32-byte offset. It is faster to do this unconditionally + * (no branch penalty). + */ + + curOffset = (curOffset + 0x1f) & ~0x1f; + + if (texelBytes) { + t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */ + t->image[0][i].y = 0; + t->image[0][i].width = MIN2(size / texelBytes, blitWidth); + t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; + } + else { + t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; + t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; + t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); + t->image[0][i].height = size / t->image[0][i].width; + } + +#if 0 + /* for debugging only and only applicable to non-rectangle targets */ + assert(size % t->image[0][i].width == 0); + assert(t->image[0][i].x == 0 + || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1)); +#endif + + if (0) + fprintf(stderr, + "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", + i, texImage->Width, texImage->Height, + t->image[0][i].x, t->image[0][i].y, + t->image[0][i].width, t->image[0][i].height, size, curOffset); + + curOffset += size; + + } + + /* Align the total size of texture memory block. + */ + t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; + + /* Setup remaining cube face blits, if needed */ + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + const GLuint faceSize = t->base.totalSize; + GLuint face; + /* reuse face 0 x/y/width/height - just update the offset when uploading */ + for (face = 1; face < 6; face++) { + for (i = 0; i < numLevels; i++) { + t->image[face][i].x = t->image[0][i].x; + t->image[face][i].y = t->image[0][i].y; + t->image[face][i].width = t->image[0][i].width; + t->image[face][i].height = t->image[0][i].height; + } + } + t->base.totalSize = 6 * faceSize; /* total texmem needed */ + } + + /* Hardware state: + */ + t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK; + t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT; + + t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK | + RADEON_TXFORMAT_HEIGHT_MASK | + RADEON_TXFORMAT_CUBIC_MAP_ENABLE | + RADEON_TXFORMAT_F5_WIDTH_MASK | + RADEON_TXFORMAT_F5_HEIGHT_MASK); + t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) | + (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT)); + + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + assert(log2Width == log2Height); + t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) | + (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) | + (RADEON_TXFORMAT_CUBIC_MAP_ENABLE)); + t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) | + (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) | + (log2Width << RADEON_FACE_WIDTH_2_SHIFT) | + (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) | + (log2Width << RADEON_FACE_WIDTH_3_SHIFT) | + (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) | + (log2Width << RADEON_FACE_WIDTH_4_SHIFT) | + (log2Height << RADEON_FACE_HEIGHT_4_SHIFT)); + } + + t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) | + ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16)); + + /* Only need to round to nearest 32 for textures, but the blitter + * requires 64-byte aligned pitches, and we may/may not need the + * blitter. NPOT only! + */ + if (baseImage->IsCompressed) + t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); + else + t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); + t->pp_txpitch -= 32; + + t->dirty_state = TEX_ALL; + + /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */ +} + + + +/* ================================================================ + * Texture combine functions + */ + +/* GL_ARB_texture_env_combine support + */ + +/* The color tables have combine functions for GL_SRC_COLOR, + * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA. + */ +static GLuint radeon_texture_color[][RADEON_MAX_TEXTURE_UNITS] = +{ + { + RADEON_COLOR_ARG_A_T0_COLOR, + RADEON_COLOR_ARG_A_T1_COLOR, + RADEON_COLOR_ARG_A_T2_COLOR + }, + { + RADEON_COLOR_ARG_A_T0_COLOR | RADEON_COMP_ARG_A, + RADEON_COLOR_ARG_A_T1_COLOR | RADEON_COMP_ARG_A, + RADEON_COLOR_ARG_A_T2_COLOR | RADEON_COMP_ARG_A + }, + { + RADEON_COLOR_ARG_A_T0_ALPHA, + RADEON_COLOR_ARG_A_T1_ALPHA, + RADEON_COLOR_ARG_A_T2_ALPHA + }, + { + RADEON_COLOR_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A, + RADEON_COLOR_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A, + RADEON_COLOR_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A + }, +}; + +static GLuint radeon_tfactor_color[] = +{ + RADEON_COLOR_ARG_A_TFACTOR_COLOR, + RADEON_COLOR_ARG_A_TFACTOR_COLOR | RADEON_COMP_ARG_A, + RADEON_COLOR_ARG_A_TFACTOR_ALPHA, + RADEON_COLOR_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A +}; + +static GLuint radeon_primary_color[] = +{ + RADEON_COLOR_ARG_A_DIFFUSE_COLOR, + RADEON_COLOR_ARG_A_DIFFUSE_COLOR | RADEON_COMP_ARG_A, + RADEON_COLOR_ARG_A_DIFFUSE_ALPHA, + RADEON_COLOR_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A +}; + +static GLuint radeon_previous_color[] = +{ + RADEON_COLOR_ARG_A_CURRENT_COLOR, + RADEON_COLOR_ARG_A_CURRENT_COLOR | RADEON_COMP_ARG_A, + RADEON_COLOR_ARG_A_CURRENT_ALPHA, + RADEON_COLOR_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A +}; + +/* GL_ZERO table - indices 0-3 + * GL_ONE table - indices 1-4 + */ +static GLuint radeon_zero_color[] = +{ + RADEON_COLOR_ARG_A_ZERO, + RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A, + RADEON_COLOR_ARG_A_ZERO, + RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A, + RADEON_COLOR_ARG_A_ZERO +}; + + +/* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA. + */ +static GLuint radeon_texture_alpha[][RADEON_MAX_TEXTURE_UNITS] = +{ + { + RADEON_ALPHA_ARG_A_T0_ALPHA, + RADEON_ALPHA_ARG_A_T1_ALPHA, + RADEON_ALPHA_ARG_A_T2_ALPHA + }, + { + RADEON_ALPHA_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A, + RADEON_ALPHA_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A, + RADEON_ALPHA_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A + }, +}; + +static GLuint radeon_tfactor_alpha[] = +{ + RADEON_ALPHA_ARG_A_TFACTOR_ALPHA, + RADEON_ALPHA_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A +}; + +static GLuint radeon_primary_alpha[] = +{ + RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA, + RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A +}; + +static GLuint radeon_previous_alpha[] = +{ + RADEON_ALPHA_ARG_A_CURRENT_ALPHA, + RADEON_ALPHA_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A +}; + +/* GL_ZERO table - indices 0-1 + * GL_ONE table - indices 1-2 + */ +static GLuint radeon_zero_alpha[] = +{ + RADEON_ALPHA_ARG_A_ZERO, + RADEON_ALPHA_ARG_A_ZERO | RADEON_COMP_ARG_A, + RADEON_ALPHA_ARG_A_ZERO +}; + + +/* Extract the arg from slot A, shift it into the correct argument slot + * and set the corresponding complement bit. + */ +#define RADEON_COLOR_ARG( n, arg ) \ +do { \ + color_combine |= \ + ((color_arg[n] & RADEON_COLOR_ARG_MASK) \ + << RADEON_COLOR_ARG_##arg##_SHIFT); \ + color_combine |= \ + ((color_arg[n] >> RADEON_COMP_ARG_SHIFT) \ + << RADEON_COMP_ARG_##arg##_SHIFT); \ +} while (0) + +#define RADEON_ALPHA_ARG( n, arg ) \ +do { \ + alpha_combine |= \ + ((alpha_arg[n] & RADEON_ALPHA_ARG_MASK) \ + << RADEON_ALPHA_ARG_##arg##_SHIFT); \ + alpha_combine |= \ + ((alpha_arg[n] >> RADEON_COMP_ARG_SHIFT) \ + << RADEON_COMP_ARG_##arg##_SHIFT); \ +} while (0) + + +/* ================================================================ + * Texture unit state management + */ + +static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + GLuint color_combine, alpha_combine; + const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO + | RADEON_COLOR_ARG_C_CURRENT_COLOR | RADEON_BLEND_CTL_ADD + | RADEON_SCALE_1X | RADEON_CLAMP_TX; + const GLuint alpha_combine0 = RADEON_ALPHA_ARG_A_ZERO | RADEON_ALPHA_ARG_B_ZERO + | RADEON_ALPHA_ARG_C_CURRENT_ALPHA | RADEON_BLEND_CTL_ADD + | RADEON_SCALE_1X | RADEON_CLAMP_TX; + + + /* texUnit->_Current can be NULL if and only if the texture unit is + * not actually enabled. + */ + assert( (texUnit->_ReallyEnabled == 0) + || (texUnit->_Current != NULL) ); + + if ( RADEON_DEBUG & DEBUG_TEXTURE ) { + fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit ); + } + + /* Set the texture environment state. Isn't this nice and clean? + * The chip will automagically set the texture alpha to 0xff when + * the texture format does not include an alpha component. This + * reduces the amount of special-casing we have to do, alpha-only + * textures being a notable exception. Doesn't work for luminance + * textures realized with I8 and ALPHA_IN_MAP not set neither (on r100). + */ + /* Don't cache these results. + */ + rmesa->state.texture.unit[unit].format = 0; + rmesa->state.texture.unit[unit].envMode = 0; + + if ( !texUnit->_ReallyEnabled ) { + color_combine = color_combine0; + alpha_combine = alpha_combine0; + } + else { + GLuint color_arg[3], alpha_arg[3]; + GLuint i; + const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB; + const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA; + GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB; + GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA; + + + /* Step 1: + * Extract the color and alpha combine function arguments. + */ + for ( i = 0 ; i < numColorArgs ; i++ ) { + const GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR; + const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i]; + assert(op >= 0); + assert(op <= 3); + switch ( srcRGBi ) { + case GL_TEXTURE: + if (texUnit->_Current->Image[0][0]->_BaseFormat == GL_ALPHA) + color_arg[i] = radeon_zero_color[op]; + else + color_arg[i] = radeon_texture_color[op][unit]; + break; + case GL_CONSTANT: + color_arg[i] = radeon_tfactor_color[op]; + break; + case GL_PRIMARY_COLOR: + color_arg[i] = radeon_primary_color[op]; + break; + case GL_PREVIOUS: + color_arg[i] = radeon_previous_color[op]; + break; + case GL_ZERO: + color_arg[i] = radeon_zero_color[op]; + break; + case GL_ONE: + color_arg[i] = radeon_zero_color[op+1]; + break; + case GL_TEXTURE0: + case GL_TEXTURE1: + case GL_TEXTURE2: { + GLuint txunit = srcRGBi - GL_TEXTURE0; + if (ctx->Texture.Unit[txunit]._Current->Image[0][0]->_BaseFormat == GL_ALPHA) + color_arg[i] = radeon_zero_color[op]; + else + /* implement ogl 1.4/1.5 core spec here, not specification of + * GL_ARB_texture_env_crossbar (which would require disabling blending + * instead of undefined results when referencing not enabled texunit) */ + color_arg[i] = radeon_texture_color[op][txunit]; + } + break; + default: + return GL_FALSE; + } + } + + for ( i = 0 ; i < numAlphaArgs ; i++ ) { + const GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA; + const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i]; + assert(op >= 0); + assert(op <= 1); + switch ( srcAi ) { + case GL_TEXTURE: + if (texUnit->_Current->Image[0][0]->_BaseFormat == GL_LUMINANCE) + alpha_arg[i] = radeon_zero_alpha[op+1]; + else + alpha_arg[i] = radeon_texture_alpha[op][unit]; + break; + case GL_CONSTANT: + alpha_arg[i] = radeon_tfactor_alpha[op]; + break; + case GL_PRIMARY_COLOR: + alpha_arg[i] = radeon_primary_alpha[op]; + break; + case GL_PREVIOUS: + alpha_arg[i] = radeon_previous_alpha[op]; + break; + case GL_ZERO: + alpha_arg[i] = radeon_zero_alpha[op]; + break; + case GL_ONE: + alpha_arg[i] = radeon_zero_alpha[op+1]; + break; + case GL_TEXTURE0: + case GL_TEXTURE1: + case GL_TEXTURE2: { + GLuint txunit = srcAi - GL_TEXTURE0; + if (ctx->Texture.Unit[txunit]._Current->Image[0][0]->_BaseFormat == GL_LUMINANCE) + alpha_arg[i] = radeon_zero_alpha[op+1]; + else + alpha_arg[i] = radeon_texture_alpha[op][txunit]; + } + break; + default: + return GL_FALSE; + } + } + + /* Step 2: + * Build up the color and alpha combine functions. + */ + switch ( texUnit->_CurrentCombine->ModeRGB ) { + case GL_REPLACE: + color_combine = (RADEON_COLOR_ARG_A_ZERO | + RADEON_COLOR_ARG_B_ZERO | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX); + RADEON_COLOR_ARG( 0, C ); + break; + case GL_MODULATE: + color_combine = (RADEON_COLOR_ARG_C_ZERO | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX); + RADEON_COLOR_ARG( 0, A ); + RADEON_COLOR_ARG( 1, B ); + break; + case GL_ADD: + color_combine = (RADEON_COLOR_ARG_B_ZERO | + RADEON_COMP_ARG_B | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX); + RADEON_COLOR_ARG( 0, A ); + RADEON_COLOR_ARG( 1, C ); + break; + case GL_ADD_SIGNED: + color_combine = (RADEON_COLOR_ARG_B_ZERO | + RADEON_COMP_ARG_B | + RADEON_BLEND_CTL_ADDSIGNED | + RADEON_CLAMP_TX); + RADEON_COLOR_ARG( 0, A ); + RADEON_COLOR_ARG( 1, C ); + break; + case GL_SUBTRACT: + color_combine = (RADEON_COLOR_ARG_B_ZERO | + RADEON_COMP_ARG_B | + RADEON_BLEND_CTL_SUBTRACT | + RADEON_CLAMP_TX); + RADEON_COLOR_ARG( 0, A ); + RADEON_COLOR_ARG( 1, C ); + break; + case GL_INTERPOLATE: + color_combine = (RADEON_BLEND_CTL_BLEND | + RADEON_CLAMP_TX); + RADEON_COLOR_ARG( 0, B ); + RADEON_COLOR_ARG( 1, A ); + RADEON_COLOR_ARG( 2, C ); + break; + + case GL_DOT3_RGB_EXT: + case GL_DOT3_RGBA_EXT: + /* The EXT version of the DOT3 extension does not support the + * scale factor, but the ARB version (and the version in OpenGL + * 1.3) does. + */ + RGBshift = 0; + /* FALLTHROUGH */ + + case GL_DOT3_RGB: + case GL_DOT3_RGBA: + /* The R100 / RV200 only support a 1X multiplier in hardware + * w/the ARB version. + */ + if ( RGBshift != (RADEON_SCALE_1X >> RADEON_SCALE_SHIFT) ) { + return GL_FALSE; + } + + RGBshift += 2; + if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT) + || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) { + /* is it necessary to set this or will it be ignored anyway? */ + Ashift = RGBshift; + } + + color_combine = (RADEON_COLOR_ARG_C_ZERO | + RADEON_BLEND_CTL_DOT3 | + RADEON_CLAMP_TX); + RADEON_COLOR_ARG( 0, A ); + RADEON_COLOR_ARG( 1, B ); + break; + + case GL_MODULATE_ADD_ATI: + color_combine = (RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX); + RADEON_COLOR_ARG( 0, A ); + RADEON_COLOR_ARG( 1, C ); + RADEON_COLOR_ARG( 2, B ); + break; + case GL_MODULATE_SIGNED_ADD_ATI: + color_combine = (RADEON_BLEND_CTL_ADDSIGNED | + RADEON_CLAMP_TX); + RADEON_COLOR_ARG( 0, A ); + RADEON_COLOR_ARG( 1, C ); + RADEON_COLOR_ARG( 2, B ); + break; + case GL_MODULATE_SUBTRACT_ATI: + color_combine = (RADEON_BLEND_CTL_SUBTRACT | + RADEON_CLAMP_TX); + RADEON_COLOR_ARG( 0, A ); + RADEON_COLOR_ARG( 1, C ); + RADEON_COLOR_ARG( 2, B ); + break; + default: + return GL_FALSE; + } + + switch ( texUnit->_CurrentCombine->ModeA ) { + case GL_REPLACE: + alpha_combine = (RADEON_ALPHA_ARG_A_ZERO | + RADEON_ALPHA_ARG_B_ZERO | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX); + RADEON_ALPHA_ARG( 0, C ); + break; + case GL_MODULATE: + alpha_combine = (RADEON_ALPHA_ARG_C_ZERO | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX); + RADEON_ALPHA_ARG( 0, A ); + RADEON_ALPHA_ARG( 1, B ); + break; + case GL_ADD: + alpha_combine = (RADEON_ALPHA_ARG_B_ZERO | + RADEON_COMP_ARG_B | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX); + RADEON_ALPHA_ARG( 0, A ); + RADEON_ALPHA_ARG( 1, C ); + break; + case GL_ADD_SIGNED: + alpha_combine = (RADEON_ALPHA_ARG_B_ZERO | + RADEON_COMP_ARG_B | + RADEON_BLEND_CTL_ADDSIGNED | + RADEON_CLAMP_TX); + RADEON_ALPHA_ARG( 0, A ); + RADEON_ALPHA_ARG( 1, C ); + break; + case GL_SUBTRACT: + alpha_combine = (RADEON_COLOR_ARG_B_ZERO | + RADEON_COMP_ARG_B | + RADEON_BLEND_CTL_SUBTRACT | + RADEON_CLAMP_TX); + RADEON_ALPHA_ARG( 0, A ); + RADEON_ALPHA_ARG( 1, C ); + break; + case GL_INTERPOLATE: + alpha_combine = (RADEON_BLEND_CTL_BLEND | + RADEON_CLAMP_TX); + RADEON_ALPHA_ARG( 0, B ); + RADEON_ALPHA_ARG( 1, A ); + RADEON_ALPHA_ARG( 2, C ); + break; + + case GL_MODULATE_ADD_ATI: + alpha_combine = (RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX); + RADEON_ALPHA_ARG( 0, A ); + RADEON_ALPHA_ARG( 1, C ); + RADEON_ALPHA_ARG( 2, B ); + break; + case GL_MODULATE_SIGNED_ADD_ATI: + alpha_combine = (RADEON_BLEND_CTL_ADDSIGNED | + RADEON_CLAMP_TX); + RADEON_ALPHA_ARG( 0, A ); + RADEON_ALPHA_ARG( 1, C ); + RADEON_ALPHA_ARG( 2, B ); + break; + case GL_MODULATE_SUBTRACT_ATI: + alpha_combine = (RADEON_BLEND_CTL_SUBTRACT | + RADEON_CLAMP_TX); + RADEON_ALPHA_ARG( 0, A ); + RADEON_ALPHA_ARG( 1, C ); + RADEON_ALPHA_ARG( 2, B ); + break; + default: + return GL_FALSE; + } + + if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGB_EXT) + || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGB) ) { + alpha_combine |= RADEON_DOT_ALPHA_DONT_REPLICATE; + } + + /* Step 3: + * Apply the scale factor. + */ + color_combine |= (RGBshift << RADEON_SCALE_SHIFT); + alpha_combine |= (Ashift << RADEON_SCALE_SHIFT); + + /* All done! + */ + } + + if ( rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] != color_combine || + rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] != alpha_combine ) { + RADEON_STATECHANGE( rmesa, tex[unit] ); + rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] = color_combine; + rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] = alpha_combine; + } + + return GL_TRUE; +} + +#define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK | \ + RADEON_MIN_FILTER_MASK | \ + RADEON_MAG_FILTER_MASK | \ + RADEON_MAX_ANISO_MASK | \ + RADEON_YUV_TO_RGB | \ + RADEON_YUV_TEMPERATURE_MASK | \ + RADEON_CLAMP_S_MASK | \ + RADEON_CLAMP_T_MASK | \ + RADEON_BORDER_MODE_D3D ) + +#define TEXOBJ_TXFORMAT_MASK (RADEON_TXFORMAT_WIDTH_MASK | \ + RADEON_TXFORMAT_HEIGHT_MASK | \ + RADEON_TXFORMAT_FORMAT_MASK | \ + RADEON_TXFORMAT_F5_WIDTH_MASK | \ + RADEON_TXFORMAT_F5_HEIGHT_MASK | \ + RADEON_TXFORMAT_ALPHA_IN_MAP | \ + RADEON_TXFORMAT_CUBIC_MAP_ENABLE | \ + RADEON_TXFORMAT_NON_POWER2) + + +static void import_tex_obj_state( radeonContextPtr rmesa, + int unit, + radeonTexObjPtr texobj ) +{ +/* do not use RADEON_DB_STATE to avoid stale texture caches */ + int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; + GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; + + RADEON_STATECHANGE( rmesa, tex[unit] ); + + cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK; + cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK; + cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; + cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK; + cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset; + cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; + + if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] ); + txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */ + txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */ + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.txr[unit] ); + se_coord_fmt |= RADEON_VTX_ST0_NONPARAMETRIC << unit; + } + else { + se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit); + + if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { + int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; + GLuint bytesPerFace = texobj->base.totalSize / 6; + ASSERT(texobj->base.totalSize % 6 == 0); + + RADEON_STATECHANGE( rmesa, cube[unit] ); + cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; + /* dont know if this setup conforms to OpenGL.. + * at least it matches the behavior of mesa software renderer + */ + cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */ + cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */ + cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */ + cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */ + cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */ + cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */ + } + } + + if (se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT]) { + RADEON_STATECHANGE( rmesa, set ); + rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt; + } + + texobj->dirty_state &= ~(1<<unit); +} + + + + +static void set_texgen_matrix( radeonContextPtr rmesa, + GLuint unit, + const GLfloat *s_plane, + const GLfloat *t_plane, + const GLfloat *r_plane, + const GLfloat *q_plane ) +{ + rmesa->TexGenMatrix[unit].m[0] = s_plane[0]; + rmesa->TexGenMatrix[unit].m[4] = s_plane[1]; + rmesa->TexGenMatrix[unit].m[8] = s_plane[2]; + rmesa->TexGenMatrix[unit].m[12] = s_plane[3]; + + rmesa->TexGenMatrix[unit].m[1] = t_plane[0]; + rmesa->TexGenMatrix[unit].m[5] = t_plane[1]; + rmesa->TexGenMatrix[unit].m[9] = t_plane[2]; + rmesa->TexGenMatrix[unit].m[13] = t_plane[3]; + + rmesa->TexGenMatrix[unit].m[2] = r_plane[0]; + rmesa->TexGenMatrix[unit].m[6] = r_plane[1]; + rmesa->TexGenMatrix[unit].m[10] = r_plane[2]; + rmesa->TexGenMatrix[unit].m[14] = r_plane[3]; + + rmesa->TexGenMatrix[unit].m[3] = q_plane[0]; + rmesa->TexGenMatrix[unit].m[7] = q_plane[1]; + rmesa->TexGenMatrix[unit].m[11] = q_plane[2]; + rmesa->TexGenMatrix[unit].m[15] = q_plane[3]; + + rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit; + rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; +} + +/* Returns GL_FALSE if fallback required. + */ +static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4; + GLuint tmp = rmesa->TexGenEnabled; + static const GLfloat reflect[16] = { + -1, 0, 0, 0, + 0, -1, 0, 0, + 0, 0, -1, 0, + 0, 0, 0, 1 }; + + rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE << unit); + rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE << unit); + rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK << inputshift); + rmesa->TexGenNeedNormals[unit] = 0; + + if ((texUnit->TexGenEnabled & (S_BIT|T_BIT|R_BIT|Q_BIT)) == 0) { + /* Disabled, no fallback: + */ + rmesa->TexGenEnabled |= + (RADEON_TEXGEN_INPUT_TEXCOORD_0 + unit) << inputshift; + return GL_TRUE; + } + /* the r100 cannot do texgen for some coords and not for others + * we do not detect such cases (certainly can't do it here) and just + * ASSUME that when S and T are texgen enabled we do not need other + * non-texgen enabled coords, no matter if the R and Q bits are texgen + * enabled. Still check for mixed mode texgen for all coords. + */ + else if ( (texUnit->TexGenEnabled & S_BIT) && + (texUnit->TexGenEnabled & T_BIT) && + (texUnit->GenModeS == texUnit->GenModeT) ) { + if ( ((texUnit->TexGenEnabled & R_BIT) && + (texUnit->GenModeS != texUnit->GenModeR)) || + ((texUnit->TexGenEnabled & Q_BIT) && + (texUnit->GenModeS != texUnit->GenModeQ)) ) { + /* Mixed modes, fallback: + */ + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "fallback mixed texgen\n"); + return GL_FALSE; + } + rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit; + } + else { + /* some texgen mode not including both S and T bits */ + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "fallback mixed texgen/nontexgen\n"); + return GL_FALSE; + } + + if ((texUnit->TexGenEnabled & (R_BIT | Q_BIT)) != 0) { + /* need this here for vtxfmt presumably. Argh we need to set + this from way too many places, would be much easier if we could leave + tcl q coord always enabled as on r200) */ + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_Q_BIT(unit); + } + + switch (texUnit->GenModeS) { + case GL_OBJECT_LINEAR: + rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift; + set_texgen_matrix( rmesa, unit, + texUnit->ObjectPlaneS, + texUnit->ObjectPlaneT, + texUnit->ObjectPlaneR, + texUnit->ObjectPlaneQ); + break; + + case GL_EYE_LINEAR: + rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift; + set_texgen_matrix( rmesa, unit, + texUnit->EyePlaneS, + texUnit->EyePlaneT, + texUnit->EyePlaneR, + texUnit->EyePlaneQ); + break; + + case GL_REFLECTION_MAP_NV: + rmesa->TexGenNeedNormals[unit] = GL_TRUE; + rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT << inputshift; + /* TODO: unknown if this is needed/correct */ + set_texgen_matrix( rmesa, unit, reflect, reflect + 4, + reflect + 8, reflect + 12 ); + break; + + case GL_NORMAL_MAP_NV: + rmesa->TexGenNeedNormals[unit] = GL_TRUE; + rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL << inputshift; + break; + + case GL_SPHERE_MAP: + /* the mode which everyone uses :-( */ + default: + /* Unsupported mode, fallback: + */ + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "fallback GL_SPHERE_MAP\n"); + return GL_FALSE; + } + + if (tmp != rmesa->TexGenEnabled) { + rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; + } + + return GL_TRUE; +} + + +static void disable_tex( GLcontext *ctx, int unit ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) { + /* Texture unit disabled */ + if ( rmesa->state.texture.unit[unit].texobj != NULL ) { + /* The old texture is no longer bound to this texture unit. + * Mark it as such. + */ + + rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit); + rmesa->state.texture.unit[unit].texobj = NULL; + } + + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= + ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit); + + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) | + RADEON_Q_BIT(unit)); + + if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) { + TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE); + rmesa->recheck_texgen[unit] = GL_TRUE; + } + + if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) { + /* this seems to be a genuine (r100 only?) hw bug. Need to remove the + cubic_map bit on unit 2 when the unit is disabled, otherwise every + 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other + units, better be safe than sorry though).*/ + RADEON_STATECHANGE( rmesa, tex[unit] ); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE; + } + + { + GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4; + GLuint tmp = rmesa->TexGenEnabled; + + rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit); + rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit); + rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift); + rmesa->TexGenNeedNormals[unit] = 0; + rmesa->TexGenEnabled |= + (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift; + + if (tmp != rmesa->TexGenEnabled) { + rmesa->recheck_texgen[unit] = GL_TRUE; + rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; + } + } + } +} + +static GLboolean enable_tex_2d( GLcontext *ctx, int unit ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; + + /* Need to load the 2d images associated with this unit. + */ + if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) { + t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2; + t->base.dirty_images[0] = ~0; + } + + ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); + + if ( t->base.dirty_images[0] ) { + RADEON_FIREVERTICES( rmesa ); + radeonSetTexImages( rmesa, tObj ); + radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 ); + if ( !t->base.memBlock ) + return GL_FALSE; + } + + return GL_TRUE; +} + +static GLboolean enable_tex_cube( GLcontext *ctx, int unit ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; + GLuint face; + + /* Need to load the 2d images associated with this unit. + */ + if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) { + t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2; + for (face = 0; face < 6; face++) + t->base.dirty_images[face] = ~0; + } + + ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); + + if ( t->base.dirty_images[0] || t->base.dirty_images[1] || + t->base.dirty_images[2] || t->base.dirty_images[3] || + t->base.dirty_images[4] || t->base.dirty_images[5] ) { + /* flush */ + RADEON_FIREVERTICES( rmesa ); + /* layout memory space, once for all faces */ + radeonSetTexImages( rmesa, tObj ); + } + + /* upload (per face) */ + for (face = 0; face < 6; face++) { + if (t->base.dirty_images[face]) { + radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face ); + } + } + + if ( !t->base.memBlock ) { + /* texmem alloc failed, use s/w fallback */ + return GL_FALSE; + } + + return GL_TRUE; +} + +static GLboolean enable_tex_rect( GLcontext *ctx, int unit ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; + + if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) { + t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2; + t->base.dirty_images[0] = ~0; + } + + ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); + + if ( t->base.dirty_images[0] ) { + RADEON_FIREVERTICES( rmesa ); + radeonSetTexImages( rmesa, tObj ); + radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 ); + if ( !t->base.memBlock /* && !rmesa->prefer_gart_client_texturing FIXME */ ) { + fprintf(stderr, "%s: upload failed\n", __FUNCTION__); + return GL_FALSE; + } + } + + return GL_TRUE; +} + + +static GLboolean update_tex_common( GLcontext *ctx, int unit ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; + GLenum format; + + /* Fallback if there's a texture border */ + if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) { + fprintf(stderr, "%s: border\n", __FUNCTION__); + return GL_FALSE; + } + /* yuv conversion only works in first unit */ + if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB)) + return GL_FALSE; + + /* Update state if this is a different texture object to last + * time. + */ + if ( rmesa->state.texture.unit[unit].texobj != t ) { + if ( rmesa->state.texture.unit[unit].texobj != NULL ) { + /* The old texture is no longer bound to this texture unit. + * Mark it as such. + */ + + rmesa->state.texture.unit[unit].texobj->base.bound &= + ~(1UL << unit); + } + + rmesa->state.texture.unit[unit].texobj = t; + t->base.bound |= (1UL << unit); + t->dirty_state |= 1<<unit; + driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */ + } + + + /* Newly enabled? + */ + if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) { + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= + (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit; + + RADEON_STATECHANGE( rmesa, tcl ); + + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit); + + rmesa->recheck_texgen[unit] = GL_TRUE; + } + + if (t->dirty_state & (1<<unit)) { + import_tex_obj_state( rmesa, unit, t ); + /* may need to update texture matrix (for texrect adjustments) */ + rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; + } + + if (rmesa->recheck_texgen[unit]) { + GLboolean fallback = !radeon_validate_texgen( ctx, unit ); + TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback); + rmesa->recheck_texgen[unit] = 0; + rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; + } + + format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat; + if ( rmesa->state.texture.unit[unit].format != format || + rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) { + rmesa->state.texture.unit[unit].format = format; + rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode; + if ( ! radeonUpdateTextureEnv( ctx, unit ) ) { + return GL_FALSE; + } + } + + FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback ); + return !t->border_fallback; +} + + + +static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit ) +{ + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + + if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) { + return (enable_tex_rect( ctx, unit ) && + update_tex_common( ctx, unit )); + } + else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) { + return (enable_tex_2d( ctx, unit ) && + update_tex_common( ctx, unit )); + } + else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) { + return (enable_tex_cube( ctx, unit ) && + update_tex_common( ctx, unit )); + } + else if ( texUnit->_ReallyEnabled ) { + return GL_FALSE; + } + else { + disable_tex( ctx, unit ); + return GL_TRUE; + } +} + +void radeonUpdateTextureState( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLboolean ok; + + ok = (radeonUpdateTextureUnit( ctx, 0 ) && + radeonUpdateTextureUnit( ctx, 1 ) && + radeonUpdateTextureUnit( ctx, 2 )); + + FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok ); + + if (rmesa->TclFallback) + radeonChooseVertexState( ctx ); +} diff --git a/radeon/server/radeon.h b/radeon/server/radeon.h new file mode 100644 index 0000000..6f6c2e6 --- /dev/null +++ b/radeon/server/radeon.h @@ -0,0 +1,209 @@ +/** + * \file server/radeon.h + * \brief Radeon 2D driver data structures. + */ + +/* + * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and + * VA Linux Systems Inc., Fremont, California. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation on the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR + * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/ati/radeon.h,v 1.29 2002/10/12 01:38:07 martin Exp $ */ + +#ifndef _RADEON_H_ +#define _RADEON_H_ + +#include "xf86drm.h" /* drm_handle_t, etc */ + +# define RADEON_AGP_1X_MODE 0x01 +# define RADEON_AGP_2X_MODE 0x02 +# define RADEON_AGP_4X_MODE 0x04 +# define RADEON_AGP_FW_MODE 0x10 +# define RADEON_AGP_MODE_MASK 0x17 +#define RADEON_CP_CSQ_CNTL 0x0740 +# define RADEON_CSQ_CNT_PRIMARY_MASK (0xff << 0) +# define RADEON_CSQ_PRIDIS_INDDIS (0 << 28) +# define RADEON_CSQ_PRIPIO_INDDIS (1 << 28) +# define RADEON_CSQ_PRIBM_INDDIS (2 << 28) +# define RADEON_CSQ_PRIPIO_INDBM (3 << 28) +# define RADEON_CSQ_PRIBM_INDBM (4 << 28) +# define RADEON_CSQ_PRIPIO_INDPIO (15 << 28) + +#define RADEON_PCIGART_TABLE_SIZE 32768 + +#define PCI_CHIP_R200_BB 0x4242 +#define PCI_CHIP_RV250_Id 0x4964 +#define PCI_CHIP_RV250_Ie 0x4965 +#define PCI_CHIP_RV250_If 0x4966 +#define PCI_CHIP_RV250_Ig 0x4967 +#define PCI_CHIP_RADEON_LW 0x4C57 +#define PCI_CHIP_RADEON_LX 0x4C58 +#define PCI_CHIP_RADEON_LY 0x4C59 +#define PCI_CHIP_RADEON_LZ 0x4C5A +#define PCI_CHIP_RV250_Ld 0x4C64 +#define PCI_CHIP_RV250_Le 0x4C65 +#define PCI_CHIP_RV250_Lf 0x4C66 +#define PCI_CHIP_RV250_Lg 0x4C67 +#define PCI_CHIP_R300_ND 0x4E44 +#define PCI_CHIP_R300_NE 0x4E45 +#define PCI_CHIP_R300_NF 0x4E46 +#define PCI_CHIP_R300_NG 0x4E47 +#define PCI_CHIP_RADEON_QD 0x5144 +#define PCI_CHIP_RADEON_QE 0x5145 +#define PCI_CHIP_RADEON_QF 0x5146 +#define PCI_CHIP_RADEON_QG 0x5147 +#define PCI_CHIP_R200_QL 0x514C +#define PCI_CHIP_R200_QN 0x514E +#define PCI_CHIP_R200_QO 0x514F +#define PCI_CHIP_RV200_QW 0x5157 +#define PCI_CHIP_RV200_QX 0x5158 +#define PCI_CHIP_RADEON_QY 0x5159 +#define PCI_CHIP_RADEON_QZ 0x515A +#define PCI_CHIP_R200_Ql 0x516C +#define PCI_CHIP_RV370_5460 0x5460 +#define PCI_CHIP_RV280_Y_ 0x5960 +#define PCI_CHIP_RV280_Ya 0x5961 +#define PCI_CHIP_RV280_Yb 0x5962 +#define PCI_CHIP_RV280_Yc 0x5963 + +/** + * \brief Chip families. + */ +typedef enum { + CHIP_FAMILY_UNKNOW, + CHIP_FAMILY_LEGACY, + CHIP_FAMILY_R128, + CHIP_FAMILY_M3, + CHIP_FAMILY_RADEON, + CHIP_FAMILY_VE, + CHIP_FAMILY_M6, + CHIP_FAMILY_RV200, + CHIP_FAMILY_M7, + CHIP_FAMILY_R200, + CHIP_FAMILY_RV250, + CHIP_FAMILY_M9, + CHIP_FAMILY_RV280, + CHIP_FAMILY_R300, + CHIP_FAMILY_R350, + CHIP_FAMILY_RV350, + CHIP_FAMILY_RV380, /* RV370/RV380/M22/M24 */ + CHIP_FAMILY_R420, /* R420/R423/M18 */ +} RADEONChipFamily; + + +typedef unsigned long memType; + + +/** + * \brief Radeon DDX driver private data. + */ +typedef struct { + int Chipset; /**< \brief Chipset number */ + RADEONChipFamily ChipFamily; /**< \brief Chip family */ + + unsigned long LinearAddr; /**< \brief Frame buffer physical address */ + + + drmSize registerSize; /**< \brief MMIO register map size */ + drm_handle_t registerHandle; /**< \brief MMIO register map handle */ + + int IsPCI; /* Current card is a PCI card */ + + /** + * \name AGP + */ + /*@{*/ + drmSize gartSize; /**< \brief AGP map size */ + drm_handle_t gartMemHandle; /**< \brief AGP map handle */ + unsigned long gartOffset; /**< \brief AGP offset */ + int gartMode; /**< \brief AGP mode */ + int gartFastWrite; + /*@}*/ + + /** + * \name CP ring buffer data + */ + /*@{*/ + unsigned long ringStart; /**< \brief Offset into AGP space */ + drm_handle_t ringHandle; /**< \brief Handle from drmAddMap() */ + drmSize ringMapSize; /**< \brief Size of map */ + int ringSize; /**< \brief Size of ring (in MB) */ + + unsigned long ringReadOffset; /**< \brief Read offset into AGP space */ + drm_handle_t ringReadPtrHandle;/**< \brief Handle from drmAddMap() */ + drmSize ringReadMapSize; /**< \brief Size of map */ + /*@}*/ + + /** + * \name CP vertex/indirect buffer data + */ + /*@{*/ + unsigned long bufStart; /**< \brief Offset into AGP space */ + drm_handle_t bufHandle; /**< \brief Handle from drmAddMap() */ + drmSize bufMapSize; /**< \brief Size of map */ + int bufSize; /**< \brief Size of buffers (in MB) */ + int bufNumBufs; /**< \brief Number of buffers */ + /*@}*/ + + /** + * \name CP AGP Texture data + */ + /*@{*/ + unsigned long gartTexStart; /**< \brief Offset into AGP space */ + drm_handle_t gartTexHandle; /**< \brief Handle from drmAddMap() */ + drmSize gartTexMapSize; /**< \brief Size of map */ + int gartTexSize; /**< \brief Size of AGP tex space (in MB) */ + int log2GARTTexGran; + /*@}*/ + + int drmMinor; /**< \brief DRM device minor number */ + + int frontOffset; /**< \brief Front color buffer offset */ + int frontPitch; /**< \brief Front color buffer pitch */ + int backOffset; /**< \brief Back color buffer offset */ + int backPitch; /**< \brief Back color buffer pitch */ + int depthOffset; /**< \brief Depth buffer offset */ + int depthPitch; /**< \brief Depth buffer pitch */ + int textureOffset; /**< \brief Texture area offset */ + int textureSize; /**< \brief Texture area size */ + int log2TexGran; /**< \brief Texture granularity in base 2 log */ + + unsigned int frontPitchOffset; + unsigned int backPitchOffset; + unsigned int depthPitchOffset; + + int colorTiling; /**< \brief Enable color tiling */ + + int irq; /**< \brief IRQ number */ + int page_flip_enable; /**< \brief Page Flip enable */ + unsigned int gen_int_cntl; + unsigned int crtc_offset_cntl; + + unsigned long pcieGartTableOffset; +} RADEONInfoRec, *RADEONInfoPtr; + + +#endif /* _RADEON_H_ */ diff --git a/radeon/server/radeon_dri.c b/radeon/server/radeon_dri.c new file mode 100644 index 0000000..7ead588 --- /dev/null +++ b/radeon/server/radeon_dri.c @@ -0,0 +1,1337 @@ +/** + * \file server/radeon_dri.c + * \brief File to perform the device-specific initialization tasks typically + * done in the X server. + * + * Here they are converted to run in the client (or perhaps a standalone + * process), and to work with the frame buffer device rather than the X + * server infrastructure. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> + +#include "driver.h" +#include "drm.h" +#include "memops.h" + +#include "radeon.h" +#include "radeon_dri.h" +#include "radeon_macros.h" +#include "radeon_reg.h" +#include "drm_sarea.h" + +static size_t radeon_drm_page_size; + +static int RadeonSetParam(const DRIDriverContext *ctx, int param, int value) +{ + drm_radeon_setparam_t sp; + + memset(&sp, 0, sizeof(sp)); + sp.param = param; + sp.value = value; + + if (drmCommandWrite(ctx->drmFD, DRM_RADEON_SETPARAM, &sp, sizeof(sp))) { + return -1; + } + + return 0; +} + +/** + * \brief Wait for free FIFO entries. + * + * \param ctx display handle. + * \param entries number of free entries to wait. + * + * It polls the free entries from the chip until it reaches the requested value + * or a timeout (3000 tries) occurs. Aborts the program if the FIFO times out. + */ +static void RADEONWaitForFifo( const DRIDriverContext *ctx, + int entries ) +{ + unsigned char *RADEONMMIO = ctx->MMIOAddress; + int i; + + for (i = 0; i < 3000; i++) { + int fifo_slots = + INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; + if (fifo_slots >= entries) return; + } + + /* There are recoveries possible, but I haven't seen them work + * in practice: + */ + fprintf(stderr, "FIFO timed out: %d entries, stat=0x%08x\n", + INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, + INREG(RADEON_RBBM_STATUS)); + exit(1); +} + +/** + * \brief Read a PLL register. + * + * \param ctx display handle. + * \param addr PLL register index. + * + * \return value of the PLL register. + */ +static unsigned int RADEONINPLL( const DRIDriverContext *ctx, int addr) +{ + unsigned char *RADEONMMIO = ctx->MMIOAddress; + unsigned int data; + + OUTREG8(RADEON_CLOCK_CNTL_INDEX, addr & 0x3f); + data = INREG(RADEON_CLOCK_CNTL_DATA); + + return data; +} + +/** + * \brief Reset graphics card to known state. + * + * \param ctx display handle. + * + * Resets the values of several Radeon registers. + */ +static void RADEONEngineReset( const DRIDriverContext *ctx ) +{ + unsigned char *RADEONMMIO = ctx->MMIOAddress; + unsigned int clock_cntl_index; + unsigned int mclk_cntl; + unsigned int rbbm_soft_reset; + unsigned int host_path_cntl; + int i; + + OUTREGP(RADEON_RB2D_DSTCACHE_CTLSTAT, + RADEON_RB2D_DC_FLUSH_ALL, + ~RADEON_RB2D_DC_FLUSH_ALL); + for (i = 0; i < 512; i++) { + if (!(INREG(RADEON_RB2D_DSTCACHE_CTLSTAT) & RADEON_RB2D_DC_BUSY)) + break; + } + + clock_cntl_index = INREG(RADEON_CLOCK_CNTL_INDEX); + + mclk_cntl = INPLL(ctx, RADEON_MCLK_CNTL); + OUTPLL(RADEON_MCLK_CNTL, (mclk_cntl | + RADEON_FORCEON_MCLKA | + RADEON_FORCEON_MCLKB | + RADEON_FORCEON_YCLKA | + RADEON_FORCEON_YCLKB | + RADEON_FORCEON_MC | + RADEON_FORCEON_AIC)); + + /* Soft resetting HDP thru RBBM_SOFT_RESET register can cause some + * unexpected behaviour on some machines. Here we use + * RADEON_HOST_PATH_CNTL to reset it. + */ + host_path_cntl = INREG(RADEON_HOST_PATH_CNTL); + rbbm_soft_reset = INREG(RADEON_RBBM_SOFT_RESET); + + OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset | + RADEON_SOFT_RESET_CP | + RADEON_SOFT_RESET_HI | + RADEON_SOFT_RESET_SE | + RADEON_SOFT_RESET_RE | + RADEON_SOFT_RESET_PP | + RADEON_SOFT_RESET_E2 | + RADEON_SOFT_RESET_RB)); + INREG(RADEON_RBBM_SOFT_RESET); + OUTREG(RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset & + (unsigned int) ~(RADEON_SOFT_RESET_CP | + RADEON_SOFT_RESET_HI | + RADEON_SOFT_RESET_SE | + RADEON_SOFT_RESET_RE | + RADEON_SOFT_RESET_PP | + RADEON_SOFT_RESET_E2 | + RADEON_SOFT_RESET_RB))); + INREG(RADEON_RBBM_SOFT_RESET); + + OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET); + INREG(RADEON_HOST_PATH_CNTL); + OUTREG(RADEON_HOST_PATH_CNTL, host_path_cntl); + + OUTREG(RADEON_RBBM_SOFT_RESET, rbbm_soft_reset); + + OUTREG(RADEON_CLOCK_CNTL_INDEX, clock_cntl_index); + OUTPLL(RADEON_MCLK_CNTL, mclk_cntl); +} + +/** + * \brief Restore the drawing engine. + * + * \param ctx display handle + * + * Resets the graphics card and sets initial values for several registers of + * the card's drawing engine. + * + * Turns on the radeon command processor engine (i.e., the ringbuffer). + */ +static int RADEONEngineRestore( const DRIDriverContext *ctx ) +{ + RADEONInfoPtr info = ctx->driverPrivate; + unsigned char *RADEONMMIO = ctx->MMIOAddress; + int pitch64, datatype, dp_gui_master_cntl, err; + + fprintf(stderr, "%s\n", __FUNCTION__); + + OUTREG(RADEON_RB3D_CNTL, 0); + RADEONEngineReset( ctx ); + + switch (ctx->bpp) { + case 16: datatype = 4; break; + case 32: datatype = 6; break; + default: return 0; + } + + dp_gui_master_cntl = + ((datatype << RADEON_GMC_DST_DATATYPE_SHIFT) + | RADEON_GMC_CLR_CMP_CNTL_DIS); + + pitch64 = ((ctx->shared.virtualWidth * (ctx->bpp / 8) + 0x3f)) >> 6; + + RADEONWaitForFifo(ctx, 1); + OUTREG(RADEON_DEFAULT_OFFSET, ((INREG(RADEON_DEFAULT_OFFSET) & 0xC0000000) + | (pitch64 << 22))); + + RADEONWaitForFifo(ctx, 1); + OUTREG(RADEON_SURFACE_CNTL, RADEON_SURF_TRANSLATION_DIS); + + RADEONWaitForFifo(ctx, 1); + OUTREG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, (RADEON_DEFAULT_SC_RIGHT_MAX + | RADEON_DEFAULT_SC_BOTTOM_MAX)); + + RADEONWaitForFifo(ctx, 1); + OUTREG(RADEON_DP_GUI_MASTER_CNTL, (dp_gui_master_cntl + | RADEON_GMC_BRUSH_SOLID_COLOR + | RADEON_GMC_SRC_DATATYPE_COLOR)); + + RADEONWaitForFifo(ctx, 7); + OUTREG(RADEON_DST_LINE_START, 0); + OUTREG(RADEON_DST_LINE_END, 0); + OUTREG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff); + OUTREG(RADEON_DP_BRUSH_BKGD_CLR, 0); + OUTREG(RADEON_DP_SRC_FRGD_CLR, 0xffffffff); + OUTREG(RADEON_DP_SRC_BKGD_CLR, 0); + OUTREG(RADEON_DP_WRITE_MASK, 0xffffffff); + OUTREG(RADEON_AUX_SC_CNTL, 0); + +/* RADEONWaitForIdleMMIO(ctx); */ + usleep(100); + + + OUTREG(RADEON_GEN_INT_CNTL, info->gen_int_cntl); + if (info->colorTiling) + info->crtc_offset_cntl |= RADEON_CRTC_TILE_EN; + OUTREG(RADEON_CRTC_OFFSET_CNTL, info->crtc_offset_cntl); + + /* Initialize and start the CP if required */ + if ((err = drmCommandNone(ctx->drmFD, DRM_RADEON_CP_START)) != 0) { + fprintf(stderr, "%s: CP start %d\n", __FUNCTION__, err); + return 0; + } + + return 1; +} + + +/** + * \brief Shutdown the drawing engine. + * + * \param ctx display handle + * + * Turns off the command processor engine & restores the graphics card + * to a state that fbdev understands. + */ +static int RADEONEngineShutdown( const DRIDriverContext *ctx ) +{ + drm_radeon_cp_stop_t stop; + int ret, i; + + stop.flush = 1; + stop.idle = 1; + + ret = drmCommandWrite(ctx->drmFD, DRM_RADEON_CP_STOP, &stop, + sizeof(drm_radeon_cp_stop_t)); + + if (ret == 0) { + return 0; + } else if (errno != EBUSY) { + return -errno; + } + + stop.flush = 0; + + i = 0; + do { + ret = drmCommandWrite(ctx->drmFD, DRM_RADEON_CP_STOP, &stop, + sizeof(drm_radeon_cp_stop_t)); + } while (ret && errno == EBUSY && i++ < 10); + + if (ret == 0) { + return 0; + } else if (errno != EBUSY) { + return -errno; + } + + stop.idle = 0; + + if (drmCommandWrite(ctx->drmFD, DRM_RADEON_CP_STOP, + &stop, sizeof(drm_radeon_cp_stop_t))) { + return -errno; + } else { + return 0; + } +} + +/** + * \brief Compute base 2 logarithm. + * + * \param val value. + * + * \return base 2 logarithm of \p val. + */ +static int RADEONMinBits(int val) +{ + int bits; + + if (!val) return 1; + for (bits = 0; val; val >>= 1, ++bits); + return bits; +} + +/** + * \brief Initialize the AGP state + * + * \param ctx display handle. + * \param info driver private data. + * + * \return one on success, or zero on failure. + * + * Acquires and enables the AGP device. Reserves memory in the AGP space for + * the ring buffer, vertex buffers and textures. Initialize the Radeon + * registers to point to that memory and add client mappings. + */ +static int RADEONDRIAgpInit( const DRIDriverContext *ctx, RADEONInfoPtr info) +{ + unsigned char *RADEONMMIO = ctx->MMIOAddress; + unsigned long mode; + int ret; + int s, l; + + if (drmAgpAcquire(ctx->drmFD) < 0) { + fprintf(stderr, "[gart] AGP not available\n"); + return 0; + } + + /* Modify the mode if the default mode is not appropriate for this + * particular combination of graphics card and AGP chipset. + */ + mode = drmAgpGetMode(ctx->drmFD); /* Default mode */ + + /* Disable fast write entirely - too many lockups. + */ + mode &= ~RADEON_AGP_MODE_MASK; + switch (ctx->agpmode) { + case 4: mode |= RADEON_AGP_4X_MODE; + case 2: mode |= RADEON_AGP_2X_MODE; + case 1: default: mode |= RADEON_AGP_1X_MODE; + } + + if (drmAgpEnable(ctx->drmFD, mode) < 0) { + fprintf(stderr, "[gart] AGP not enabled\n"); + drmAgpRelease(ctx->drmFD); + return 0; + } + else + fprintf(stderr, "[gart] AGP enabled at %dx\n", ctx->agpmode); + + /* Workaround for some hardware bugs */ + if (info->ChipFamily < CHIP_FAMILY_R200) + OUTREG(RADEON_AGP_CNTL, INREG(RADEON_AGP_CNTL) | 0x000e0000); + + info->gartOffset = 0; + + if ((ret = drmAgpAlloc(ctx->drmFD, info->gartSize*1024*1024, 0, NULL, + &info->gartMemHandle)) < 0) { + fprintf(stderr, "[gart] Out of memory (%d)\n", ret); + drmAgpRelease(ctx->drmFD); + return 0; + } + fprintf(stderr, + "[gart] %d kB allocated with handle 0x%08x\n", + info->gartSize*1024, (unsigned)info->gartMemHandle); + + if (drmAgpBind(ctx->drmFD, + info->gartMemHandle, info->gartOffset) < 0) { + fprintf(stderr, "[gart] Could not bind\n"); + drmAgpFree(ctx->drmFD, info->gartMemHandle); + drmAgpRelease(ctx->drmFD); + return 0; + } + + /* Initialize the CP ring buffer data */ + info->ringStart = info->gartOffset; + info->ringMapSize = info->ringSize*1024*1024 + radeon_drm_page_size; + + info->ringReadOffset = info->ringStart + info->ringMapSize; + info->ringReadMapSize = radeon_drm_page_size; + + /* Reserve space for vertex/indirect buffers */ + info->bufStart = info->ringReadOffset + info->ringReadMapSize; + info->bufMapSize = info->bufSize*1024*1024; + + /* Reserve the rest for AGP textures */ + info->gartTexStart = info->bufStart + info->bufMapSize; + s = (info->gartSize*1024*1024 - info->gartTexStart); + l = RADEONMinBits((s-1) / RADEON_NR_TEX_REGIONS); + if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY; + info->gartTexMapSize = (s >> l) << l; + info->log2GARTTexGran = l; + + if (drmAddMap(ctx->drmFD, info->ringStart, info->ringMapSize, + DRM_AGP, DRM_READ_ONLY, &info->ringHandle) < 0) { + fprintf(stderr, "[gart] Could not add ring mapping\n"); + return 0; + } + fprintf(stderr, "[gart] ring handle = 0x%08x\n", info->ringHandle); + + + if (drmAddMap(ctx->drmFD, info->ringReadOffset, info->ringReadMapSize, + DRM_AGP, DRM_READ_ONLY, &info->ringReadPtrHandle) < 0) { + fprintf(stderr, + "[gart] Could not add ring read ptr mapping\n"); + return 0; + } + + fprintf(stderr, + "[gart] ring read ptr handle = 0x%08lx\n", + info->ringReadPtrHandle); + + if (drmAddMap(ctx->drmFD, info->bufStart, info->bufMapSize, + DRM_AGP, 0, &info->bufHandle) < 0) { + fprintf(stderr, + "[gart] Could not add vertex/indirect buffers mapping\n"); + return 0; + } + fprintf(stderr, + "[gart] vertex/indirect buffers handle = 0x%08x\n", + info->bufHandle); + + if (drmAddMap(ctx->drmFD, info->gartTexStart, info->gartTexMapSize, + DRM_AGP, 0, &info->gartTexHandle) < 0) { + fprintf(stderr, + "[gart] Could not add AGP texture map mapping\n"); + return 0; + } + fprintf(stderr, + "[gart] AGP texture map handle = 0x%08lx\n", + info->gartTexHandle); + + /* Initialize Radeon's AGP registers */ + /* Ring buffer is at AGP offset 0 */ + OUTREG(RADEON_AGP_BASE, info->ringHandle); + + return 1; +} + +/* Initialize the PCI GART state. Request memory for use in PCI space, + * and initialize the Radeon registers to point to that memory. + */ +static int RADEONDRIPciInit(const DRIDriverContext *ctx, RADEONInfoPtr info) +{ + int ret; + int flags = DRM_READ_ONLY | DRM_LOCKED | DRM_KERNEL; + int s, l; + + ret = drmScatterGatherAlloc(ctx->drmFD, info->gartSize*1024*1024, + &info->gartMemHandle); + if (ret < 0) { + fprintf(stderr, "[pci] Out of memory (%d)\n", ret); + return 0; + } + fprintf(stderr, + "[pci] %d kB allocated with handle 0x%08lx\n", + info->gartSize*1024, info->gartMemHandle); + + info->gartOffset = 0; + + /* Initialize the CP ring buffer data */ + info->ringStart = info->gartOffset; + info->ringMapSize = info->ringSize*1024*1024 + radeon_drm_page_size; + + info->ringReadOffset = info->ringStart + info->ringMapSize; + info->ringReadMapSize = radeon_drm_page_size; + + /* Reserve space for vertex/indirect buffers */ + info->bufStart = info->ringReadOffset + info->ringReadMapSize; + info->bufMapSize = info->bufSize*1024*1024; + + /* Reserve the rest for AGP textures */ + info->gartTexStart = info->bufStart + info->bufMapSize; + s = (info->gartSize*1024*1024 - info->gartTexStart); + l = RADEONMinBits((s-1) / RADEON_NR_TEX_REGIONS); + if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY; + info->gartTexMapSize = (s >> l) << l; + info->log2GARTTexGran = l; + + if (drmAddMap(ctx->drmFD, info->ringStart, info->ringMapSize, + DRM_SCATTER_GATHER, flags, &info->ringHandle) < 0) { + fprintf(stderr, + "[pci] Could not add ring mapping\n"); + return 0; + } + fprintf(stderr, + "[pci] ring handle = 0x%08x\n", info->ringHandle); + + if (drmAddMap(ctx->drmFD, info->ringReadOffset, info->ringReadMapSize, + DRM_SCATTER_GATHER, flags, &info->ringReadPtrHandle) < 0) { + fprintf(stderr, + "[pci] Could not add ring read ptr mapping\n"); + return 0; + } + fprintf(stderr, + "[pci] ring read ptr handle = 0x%08lx\n", + info->ringReadPtrHandle); + + if (drmAddMap(ctx->drmFD, info->bufStart, info->bufMapSize, + DRM_SCATTER_GATHER, 0, &info->bufHandle) < 0) { + fprintf(stderr, + "[pci] Could not add vertex/indirect buffers mapping\n"); + return 0; + } + fprintf(stderr, + "[pci] vertex/indirect buffers handle = 0x%08lx\n", + info->bufHandle); + + if (drmAddMap(ctx->drmFD, info->gartTexStart, info->gartTexMapSize, + DRM_SCATTER_GATHER, 0, &info->gartTexHandle) < 0) { + fprintf(stderr, + "[pci] Could not add GART texture map mapping\n"); + return 0; + } + fprintf(stderr, + "[pci] GART texture map handle = 0x%08x\n", + info->gartTexHandle); + + return 1; +} + + +/** + * \brief Initialize the kernel data structures and enable the CP engine. + * + * \param ctx display handle. + * \param info driver private data. + * + * \return non-zero on success, or zero on failure. + * + * This function is a wrapper around the DRM_RADEON_CP_INIT command, passing + * all the parameters in a drm_radeon_init_t structure. + */ +static int RADEONDRIKernelInit( const DRIDriverContext *ctx, + RADEONInfoPtr info) +{ + int cpp = ctx->bpp / 8; + drm_radeon_init_t drmInfo; + int ret; + + memset(&drmInfo, 0, sizeof(drm_radeon_init_t)); + + if ( (info->ChipFamily == CHIP_FAMILY_R200) || + (info->ChipFamily == CHIP_FAMILY_RV250) || + (info->ChipFamily == CHIP_FAMILY_M9) || + (info->ChipFamily == CHIP_FAMILY_RV280) ) + drmInfo.func = RADEON_INIT_R200_CP; + else + drmInfo.func = RADEON_INIT_CP; + + /* This is the struct passed to the kernel module for its initialization */ + drmInfo.sarea_priv_offset = sizeof(drm_sarea_t); + drmInfo.is_pci = ctx->isPCI; + drmInfo.cp_mode = RADEON_DEFAULT_CP_BM_MODE; + drmInfo.gart_size = info->gartSize*1024*1024; + drmInfo.ring_size = info->ringSize*1024*1024; + drmInfo.usec_timeout = 1000; + drmInfo.fb_bpp = ctx->bpp; + drmInfo.depth_bpp = ctx->bpp; + drmInfo.front_offset = info->frontOffset; + drmInfo.front_pitch = info->frontPitch * cpp; + drmInfo.back_offset = info->backOffset; + drmInfo.back_pitch = info->backPitch * cpp; + drmInfo.depth_offset = info->depthOffset; + drmInfo.depth_pitch = info->depthPitch * cpp; + drmInfo.fb_offset = info->LinearAddr; + drmInfo.mmio_offset = info->registerHandle; + drmInfo.ring_offset = info->ringHandle; + drmInfo.ring_rptr_offset = info->ringReadPtrHandle; + drmInfo.buffers_offset = info->bufHandle; + drmInfo.gart_textures_offset = info->gartTexHandle; + + ret = drmCommandWrite(ctx->drmFD, DRM_RADEON_CP_INIT, &drmInfo, + sizeof(drm_radeon_init_t)); + + return ret >= 0; +} + + +/** + * \brief Initialize the AGP heap. + * + * \param ctx display handle. + * \param info driver private data. + * + * This function is a wrapper around the DRM_RADEON_INIT_HEAP command, passing + * all the parameters in a drm_radeon_mem_init_heap structure. + */ +static void RADEONDRIAgpHeapInit(const DRIDriverContext *ctx, + RADEONInfoPtr info) +{ + drm_radeon_mem_init_heap_t drmHeap; + + /* Start up the simple memory manager for gart space */ + drmHeap.region = RADEON_MEM_REGION_GART; + drmHeap.start = 0; + drmHeap.size = info->gartTexMapSize; + + if (drmCommandWrite(ctx->drmFD, DRM_RADEON_INIT_HEAP, + &drmHeap, sizeof(drmHeap))) { + fprintf(stderr, + "[drm] Failed to initialized gart heap manager\n"); + } else { + fprintf(stderr, + "[drm] Initialized kernel gart heap manager, %d\n", + info->gartTexMapSize); + } +} + +/** + * \brief Add a map for the vertex buffers that will be accessed by any + * DRI-based clients. + * + * \param ctx display handle. + * \param info driver private data. + * + * \return one on success, or zero on failure. + * + * Calls drmAddBufs() with the previously allocated vertex buffers. + */ +static int RADEONDRIBufInit( const DRIDriverContext *ctx, RADEONInfoPtr info ) +{ + /* Initialize vertex buffers */ + info->bufNumBufs = drmAddBufs(ctx->drmFD, + info->bufMapSize / RADEON_BUFFER_SIZE, + RADEON_BUFFER_SIZE, + ctx->isPCI ? DRM_SG_BUFFER : DRM_AGP_BUFFER, + info->bufStart); + + if (info->bufNumBufs <= 0) { + fprintf(stderr, + "[drm] Could not create vertex/indirect buffers list\n"); + return 0; + } + fprintf(stderr, + "[drm] Added %d %d byte vertex/indirect buffers\n", + info->bufNumBufs, RADEON_BUFFER_SIZE); + + return 1; +} + +/** + * \brief Install an IRQ handler. + * + * \param ctx display handle. + * \param info driver private data. + * + * Attempts to install an IRQ handler via drmCtlInstHandler(), falling back to + * IRQ-free operation on failure. + */ +static void RADEONDRIIrqInit(const DRIDriverContext *ctx, + RADEONInfoPtr info) +{ + if (!info->irq) { + info->irq = drmGetInterruptFromBusID(ctx->drmFD, + ctx->pciBus, + ctx->pciDevice, + ctx->pciFunc); + + if ((drmCtlInstHandler(ctx->drmFD, info->irq)) != 0) { + fprintf(stderr, + "[drm] failure adding irq handler, " + "there is a device already using that irq\n" + "[drm] falling back to irq-free operation\n"); + info->irq = 0; + } + } + + if (info->irq) + fprintf(stderr, + "[drm] dma control initialized, using IRQ %d\n", + info->irq); +} + +static int RADEONCheckDRMVersion( const DRIDriverContext *ctx, + RADEONInfoPtr info ) +{ + drmVersionPtr version; + + version = drmGetVersion(ctx->drmFD); + if (version) { + int req_minor, req_patch; + + /* Need 1.8.x for proper cleanup-on-client-exit behaviour. + */ + req_minor = 8; + req_patch = 0; + + if (version->version_major != 1 || + version->version_minor < req_minor || + (version->version_minor == req_minor && + version->version_patchlevel < req_patch)) { + /* Incompatible drm version */ + fprintf(stderr, + "[dri] RADEONDRIScreenInit failed because of a version " + "mismatch.\n" + "[dri] radeon.o kernel module version is %d.%d.%d " + "but version 1.%d.%d or newer is needed.\n" + "[dri] Disabling DRI.\n", + version->version_major, + version->version_minor, + version->version_patchlevel, + req_minor, + req_patch); + drmFreeVersion(version); + return 0; + } + + info->drmMinor = version->version_minor; + drmFreeVersion(version); + } + + return 1; +} + +static int RADEONMemoryInit( const DRIDriverContext *ctx, RADEONInfoPtr info ) +{ + int width_bytes = ctx->shared.virtualWidth * ctx->cpp; + int cpp = ctx->cpp; + int bufferSize = ((((ctx->shared.virtualHeight+15) & ~15) * width_bytes + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN); + int depthSize = ((((ctx->shared.virtualHeight+15) & ~15) * width_bytes + + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN); + int l; + + info->frontOffset = 0; + info->frontPitch = ctx->shared.virtualWidth; + + fprintf(stderr, + "Using %d MB AGP aperture\n", info->gartSize); + fprintf(stderr, + "Using %d MB for the ring buffer\n", info->ringSize); + fprintf(stderr, + "Using %d MB for vertex/indirect buffers\n", info->bufSize); + fprintf(stderr, + "Using %d MB for AGP textures\n", info->gartTexSize); + + /* Front, back and depth buffers - everything else texture?? + */ + info->textureSize = ctx->shared.fbSize - 2 * bufferSize - depthSize; + + if (ctx->colorTiling==1) + { + info->textureSize = ctx->shared.fbSize - ((ctx->shared.fbSize - info->textureSize + width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes*16); + } + + if (info->textureSize < 0) + return 0; + + l = RADEONMinBits((info->textureSize-1) / RADEON_NR_TEX_REGIONS); + if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY; + + /* Round the texture size up to the nearest whole number of + * texture regions. Again, be greedy about this, don't + * round down. + */ + info->log2TexGran = l; + info->textureSize = (info->textureSize >> l) << l; + + /* Set a minimum usable local texture heap size. This will fit + * two 256x256x32bpp textures. + */ + if (info->textureSize < 512 * 1024) { + info->textureOffset = 0; + info->textureSize = 0; + } + + /* Reserve space for textures */ + if (ctx->colorTiling==1) + { + info->textureOffset = ((ctx->shared.fbSize - info->textureSize) / + (width_bytes * 16)) * (width_bytes*16); + } + else + { + info->textureOffset = ((ctx->shared.fbSize - info->textureSize + + RADEON_BUFFER_ALIGN) & + ~RADEON_BUFFER_ALIGN); + } + /* Reserve space for the shared depth + * buffer. + */ + info->depthOffset = ((info->textureOffset - depthSize + + RADEON_BUFFER_ALIGN) & + ~RADEON_BUFFER_ALIGN); + info->depthPitch = ctx->shared.virtualWidth; + + info->backOffset = ((info->depthOffset - bufferSize + + RADEON_BUFFER_ALIGN) & + ~RADEON_BUFFER_ALIGN); + info->backPitch = ctx->shared.virtualWidth; + + + fprintf(stderr, + "Will use back buffer at offset 0x%x\n", + info->backOffset); + fprintf(stderr, + "Will use depth buffer at offset 0x%x\n", + info->depthOffset); + fprintf(stderr, + "Will use %d kb for textures at offset 0x%x\n", + info->textureSize/1024, info->textureOffset); + + info->frontPitchOffset = (((info->frontPitch * cpp / 64) << 22) | + (info->frontOffset >> 10)); + + info->backPitchOffset = (((info->backPitch * cpp / 64) << 22) | + (info->backOffset >> 10)); + + info->depthPitchOffset = (((info->depthPitch * cpp / 64) << 22) | + (info->depthOffset >> 10)); + + return 1; +} + +static int RADEONColorTilingInit( const DRIDriverContext *ctx, RADEONInfoPtr info ) +{ + int width_bytes = ctx->shared.virtualWidth * ctx->cpp; + int bufferSize = ((((ctx->shared.virtualHeight+15) & ~15) * width_bytes + RADEON_BUFFER_ALIGN) + & ~RADEON_BUFFER_ALIGN); + /* Setup color tiling */ + if (info->drmMinor<14) + info->colorTiling=0; + + if (info->colorTiling) + { + + int colorTilingFlag; + drm_radeon_surface_alloc_t front,back; + + RadeonSetParam(ctx, RADEON_SETPARAM_SWITCH_TILING, info->colorTiling ? 1 : 0); + + /* Setup the surfaces */ + if (info->ChipFamily < CHIP_FAMILY_R200) + colorTilingFlag=RADEON_SURF_TILE_COLOR_MACRO; + else + colorTilingFlag=R200_SURF_TILE_COLOR_MACRO; + + front.address = info->frontOffset; + front.size = bufferSize; + front.flags = (width_bytes) | colorTilingFlag; + drmCommandWrite(ctx->drmFD, DRM_RADEON_SURF_ALLOC, &front,sizeof(front)); + + back.address = info->backOffset; + back.size = bufferSize; + back.flags = (width_bytes) | colorTilingFlag; + drmCommandWrite(ctx->drmFD, DRM_RADEON_SURF_ALLOC, &back,sizeof(back)); + + } + return 1; +} + + + +/** + * Called at the start of each server generation. + * + * \param ctx display handle. + * \param info driver private data. + * + * \return non-zero on success, or zero on failure. + * + * Performs static frame buffer allocation. Opens the DRM device and add maps + * to the SAREA, framebuffer and MMIO regions. Fills in \p info with more + * information. Creates a \e server context to grab the lock for the + * initialization ioctls and calls the other initilization functions in this + * file. Starts the CP engine via the DRM_RADEON_CP_START command. + * + * Setups a RADEONDRIRec structure to be passed to radeon_dri.so for its + * initialization. + */ +static int RADEONScreenInit( DRIDriverContext *ctx, RADEONInfoPtr info ) +{ + RADEONDRIPtr pRADEONDRI; + int err; + + usleep(100); + /*assert(!ctx->IsClient);*/ + + { + int width_bytes = (ctx->shared.virtualWidth * ctx->cpp); + int maxy = ctx->shared.fbSize / width_bytes; + + + if (maxy <= ctx->shared.virtualHeight * 3) { + fprintf(stderr, + "Static buffer allocation failed -- " + "need at least %d kB video memory (have %d kB)\n", + (ctx->shared.virtualWidth * ctx->shared.virtualHeight * + ctx->cpp * 3 + 1023) / 1024, + ctx->shared.fbSize / 1024); + return 0; + } + } + + + if (info->ChipFamily >= CHIP_FAMILY_R300) { + fprintf(stderr, + "Direct rendering not yet supported on " + "Radeon 9700 and newer cards\n"); + return 0; + } + + radeon_drm_page_size = getpagesize(); + + info->registerSize = ctx->MMIOSize; + ctx->shared.SAREASize = SAREA_MAX; + + /* Note that drmOpen will try to load the kernel module, if needed. */ + ctx->drmFD = drmOpen("radeon", NULL ); + if (ctx->drmFD < 0) { + fprintf(stderr, "[drm] drmOpen failed\n"); + return 0; + } + + if ((err = drmSetBusid(ctx->drmFD, ctx->pciBusID)) < 0) { + fprintf(stderr, "[drm] drmSetBusid failed (%d, %s), %s\n", + ctx->drmFD, ctx->pciBusID, strerror(-err)); + return 0; + } + + if (drmAddMap( ctx->drmFD, + 0, + ctx->shared.SAREASize, + DRM_SHM, + DRM_CONTAINS_LOCK, + &ctx->shared.hSAREA) < 0) + { + fprintf(stderr, "[drm] drmAddMap failed\n"); + return 0; + } + fprintf(stderr, "[drm] added %d byte SAREA at 0x%08lx\n", + ctx->shared.SAREASize, ctx->shared.hSAREA); + + if (drmMap( ctx->drmFD, + ctx->shared.hSAREA, + ctx->shared.SAREASize, + (drmAddressPtr)(&ctx->pSAREA)) < 0) + { + fprintf(stderr, "[drm] drmMap failed\n"); + return 0; + } + memset(ctx->pSAREA, 0, ctx->shared.SAREASize); + fprintf(stderr, "[drm] mapped SAREA 0x%08lx to %p, size %d\n", + ctx->shared.hSAREA, ctx->pSAREA, ctx->shared.SAREASize); + + /* Need to AddMap the framebuffer and mmio regions here: + */ + if (drmAddMap( ctx->drmFD, + (drm_handle_t)ctx->FBStart, + ctx->FBSize, + DRM_FRAME_BUFFER, +#ifndef _EMBEDDED + 0, +#else + DRM_READ_ONLY, +#endif + &ctx->shared.hFrameBuffer) < 0) + { + fprintf(stderr, "[drm] drmAddMap framebuffer failed\n"); + return 0; + } + + fprintf(stderr, "[drm] framebuffer handle = 0x%08lx\n", + ctx->shared.hFrameBuffer); + + + + if (drmAddMap(ctx->drmFD, + ctx->MMIOStart, + ctx->MMIOSize, + DRM_REGISTERS, + DRM_READ_ONLY, + &info->registerHandle) < 0) { + fprintf(stderr, "[drm] drmAddMap mmio failed\n"); + return 0; + } + fprintf(stderr, + "[drm] register handle = 0x%08lx\n", info->registerHandle); + + /* Check the radeon DRM version */ + if (!RADEONCheckDRMVersion(ctx, info)) { + return 0; + } + + if (ctx->isPCI) { + /* Initialize PCI */ + if (!RADEONDRIPciInit(ctx, info)) + return 0; + } + else { + /* Initialize AGP */ + if (!RADEONDRIAgpInit(ctx, info)) + return 0; + } + + /* Memory manager setup */ + if (!RADEONMemoryInit(ctx, info)) { + return 0; + } + + /* Create a 'server' context so we can grab the lock for + * initialization ioctls. + */ + if ((err = drmCreateContext(ctx->drmFD, &ctx->serverContext)) != 0) { + fprintf(stderr, "%s: drmCreateContext failed %d\n", __FUNCTION__, err); + return 0; + } + + DRM_LOCK(ctx->drmFD, ctx->pSAREA, ctx->serverContext, 0); + + /* Initialize the kernel data structures */ + if (!RADEONDRIKernelInit(ctx, info)) { + fprintf(stderr, "RADEONDRIKernelInit failed\n"); + DRM_UNLOCK(ctx->drmFD, ctx->pSAREA, ctx->serverContext); + return 0; + } + + /* Initialize the vertex buffers list */ + if (!RADEONDRIBufInit(ctx, info)) { + fprintf(stderr, "RADEONDRIBufInit failed\n"); + DRM_UNLOCK(ctx->drmFD, ctx->pSAREA, ctx->serverContext); + return 0; + } + + RADEONColorTilingInit(ctx, info); + + /* Initialize IRQ */ + RADEONDRIIrqInit(ctx, info); + + /* Initialize kernel gart memory manager */ + RADEONDRIAgpHeapInit(ctx, info); + + fprintf(stderr,"color tiling %sabled\n", info->colorTiling?"en":"dis"); + fprintf(stderr,"page flipping %sabled\n", info->page_flip_enable?"en":"dis"); + /* Initialize the SAREA private data structure */ + { + drm_radeon_sarea_t *pSAREAPriv; + pSAREAPriv = (drm_radeon_sarea_t *)(((char*)ctx->pSAREA) + + sizeof(drm_sarea_t)); + memset(pSAREAPriv, 0, sizeof(*pSAREAPriv)); + pSAREAPriv->pfState = info->page_flip_enable; + } + + + /* Quick hack to clear the front & back buffers. Could also use + * the clear ioctl to do this, but would need to setup hw state + * first. + */ + drimemsetio((char *)ctx->FBAddress + info->frontOffset, + 0, + info->frontPitch * ctx->cpp * ctx->shared.virtualHeight ); + + drimemsetio((char *)ctx->FBAddress + info->backOffset, + 0, + info->backPitch * ctx->cpp * ctx->shared.virtualHeight ); + + /* This is the struct passed to radeon_dri.so for its initialization */ + ctx->driverClientMsg = malloc(sizeof(RADEONDRIRec)); + ctx->driverClientMsgSize = sizeof(RADEONDRIRec); + pRADEONDRI = (RADEONDRIPtr)ctx->driverClientMsg; + pRADEONDRI->deviceID = info->Chipset; + pRADEONDRI->width = ctx->shared.virtualWidth; + pRADEONDRI->height = ctx->shared.virtualHeight; + pRADEONDRI->depth = ctx->bpp; /* XXX: depth */ + pRADEONDRI->bpp = ctx->bpp; + pRADEONDRI->IsPCI = ctx->isPCI; + pRADEONDRI->AGPMode = ctx->agpmode; + pRADEONDRI->frontOffset = info->frontOffset; + pRADEONDRI->frontPitch = info->frontPitch; + pRADEONDRI->backOffset = info->backOffset; + pRADEONDRI->backPitch = info->backPitch; + pRADEONDRI->depthOffset = info->depthOffset; + pRADEONDRI->depthPitch = info->depthPitch; + pRADEONDRI->textureOffset = info->textureOffset; + pRADEONDRI->textureSize = info->textureSize; + pRADEONDRI->log2TexGran = info->log2TexGran; + pRADEONDRI->registerHandle = info->registerHandle; + pRADEONDRI->registerSize = info->registerSize; + pRADEONDRI->statusHandle = info->ringReadPtrHandle; + pRADEONDRI->statusSize = info->ringReadMapSize; + pRADEONDRI->gartTexHandle = info->gartTexHandle; + pRADEONDRI->gartTexMapSize = info->gartTexMapSize; + pRADEONDRI->log2GARTTexGran = info->log2GARTTexGran; + pRADEONDRI->gartTexOffset = info->gartTexStart; + pRADEONDRI->sarea_priv_offset = sizeof(drm_sarea_t); + + /* Don't release the lock now - let the VT switch handler do it. */ + + return 1; +} + + +/** + * \brief Get Radeon chip family from chipset number. + * + * \param info driver private data. + * + * \return non-zero on success, or zero on failure. + * + * Called by radeonInitFBDev() to set RADEONInfoRec::ChipFamily + * according to the value of RADEONInfoRec::Chipset. Fails if the + * chipset is unrecognized or not appropriate for this driver (i.e., not + * an r100 style radeon) + */ +static int get_chipfamily_from_chipset( RADEONInfoPtr info ) +{ + switch (info->Chipset) { + case PCI_CHIP_RADEON_LY: + case PCI_CHIP_RADEON_LZ: + info->ChipFamily = CHIP_FAMILY_M6; + break; + + case PCI_CHIP_RADEON_QY: + case PCI_CHIP_RADEON_QZ: + info->ChipFamily = CHIP_FAMILY_VE; + break; + + case PCI_CHIP_R200_QL: + case PCI_CHIP_R200_QN: + case PCI_CHIP_R200_QO: + case PCI_CHIP_R200_Ql: + case PCI_CHIP_R200_BB: + info->ChipFamily = CHIP_FAMILY_R200; + break; + + case PCI_CHIP_RV200_QW: /* RV200 desktop */ + case PCI_CHIP_RV200_QX: + info->ChipFamily = CHIP_FAMILY_RV200; + break; + + case PCI_CHIP_RADEON_LW: + case PCI_CHIP_RADEON_LX: + info->ChipFamily = CHIP_FAMILY_M7; + break; + + case PCI_CHIP_RV250_Id: + case PCI_CHIP_RV250_Ie: + case PCI_CHIP_RV250_If: + case PCI_CHIP_RV250_Ig: + info->ChipFamily = CHIP_FAMILY_RV250; + break; + + case PCI_CHIP_RV250_Ld: + case PCI_CHIP_RV250_Le: + case PCI_CHIP_RV250_Lf: + case PCI_CHIP_RV250_Lg: + info->ChipFamily = CHIP_FAMILY_M9; + break; + + case PCI_CHIP_RV280_Y_: + case PCI_CHIP_RV280_Ya: + case PCI_CHIP_RV280_Yb: + case PCI_CHIP_RV280_Yc: + info->ChipFamily = CHIP_FAMILY_RV280; + break; + + case PCI_CHIP_R300_ND: + case PCI_CHIP_R300_NE: + case PCI_CHIP_R300_NF: + case PCI_CHIP_R300_NG: + info->ChipFamily = CHIP_FAMILY_R300; + break; + + default: + /* Original Radeon/7200 */ + info->ChipFamily = CHIP_FAMILY_RADEON; + } + + return 1; +} + + +/** + * \brief Validate the fbdev mode. + * + * \param ctx display handle. + * + * \return one on success, or zero on failure. + * + * Saves some registers and returns 1. + * + * \sa radeonValidateMode(). + */ +static int radeonValidateMode( const DRIDriverContext *ctx ) +{ + unsigned char *RADEONMMIO = ctx->MMIOAddress; + RADEONInfoPtr info = ctx->driverPrivate; + + info->gen_int_cntl = INREG(RADEON_GEN_INT_CNTL); + info->crtc_offset_cntl = INREG(RADEON_CRTC_OFFSET_CNTL); + + if (info->colorTiling) + info->crtc_offset_cntl |= RADEON_CRTC_TILE_EN; + return 1; +} + + +/** + * \brief Examine mode returned by fbdev. + * + * \param ctx display handle. + * + * \return one on success, or zero on failure. + * + * Restores registers that fbdev has clobbered and returns 1. + * + * \sa radeonValidateMode(). + */ +static int radeonPostValidateMode( const DRIDriverContext *ctx ) +{ + unsigned char *RADEONMMIO = ctx->MMIOAddress; + RADEONInfoPtr info = ctx->driverPrivate; + + RADEONColorTilingInit( ctx, info); + OUTREG(RADEON_GEN_INT_CNTL, info->gen_int_cntl); + if (info->colorTiling) + info->crtc_offset_cntl |= RADEON_CRTC_TILE_EN; + OUTREG(RADEON_CRTC_OFFSET_CNTL, info->crtc_offset_cntl); + + return 1; +} + + +/** + * \brief Initialize the framebuffer device mode + * + * \param ctx display handle. + * + * \return one on success, or zero on failure. + * + * Fills in \p info with some default values and some information from \p ctx + * and then calls RADEONScreenInit() for the screen initialization. + * + * Before exiting clears the framebuffer memory accessing it directly. + */ +static int radeonInitFBDev( DRIDriverContext *ctx ) +{ + RADEONInfoPtr info = calloc(1, sizeof(*info)); + + { + int dummy = ctx->shared.virtualWidth; + + if (ctx->colorTiling==1) + { + switch (ctx->bpp / 8) { + case 1: dummy = (ctx->shared.virtualWidth + 255) & ~255; break; + case 2: dummy = (ctx->shared.virtualWidth + 127) & ~127; break; + case 3: + case 4: dummy = (ctx->shared.virtualWidth + 63) & ~63; break; + } + } else { + switch (ctx->bpp / 8) { + case 1: dummy = (ctx->shared.virtualWidth + 127) & ~127; break; + case 2: dummy = (ctx->shared.virtualWidth + 31) & ~31; break; + case 3: + case 4: dummy = (ctx->shared.virtualWidth + 15) & ~15; break; + } + } + + ctx->shared.virtualWidth = dummy; + ctx->shared.Width = dummy; + } + + fprintf(stderr,"shared virtual width is %d\n", ctx->shared.virtualWidth); + ctx->driverPrivate = (void *)info; + + info->gartFastWrite = RADEON_DEFAULT_AGP_FAST_WRITE; + info->gartSize = RADEON_DEFAULT_AGP_SIZE; + info->gartTexSize = RADEON_DEFAULT_AGP_TEX_SIZE; + info->bufSize = RADEON_DEFAULT_BUFFER_SIZE; + info->ringSize = RADEON_DEFAULT_RING_SIZE; + info->page_flip_enable = RADEON_DEFAULT_PAGE_FLIP; + info->colorTiling = ctx->colorTiling; + + info->Chipset = ctx->chipset; + + if (!get_chipfamily_from_chipset( info )) { + fprintf(stderr, "Unknown or non-radeon chipset -- cannot continue\n"); + fprintf(stderr, "==> Verify PCI BusID is correct in miniglx.conf\n"); + return 0; + } + + info->frontPitch = ctx->shared.virtualWidth; + info->LinearAddr = ctx->FBStart & 0xfc000000; + + + if (!RADEONScreenInit( ctx, info )) + return 0; + + + return 1; +} + + +/** + * \brief The screen is being closed, so clean up any state and free any + * resources used by the DRI. + * + * \param ctx display handle. + * + * Unmaps the SAREA, closes the DRM device file descriptor and frees the driver + * private data. + */ +static void radeonHaltFBDev( DRIDriverContext *ctx ) +{ + drmUnmap( ctx->pSAREA, ctx->shared.SAREASize ); + drmClose(ctx->drmFD); + + if (ctx->driverPrivate) { + free(ctx->driverPrivate); + ctx->driverPrivate = 0; + } +} + + +extern void radeonNotifyFocus( int ); + +/** + * \brief Exported driver interface for Mini GLX. + * + * \sa DRIDriverRec. + */ +const struct DRIDriverRec __driDriver = { + radeonValidateMode, + radeonPostValidateMode, + radeonInitFBDev, + radeonHaltFBDev, + RADEONEngineShutdown, + RADEONEngineRestore, +#ifndef _EMBEDDED + 0, +#else + radeonNotifyFocus, +#endif +}; diff --git a/radeon/server/radeon_dri.h b/radeon/server/radeon_dri.h new file mode 100644 index 0000000..ecd5323 --- /dev/null +++ b/radeon/server/radeon_dri.h @@ -0,0 +1,116 @@ +/** + * \file server/radeon_dri.h + * \brief Radeon server-side structures. + * + * \author Kevin E. Martin <martin@xfree86.org> + * \author Rickard E. Faith <faith@valinux.com> + */ + +/* + * Copyright 2000 ATI Technologies Inc., Markham, Ontario, + * VA Linux Systems Inc., Fremont, California. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation on the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR + * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.h,v 1.3 2002/04/24 16:20:40 martin Exp $ */ + +#ifndef _RADEON_DRI_ +#define _RADEON_DRI_ + +#include "xf86drm.h" +#include "drm.h" +#include "radeon_drm.h" + +/* DRI Driver defaults */ +#define RADEON_DEFAULT_CP_PIO_MODE RADEON_CSQ_PRIPIO_INDPIO +#define RADEON_DEFAULT_CP_BM_MODE RADEON_CSQ_PRIBM_INDBM +#define RADEON_DEFAULT_AGP_MODE 1 +#define RADEON_DEFAULT_AGP_FAST_WRITE 0 +#define RADEON_DEFAULT_AGP_SIZE 8 /* MB (must be 2^n and > 4MB) */ +#define RADEON_DEFAULT_RING_SIZE 1 /* MB (must be page aligned) */ +#define RADEON_DEFAULT_BUFFER_SIZE 2 /* MB (must be page aligned) */ +#define RADEON_DEFAULT_AGP_TEX_SIZE 1 /* MB (must be page aligned) */ +#define RADEON_DEFAULT_CP_TIMEOUT 10000 /* usecs */ +#define RADEON_DEFAULT_PAGE_FLIP 0 /* page flipping diabled */ +#define RADEON_BUFFER_ALIGN 0x00000fff + +/** + * \brief Radeon DRI driver private data. + */ +typedef struct { + /** + * \name DRI screen private data + */ + /*@{*/ + int deviceID; /**< \brief PCI device ID */ + int width; /**< \brief width in pixels of display */ + int height; /**< \brief height in scanlines of display */ + int depth; /**< \brief depth of display (8, 15, 16, 24) */ + int bpp; /**< \brief bit depth of display (8, 16, 24, 32) */ + + int IsPCI; /**< \brief is current card a PCI card? */ + int AGPMode; /**< \brief AGP mode */ + + int frontOffset; /**< \brief front buffer offset */ + int frontPitch; /**< \brief front buffer pitch */ + int backOffset; /**< \brief shared back buffer offset */ + int backPitch; /**< \brief shared back buffer pitch */ + int depthOffset; /**< \brief shared depth buffer offset */ + int depthPitch; /**< \brief shared depth buffer pitch */ + int textureOffset; /**< \brief start of texture data in frame buffer */ + int textureSize; /**< \brief size of texture date */ + int log2TexGran; /**< \brief log2 texture granularity */ + /*@}*/ + + /** + * \name MMIO register data + */ + /*@{*/ + drm_handle_t registerHandle; /**< \brief MMIO register map size */ + drmSize registerSize; /**< \brief MMIO register map handle */ + /*@}*/ + + /** + * \name CP in-memory status information + */ + /*@{*/ + drm_handle_t statusHandle; /**< \brief status map handle */ + drmSize statusSize; /**< \brief status map size */ + /*@}*/ + + /** + * \name CP AGP Texture data + */ + /*@{*/ + drm_handle_t gartTexHandle; /**< \brief AGP texture area map handle */ + drmSize gartTexMapSize; /**< \brief AGP texture area map size */ + int log2GARTTexGran; /**< \brief AGP texture granularity in log base 2 */ + int gartTexOffset; /**< \brief AGP texture area offset in AGP space */ + /*@}*/ + + unsigned int sarea_priv_offset; /**< \brief offset of the private SAREA data*/ +} RADEONDRIRec, *RADEONDRIPtr; + +#endif diff --git a/radeon/server/radeon_egl.c b/radeon/server/radeon_egl.c new file mode 100644 index 0000000..2f6ea55 --- /dev/null +++ b/radeon/server/radeon_egl.c @@ -0,0 +1,1088 @@ +/* + * EGL driver for radeon_dri.so + */ +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include "eglconfig.h" +#include "eglcontext.h" +#include "egldisplay.h" +#include "egldriver.h" +#include "eglglobals.h" +#include "egllog.h" +#include "eglmode.h" +#include "eglscreen.h" +#include "eglsurface.h" +#include "egldri.h" + +#include "mtypes.h" +#include "memops.h" +#include "drm.h" +#include "drm_sarea.h" +#include "radeon_drm.h" +#include "radeon_dri.h" +#include "radeon.h" + +static size_t radeon_drm_page_size; + +/** + * radeon driver-specific driver class derived from _EGLDriver + */ +typedef struct radeon_driver +{ + _EGLDriver Base; /* base class/object */ + GLuint radeonStuff; +} radeonDriver; + +static int +RADEONSetParam(driDisplay *disp, int param, int value) +{ + drm_radeon_setparam_t sp; + int ret; + + memset(&sp, 0, sizeof(sp)); + sp.param = param; + sp.value = value; + + if ((ret=drmCommandWrite(disp->drmFD, DRM_RADEON_SETPARAM, &sp, sizeof(sp)))) { + fprintf(stderr,"Set param failed\n", ret); + return -1; + } + + return 0; +} + +static int +RADEONCheckDRMVersion(driDisplay *disp, RADEONInfoPtr info) +{ + drmVersionPtr version; + + version = drmGetVersion(disp->drmFD); + if (version) { + int req_minor, req_patch; + + /* Need 1.21.x for card type detection getparam + */ + req_minor = 21; + req_patch = 0; + + if (version->version_major != 1 || + version->version_minor < req_minor || + (version->version_minor == req_minor && + version->version_patchlevel < req_patch)) { + /* Incompatible drm version */ + fprintf(stderr, + "[dri] RADEONDRIScreenInit failed because of a version " + "mismatch.\n" + "[dri] radeon.o kernel module version is %d.%d.%d " + "but version 1.%d.%d or newer is needed.\n" + "[dri] Disabling DRI.\n", + version->version_major, + version->version_minor, + version->version_patchlevel, + req_minor, + req_patch); + drmFreeVersion(version); + return 0; + } + + info->drmMinor = version->version_minor; + drmFreeVersion(version); + } + + return 1; +} + + +/** + * \brief Compute base 2 logarithm. + * + * \param val value. + * + * \return base 2 logarithm of \p val. + */ +static int RADEONMinBits(int val) +{ + int bits; + + if (!val) return 1; + for (bits = 0; val; val >>= 1, ++bits); + return bits; +} + + +/* Initialize the PCI GART state. Request memory for use in PCI space, + * and initialize the Radeon registers to point to that memory. + */ +static int RADEONDRIPciInit(driDisplay *disp, RADEONInfoPtr info) +{ + int ret; + int flags = DRM_READ_ONLY | DRM_LOCKED | DRM_KERNEL; + int s, l; + + ret = drmScatterGatherAlloc(disp->drmFD, info->gartSize*1024*1024, + &info->gartMemHandle); + if (ret < 0) { + fprintf(stderr, "[pci] Out of memory (%d)\n", ret); + return 0; + } + fprintf(stderr, + "[pci] %d kB allocated with handle 0x%04lx\n", + info->gartSize*1024, (long) info->gartMemHandle); + + info->gartOffset = 0; + + /* Initialize the CP ring buffer data */ + info->ringStart = info->gartOffset; + info->ringMapSize = info->ringSize*1024*1024 + radeon_drm_page_size; + + info->ringReadOffset = info->ringStart + info->ringMapSize; + info->ringReadMapSize = radeon_drm_page_size; + + /* Reserve space for vertex/indirect buffers */ + info->bufStart = info->ringReadOffset + info->ringReadMapSize; + info->bufMapSize = info->bufSize*1024*1024; + + /* Reserve the rest for AGP textures */ + info->gartTexStart = info->bufStart + info->bufMapSize; + s = (info->gartSize*1024*1024 - info->gartTexStart); + l = RADEONMinBits((s-1) / RADEON_NR_TEX_REGIONS); + if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY; + info->gartTexMapSize = (s >> l) << l; + info->log2GARTTexGran = l; + + if (drmAddMap(disp->drmFD, info->ringStart, info->ringMapSize, + DRM_SCATTER_GATHER, flags, &info->ringHandle) < 0) { + fprintf(stderr, + "[pci] Could not add ring mapping\n"); + return 0; + } + fprintf(stderr, + "[pci] ring handle = 0x%08lx\n", info->ringHandle); + + if (drmAddMap(disp->drmFD, info->ringReadOffset, info->ringReadMapSize, + DRM_SCATTER_GATHER, flags, &info->ringReadPtrHandle) < 0) { + fprintf(stderr, + "[pci] Could not add ring read ptr mapping\n"); + return 0; + } + fprintf(stderr, + "[pci] ring read ptr handle = 0x%08lx\n", + info->ringReadPtrHandle); + + if (drmAddMap(disp->drmFD, info->bufStart, info->bufMapSize, + DRM_SCATTER_GATHER, 0, &info->bufHandle) < 0) { + fprintf(stderr, + "[pci] Could not add vertex/indirect buffers mapping\n"); + return 0; + } + fprintf(stderr, + "[pci] vertex/indirect buffers handle = 0x%08lx\n", + info->bufHandle); + + if (drmAddMap(disp->drmFD, info->gartTexStart, info->gartTexMapSize, + DRM_SCATTER_GATHER, 0, &info->gartTexHandle) < 0) { + fprintf(stderr, + "[pci] Could not add GART texture map mapping\n"); + return 0; + } + fprintf(stderr, + "[pci] GART texture map handle = 0x%08lx\n", + info->gartTexHandle); + + return 1; +} + + +/** + * \brief Initialize the AGP state + * + * \param ctx display handle. + * \param info driver private data. + * + * \return one on success, or zero on failure. + * + * Acquires and enables the AGP device. Reserves memory in the AGP space for + * the ring buffer, vertex buffers and textures. Initialize the Radeon + * registers to point to that memory and add client mappings. + */ +static int RADEONDRIAgpInit( driDisplay *disp, RADEONInfoPtr info) +{ + int mode, ret; + int s, l; + int agpmode = 1; + + if (drmAgpAcquire(disp->drmFD) < 0) { + fprintf(stderr, "[gart] AGP not available\n"); + return 0; + } + + mode = drmAgpGetMode(disp->drmFD); /* Default mode */ + /* Disable fast write entirely - too many lockups. + */ + mode &= ~RADEON_AGP_MODE_MASK; + switch (agpmode) { + case 4: mode |= RADEON_AGP_4X_MODE; + case 2: mode |= RADEON_AGP_2X_MODE; + case 1: default: mode |= RADEON_AGP_1X_MODE; + } + + if (drmAgpEnable(disp->drmFD, mode) < 0) { + fprintf(stderr, "[gart] AGP not enabled\n"); + drmAgpRelease(disp->drmFD); + return 0; + } + +#if 0 + /* Workaround for some hardware bugs */ + if (info->ChipFamily < CHIP_FAMILY_R200) + OUTREG(RADEON_AGP_CNTL, INREG(RADEON_AGP_CNTL) | 0x000e0000); +#endif + info->gartOffset = 0; + + if ((ret = drmAgpAlloc(disp->drmFD, info->gartSize*1024*1024, 0, NULL, + &info->gartMemHandle)) < 0) { + fprintf(stderr, "[gart] Out of memory (%d)\n", ret); + drmAgpRelease(disp->drmFD); + return 0; + } + fprintf(stderr, + "[gart] %d kB allocated with handle 0x%08x\n", + info->gartSize*1024, (unsigned)info->gartMemHandle); + + if (drmAgpBind(disp->drmFD, + info->gartMemHandle, info->gartOffset) < 0) { + fprintf(stderr, "[gart] Could not bind\n"); + drmAgpFree(disp->drmFD, info->gartMemHandle); + drmAgpRelease(disp->drmFD); + return 0; + } + + /* Initialize the CP ring buffer data */ + info->ringStart = info->gartOffset; + info->ringMapSize = info->ringSize*1024*1024 + radeon_drm_page_size; + + info->ringReadOffset = info->ringStart + info->ringMapSize; + info->ringReadMapSize = radeon_drm_page_size; + + /* Reserve space for vertex/indirect buffers */ + info->bufStart = info->ringReadOffset + info->ringReadMapSize; + info->bufMapSize = info->bufSize*1024*1024; + + /* Reserve the rest for AGP textures */ + info->gartTexStart = info->bufStart + info->bufMapSize; + s = (info->gartSize*1024*1024 - info->gartTexStart); + l = RADEONMinBits((s-1) / RADEON_NR_TEX_REGIONS); + if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY; + info->gartTexMapSize = (s >> l) << l; + info->log2GARTTexGran = l; + + if (drmAddMap(disp->drmFD, info->ringStart, info->ringMapSize, + DRM_AGP, DRM_READ_ONLY, &info->ringHandle) < 0) { + fprintf(stderr, "[gart] Could not add ring mapping\n"); + return 0; + } + fprintf(stderr, "[gart] ring handle = 0x%08lx\n", info->ringHandle); + + + if (drmAddMap(disp->drmFD, info->ringReadOffset, info->ringReadMapSize, + DRM_AGP, DRM_READ_ONLY, &info->ringReadPtrHandle) < 0) { + fprintf(stderr, + "[gart] Could not add ring read ptr mapping\n"); + return 0; + } + + fprintf(stderr, + "[gart] ring read ptr handle = 0x%08lx\n", + info->ringReadPtrHandle); + + if (drmAddMap(disp->drmFD, info->bufStart, info->bufMapSize, + DRM_AGP, 0, &info->bufHandle) < 0) { + fprintf(stderr, + "[gart] Could not add vertex/indirect buffers mapping\n"); + return 0; + } + fprintf(stderr, + "[gart] vertex/indirect buffers handle = 0x%08lx\n", + info->bufHandle); + + if (drmAddMap(disp->drmFD, info->gartTexStart, info->gartTexMapSize, + DRM_AGP, 0, &info->gartTexHandle) < 0) { + fprintf(stderr, + "[gart] Could not add AGP texture map mapping\n"); + return 0; + } + fprintf(stderr, + "[gart] AGP texture map handle = 0x%08lx\n", + info->gartTexHandle); + + return 1; +} + + +/** + * Initialize all the memory-related fields of the RADEONInfo object. + * This includes the various 'offset' and 'size' fields. + */ +static int +RADEONMemoryInit(driDisplay *disp, RADEONInfoPtr info) +{ + int width_bytes = disp->virtualWidth * disp->cpp; + int cpp = disp->cpp; + int bufferSize = ((disp->virtualHeight * width_bytes + + RADEON_BUFFER_ALIGN) + & ~RADEON_BUFFER_ALIGN); + int depthSize = ((((disp->virtualHeight+15) & ~15) * width_bytes + + RADEON_BUFFER_ALIGN) + & ~RADEON_BUFFER_ALIGN); + int l; + int pcie_gart_table_size = 0; + + info->frontOffset = 0; + info->frontPitch = disp->virtualWidth; + + if (disp->card_type==RADEON_CARD_PCIE) + pcie_gart_table_size = RADEON_PCIGART_TABLE_SIZE; + + /* Front, back and depth buffers - everything else texture?? + */ + info->textureSize = disp->fbSize - pcie_gart_table_size - 2 * bufferSize - depthSize; + + if (info->textureSize < 0) + return 0; + + l = RADEONMinBits((info->textureSize-1) / RADEON_NR_TEX_REGIONS); + if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY; + + /* Round the texture size up to the nearest whole number of + * texture regions. Again, be greedy about this, don't + * round down. + */ + info->log2TexGran = l; + info->textureSize = (info->textureSize >> l) << l; + + /* Set a minimum usable local texture heap size. This will fit + * two 256x256x32bpp textures. + */ + if (info->textureSize < 512 * 1024) { + info->textureOffset = 0; + info->textureSize = 0; + } + + /* Reserve space for textures */ + info->textureOffset = ((disp->fbSize - pcie_gart_table_size - info->textureSize + + RADEON_BUFFER_ALIGN) & + ~RADEON_BUFFER_ALIGN); + + /* Reserve space for the shared depth + * buffer. + */ + info->depthOffset = ((info->textureOffset - depthSize + + RADEON_BUFFER_ALIGN) & + ~RADEON_BUFFER_ALIGN); + info->depthPitch = disp->virtualWidth; + + info->backOffset = ((info->depthOffset - bufferSize + + RADEON_BUFFER_ALIGN) & + ~RADEON_BUFFER_ALIGN); + info->backPitch = disp->virtualWidth; + + if (pcie_gart_table_size) + info->pcieGartTableOffset = disp->fbSize - pcie_gart_table_size; + + fprintf(stderr, + "Will use back buffer at offset 0x%x, pitch %d\n", + info->backOffset, info->backPitch); + fprintf(stderr, + "Will use depth buffer at offset 0x%x, pitch %d\n", + info->depthOffset, info->depthPitch); + fprintf(stderr, + "Will use %d kb for textures at offset 0x%x\n", + info->textureSize/1024, info->textureOffset); + if (pcie_gart_table_size) + { + fprintf(stderr, + "Will use %d kb for PCIE GART Table at offset 0x%x\n", + pcie_gart_table_size/1024, info->pcieGartTableOffset); + } + + /* XXX I don't think these are needed. */ +#if 0 + info->frontPitchOffset = (((info->frontPitch * cpp / 64) << 22) | + (info->frontOffset >> 10)); + + info->backPitchOffset = (((info->backPitch * cpp / 64) << 22) | + (info->backOffset >> 10)); + + info->depthPitchOffset = (((info->depthPitch * cpp / 64) << 22) | + (info->depthOffset >> 10)); +#endif + + if (pcie_gart_table_size) + RADEONSetParam(disp, RADEON_SETPARAM_PCIGART_LOCATION, info->pcieGartTableOffset); + + return 1; +} + + +/** + * \brief Initialize the kernel data structures and enable the CP engine. + * + * \param ctx display handle. + * \param info driver private data. + * + * \return non-zero on success, or zero on failure. + * + * This function is a wrapper around the DRM_RADEON_CP_INIT command, passing + * all the parameters in a drm_radeon_init_t structure. + */ +static int RADEONDRIKernelInit( driDisplay *disp, + RADEONInfoPtr info) +{ + int cpp = disp->bpp / 8; + drm_radeon_init_t drmInfo; + int ret; + + memset(&drmInfo, 0, sizeof(drmInfo)); + + if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) + drmInfo.func = RADEON_INIT_R300_CP; + else if ( (info->ChipFamily == CHIP_FAMILY_R200) || + (info->ChipFamily == CHIP_FAMILY_RV250) || + (info->ChipFamily == CHIP_FAMILY_M9) || + (info->ChipFamily == CHIP_FAMILY_RV280) ) + drmInfo.func = RADEON_INIT_R200_CP; + else + drmInfo.func = RADEON_INIT_CP; + + /* This is the struct passed to the kernel module for its initialization */ + /* XXX problem here: + * The front/back/depth_offset/pitch fields may change depending upon + * which drawing surface we're using!!! They can't be set just once + * during initialization. + * Looks like we'll need a new ioctl to update these fields for drawing + * to other surfaces... + */ + drmInfo.sarea_priv_offset = sizeof(drm_sarea_t); + drmInfo.cp_mode = RADEON_DEFAULT_CP_BM_MODE; + drmInfo.gart_size = info->gartSize*1024*1024; + drmInfo.ring_size = info->ringSize*1024*1024; + drmInfo.usec_timeout = 1000; + drmInfo.fb_bpp = disp->bpp; + drmInfo.depth_bpp = disp->bpp; + drmInfo.front_offset = info->frontOffset; + drmInfo.front_pitch = info->frontPitch * cpp; + drmInfo.back_offset = info->backOffset; + drmInfo.back_pitch = info->backPitch * cpp; + drmInfo.depth_offset = info->depthOffset; + drmInfo.depth_pitch = info->depthPitch * cpp; + drmInfo.ring_offset = info->ringHandle; + drmInfo.ring_rptr_offset = info->ringReadPtrHandle; + drmInfo.buffers_offset = info->bufHandle; + drmInfo.gart_textures_offset = info->gartTexHandle; + + ret = drmCommandWrite(disp->drmFD, DRM_RADEON_CP_INIT, &drmInfo, + sizeof(drm_radeon_init_t)); + + return ret >= 0; +} + + +/** + * \brief Add a map for the vertex buffers that will be accessed by any + * DRI-based clients. + * + * \param ctx display handle. + * \param info driver private data. + * + * \return one on success, or zero on failure. + * + * Calls drmAddBufs() with the previously allocated vertex buffers. + */ +static int RADEONDRIBufInit( driDisplay *disp, RADEONInfoPtr info ) +{ + /* Initialize vertex buffers */ + info->bufNumBufs = drmAddBufs(disp->drmFD, + info->bufMapSize / RADEON_BUFFER_SIZE, + RADEON_BUFFER_SIZE, + (disp->card_type!=RADEON_CARD_AGP) ? DRM_SG_BUFFER : DRM_AGP_BUFFER, + info->bufStart); + + if (info->bufNumBufs <= 0) { + fprintf(stderr, + "[drm] Could not create vertex/indirect buffers list\n"); + return 0; + } + fprintf(stderr, + "[drm] Added %d %d byte vertex/indirect buffers\n", + info->bufNumBufs, RADEON_BUFFER_SIZE); + + return 1; +} + + +/** + * \brief Install an IRQ handler. + * + * \param disp display handle. + * \param info driver private data. + * + * Attempts to install an IRQ handler via drmCtlInstHandler(), falling back to + * IRQ-free operation on failure. + */ +static void RADEONDRIIrqInit(driDisplay *disp, RADEONInfoPtr info) +{ + if ((drmCtlInstHandler(disp->drmFD, 0)) != 0) + fprintf(stderr, "[drm] failure adding irq handler, " + "there is a device already using that irq\n" + "[drm] falling back to irq-free operation\n"); +} + + +/** + * \brief Initialize the AGP heap. + * + * \param disp display handle. + * \param info driver private data. + * + * This function is a wrapper around the DRM_RADEON_INIT_HEAP command, passing + * all the parameters in a drm_radeon_mem_init_heap structure. + */ +static void RADEONDRIAgpHeapInit(driDisplay *disp, + RADEONInfoPtr info) +{ + drm_radeon_mem_init_heap_t drmHeap; + + /* Start up the simple memory manager for gart space */ + drmHeap.region = RADEON_MEM_REGION_GART; + drmHeap.start = 0; + drmHeap.size = info->gartTexMapSize; + + if (drmCommandWrite(disp->drmFD, DRM_RADEON_INIT_HEAP, + &drmHeap, sizeof(drmHeap))) { + fprintf(stderr, + "[drm] Failed to initialized gart heap manager\n"); + } else { + fprintf(stderr, + "[drm] Initialized kernel gart heap manager, %d\n", + info->gartTexMapSize); + } +} + +static int RADEONGetCardType(driDisplay *disp, RADEONInfoPtr info) +{ + drm_radeon_getparam_t gp; + int ret; + + gp.param = RADEON_PARAM_CARD_TYPE; + gp.value = &disp->card_type; + + ret=drmCommandWriteRead(disp->drmFD, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); + if (ret) { + fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_CARD_TYPE) : %d\n", ret); + return -1; + } + + return disp->card_type; +} + +/** + * Called at the start of each server generation. + * + * \param disp display handle. + * \param info driver private data. + * + * \return non-zero on success, or zero on failure. + * + * Performs static frame buffer allocation. Opens the DRM device and add maps + * to the SAREA, framebuffer and MMIO regions. Fills in \p info with more + * information. Creates a \e server context to grab the lock for the + * initialization ioctls and calls the other initilization functions in this + * file. Starts the CP engine via the DRM_RADEON_CP_START command. + * + * Setups a RADEONDRIRec structure to be passed to radeon_dri.so for its + * initialization. + */ +static int +RADEONScreenInit( driDisplay *disp, RADEONInfoPtr info, + RADEONDRIPtr pRADEONDRI) +{ + int i, err; + + /* XXX this probably isn't needed here */ + { + int width_bytes = (disp->virtualWidth * disp->cpp); + int maxy = disp->fbSize / width_bytes; + + if (maxy <= disp->virtualHeight * 3) { + _eglLog(_EGL_WARNING, + "Static buffer allocation failed -- " + "need at least %d kB video memory (have %d kB)\n", + (disp->virtualWidth * disp->virtualHeight * + disp->cpp * 3 + 1023) / 1024, + disp->fbSize / 1024); + return 0; + } + } + + /* Memory manager setup */ + if (!RADEONMemoryInit(disp, info)) { + return 0; + } + + /* Create a 'server' context so we can grab the lock for + * initialization ioctls. + */ + if ((err = drmCreateContext(disp->drmFD, &disp->serverContext)) != 0) { + _eglLog(_EGL_WARNING, "%s: drmCreateContext failed %d\n", + __FUNCTION__, err); + return 0; + } + + DRM_LOCK(disp->drmFD, disp->pSAREA, disp->serverContext, 0); + + /* Initialize the kernel data structures */ + if (!RADEONDRIKernelInit(disp, info)) { + _eglLog(_EGL_WARNING, "RADEONDRIKernelInit failed\n"); + DRM_UNLOCK(disp->drmFD, disp->pSAREA, disp->serverContext); + return 0; + } + + /* Initialize the vertex buffers list */ + if (!RADEONDRIBufInit(disp, info)) { + fprintf(stderr, "RADEONDRIBufInit failed\n"); + DRM_UNLOCK(disp->drmFD, disp->pSAREA, disp->serverContext); + return 0; + } + + /* Initialize IRQ */ + RADEONDRIIrqInit(disp, info); + + /* Initialize kernel gart memory manager */ + RADEONDRIAgpHeapInit(disp, info); + + /* Initialize the SAREA private data structure */ + { + drm_radeon_sarea_t *pSAREAPriv; + pSAREAPriv = (drm_radeon_sarea_t *)(((char*)disp->pSAREA) + + sizeof(drm_sarea_t)); + memset(pSAREAPriv, 0, sizeof(*pSAREAPriv)); + pSAREAPriv->pfState = info->page_flip_enable; + } + + for ( i = 0;; i++ ) { + drmMapType type; + drmMapFlags flags; + drm_handle_t handle, offset; + drmSize size; + int rc, mtrr; + + if ( ( rc = drmGetMap( disp->drmFD, i, &offset, &size, &type, &flags, &handle, &mtrr ) ) != 0 ) + break; + if ( type == DRM_REGISTERS ) { + pRADEONDRI->registerHandle = offset; + pRADEONDRI->registerSize = size; + break; + } + } + /* Quick hack to clear the front & back buffers. Could also use + * the clear ioctl to do this, but would need to setup hw state + * first. + */ + drimemsetio((char *)disp->pFB + info->frontOffset, + 0xEE, + info->frontPitch * disp->cpp * disp->virtualHeight ); + + drimemsetio((char *)disp->pFB + info->backOffset, + 0x30, + info->backPitch * disp->cpp * disp->virtualHeight ); + + + /* This is the struct passed to radeon_dri.so for its initialization */ + pRADEONDRI->deviceID = info->Chipset; + pRADEONDRI->width = disp->virtualWidth; + pRADEONDRI->height = disp->virtualHeight; + pRADEONDRI->depth = disp->bpp; /* XXX: depth */ + pRADEONDRI->bpp = disp->bpp; + pRADEONDRI->IsPCI = (disp->card_type != RADEON_CARD_AGP);; + pRADEONDRI->frontOffset = info->frontOffset; + pRADEONDRI->frontPitch = info->frontPitch; + pRADEONDRI->backOffset = info->backOffset; + pRADEONDRI->backPitch = info->backPitch; + pRADEONDRI->depthOffset = info->depthOffset; + pRADEONDRI->depthPitch = info->depthPitch; + pRADEONDRI->textureOffset = info->textureOffset; + pRADEONDRI->textureSize = info->textureSize; + pRADEONDRI->log2TexGran = info->log2TexGran; + pRADEONDRI->statusHandle = info->ringReadPtrHandle; + pRADEONDRI->statusSize = info->ringReadMapSize; + pRADEONDRI->gartTexHandle = info->gartTexHandle; + pRADEONDRI->gartTexMapSize = info->gartTexMapSize; + pRADEONDRI->log2GARTTexGran = info->log2GARTTexGran; + pRADEONDRI->gartTexOffset = info->gartTexStart; + pRADEONDRI->sarea_priv_offset = sizeof(drm_sarea_t); + + /* Don't release the lock now - let the VT switch handler do it. */ + + return 1; +} + + +/** + * \brief Get Radeon chip family from chipset number. + * + * \param info driver private data. + * + * \return non-zero on success, or zero on failure. + * + * Called by radeonInitFBDev() to set RADEONInfoRec::ChipFamily + * according to the value of RADEONInfoRec::Chipset. Fails if the + * chipset is unrecognized or not appropriate for this driver (i.e., not + * an r100 style radeon) + */ +static int get_chipfamily_from_chipset( RADEONInfoPtr info ) +{ + switch (info->Chipset) { + case PCI_CHIP_RADEON_LY: + case PCI_CHIP_RADEON_LZ: + info->ChipFamily = CHIP_FAMILY_M6; + break; + + case PCI_CHIP_RADEON_QY: + case PCI_CHIP_RADEON_QZ: + info->ChipFamily = CHIP_FAMILY_VE; + break; + + case PCI_CHIP_R200_QL: + case PCI_CHIP_R200_QN: + case PCI_CHIP_R200_QO: + case PCI_CHIP_R200_Ql: + case PCI_CHIP_R200_BB: + info->ChipFamily = CHIP_FAMILY_R200; + break; + + case PCI_CHIP_RV200_QW: /* RV200 desktop */ + case PCI_CHIP_RV200_QX: + info->ChipFamily = CHIP_FAMILY_RV200; + break; + + case PCI_CHIP_RADEON_LW: + case PCI_CHIP_RADEON_LX: + info->ChipFamily = CHIP_FAMILY_M7; + break; + + case PCI_CHIP_RV250_Id: + case PCI_CHIP_RV250_Ie: + case PCI_CHIP_RV250_If: + case PCI_CHIP_RV250_Ig: + info->ChipFamily = CHIP_FAMILY_RV250; + break; + + case PCI_CHIP_RV250_Ld: + case PCI_CHIP_RV250_Le: + case PCI_CHIP_RV250_Lf: + case PCI_CHIP_RV250_Lg: + info->ChipFamily = CHIP_FAMILY_M9; + break; + + case PCI_CHIP_RV280_Y_: + case PCI_CHIP_RV280_Ya: + case PCI_CHIP_RV280_Yb: + case PCI_CHIP_RV280_Yc: + info->ChipFamily = CHIP_FAMILY_RV280; + break; + + case PCI_CHIP_R300_ND: + case PCI_CHIP_R300_NE: + case PCI_CHIP_R300_NF: + case PCI_CHIP_R300_NG: + info->ChipFamily = CHIP_FAMILY_R300; + break; + + case PCI_CHIP_RV370_5460: + info->ChipFamily = CHIP_FAMILY_RV380; + break; + + default: + /* Original Radeon/7200 */ + info->ChipFamily = CHIP_FAMILY_RADEON; + } + + return 1; +} + + +/** + * \brief Initialize the framebuffer device mode + * + * \param disp display handle. + * + * \return one on success, or zero on failure. + * + * Fills in \p info with some default values and some information from \p disp + * and then calls RADEONScreenInit() for the screen initialization. + * + * Before exiting clears the framebuffer memory accessing it directly. + */ +static int radeonInitFBDev( driDisplay *disp, RADEONDRIPtr pRADEONDRI ) +{ + int err; + RADEONInfoPtr info = calloc(1, sizeof(*info)); + + disp->driverPrivate = (void *)info; + + info->gartFastWrite = RADEON_DEFAULT_AGP_FAST_WRITE; + info->gartSize = RADEON_DEFAULT_AGP_SIZE; + info->gartTexSize = RADEON_DEFAULT_AGP_TEX_SIZE; + info->bufSize = RADEON_DEFAULT_BUFFER_SIZE; + info->ringSize = RADEON_DEFAULT_RING_SIZE; + info->page_flip_enable = RADEON_DEFAULT_PAGE_FLIP; + + fprintf(stderr, + "Using %d MB AGP aperture\n", info->gartSize); + fprintf(stderr, + "Using %d MB for the ring buffer\n", info->ringSize); + fprintf(stderr, + "Using %d MB for vertex/indirect buffers\n", info->bufSize); + fprintf(stderr, + "Using %d MB for AGP textures\n", info->gartTexSize); + fprintf(stderr, + "page flipping %sabled\n", info->page_flip_enable?"en":"dis"); + + info->Chipset = disp->chipset; + + if (!get_chipfamily_from_chipset( info )) { + fprintf(stderr, "Unknown or non-radeon chipset -- cannot continue\n"); + fprintf(stderr, "==> Verify PCI BusID is correct in miniglx.conf\n"); + return 0; + } +#if 0 + if (info->ChipFamily >= CHIP_FAMILY_R300) { + fprintf(stderr, + "Direct rendering not yet supported on " + "Radeon 9700 and newer cards\n"); + return 0; + } +#endif + +#if 00 + /* don't seem to need this here */ + info->frontPitch = disp->virtualWidth; +#endif + + /* Check the radeon DRM version */ + if (!RADEONCheckDRMVersion(disp, info)) { + return 0; + } + + if (RADEONGetCardType(disp, info)<0) + return 0; + + if (disp->card_type!=RADEON_CARD_AGP) { + /* Initialize PCI */ + if (!RADEONDRIPciInit(disp, info)) + return 0; + } + else { + /* Initialize AGP */ + if (!RADEONDRIAgpInit(disp, info)) + return 0; + } + + if (!RADEONScreenInit( disp, info, pRADEONDRI)) + return 0; + + /* Initialize and start the CP if required */ + if ((err = drmCommandNone(disp->drmFD, DRM_RADEON_CP_START)) != 0) { + fprintf(stderr, "%s: CP start %d\n", __FUNCTION__, err); + return 0; + } + + return 1; +} + + +/** + * Create list of all supported surface configs, attach list to the display. + */ +static EGLBoolean +radeonFillInConfigs(_EGLDisplay *disp, unsigned pixel_bits, + unsigned depth_bits, + unsigned stencil_bits, GLboolean have_back_buffer) +{ + _EGLConfig *configs; + _EGLConfig *c; + unsigned int i, num_configs; + unsigned int depth_buffer_factor; + unsigned int back_buffer_factor; + GLenum fb_format; + GLenum fb_type; + + /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy + * enough to add support. Basically, if a context is created with an + * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping + * will never be used. + */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML /*, GLX_SWAP_COPY_OML */ + }; + + u_int8_t depth_bits_array[2]; + u_int8_t stencil_bits_array[2]; + + depth_bits_array[0] = depth_bits; + depth_bits_array[1] = depth_bits; + + /* Just like with the accumulation buffer, always provide some modes + * with a stencil buffer. It will be a sw fallback, but some apps won't + * care about that. + */ + stencil_bits_array[0] = 0; + stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; + + depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1; + back_buffer_factor = (have_back_buffer) ? 2 : 1; + + num_configs = depth_buffer_factor * back_buffer_factor * 2; + + if (pixel_bits == 16) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_SHORT_5_6_5; + } else { + fb_format = GL_RGBA; + fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } + + configs = calloc(sizeof(*configs), num_configs); + c = configs; + if (!_eglFillInConfigs(c, fb_format, fb_type, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, back_buffer_factor, + GLX_TRUE_COLOR)) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__); + return EGL_FALSE; + } + + /* Mark the visual as slow if there are "fake" stencil bits. + */ + for (i = 0, c = configs; i < num_configs; i++, c++) { + int stencil = GET_CONFIG_ATTRIB(c, EGL_STENCIL_SIZE); + if ((stencil != 0) && (stencil != stencil_bits)) { + SET_CONFIG_ATTRIB(c, EGL_CONFIG_CAVEAT, EGL_SLOW_CONFIG); + } + } + + for (i = 0, c = configs; i < num_configs; i++, c++) + _eglAddConfig(disp, c); + + free(configs); + + return EGL_TRUE; +} + + +/** + * Show the given surface on the named screen. + * If surface is EGL_NO_SURFACE, disable the screen's output. + */ +static EGLBoolean +radeonShowScreenSurfaceMESA(_EGLDriver *drv, EGLDisplay dpy, EGLScreenMESA screen, + EGLSurface surface, EGLModeMESA m) +{ + EGLBoolean b = _eglDRIShowScreenSurfaceMESA(drv, dpy, screen, surface, m); + return b; +} + + +/** + * Called via eglInitialize() by user. + */ +static EGLBoolean +radeonInitialize(_EGLDriver *drv, EGLDisplay dpy, EGLint *major, EGLint *minor) +{ + __DRIframebuffer framebuffer; + driDisplay *display; + + /* one-time init */ + radeon_drm_page_size = getpagesize(); + + if (!_eglDRIInitialize(drv, dpy, major, minor)) + return EGL_FALSE; + + display = Lookup_driDisplay(dpy); + + framebuffer.dev_priv_size = sizeof(RADEONDRIRec); + framebuffer.dev_priv = malloc(sizeof(RADEONDRIRec)); + + /* XXX we shouldn't hard-code values here! */ + /* we won't know the screen surface size until the user calls + * eglCreateScreenSurfaceMESA(). + */ +#if 0 + display->virtualWidth = 1024; + display->virtualHeight = 768; +#else + display->virtualWidth = 1280; + display->virtualHeight = 1024; +#endif + display->bpp = 32; + display->cpp = 4; + + if (!_eglDRIGetDisplayInfo(display)) + return EGL_FALSE; + + framebuffer.base = display->pFB; + framebuffer.width = display->virtualWidth; + framebuffer.height = display->virtualHeight; + framebuffer.stride = display->virtualWidth; + framebuffer.size = display->fbSize; + radeonInitFBDev( display, framebuffer.dev_priv ); + + if (!_eglDRICreateDisplay(display, &framebuffer)) + return EGL_FALSE; + + if (!_eglDRICreateScreens(display)) + return EGL_FALSE; + + /* create a variety of both 32 and 16-bit configurations */ + radeonFillInConfigs(&display->Base, 32, 24, 8, GL_TRUE); + radeonFillInConfigs(&display->Base, 16, 16, 0, GL_TRUE); + + drv->Initialized = EGL_TRUE; + return EGL_TRUE; +} + + +/** + * The bootstrap function. Return a new radeonDriver object and + * plug in API functions. + */ +_EGLDriver * +_eglMain(_EGLDisplay *dpy) +{ + radeonDriver *radeon; + + radeon = (radeonDriver *) calloc(1, sizeof(*radeon)); + if (!radeon) { + return NULL; + } + + /* First fill in the dispatch table with defaults */ + _eglDRIInitDriverFallbacks(&radeon->Base); + + /* then plug in our radeon-specific functions */ + radeon->Base.API.Initialize = radeonInitialize; + radeon->Base.API.ShowScreenSurfaceMESA = radeonShowScreenSurfaceMESA; + + return &radeon->Base; +} diff --git a/radeon/server/radeon_macros.h b/radeon/server/radeon_macros.h new file mode 100644 index 0000000..60f0fa2 --- /dev/null +++ b/radeon/server/radeon_macros.h @@ -0,0 +1,129 @@ +/** + * \file server/radeon_macros.h + * \brief Macros for Radeon MMIO operation. + * + * \authors Kevin E. Martin <martin@xfree86.org> + * \authors Rickard E. Faith <faith@valinux.com> + * \authors Alan Hourihane <alanh@fairlite.demon.co.uk> + */ + +/* + * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and + * VA Linux Systems Inc., Fremont, California. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation on the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR + * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_reg.h,v 1.20 2002/10/12 01:38:07 martin Exp $ */ + +#ifndef _RADEON_MACROS_H_ +#define _RADEON_MACROS_H_ + +#include <mmio.h> + +# define MMIO_IN8(base, offset) \ + *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) +# define MMIO_IN32(base, offset) \ + read_MMIO_LE32(base, offset) +# define MMIO_OUT8(base, offset, val) \ + *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val) +# define MMIO_OUT32(base, offset, val) \ + *(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val) + + + /* Memory mapped register access macros */ +#define INREG8(addr) MMIO_IN8(RADEONMMIO, addr) +#define INREG(addr) MMIO_IN32(RADEONMMIO, addr) +#define OUTREG8(addr, val) MMIO_OUT8(RADEONMMIO, addr, val) +#define OUTREG(addr, val) MMIO_OUT32(RADEONMMIO, addr, val) + +#define ADDRREG(addr) ((volatile GLuint *)(pointer)(RADEONMMIO + (addr))) + + +#define OUTREGP(addr, val, mask) \ +do { \ + GLuint tmp = INREG(addr); \ + tmp &= (mask); \ + tmp |= (val); \ + OUTREG(addr, tmp); \ +} while (0) + +#define INPLL(dpy, addr) RADEONINPLL(dpy, addr) + +#define OUTPLL(addr, val) \ +do { \ + OUTREG8(RADEON_CLOCK_CNTL_INDEX, (((addr) & 0x3f) | \ + RADEON_PLL_WR_EN)); \ + OUTREG(RADEON_CLOCK_CNTL_DATA, val); \ +} while (0) + +#define OUTPLLP(dpy, addr, val, mask) \ +do { \ + GLuint tmp = INPLL(dpy, addr); \ + tmp &= (mask); \ + tmp |= (val); \ + OUTPLL(addr, tmp); \ +} while (0) + +#define OUTPAL_START(idx) \ +do { \ + OUTREG8(RADEON_PALETTE_INDEX, (idx)); \ +} while (0) + +#define OUTPAL_NEXT(r, g, b) \ +do { \ + OUTREG(RADEON_PALETTE_DATA, ((r) << 16) | ((g) << 8) | (b)); \ +} while (0) + +#define OUTPAL_NEXT_CARD32(v) \ +do { \ + OUTREG(RADEON_PALETTE_DATA, (v & 0x00ffffff)); \ +} while (0) + +#define OUTPAL(idx, r, g, b) \ +do { \ + OUTPAL_START((idx)); \ + OUTPAL_NEXT((r), (g), (b)); \ +} while (0) + +#define INPAL_START(idx) \ +do { \ + OUTREG(RADEON_PALETTE_INDEX, (idx) << 16); \ +} while (0) + +#define INPAL_NEXT() INREG(RADEON_PALETTE_DATA) + +#define PAL_SELECT(idx) \ +do { \ + if (!idx) { \ + OUTREG(RADEON_DAC_CNTL2, INREG(RADEON_DAC_CNTL2) & \ + (GLuint)~RADEON_DAC2_PALETTE_ACC_CTL); \ + } else { \ + OUTREG(RADEON_DAC_CNTL2, INREG(RADEON_DAC_CNTL2) | \ + RADEON_DAC2_PALETTE_ACC_CTL); \ + } \ +} while (0) + + +#endif diff --git a/radeon/server/radeon_reg.h b/radeon/server/radeon_reg.h new file mode 100644 index 0000000..4dcce63 --- /dev/null +++ b/radeon/server/radeon_reg.h @@ -0,0 +1,2144 @@ +/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_reg.h,v 1.30 2003/10/07 22:47:12 martin Exp $ */ +/* + * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and + * VA Linux Systems Inc., Fremont, California. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation on the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR + * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Kevin E. Martin <martin@xfree86.org> + * Rickard E. Faith <faith@valinux.com> + * Alan Hourihane <alanh@fairlite.demon.co.uk> + * + * References: + * + * !!!! FIXME !!!! + * RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical + * Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April + * 1999. + * + * !!!! FIXME !!!! + * RAGE 128 Software Development Manual (Technical Reference Manual P/N + * SDK-G04000 Rev. 0.01), ATI Technologies: June 1999. + * + */ + +/* !!!! FIXME !!!! NOTE: THIS FILE HAS BEEN CONVERTED FROM r128_reg.h + * AND CONTAINS REGISTERS AND REGISTER DEFINITIONS THAT ARE NOT CORRECT + * ON THE RADEON. A FULL AUDIT OF THIS CODE IS NEEDED! */ + +#ifndef _RADEON_REG_H_ +#define _RADEON_REG_H_ + + /* Registers for 2D/Video/Overlay */ +#define RADEON_ADAPTER_ID 0x0f2c /* PCI */ +#define RADEON_AGP_BASE 0x0170 +#define RADEON_AGP_CNTL 0x0174 +# define RADEON_AGP_APER_SIZE_256MB (0x00 << 0) +# define RADEON_AGP_APER_SIZE_128MB (0x20 << 0) +# define RADEON_AGP_APER_SIZE_64MB (0x30 << 0) +# define RADEON_AGP_APER_SIZE_32MB (0x38 << 0) +# define RADEON_AGP_APER_SIZE_16MB (0x3c << 0) +# define RADEON_AGP_APER_SIZE_8MB (0x3e << 0) +# define RADEON_AGP_APER_SIZE_4MB (0x3f << 0) +# define RADEON_AGP_APER_SIZE_MASK (0x3f << 0) +#define RADEON_AGP_COMMAND 0x0f60 /* PCI */ +#define RADEON_AGP_COMMAND_PCI_CONFIG 0x0060 /* offset in PCI config*/ +# define RADEON_AGP_ENABLE (1<<8) +#define RADEON_AGP_PLL_CNTL 0x000b /* PLL */ +#define RADEON_AGP_STATUS 0x0f5c /* PCI */ +# define RADEON_AGP_1X_MODE 0x01 +# define RADEON_AGP_2X_MODE 0x02 +# define RADEON_AGP_4X_MODE 0x04 +# define RADEON_AGP_FW_MODE 0x10 +# define RADEON_AGP_MODE_MASK 0x17 +#define RADEON_ATTRDR 0x03c1 /* VGA */ +#define RADEON_ATTRDW 0x03c0 /* VGA */ +#define RADEON_ATTRX 0x03c0 /* VGA */ +#define RADEON_AUX_SC_CNTL 0x1660 +# define RADEON_AUX1_SC_EN (1 << 0) +# define RADEON_AUX1_SC_MODE_OR (0 << 1) +# define RADEON_AUX1_SC_MODE_NAND (1 << 1) +# define RADEON_AUX2_SC_EN (1 << 2) +# define RADEON_AUX2_SC_MODE_OR (0 << 3) +# define RADEON_AUX2_SC_MODE_NAND (1 << 3) +# define RADEON_AUX3_SC_EN (1 << 4) +# define RADEON_AUX3_SC_MODE_OR (0 << 5) +# define RADEON_AUX3_SC_MODE_NAND (1 << 5) +#define RADEON_AUX1_SC_BOTTOM 0x1670 +#define RADEON_AUX1_SC_LEFT 0x1664 +#define RADEON_AUX1_SC_RIGHT 0x1668 +#define RADEON_AUX1_SC_TOP 0x166c +#define RADEON_AUX2_SC_BOTTOM 0x1680 +#define RADEON_AUX2_SC_LEFT 0x1674 +#define RADEON_AUX2_SC_RIGHT 0x1678 +#define RADEON_AUX2_SC_TOP 0x167c +#define RADEON_AUX3_SC_BOTTOM 0x1690 +#define RADEON_AUX3_SC_LEFT 0x1684 +#define RADEON_AUX3_SC_RIGHT 0x1688 +#define RADEON_AUX3_SC_TOP 0x168c +#define RADEON_AUX_WINDOW_HORZ_CNTL 0x02d8 +#define RADEON_AUX_WINDOW_VERT_CNTL 0x02dc + +#define RADEON_BASE_CODE 0x0f0b +#define RADEON_BIOS_0_SCRATCH 0x0010 +#define RADEON_BIOS_1_SCRATCH 0x0014 +#define RADEON_BIOS_2_SCRATCH 0x0018 +#define RADEON_BIOS_3_SCRATCH 0x001c +#define RADEON_BIOS_4_SCRATCH 0x0020 +#define RADEON_BIOS_5_SCRATCH 0x0024 +#define RADEON_BIOS_6_SCRATCH 0x0028 +#define RADEON_BIOS_7_SCRATCH 0x002c +#define RADEON_BIOS_ROM 0x0f30 /* PCI */ +#define RADEON_BIST 0x0f0f /* PCI */ +#define RADEON_BRUSH_DATA0 0x1480 +#define RADEON_BRUSH_DATA1 0x1484 +#define RADEON_BRUSH_DATA10 0x14a8 +#define RADEON_BRUSH_DATA11 0x14ac +#define RADEON_BRUSH_DATA12 0x14b0 +#define RADEON_BRUSH_DATA13 0x14b4 +#define RADEON_BRUSH_DATA14 0x14b8 +#define RADEON_BRUSH_DATA15 0x14bc +#define RADEON_BRUSH_DATA16 0x14c0 +#define RADEON_BRUSH_DATA17 0x14c4 +#define RADEON_BRUSH_DATA18 0x14c8 +#define RADEON_BRUSH_DATA19 0x14cc +#define RADEON_BRUSH_DATA2 0x1488 +#define RADEON_BRUSH_DATA20 0x14d0 +#define RADEON_BRUSH_DATA21 0x14d4 +#define RADEON_BRUSH_DATA22 0x14d8 +#define RADEON_BRUSH_DATA23 0x14dc +#define RADEON_BRUSH_DATA24 0x14e0 +#define RADEON_BRUSH_DATA25 0x14e4 +#define RADEON_BRUSH_DATA26 0x14e8 +#define RADEON_BRUSH_DATA27 0x14ec +#define RADEON_BRUSH_DATA28 0x14f0 +#define RADEON_BRUSH_DATA29 0x14f4 +#define RADEON_BRUSH_DATA3 0x148c +#define RADEON_BRUSH_DATA30 0x14f8 +#define RADEON_BRUSH_DATA31 0x14fc +#define RADEON_BRUSH_DATA32 0x1500 +#define RADEON_BRUSH_DATA33 0x1504 +#define RADEON_BRUSH_DATA34 0x1508 +#define RADEON_BRUSH_DATA35 0x150c +#define RADEON_BRUSH_DATA36 0x1510 +#define RADEON_BRUSH_DATA37 0x1514 +#define RADEON_BRUSH_DATA38 0x1518 +#define RADEON_BRUSH_DATA39 0x151c +#define RADEON_BRUSH_DATA4 0x1490 +#define RADEON_BRUSH_DATA40 0x1520 +#define RADEON_BRUSH_DATA41 0x1524 +#define RADEON_BRUSH_DATA42 0x1528 +#define RADEON_BRUSH_DATA43 0x152c +#define RADEON_BRUSH_DATA44 0x1530 +#define RADEON_BRUSH_DATA45 0x1534 +#define RADEON_BRUSH_DATA46 0x1538 +#define RADEON_BRUSH_DATA47 0x153c +#define RADEON_BRUSH_DATA48 0x1540 +#define RADEON_BRUSH_DATA49 0x1544 +#define RADEON_BRUSH_DATA5 0x1494 +#define RADEON_BRUSH_DATA50 0x1548 +#define RADEON_BRUSH_DATA51 0x154c +#define RADEON_BRUSH_DATA52 0x1550 +#define RADEON_BRUSH_DATA53 0x1554 +#define RADEON_BRUSH_DATA54 0x1558 +#define RADEON_BRUSH_DATA55 0x155c +#define RADEON_BRUSH_DATA56 0x1560 +#define RADEON_BRUSH_DATA57 0x1564 +#define RADEON_BRUSH_DATA58 0x1568 +#define RADEON_BRUSH_DATA59 0x156c +#define RADEON_BRUSH_DATA6 0x1498 +#define RADEON_BRUSH_DATA60 0x1570 +#define RADEON_BRUSH_DATA61 0x1574 +#define RADEON_BRUSH_DATA62 0x1578 +#define RADEON_BRUSH_DATA63 0x157c +#define RADEON_BRUSH_DATA7 0x149c +#define RADEON_BRUSH_DATA8 0x14a0 +#define RADEON_BRUSH_DATA9 0x14a4 +#define RADEON_BRUSH_SCALE 0x1470 +#define RADEON_BRUSH_Y_X 0x1474 +#define RADEON_BUS_CNTL 0x0030 +# define RADEON_BUS_MASTER_DIS (1 << 6) +# define RADEON_BUS_RD_DISCARD_EN (1 << 24) +# define RADEON_BUS_RD_ABORT_EN (1 << 25) +# define RADEON_BUS_MSTR_DISCONNECT_EN (1 << 28) +# define RADEON_BUS_WRT_BURST (1 << 29) +# define RADEON_BUS_READ_BURST (1 << 30) +#define RADEON_BUS_CNTL1 0x0034 +# define RADEON_BUS_WAIT_ON_LOCK_EN (1 << 4) + +#define RADEON_CACHE_CNTL 0x1724 +#define RADEON_CACHE_LINE 0x0f0c /* PCI */ +#define RADEON_CAP0_TRIG_CNTL 0x0950 /* ? */ +#define RADEON_CAP1_TRIG_CNTL 0x09c0 /* ? */ +#define RADEON_CAPABILITIES_ID 0x0f50 /* PCI */ +#define RADEON_CAPABILITIES_PTR 0x0f34 /* PCI */ +#define RADEON_CLK_PIN_CNTL 0x0001 /* PLL */ +#define RADEON_CLOCK_CNTL_DATA 0x000c +#define RADEON_CLOCK_CNTL_INDEX 0x0008 +# define RADEON_PLL_WR_EN (1 << 7) +# define RADEON_PLL_DIV_SEL (3 << 8) +# define RADEON_PLL2_DIV_SEL_MASK ~(3 << 8) +#define RADEON_CLR_CMP_CLR_3D 0x1a24 +#define RADEON_CLR_CMP_CLR_DST 0x15c8 +#define RADEON_CLR_CMP_CLR_SRC 0x15c4 +#define RADEON_CLR_CMP_CNTL 0x15c0 +# define RADEON_SRC_CMP_EQ_COLOR (4 << 0) +# define RADEON_SRC_CMP_NEQ_COLOR (5 << 0) +# define RADEON_CLR_CMP_SRC_SOURCE (1 << 24) +#define RADEON_CLR_CMP_MASK 0x15cc +# define RADEON_CLR_CMP_MSK 0xffffffff +#define RADEON_CLR_CMP_MASK_3D 0x1A28 +#define RADEON_COMMAND 0x0f04 /* PCI */ +#define RADEON_COMPOSITE_SHADOW_ID 0x1a0c +#define RADEON_CONFIG_APER_0_BASE 0x0100 +#define RADEON_CONFIG_APER_1_BASE 0x0104 +#define RADEON_CONFIG_APER_SIZE 0x0108 +#define RADEON_CONFIG_BONDS 0x00e8 +#define RADEON_CONFIG_CNTL 0x00e0 +# define RADEON_CFG_ATI_REV_A11 (0 << 16) +# define RADEON_CFG_ATI_REV_A12 (1 << 16) +# define RADEON_CFG_ATI_REV_A13 (2 << 16) +# define RADEON_CFG_ATI_REV_ID_MASK (0xf << 16) +#define RADEON_CONFIG_MEMSIZE 0x00f8 +#define RADEON_CONFIG_MEMSIZE_EMBEDDED 0x0114 +#define RADEON_CONFIG_REG_1_BASE 0x010c +#define RADEON_CONFIG_REG_APER_SIZE 0x0110 +#define RADEON_CONFIG_XSTRAP 0x00e4 +#define RADEON_CONSTANT_COLOR_C 0x1d34 +# define RADEON_CONSTANT_COLOR_MASK 0x00ffffff +# define RADEON_CONSTANT_COLOR_ONE 0x00ffffff +# define RADEON_CONSTANT_COLOR_ZERO 0x00000000 +#define RADEON_CRC_CMDFIFO_ADDR 0x0740 +#define RADEON_CRC_CMDFIFO_DOUT 0x0744 +#define RADEON_GRPH_BUFFER_CNTL 0x02f0 +# define RADEON_GRPH_START_REQ_MASK (0x7f) +# define RADEON_GRPH_START_REQ_SHIFT 0 +# define RADEON_GRPH_STOP_REQ_MASK (0x7f<<8) +# define RADEON_GRPH_STOP_REQ_SHIFT 8 +# define RADEON_GRPH_CRITICAL_POINT_MASK (0x7f<<16) +# define RADEON_GRPH_CRITICAL_POINT_SHIFT 16 +# define RADEON_GRPH_CRITICAL_CNTL (1<<28) +# define RADEON_GRPH_BUFFER_SIZE (1<<29) +# define RADEON_GRPH_CRITICAL_AT_SOF (1<<30) +# define RADEON_GRPH_STOP_CNTL (1<<31) +#define RADEON_GRPH2_BUFFER_CNTL 0x03f0 +# define RADEON_GRPH2_START_REQ_MASK (0x7f) +# define RADEON_GRPH2_START_REQ_SHIFT 0 +# define RADEON_GRPH2_STOP_REQ_MASK (0x7f<<8) +# define RADEON_GRPH2_STOP_REQ_SHIFT 8 +# define RADEON_GRPH2_CRITICAL_POINT_MASK (0x7f<<16) +# define RADEON_GRPH2_CRITICAL_POINT_SHIFT 16 +# define RADEON_GRPH2_CRITICAL_CNTL (1<<28) +# define RADEON_GRPH2_BUFFER_SIZE (1<<29) +# define RADEON_GRPH2_CRITICAL_AT_SOF (1<<30) +# define RADEON_GRPH2_STOP_CNTL (1<<31) +#define RADEON_CRTC_CRNT_FRAME 0x0214 +#define RADEON_CRTC_EXT_CNTL 0x0054 +# define RADEON_CRTC_VGA_XOVERSCAN (1 << 0) +# define RADEON_VGA_ATI_LINEAR (1 << 3) +# define RADEON_XCRT_CNT_EN (1 << 6) +# define RADEON_CRTC_HSYNC_DIS (1 << 8) +# define RADEON_CRTC_VSYNC_DIS (1 << 9) +# define RADEON_CRTC_DISPLAY_DIS (1 << 10) +# define RADEON_CRTC_SYNC_TRISTAT (1 << 11) +# define RADEON_CRTC_CRT_ON (1 << 15) +#define RADEON_CRTC_EXT_CNTL_DPMS_BYTE 0x0055 +# define RADEON_CRTC_HSYNC_DIS_BYTE (1 << 0) +# define RADEON_CRTC_VSYNC_DIS_BYTE (1 << 1) +# define RADEON_CRTC_DISPLAY_DIS_BYTE (1 << 2) +#define RADEON_CRTC_GEN_CNTL 0x0050 +# define RADEON_CRTC_DBL_SCAN_EN (1 << 0) +# define RADEON_CRTC_INTERLACE_EN (1 << 1) +# define RADEON_CRTC_CSYNC_EN (1 << 4) +# define RADEON_CRTC_CUR_EN (1 << 16) +# define RADEON_CRTC_CUR_MODE_MASK (7 << 17) +# define RADEON_CRTC_ICON_EN (1 << 20) +# define RADEON_CRTC_EXT_DISP_EN (1 << 24) +# define RADEON_CRTC_EN (1 << 25) +# define RADEON_CRTC_DISP_REQ_EN_B (1 << 26) +#define RADEON_CRTC2_GEN_CNTL 0x03f8 +# define RADEON_CRTC2_DBL_SCAN_EN (1 << 0) +# define RADEON_CRTC2_INTERLACE_EN (1 << 1) +# define RADEON_CRTC2_SYNC_TRISTAT (1 << 4) +# define RADEON_CRTC2_HSYNC_TRISTAT (1 << 5) +# define RADEON_CRTC2_VSYNC_TRISTAT (1 << 6) +# define RADEON_CRTC2_CRT2_ON (1 << 7) +# define RADEON_CRTC2_ICON_EN (1 << 15) +# define RADEON_CRTC2_CUR_EN (1 << 16) +# define RADEON_CRTC2_CUR_MODE_MASK (7 << 20) +# define RADEON_CRTC2_DISP_DIS (1 << 23) +# define RADEON_CRTC2_EN (1 << 25) +# define RADEON_CRTC2_DISP_REQ_EN_B (1 << 26) +# define RADEON_CRTC2_CSYNC_EN (1 << 27) +# define RADEON_CRTC2_HSYNC_DIS (1 << 28) +# define RADEON_CRTC2_VSYNC_DIS (1 << 29) +#define RADEON_CRTC_MORE_CNTL 0x27c +# define RADEON_CRTC_H_CUTOFF_ACTIVE_EN (1<<4) +# define RADEON_CRTC_V_CUTOFF_ACTIVE_EN (1<<5) +#define RADEON_CRTC_GUI_TRIG_VLINE 0x0218 +#define RADEON_CRTC_H_SYNC_STRT_WID 0x0204 +# define RADEON_CRTC_H_SYNC_STRT_PIX (0x07 << 0) +# define RADEON_CRTC_H_SYNC_STRT_CHAR (0x3ff << 3) +# define RADEON_CRTC_H_SYNC_STRT_CHAR_SHIFT 3 +# define RADEON_CRTC_H_SYNC_WID (0x3f << 16) +# define RADEON_CRTC_H_SYNC_WID_SHIFT 16 +# define RADEON_CRTC_H_SYNC_POL (1 << 23) +#define RADEON_CRTC2_H_SYNC_STRT_WID 0x0304 +# define RADEON_CRTC2_H_SYNC_STRT_PIX (0x07 << 0) +# define RADEON_CRTC2_H_SYNC_STRT_CHAR (0x3ff << 3) +# define RADEON_CRTC2_H_SYNC_STRT_CHAR_SHIFT 3 +# define RADEON_CRTC2_H_SYNC_WID (0x3f << 16) +# define RADEON_CRTC2_H_SYNC_WID_SHIFT 16 +# define RADEON_CRTC2_H_SYNC_POL (1 << 23) +#define RADEON_CRTC_H_TOTAL_DISP 0x0200 +# define RADEON_CRTC_H_TOTAL (0x03ff << 0) +# define RADEON_CRTC_H_TOTAL_SHIFT 0 +# define RADEON_CRTC_H_DISP (0x01ff << 16) +# define RADEON_CRTC_H_DISP_SHIFT 16 +#define RADEON_CRTC2_H_TOTAL_DISP 0x0300 +# define RADEON_CRTC2_H_TOTAL (0x03ff << 0) +# define RADEON_CRTC2_H_TOTAL_SHIFT 0 +# define RADEON_CRTC2_H_DISP (0x01ff << 16) +# define RADEON_CRTC2_H_DISP_SHIFT 16 +#define RADEON_CRTC_OFFSET 0x0224 +#define RADEON_CRTC2_OFFSET 0x0324 +#define RADEON_CRTC_OFFSET_CNTL 0x0228 +# define RADEON_CRTC_TILE_EN (1 << 15) +#define RADEON_CRTC2_OFFSET_CNTL 0x0328 +# define RADEON_CRTC2_TILE_EN (1 << 15) +#define RADEON_CRTC_PITCH 0x022c +#define RADEON_CRTC2_PITCH 0x032c +#define RADEON_CRTC_STATUS 0x005c +# define RADEON_CRTC_VBLANK_SAVE (1 << 1) +# define RADEON_CRTC_VBLANK_SAVE_CLEAR (1 << 1) +#define RADEON_CRTC2_STATUS 0x03fc +# define RADEON_CRTC2_VBLANK_SAVE (1 << 1) +# define RADEON_CRTC2_VBLANK_SAVE_CLEAR (1 << 1) +#define RADEON_CRTC_V_SYNC_STRT_WID 0x020c +# define RADEON_CRTC_V_SYNC_STRT (0x7ff << 0) +# define RADEON_CRTC_V_SYNC_STRT_SHIFT 0 +# define RADEON_CRTC_V_SYNC_WID (0x1f << 16) +# define RADEON_CRTC_V_SYNC_WID_SHIFT 16 +# define RADEON_CRTC_V_SYNC_POL (1 << 23) +#define RADEON_CRTC2_V_SYNC_STRT_WID 0x030c +# define RADEON_CRTC2_V_SYNC_STRT (0x7ff << 0) +# define RADEON_CRTC2_V_SYNC_STRT_SHIFT 0 +# define RADEON_CRTC2_V_SYNC_WID (0x1f << 16) +# define RADEON_CRTC2_V_SYNC_WID_SHIFT 16 +# define RADEON_CRTC2_V_SYNC_POL (1 << 23) +#define RADEON_CRTC_V_TOTAL_DISP 0x0208 +# define RADEON_CRTC_V_TOTAL (0x07ff << 0) +# define RADEON_CRTC_V_TOTAL_SHIFT 0 +# define RADEON_CRTC_V_DISP (0x07ff << 16) +# define RADEON_CRTC_V_DISP_SHIFT 16 +#define RADEON_CRTC2_V_TOTAL_DISP 0x0308 +# define RADEON_CRTC2_V_TOTAL (0x07ff << 0) +# define RADEON_CRTC2_V_TOTAL_SHIFT 0 +# define RADEON_CRTC2_V_DISP (0x07ff << 16) +# define RADEON_CRTC2_V_DISP_SHIFT 16 +#define RADEON_CRTC_VLINE_CRNT_VLINE 0x0210 +# define RADEON_CRTC_CRNT_VLINE_MASK (0x7ff << 16) +#define RADEON_CRTC2_CRNT_FRAME 0x0314 +#define RADEON_CRTC2_GUI_TRIG_VLINE 0x0318 +#define RADEON_CRTC2_STATUS 0x03fc +#define RADEON_CRTC2_VLINE_CRNT_VLINE 0x0310 +#define RADEON_CRTC8_DATA 0x03d5 /* VGA, 0x3b5 */ +#define RADEON_CRTC8_IDX 0x03d4 /* VGA, 0x3b4 */ +#define RADEON_CUR_CLR0 0x026c +#define RADEON_CUR_CLR1 0x0270 +#define RADEON_CUR_HORZ_VERT_OFF 0x0268 +#define RADEON_CUR_HORZ_VERT_POSN 0x0264 +#define RADEON_CUR_OFFSET 0x0260 +# define RADEON_CUR_LOCK (1 << 31) +#define RADEON_CUR2_CLR0 0x036c +#define RADEON_CUR2_CLR1 0x0370 +#define RADEON_CUR2_HORZ_VERT_OFF 0x0368 +#define RADEON_CUR2_HORZ_VERT_POSN 0x0364 +#define RADEON_CUR2_OFFSET 0x0360 +# define RADEON_CUR2_LOCK (1 << 31) + +#define RADEON_DAC_CNTL 0x0058 +# define RADEON_DAC_RANGE_CNTL (3 << 0) +# define RADEON_DAC_RANGE_CNTL_MASK 0x03 +# define RADEON_DAC_BLANKING (1 << 2) +# define RADEON_DAC_CMP_EN (1 << 3) +# define RADEON_DAC_CMP_OUTPUT (1 << 7) +# define RADEON_DAC_8BIT_EN (1 << 8) +# define RADEON_DAC_VGA_ADR_EN (1 << 13) +# define RADEON_DAC_PDWN (1 << 15) +# define RADEON_DAC_MASK_ALL (0xff << 24) +#define RADEON_DAC_CNTL2 0x007c +# define RADEON_DAC2_DAC_CLK_SEL (1 << 0) +# define RADEON_DAC2_DAC2_CLK_SEL (1 << 1) +# define RADEON_DAC2_PALETTE_ACC_CTL (1 << 5) +#define RADEON_DAC_EXT_CNTL 0x0280 +# define RADEON_DAC_FORCE_BLANK_OFF_EN (1 << 4) +# define RADEON_DAC_FORCE_DATA_EN (1 << 5) +# define RADEON_DAC_FORCE_DATA_SEL_MASK (3 << 6) +# define RADEON_DAC_FORCE_DATA_MASK 0x0003ff00 +# define RADEON_DAC_FORCE_DATA_SHIFT 8 +#define RADEON_TV_DAC_CNTL 0x088c +# define RADEON_TV_DAC_STD_MASK 0x0300 +# define RADEON_TV_DAC_RDACPD (1 << 24) +# define RADEON_TV_DAC_GDACPD (1 << 25) +# define RADEON_TV_DAC_BDACPD (1 << 26) +#define RADEON_DISP_HW_DEBUG 0x0d14 +# define RADEON_CRT2_DISP1_SEL (1 << 5) +#define RADEON_DISP_OUTPUT_CNTL 0x0d64 +# define RADEON_DISP_DAC_SOURCE_MASK 0x03 +# define RADEON_DISP_DAC2_SOURCE_MASK 0x0c +# define RADEON_DISP_DAC_SOURCE_CRTC2 0x01 +# define RADEON_DISP_DAC2_SOURCE_CRTC2 0x04 +#define RADEON_DAC_CRC_SIG 0x02cc +#define RADEON_DAC_DATA 0x03c9 /* VGA */ +#define RADEON_DAC_MASK 0x03c6 /* VGA */ +#define RADEON_DAC_R_INDEX 0x03c7 /* VGA */ +#define RADEON_DAC_W_INDEX 0x03c8 /* VGA */ +#define RADEON_DDA_CONFIG 0x02e0 +#define RADEON_DDA_ON_OFF 0x02e4 +#define RADEON_DEFAULT_OFFSET 0x16e0 +#define RADEON_DEFAULT_PITCH 0x16e4 +#define RADEON_DEFAULT_SC_BOTTOM_RIGHT 0x16e8 +# define RADEON_DEFAULT_SC_RIGHT_MAX (0x1fff << 0) +# define RADEON_DEFAULT_SC_BOTTOM_MAX (0x1fff << 16) +#define RADEON_DESTINATION_3D_CLR_CMP_VAL 0x1820 +#define RADEON_DESTINATION_3D_CLR_CMP_MSK 0x1824 +#define RADEON_DEVICE_ID 0x0f02 /* PCI */ +#define RADEON_DISP_MISC_CNTL 0x0d00 +# define RADEON_SOFT_RESET_GRPH_PP (1 << 0) +#define RADEON_DISP_MERGE_CNTL 0x0d60 +# define RADEON_DISP_ALPHA_MODE_MASK 0x03 +# define RADEON_DISP_ALPHA_MODE_KEY 0 +# define RADEON_DISP_ALPHA_MODE_PER_PIXEL 1 +# define RADEON_DISP_ALPHA_MODE_GLOBAL 2 +# define RADEON_DISP_RGB_OFFSET_EN (1<<8) +# define RADEON_DISP_GRPH_ALPHA_MASK (0xff << 16) +# define RADEON_DISP_OV0_ALPHA_MASK (0xff << 24) +# define RADEON_DISP_LIN_TRANS_BYPASS (0x01 << 9) +#define RADEON_DISP2_MERGE_CNTL 0x0d68 +# define RADEON_DISP2_RGB_OFFSET_EN (1<<8) +#define RADEON_DISP_LIN_TRANS_GRPH_A 0x0d80 +#define RADEON_DISP_LIN_TRANS_GRPH_B 0x0d84 +#define RADEON_DISP_LIN_TRANS_GRPH_C 0x0d88 +#define RADEON_DISP_LIN_TRANS_GRPH_D 0x0d8c +#define RADEON_DISP_LIN_TRANS_GRPH_E 0x0d90 +#define RADEON_DISP_LIN_TRANS_GRPH_F 0x0d98 +#define RADEON_DP_BRUSH_BKGD_CLR 0x1478 +#define RADEON_DP_BRUSH_FRGD_CLR 0x147c +#define RADEON_DP_CNTL 0x16c0 +# define RADEON_DST_X_LEFT_TO_RIGHT (1 << 0) +# define RADEON_DST_Y_TOP_TO_BOTTOM (1 << 1) +#define RADEON_DP_CNTL_XDIR_YDIR_YMAJOR 0x16d0 +# define RADEON_DST_Y_MAJOR (1 << 2) +# define RADEON_DST_Y_DIR_TOP_TO_BOTTOM (1 << 15) +# define RADEON_DST_X_DIR_LEFT_TO_RIGHT (1 << 31) +#define RADEON_DP_DATATYPE 0x16c4 +# define RADEON_HOST_BIG_ENDIAN_EN (1 << 29) +#define RADEON_DP_GUI_MASTER_CNTL 0x146c +# define RADEON_GMC_SRC_PITCH_OFFSET_CNTL (1 << 0) +# define RADEON_GMC_DST_PITCH_OFFSET_CNTL (1 << 1) +# define RADEON_GMC_SRC_CLIPPING (1 << 2) +# define RADEON_GMC_DST_CLIPPING (1 << 3) +# define RADEON_GMC_BRUSH_DATATYPE_MASK (0x0f << 4) +# define RADEON_GMC_BRUSH_8X8_MONO_FG_BG (0 << 4) +# define RADEON_GMC_BRUSH_8X8_MONO_FG_LA (1 << 4) +# define RADEON_GMC_BRUSH_1X8_MONO_FG_BG (4 << 4) +# define RADEON_GMC_BRUSH_1X8_MONO_FG_LA (5 << 4) +# define RADEON_GMC_BRUSH_32x1_MONO_FG_BG (6 << 4) +# define RADEON_GMC_BRUSH_32x1_MONO_FG_LA (7 << 4) +# define RADEON_GMC_BRUSH_32x32_MONO_FG_BG (8 << 4) +# define RADEON_GMC_BRUSH_32x32_MONO_FG_LA (9 << 4) +# define RADEON_GMC_BRUSH_8x8_COLOR (10 << 4) +# define RADEON_GMC_BRUSH_1X8_COLOR (12 << 4) +# define RADEON_GMC_BRUSH_SOLID_COLOR (13 << 4) +# define RADEON_GMC_BRUSH_NONE (15 << 4) +# define RADEON_GMC_DST_8BPP_CI (2 << 8) +# define RADEON_GMC_DST_15BPP (3 << 8) +# define RADEON_GMC_DST_16BPP (4 << 8) +# define RADEON_GMC_DST_24BPP (5 << 8) +# define RADEON_GMC_DST_32BPP (6 << 8) +# define RADEON_GMC_DST_8BPP_RGB (7 << 8) +# define RADEON_GMC_DST_Y8 (8 << 8) +# define RADEON_GMC_DST_RGB8 (9 << 8) +# define RADEON_GMC_DST_VYUY (11 << 8) +# define RADEON_GMC_DST_YVYU (12 << 8) +# define RADEON_GMC_DST_AYUV444 (14 << 8) +# define RADEON_GMC_DST_ARGB4444 (15 << 8) +# define RADEON_GMC_DST_DATATYPE_MASK (0x0f << 8) +# define RADEON_GMC_DST_DATATYPE_SHIFT 8 +# define RADEON_GMC_SRC_DATATYPE_MASK (3 << 12) +# define RADEON_GMC_SRC_DATATYPE_MONO_FG_BG (0 << 12) +# define RADEON_GMC_SRC_DATATYPE_MONO_FG_LA (1 << 12) +# define RADEON_GMC_SRC_DATATYPE_COLOR (3 << 12) +# define RADEON_GMC_BYTE_PIX_ORDER (1 << 14) +# define RADEON_GMC_BYTE_MSB_TO_LSB (0 << 14) +# define RADEON_GMC_BYTE_LSB_TO_MSB (1 << 14) +# define RADEON_GMC_CONVERSION_TEMP (1 << 15) +# define RADEON_GMC_CONVERSION_TEMP_6500 (0 << 15) +# define RADEON_GMC_CONVERSION_TEMP_9300 (1 << 15) +# define RADEON_GMC_ROP3_MASK (0xff << 16) +# define RADEON_DP_SRC_SOURCE_MASK (7 << 24) +# define RADEON_DP_SRC_SOURCE_MEMORY (2 << 24) +# define RADEON_DP_SRC_SOURCE_HOST_DATA (3 << 24) +# define RADEON_GMC_3D_FCN_EN (1 << 27) +# define RADEON_GMC_CLR_CMP_CNTL_DIS (1 << 28) +# define RADEON_GMC_AUX_CLIP_DIS (1 << 29) +# define RADEON_GMC_WR_MSK_DIS (1 << 30) +# define RADEON_GMC_LD_BRUSH_Y_X (1 << 31) +# define RADEON_ROP3_ZERO 0x00000000 +# define RADEON_ROP3_DSa 0x00880000 +# define RADEON_ROP3_SDna 0x00440000 +# define RADEON_ROP3_S 0x00cc0000 +# define RADEON_ROP3_DSna 0x00220000 +# define RADEON_ROP3_D 0x00aa0000 +# define RADEON_ROP3_DSx 0x00660000 +# define RADEON_ROP3_DSo 0x00ee0000 +# define RADEON_ROP3_DSon 0x00110000 +# define RADEON_ROP3_DSxn 0x00990000 +# define RADEON_ROP3_Dn 0x00550000 +# define RADEON_ROP3_SDno 0x00dd0000 +# define RADEON_ROP3_Sn 0x00330000 +# define RADEON_ROP3_DSno 0x00bb0000 +# define RADEON_ROP3_DSan 0x00770000 +# define RADEON_ROP3_ONE 0x00ff0000 +# define RADEON_ROP3_DPa 0x00a00000 +# define RADEON_ROP3_PDna 0x00500000 +# define RADEON_ROP3_P 0x00f00000 +# define RADEON_ROP3_DPna 0x000a0000 +# define RADEON_ROP3_D 0x00aa0000 +# define RADEON_ROP3_DPx 0x005a0000 +# define RADEON_ROP3_DPo 0x00fa0000 +# define RADEON_ROP3_DPon 0x00050000 +# define RADEON_ROP3_PDxn 0x00a50000 +# define RADEON_ROP3_PDno 0x00f50000 +# define RADEON_ROP3_Pn 0x000f0000 +# define RADEON_ROP3_DPno 0x00af0000 +# define RADEON_ROP3_DPan 0x005f0000 +#define RADEON_DP_GUI_MASTER_CNTL_C 0x1c84 +#define RADEON_DP_MIX 0x16c8 +#define RADEON_DP_SRC_BKGD_CLR 0x15dc +#define RADEON_DP_SRC_FRGD_CLR 0x15d8 +#define RADEON_DP_WRITE_MASK 0x16cc +#define RADEON_DST_BRES_DEC 0x1630 +#define RADEON_DST_BRES_ERR 0x1628 +#define RADEON_DST_BRES_INC 0x162c +#define RADEON_DST_BRES_LNTH 0x1634 +#define RADEON_DST_BRES_LNTH_SUB 0x1638 +#define RADEON_DST_HEIGHT 0x1410 +#define RADEON_DST_HEIGHT_WIDTH 0x143c +#define RADEON_DST_HEIGHT_WIDTH_8 0x158c +#define RADEON_DST_HEIGHT_WIDTH_BW 0x15b4 +#define RADEON_DST_HEIGHT_Y 0x15a0 +#define RADEON_DST_LINE_START 0x1600 +#define RADEON_DST_LINE_END 0x1604 +#define RADEON_DST_LINE_PATCOUNT 0x1608 +# define RADEON_BRES_CNTL_SHIFT 8 +#define RADEON_DST_OFFSET 0x1404 +#define RADEON_DST_PITCH 0x1408 +#define RADEON_DST_PITCH_OFFSET 0x142c +#define RADEON_DST_PITCH_OFFSET_C 0x1c80 +# define RADEON_PITCH_SHIFT 21 +# define RADEON_DST_TILE_LINEAR (0 << 30) +# define RADEON_DST_TILE_MACRO (1 << 30) +# define RADEON_DST_TILE_MICRO (2 << 30) +# define RADEON_DST_TILE_BOTH (3 << 30) +#define RADEON_DST_WIDTH 0x140c +#define RADEON_DST_WIDTH_HEIGHT 0x1598 +#define RADEON_DST_WIDTH_X 0x1588 +#define RADEON_DST_WIDTH_X_INCY 0x159c +#define RADEON_DST_X 0x141c +#define RADEON_DST_X_SUB 0x15a4 +#define RADEON_DST_X_Y 0x1594 +#define RADEON_DST_Y 0x1420 +#define RADEON_DST_Y_SUB 0x15a8 +#define RADEON_DST_Y_X 0x1438 + +#define RADEON_FCP_CNTL 0x0910 +# define RADEON_FCP0_SRC_PCICLK 0 +# define RADEON_FCP0_SRC_PCLK 1 +# define RADEON_FCP0_SRC_PCLKb 2 +# define RADEON_FCP0_SRC_HREF 3 +# define RADEON_FCP0_SRC_GND 4 +# define RADEON_FCP0_SRC_HREFb 5 +#define RADEON_FLUSH_1 0x1704 +#define RADEON_FLUSH_2 0x1708 +#define RADEON_FLUSH_3 0x170c +#define RADEON_FLUSH_4 0x1710 +#define RADEON_FLUSH_5 0x1714 +#define RADEON_FLUSH_6 0x1718 +#define RADEON_FLUSH_7 0x171c +#define RADEON_FOG_3D_TABLE_START 0x1810 +#define RADEON_FOG_3D_TABLE_END 0x1814 +#define RADEON_FOG_3D_TABLE_DENSITY 0x181c +#define RADEON_FOG_TABLE_INDEX 0x1a14 +#define RADEON_FOG_TABLE_DATA 0x1a18 +#define RADEON_FP_CRTC_H_TOTAL_DISP 0x0250 +#define RADEON_FP_CRTC_V_TOTAL_DISP 0x0254 +#define RADEON_FP_CRTC2_H_TOTAL_DISP 0x0350 +#define RADEON_FP_CRTC2_V_TOTAL_DISP 0x0354 +# define RADEON_FP_CRTC_H_TOTAL_MASK 0x000003ff +# define RADEON_FP_CRTC_H_DISP_MASK 0x01ff0000 +# define RADEON_FP_CRTC_V_TOTAL_MASK 0x00000fff +# define RADEON_FP_CRTC_V_DISP_MASK 0x0fff0000 +# define RADEON_FP_H_SYNC_STRT_CHAR_MASK 0x00001ff8 +# define RADEON_FP_H_SYNC_WID_MASK 0x003f0000 +# define RADEON_FP_V_SYNC_STRT_MASK 0x00000fff +# define RADEON_FP_V_SYNC_WID_MASK 0x001f0000 +# define RADEON_FP_CRTC_H_TOTAL_SHIFT 0x00000000 +# define RADEON_FP_CRTC_H_DISP_SHIFT 0x00000010 +# define RADEON_FP_CRTC_V_TOTAL_SHIFT 0x00000000 +# define RADEON_FP_CRTC_V_DISP_SHIFT 0x00000010 +# define RADEON_FP_H_SYNC_STRT_CHAR_SHIFT 0x00000003 +# define RADEON_FP_H_SYNC_WID_SHIFT 0x00000010 +# define RADEON_FP_V_SYNC_STRT_SHIFT 0x00000000 +# define RADEON_FP_V_SYNC_WID_SHIFT 0x00000010 +#define RADEON_FP_GEN_CNTL 0x0284 +# define RADEON_FP_FPON (1 << 0) +# define RADEON_FP_TMDS_EN (1 << 2) +# define RADEON_FP_PANEL_FORMAT (1 << 3) +# define RADEON_FP_EN_TMDS (1 << 7) +# define RADEON_FP_DETECT_SENSE (1 << 8) +# define RADEON_FP_SEL_CRTC2 (1 << 13) +# define RADEON_FP_CRTC_DONT_SHADOW_HPAR (1 << 15) +# define RADEON_FP_CRTC_DONT_SHADOW_VPAR (1 << 16) +# define RADEON_FP_CRTC_DONT_SHADOW_HEND (1 << 17) +# define RADEON_FP_CRTC_USE_SHADOW_VEND (1 << 18) +# define RADEON_FP_RMX_HVSYNC_CONTROL_EN (1 << 20) +# define RADEON_FP_DFP_SYNC_SEL (1 << 21) +# define RADEON_FP_CRTC_LOCK_8DOT (1 << 22) +# define RADEON_FP_CRT_SYNC_SEL (1 << 23) +# define RADEON_FP_USE_SHADOW_EN (1 << 24) +# define RADEON_FP_CRT_SYNC_ALT (1 << 26) +#define RADEON_FP2_GEN_CNTL 0x0288 +# define RADEON_FP2_BLANK_EN (1 << 1) +# define RADEON_FP2_ON (1 << 2) +# define RADEON_FP2_PANEL_FORMAT (1 << 3) +# define RADEON_FP2_SOURCE_SEL_MASK (3 << 10) +# define RADEON_FP2_SOURCE_SEL_CRTC2 (1 << 10) +# define RADEON_FP2_SRC_SEL_MASK (3 << 13) +# define RADEON_FP2_SRC_SEL_CRTC2 (1 << 13) +# define RADEON_FP2_FP_POL (1 << 16) +# define RADEON_FP2_LP_POL (1 << 17) +# define RADEON_FP2_SCK_POL (1 << 18) +# define RADEON_FP2_LCD_CNTL_MASK (7 << 19) +# define RADEON_FP2_PAD_FLOP_EN (1 << 22) +# define RADEON_FP2_CRC_EN (1 << 23) +# define RADEON_FP2_CRC_READ_EN (1 << 24) +# define RADEON_FP2_DV0_EN (1 << 25) +# define RADEON_FP2_DV0_RATE_SEL_SDR (1 << 26) +#define RADEON_FP_H_SYNC_STRT_WID 0x02c4 +#define RADEON_FP_H2_SYNC_STRT_WID 0x03c4 +#define RADEON_FP_HORZ_STRETCH 0x028c +#define RADEON_FP_HORZ2_STRETCH 0x038c +# define RADEON_HORZ_STRETCH_RATIO_MASK 0xffff +# define RADEON_HORZ_STRETCH_RATIO_MAX 4096 +# define RADEON_HORZ_PANEL_SIZE (0x1ff << 16) +# define RADEON_HORZ_PANEL_SHIFT 16 +# define RADEON_HORZ_STRETCH_PIXREP (0 << 25) +# define RADEON_HORZ_STRETCH_BLEND (1 << 26) +# define RADEON_HORZ_STRETCH_ENABLE (1 << 25) +# define RADEON_HORZ_AUTO_RATIO (1 << 27) +# define RADEON_HORZ_FP_LOOP_STRETCH (0x7 << 28) +# define RADEON_HORZ_AUTO_RATIO_INC (1 << 31) +#define RADEON_FP_V_SYNC_STRT_WID 0x02c8 +#define RADEON_FP_VERT_STRETCH 0x0290 +#define RADEON_FP_V2_SYNC_STRT_WID 0x03c8 +#define RADEON_FP_VERT2_STRETCH 0x0390 +# define RADEON_VERT_PANEL_SIZE (0xfff << 12) +# define RADEON_VERT_PANEL_SHIFT 12 +# define RADEON_VERT_STRETCH_RATIO_MASK 0xfff +# define RADEON_VERT_STRETCH_RATIO_SHIFT 0 +# define RADEON_VERT_STRETCH_RATIO_MAX 4096 +# define RADEON_VERT_STRETCH_ENABLE (1 << 25) +# define RADEON_VERT_STRETCH_LINEREP (0 << 26) +# define RADEON_VERT_STRETCH_BLEND (1 << 26) +# define RADEON_VERT_AUTO_RATIO_EN (1 << 27) +# define RADEON_VERT_STRETCH_RESERVED 0xf1000000 + +#define RADEON_GEN_INT_CNTL 0x0040 +#define RADEON_GEN_INT_STATUS 0x0044 +# define RADEON_VSYNC_INT_AK (1 << 2) +# define RADEON_VSYNC_INT (1 << 2) +# define RADEON_VSYNC2_INT_AK (1 << 6) +# define RADEON_VSYNC2_INT (1 << 6) +#define RADEON_GENENB 0x03c3 /* VGA */ +#define RADEON_GENFC_RD 0x03ca /* VGA */ +#define RADEON_GENFC_WT 0x03da /* VGA, 0x03ba */ +#define RADEON_GENMO_RD 0x03cc /* VGA */ +#define RADEON_GENMO_WT 0x03c2 /* VGA */ +#define RADEON_GENS0 0x03c2 /* VGA */ +#define RADEON_GENS1 0x03da /* VGA, 0x03ba */ +#define RADEON_GPIO_MONID 0x0068 /* DDC interface via I2C */ +#define RADEON_GPIO_MONIDB 0x006c +#define RADEON_GPIO_CRT2_DDC 0x006c +#define RADEON_GPIO_DVI_DDC 0x0064 +#define RADEON_GPIO_VGA_DDC 0x0060 +# define RADEON_GPIO_A_0 (1 << 0) +# define RADEON_GPIO_A_1 (1 << 1) +# define RADEON_GPIO_Y_0 (1 << 8) +# define RADEON_GPIO_Y_1 (1 << 9) +# define RADEON_GPIO_Y_SHIFT_0 8 +# define RADEON_GPIO_Y_SHIFT_1 9 +# define RADEON_GPIO_EN_0 (1 << 16) +# define RADEON_GPIO_EN_1 (1 << 17) +# define RADEON_GPIO_MASK_0 (1 << 24) /*??*/ +# define RADEON_GPIO_MASK_1 (1 << 25) /*??*/ +#define RADEON_GRPH8_DATA 0x03cf /* VGA */ +#define RADEON_GRPH8_IDX 0x03ce /* VGA */ +#define RADEON_GUI_SCRATCH_REG0 0x15e0 +#define RADEON_GUI_SCRATCH_REG1 0x15e4 +#define RADEON_GUI_SCRATCH_REG2 0x15e8 +#define RADEON_GUI_SCRATCH_REG3 0x15ec +#define RADEON_GUI_SCRATCH_REG4 0x15f0 +#define RADEON_GUI_SCRATCH_REG5 0x15f4 + +#define RADEON_HEADER 0x0f0e /* PCI */ +#define RADEON_HOST_DATA0 0x17c0 +#define RADEON_HOST_DATA1 0x17c4 +#define RADEON_HOST_DATA2 0x17c8 +#define RADEON_HOST_DATA3 0x17cc +#define RADEON_HOST_DATA4 0x17d0 +#define RADEON_HOST_DATA5 0x17d4 +#define RADEON_HOST_DATA6 0x17d8 +#define RADEON_HOST_DATA7 0x17dc +#define RADEON_HOST_DATA_LAST 0x17e0 +#define RADEON_HOST_PATH_CNTL 0x0130 +# define RADEON_HDP_SOFT_RESET (1 << 26) +#define RADEON_HTOTAL_CNTL 0x0009 /* PLL */ +#define RADEON_HTOTAL2_CNTL 0x002e /* PLL */ + +#define RADEON_I2C_CNTL_1 0x0094 /* ? */ +#define RADEON_DVI_I2C_CNTL_1 0x02e4 /* ? */ +#define RADEON_INTERRUPT_LINE 0x0f3c /* PCI */ +#define RADEON_INTERRUPT_PIN 0x0f3d /* PCI */ +#define RADEON_IO_BASE 0x0f14 /* PCI */ + +#define RADEON_LATENCY 0x0f0d /* PCI */ +#define RADEON_LEAD_BRES_DEC 0x1608 +#define RADEON_LEAD_BRES_LNTH 0x161c +#define RADEON_LEAD_BRES_LNTH_SUB 0x1624 +#define RADEON_LVDS_GEN_CNTL 0x02d0 +# define RADEON_LVDS_ON (1 << 0) +# define RADEON_LVDS_DISPLAY_DIS (1 << 1) +# define RADEON_LVDS_PANEL_TYPE (1 << 2) +# define RADEON_LVDS_PANEL_FORMAT (1 << 3) +# define RADEON_LVDS_EN (1 << 7) +# define RADEON_LVDS_DIGON (1 << 18) +# define RADEON_LVDS_BLON (1 << 19) +# define RADEON_LVDS_SEL_CRTC2 (1 << 23) +#define RADEON_LVDS_PLL_CNTL 0x02d4 +# define RADEON_HSYNC_DELAY_SHIFT 28 +# define RADEON_HSYNC_DELAY_MASK (0xf << 28) + +#define RADEON_MAX_LATENCY 0x0f3f /* PCI */ +#define RADEON_MC_AGP_LOCATION 0x014c +#define RADEON_MC_FB_LOCATION 0x0148 +#define RADEON_DISPLAY_BASE_ADDR 0x23c +#define RADEON_DISPLAY2_BASE_ADDR 0x33c +#define RADEON_OV0_BASE_ADDR 0x43c +#define RADEON_NB_TOM 0x15c +#define RADEON_MCLK_CNTL 0x0012 /* PLL */ +# define RADEON_FORCEON_MCLKA (1 << 16) +# define RADEON_FORCEON_MCLKB (1 << 17) +# define RADEON_FORCEON_YCLKA (1 << 18) +# define RADEON_FORCEON_YCLKB (1 << 19) +# define RADEON_FORCEON_MC (1 << 20) +# define RADEON_FORCEON_AIC (1 << 21) +#define RADEON_MDGPIO_A_REG 0x01ac +#define RADEON_MDGPIO_EN_REG 0x01b0 +#define RADEON_MDGPIO_MASK 0x0198 +#define RADEON_MDGPIO_Y_REG 0x01b4 +#define RADEON_MEM_ADDR_CONFIG 0x0148 +#define RADEON_MEM_BASE 0x0f10 /* PCI */ +#define RADEON_MEM_CNTL 0x0140 +# define RADEON_MEM_NUM_CHANNELS_MASK 0x01 +# define RADEON_MEM_USE_B_CH_ONLY (1<<1) +# define RV100_HALF_MODE (1<<3) +# define R300_MEM_NUM_CHANNELS_MASK 0x03 +# define R300_MEM_USE_CD_CH_ONLY (1<<2) +#define RADEON_MEM_TIMING_CNTL 0x0144 /* EXT_MEM_CNTL */ +#define RADEON_MEM_INIT_LAT_TIMER 0x0154 +#define RADEON_MEM_INTF_CNTL 0x014c +#define RADEON_MEM_SDRAM_MODE_REG 0x0158 +#define RADEON_MEM_STR_CNTL 0x0150 +#define RADEON_MEM_VGA_RP_SEL 0x003c +#define RADEON_MEM_VGA_WP_SEL 0x0038 +#define RADEON_MIN_GRANT 0x0f3e /* PCI */ +#define RADEON_MM_DATA 0x0004 +#define RADEON_MM_INDEX 0x0000 +#define RADEON_MPLL_CNTL 0x000e /* PLL */ +#define RADEON_MPP_TB_CONFIG 0x01c0 /* ? */ +#define RADEON_MPP_GP_CONFIG 0x01c8 /* ? */ +#define R300_MC_IND_INDEX 0x01f8 +# define R300_MC_IND_ADDR_MASK 0x3f +#define R300_MC_IND_DATA 0x01fc +#define R300_MC_READ_CNTL_AB 0x017c +# define R300_MEM_RBS_POSITION_A_MASK 0x03 +#define R300_MC_READ_CNTL_CD_mcind 0x24 +# define R300_MEM_RBS_POSITION_C_MASK 0x03 + +#define RADEON_N_VIF_COUNT 0x0248 + +#define RADEON_OV0_AUTO_FLIP_CNTL 0x0470 +#define RADEON_OV0_COLOUR_CNTL 0x04E0 +#define RADEON_OV0_DEINTERLACE_PATTERN 0x0474 +#define RADEON_OV0_EXCLUSIVE_HORZ 0x0408 +# define RADEON_EXCL_HORZ_START_MASK 0x000000ff +# define RADEON_EXCL_HORZ_END_MASK 0x0000ff00 +# define RADEON_EXCL_HORZ_BACK_PORCH_MASK 0x00ff0000 +# define RADEON_EXCL_HORZ_EXCLUSIVE_EN 0x80000000 +#define RADEON_OV0_EXCLUSIVE_VERT 0x040C +# define RADEON_EXCL_VERT_START_MASK 0x000003ff +# define RADEON_EXCL_VERT_END_MASK 0x03ff0000 +#define RADEON_OV0_FILTER_CNTL 0x04A0 +#define RADEON_OV0_FOUR_TAP_COEF_0 0x04B0 +#define RADEON_OV0_FOUR_TAP_COEF_1 0x04B4 +#define RADEON_OV0_FOUR_TAP_COEF_2 0x04B8 +#define RADEON_OV0_FOUR_TAP_COEF_3 0x04BC +#define RADEON_OV0_FOUR_TAP_COEF_4 0x04C0 +#define RADEON_OV0_GAMMA_000_00F 0x0d40 +#define RADEON_OV0_GAMMA_010_01F 0x0d44 +#define RADEON_OV0_GAMMA_020_03F 0x0d48 +#define RADEON_OV0_GAMMA_040_07F 0x0d4c +#define RADEON_OV0_GAMMA_080_0BF 0x0e00 +#define RADEON_OV0_GAMMA_0C0_0FF 0x0e04 +#define RADEON_OV0_GAMMA_100_13F 0x0e08 +#define RADEON_OV0_GAMMA_140_17F 0x0e0c +#define RADEON_OV0_GAMMA_180_1BF 0x0e10 +#define RADEON_OV0_GAMMA_1C0_1FF 0x0e14 +#define RADEON_OV0_GAMMA_200_23F 0x0e18 +#define RADEON_OV0_GAMMA_240_27F 0x0e1c +#define RADEON_OV0_GAMMA_280_2BF 0x0e20 +#define RADEON_OV0_GAMMA_2C0_2FF 0x0e24 +#define RADEON_OV0_GAMMA_300_33F 0x0e28 +#define RADEON_OV0_GAMMA_340_37F 0x0e2c +#define RADEON_OV0_GAMMA_380_3BF 0x0d50 +#define RADEON_OV0_GAMMA_3C0_3FF 0x0d54 +#define RADEON_OV0_GRAPHICS_KEY_CLR_LOW 0x04EC +#define RADEON_OV0_GRAPHICS_KEY_CLR_HIGH 0x04F0 +#define RADEON_OV0_H_INC 0x0480 +#define RADEON_OV0_KEY_CNTL 0x04F4 +# define RADEON_VIDEO_KEY_FN_MASK 0x00000003L +# define RADEON_VIDEO_KEY_FN_FALSE 0x00000000L +# define RADEON_VIDEO_KEY_FN_TRUE 0x00000001L +# define RADEON_VIDEO_KEY_FN_EQ 0x00000002L +# define RADEON_VIDEO_KEY_FN_NE 0x00000003L +# define RADEON_GRAPHIC_KEY_FN_MASK 0x00000030L +# define RADEON_GRAPHIC_KEY_FN_FALSE 0x00000000L +# define RADEON_GRAPHIC_KEY_FN_TRUE 0x00000010L +# define RADEON_GRAPHIC_KEY_FN_EQ 0x00000020L +# define RADEON_GRAPHIC_KEY_FN_NE 0x00000030L +# define RADEON_CMP_MIX_MASK 0x00000100L +# define RADEON_CMP_MIX_OR 0x00000000L +# define RADEON_CMP_MIX_AND 0x00000100L +#define RADEON_OV0_LIN_TRANS_A 0x0d20 +#define RADEON_OV0_LIN_TRANS_B 0x0d24 +#define RADEON_OV0_LIN_TRANS_C 0x0d28 +#define RADEON_OV0_LIN_TRANS_D 0x0d2c +#define RADEON_OV0_LIN_TRANS_E 0x0d30 +#define RADEON_OV0_LIN_TRANS_F 0x0d34 +#define RADEON_OV0_P1_BLANK_LINES_AT_TOP 0x0430 +# define RADEON_P1_BLNK_LN_AT_TOP_M1_MASK 0x00000fffL +# define RADEON_P1_ACTIVE_LINES_M1 0x0fff0000L +#define RADEON_OV0_P1_H_ACCUM_INIT 0x0488 +#define RADEON_OV0_P1_V_ACCUM_INIT 0x0428 +# define RADEON_OV0_P1_MAX_LN_IN_PER_LN_OUT 0x00000003L +# define RADEON_OV0_P1_V_ACCUM_INIT_MASK 0x01ff8000L +#define RADEON_OV0_P1_X_START_END 0x0494 +#define RADEON_OV0_P2_X_START_END 0x0498 +#define RADEON_OV0_P23_BLANK_LINES_AT_TOP 0x0434 +# define RADEON_P23_BLNK_LN_AT_TOP_M1_MASK 0x000007ffL +# define RADEON_P23_ACTIVE_LINES_M1 0x07ff0000L +#define RADEON_OV0_P23_H_ACCUM_INIT 0x048C +#define RADEON_OV0_P23_V_ACCUM_INIT 0x042C +#define RADEON_OV0_P3_X_START_END 0x049C +#define RADEON_OV0_REG_LOAD_CNTL 0x0410 +# define RADEON_REG_LD_CTL_LOCK 0x00000001L +# define RADEON_REG_LD_CTL_VBLANK_DURING_LOCK 0x00000002L +# define RADEON_REG_LD_CTL_STALL_GUI_UNTIL_FLIP 0x00000004L +# define RADEON_REG_LD_CTL_LOCK_READBACK 0x00000008L +#define RADEON_OV0_SCALE_CNTL 0x0420 +# define RADEON_SCALER_HORZ_PICK_NEAREST 0x00000004L +# define RADEON_SCALER_VERT_PICK_NEAREST 0x00000008L +# define RADEON_SCALER_SIGNED_UV 0x00000010L +# define RADEON_SCALER_GAMMA_SEL_MASK 0x00000060L +# define RADEON_SCALER_GAMMA_SEL_BRIGHT 0x00000000L +# define RADEON_SCALER_GAMMA_SEL_G22 0x00000020L +# define RADEON_SCALER_GAMMA_SEL_G18 0x00000040L +# define RADEON_SCALER_GAMMA_SEL_G14 0x00000060L +# define RADEON_SCALER_COMCORE_SHIFT_UP_ONE 0x00000080L +# define RADEON_SCALER_SURFAC_FORMAT 0x00000f00L +# define RADEON_SCALER_SOURCE_15BPP 0x00000300L +# define RADEON_SCALER_SOURCE_16BPP 0x00000400L +# define RADEON_SCALER_SOURCE_32BPP 0x00000600L +# define RADEON_SCALER_SOURCE_YUV9 0x00000900L +# define RADEON_SCALER_SOURCE_YUV12 0x00000A00L +# define RADEON_SCALER_SOURCE_VYUY422 0x00000B00L +# define RADEON_SCALER_SOURCE_YVYU422 0x00000C00L +# define RADEON_SCALER_ADAPTIVE_DEINT 0x00001000L +# define RADEON_SCALER_TEMPORAL_DEINT 0x00002000L +# define RADEON_SCALER_SMART_SWITCH 0x00008000L +# define RADEON_SCALER_BURST_PER_PLANE 0x007F0000L +# define RADEON_SCALER_DOUBLE_BUFFER 0x01000000L +# define RADEON_SCALER_DIS_LIMIT 0x08000000L +# define RADEON_SCALER_INT_EMU 0x20000000L +# define RADEON_SCALER_ENABLE 0x40000000L +# define RADEON_SCALER_SOFT_RESET 0x80000000L +# define RADEON_SCALER_ADAPTIVE_DEINT 0x00001000L +#define RADEON_OV0_STEP_BY 0x0484 +#define RADEON_OV0_TEST 0x04F8 +#define RADEON_OV0_V_INC 0x0424 +#define RADEON_OV0_VID_BUF_PITCH0_VALUE 0x0460 +#define RADEON_OV0_VID_BUF_PITCH1_VALUE 0x0464 +#define RADEON_OV0_VID_BUF0_BASE_ADRS 0x0440 +# define RADEON_VIF_BUF0_PITCH_SEL 0x00000001L +# define RADEON_VIF_BUF0_TILE_ADRS 0x00000002L +# define RADEON_VIF_BUF0_BASE_ADRS_MASK 0x03fffff0L +# define RADEON_VIF_BUF0_1ST_LINE_LSBS_MASK 0x48000000L +#define RADEON_OV0_VID_BUF1_BASE_ADRS 0x0444 +# define RADEON_VIF_BUF1_PITCH_SEL 0x00000001L +# define RADEON_VIF_BUF1_TILE_ADRS 0x00000002L +# define RADEON_VIF_BUF1_BASE_ADRS_MASK 0x03fffff0L +# define RADEON_VIF_BUF1_1ST_LINE_LSBS_MASK 0x48000000L +#define RADEON_OV0_VID_BUF2_BASE_ADRS 0x0448 +# define RADEON_VIF_BUF2_PITCH_SEL 0x00000001L +# define RADEON_VIF_BUF2_TILE_ADRS 0x00000002L +# define RADEON_VIF_BUF2_BASE_ADRS_MASK 0x03fffff0L +# define RADEON_VIF_BUF2_1ST_LINE_LSBS_MASK 0x48000000L +#define RADEON_OV0_VID_BUF3_BASE_ADRS 0x044C +#define RADEON_OV0_VID_BUF4_BASE_ADRS 0x0450 +#define RADEON_OV0_VID_BUF5_BASE_ADRS 0x0454 +#define RADEON_OV0_VIDEO_KEY_CLR_HIGH 0x04E8 +#define RADEON_OV0_VIDEO_KEY_CLR_LOW 0x04E4 +#define RADEON_OV0_Y_X_START 0x0400 +#define RADEON_OV0_Y_X_END 0x0404 +#define RADEON_OV1_Y_X_START 0x0600 +#define RADEON_OV1_Y_X_END 0x0604 +#define RADEON_OVR_CLR 0x0230 +#define RADEON_OVR_WID_LEFT_RIGHT 0x0234 +#define RADEON_OVR_WID_TOP_BOTTOM 0x0238 + +#define RADEON_P2PLL_CNTL 0x002a /* P2PLL */ +# define RADEON_P2PLL_RESET (1 << 0) +# define RADEON_P2PLL_SLEEP (1 << 1) +# define RADEON_P2PLL_ATOMIC_UPDATE_EN (1 << 16) +# define RADEON_P2PLL_VGA_ATOMIC_UPDATE_EN (1 << 17) +# define RADEON_P2PLL_ATOMIC_UPDATE_VSYNC (1 << 18) +#define RADEON_P2PLL_DIV_0 0x002c +# define RADEON_P2PLL_FB0_DIV_MASK 0x07ff +# define RADEON_P2PLL_POST0_DIV_MASK 0x00070000 +#define RADEON_P2PLL_REF_DIV 0x002B /* PLL */ +# define RADEON_P2PLL_REF_DIV_MASK 0x03ff +# define RADEON_P2PLL_ATOMIC_UPDATE_R (1 << 15) /* same as _W */ +# define RADEON_P2PLL_ATOMIC_UPDATE_W (1 << 15) /* same as _R */ +# define R300_PPLL_REF_DIV_ACC_MASK (0x3ff << 18) +# define R300_PPLL_REF_DIV_ACC_SHIFT 18 +#define RADEON_PALETTE_DATA 0x00b4 +#define RADEON_PALETTE_30_DATA 0x00b8 +#define RADEON_PALETTE_INDEX 0x00b0 +#define RADEON_PCI_GART_PAGE 0x017c +#define RADEON_PIXCLKS_CNTL 0x002d +# define RADEON_PIX2CLK_SRC_SEL_MASK 0x03 +# define RADEON_PIX2CLK_SRC_SEL_CPUCLK 0x00 +# define RADEON_PIX2CLK_SRC_SEL_PSCANCLK 0x01 +# define RADEON_PIX2CLK_SRC_SEL_BYTECLK 0x02 +# define RADEON_PIX2CLK_SRC_SEL_P2PLLCLK 0x03 +# define RADEON_PIX2CLK_ALWAYS_ONb (1<<6) +# define RADEON_PIX2CLK_DAC_ALWAYS_ONb (1<<7) +# define RADEON_PIXCLK_TV_SRC_SEL (1 << 8) +# define RADEON_PIXCLK_LVDS_ALWAYS_ONb (1 << 14) +# define RADEON_PIXCLK_TMDS_ALWAYS_ONb (1 << 15) +#define RADEON_PLANE_3D_MASK_C 0x1d44 +#define RADEON_PLL_TEST_CNTL 0x0013 /* PLL */ +#define RADEON_PMI_CAP_ID 0x0f5c /* PCI */ +#define RADEON_PMI_DATA 0x0f63 /* PCI */ +#define RADEON_PMI_NXT_CAP_PTR 0x0f5d /* PCI */ +#define RADEON_PMI_PMC_REG 0x0f5e /* PCI */ +#define RADEON_PMI_PMCSR_REG 0x0f60 /* PCI */ +#define RADEON_PMI_REGISTER 0x0f5c /* PCI */ +#define RADEON_PPLL_CNTL 0x0002 /* PLL */ +# define RADEON_PPLL_RESET (1 << 0) +# define RADEON_PPLL_SLEEP (1 << 1) +# define RADEON_PPLL_ATOMIC_UPDATE_EN (1 << 16) +# define RADEON_PPLL_VGA_ATOMIC_UPDATE_EN (1 << 17) +# define RADEON_PPLL_ATOMIC_UPDATE_VSYNC (1 << 18) +#define RADEON_PPLL_DIV_0 0x0004 /* PLL */ +#define RADEON_PPLL_DIV_1 0x0005 /* PLL */ +#define RADEON_PPLL_DIV_2 0x0006 /* PLL */ +#define RADEON_PPLL_DIV_3 0x0007 /* PLL */ +# define RADEON_PPLL_FB3_DIV_MASK 0x07ff +# define RADEON_PPLL_POST3_DIV_MASK 0x00070000 +#define RADEON_PPLL_REF_DIV 0x0003 /* PLL */ +# define RADEON_PPLL_REF_DIV_MASK 0x03ff +# define RADEON_PPLL_ATOMIC_UPDATE_R (1 << 15) /* same as _W */ +# define RADEON_PPLL_ATOMIC_UPDATE_W (1 << 15) /* same as _R */ +#define RADEON_PWR_MNGMT_CNTL_STATUS 0x0f60 /* PCI */ + +#define RADEON_RBBM_GUICNTL 0x172c +# define RADEON_HOST_DATA_SWAP_NONE (0 << 0) +# define RADEON_HOST_DATA_SWAP_16BIT (1 << 0) +# define RADEON_HOST_DATA_SWAP_32BIT (2 << 0) +# define RADEON_HOST_DATA_SWAP_HDW (3 << 0) +#define RADEON_RBBM_SOFT_RESET 0x00f0 +# define RADEON_SOFT_RESET_CP (1 << 0) +# define RADEON_SOFT_RESET_HI (1 << 1) +# define RADEON_SOFT_RESET_SE (1 << 2) +# define RADEON_SOFT_RESET_RE (1 << 3) +# define RADEON_SOFT_RESET_PP (1 << 4) +# define RADEON_SOFT_RESET_E2 (1 << 5) +# define RADEON_SOFT_RESET_RB (1 << 6) +# define RADEON_SOFT_RESET_HDP (1 << 7) +#define RADEON_RBBM_STATUS 0x0e40 +# define RADEON_RBBM_FIFOCNT_MASK 0x007f +# define RADEON_RBBM_ACTIVE (1 << 31) +#define RADEON_RB2D_DSTCACHE_CTLSTAT 0x342c +# define RADEON_RB2D_DC_FLUSH (3 << 0) +# define RADEON_RB2D_DC_FREE (3 << 2) +# define RADEON_RB2D_DC_FLUSH_ALL 0xf +# define RADEON_RB2D_DC_BUSY (1 << 31) +#define RADEON_RB2D_DSTCACHE_MODE 0x3428 +#define RADEON_REG_BASE 0x0f18 /* PCI */ +#define RADEON_REGPROG_INF 0x0f09 /* PCI */ +#define RADEON_REVISION_ID 0x0f08 /* PCI */ + +#define RADEON_SC_BOTTOM 0x164c +#define RADEON_SC_BOTTOM_RIGHT 0x16f0 +#define RADEON_SC_BOTTOM_RIGHT_C 0x1c8c +#define RADEON_SC_LEFT 0x1640 +#define RADEON_SC_RIGHT 0x1644 +#define RADEON_SC_TOP 0x1648 +#define RADEON_SC_TOP_LEFT 0x16ec +#define RADEON_SC_TOP_LEFT_C 0x1c88 +# define RADEON_SC_SIGN_MASK_LO 0x8000 +# define RADEON_SC_SIGN_MASK_HI 0x80000000 +#define RADEON_SCLK_CNTL 0x000d /* PLL */ +# define RADEON_DYN_STOP_LAT_MASK 0x00007ff8 +# define RADEON_CP_MAX_DYN_STOP_LAT 0x0008 +# define RADEON_SCLK_FORCEON_MASK 0xffff8000 +#define RADEON_SCLK_MORE_CNTL 0x0035 /* PLL */ +# define RADEON_SCLK_MORE_FORCEON 0x0700 +#define RADEON_SDRAM_MODE_REG 0x0158 +#define RADEON_SEQ8_DATA 0x03c5 /* VGA */ +#define RADEON_SEQ8_IDX 0x03c4 /* VGA */ +#define RADEON_SNAPSHOT_F_COUNT 0x0244 +#define RADEON_SNAPSHOT_VH_COUNTS 0x0240 +#define RADEON_SNAPSHOT_VIF_COUNT 0x024c +#define RADEON_SRC_OFFSET 0x15ac +#define RADEON_SRC_PITCH 0x15b0 +#define RADEON_SRC_PITCH_OFFSET 0x1428 +#define RADEON_SRC_SC_BOTTOM 0x165c +#define RADEON_SRC_SC_BOTTOM_RIGHT 0x16f4 +#define RADEON_SRC_SC_RIGHT 0x1654 +#define RADEON_SRC_X 0x1414 +#define RADEON_SRC_X_Y 0x1590 +#define RADEON_SRC_Y 0x1418 +#define RADEON_SRC_Y_X 0x1434 +#define RADEON_STATUS 0x0f06 /* PCI */ +#define RADEON_SUBPIC_CNTL 0x0540 /* ? */ +#define RADEON_SUB_CLASS 0x0f0a /* PCI */ +#define RADEON_SURFACE_CNTL 0x0b00 +# define RADEON_SURF_TRANSLATION_DIS (1 << 8) +# define RADEON_NONSURF_AP0_SWP_16BPP (1 << 20) +# define RADEON_NONSURF_AP0_SWP_32BPP (1 << 21) +#define RADEON_SURFACE0_INFO 0x0b0c +# define RADEON_SURF_TILE_COLOR_MACRO (0 << 16) +# define RADEON_SURF_TILE_COLOR_BOTH (1 << 16) +# define RADEON_SURF_TILE_DEPTH_32BPP (2 << 16) +# define RADEON_SURF_TILE_DEPTH_16BPP (3 << 16) +# define R200_SURF_TILE_NONE (0 << 16) +# define R200_SURF_TILE_COLOR_MACRO (1 << 16) +# define R200_SURF_TILE_COLOR_MICRO (2 << 16) +# define R200_SURF_TILE_COLOR_BOTH (3 << 16) +# define R200_SURF_TILE_DEPTH_32BPP (4 << 16) +# define R200_SURF_TILE_DEPTH_16BPP (5 << 16) +# define RADEON_SURF_AP0_SWP_16BPP (1 << 20) +# define RADEON_SURF_AP0_SWP_32BPP (1 << 21) +# define RADEON_SURF_AP1_SWP_16BPP (1 << 22) +# define RADEON_SURF_AP1_SWP_32BPP (1 << 23) +#define RADEON_SURFACE0_LOWER_BOUND 0x0b04 +#define RADEON_SURFACE0_UPPER_BOUND 0x0b08 +#define RADEON_SURFACE1_INFO 0x0b1c +#define RADEON_SURFACE1_LOWER_BOUND 0x0b14 +#define RADEON_SURFACE1_UPPER_BOUND 0x0b18 +#define RADEON_SURFACE2_INFO 0x0b2c +#define RADEON_SURFACE2_LOWER_BOUND 0x0b24 +#define RADEON_SURFACE2_UPPER_BOUND 0x0b28 +#define RADEON_SURFACE3_INFO 0x0b3c +#define RADEON_SURFACE3_LOWER_BOUND 0x0b34 +#define RADEON_SURFACE3_UPPER_BOUND 0x0b38 +#define RADEON_SURFACE4_INFO 0x0b4c +#define RADEON_SURFACE4_LOWER_BOUND 0x0b44 +#define RADEON_SURFACE4_UPPER_BOUND 0x0b48 +#define RADEON_SURFACE5_INFO 0x0b5c +#define RADEON_SURFACE5_LOWER_BOUND 0x0b54 +#define RADEON_SURFACE5_UPPER_BOUND 0x0b58 +#define RADEON_SURFACE6_INFO 0x0b6c +#define RADEON_SURFACE6_LOWER_BOUND 0x0b64 +#define RADEON_SURFACE6_UPPER_BOUND 0x0b68 +#define RADEON_SURFACE7_INFO 0x0b7c +#define RADEON_SURFACE7_LOWER_BOUND 0x0b74 +#define RADEON_SURFACE7_UPPER_BOUND 0x0b78 +#define RADEON_SW_SEMAPHORE 0x013c + +#define RADEON_TEST_DEBUG_CNTL 0x0120 +#define RADEON_TEST_DEBUG_MUX 0x0124 +#define RADEON_TEST_DEBUG_OUT 0x012c +#define RADEON_TMDS_PLL_CNTL 0x02a8 +#define RADEON_TMDS_TRANSMITTER_CNTL 0x02a4 +# define RADEON_TMDS_TRANSMITTER_PLLEN 1 +# define RADEON_TMDS_TRANSMITTER_PLLRST 2 +#define RADEON_TRAIL_BRES_DEC 0x1614 +#define RADEON_TRAIL_BRES_ERR 0x160c +#define RADEON_TRAIL_BRES_INC 0x1610 +#define RADEON_TRAIL_X 0x1618 +#define RADEON_TRAIL_X_SUB 0x1620 + +#define RADEON_VCLK_ECP_CNTL 0x0008 /* PLL */ +# define RADEON_VCLK_SRC_SEL_MASK 0x03 +# define RADEON_VCLK_SRC_SEL_CPUCLK 0x00 +# define RADEON_VCLK_SRC_SEL_PSCANCLK 0x01 +# define RADEON_VCLK_SRC_SEL_BYTECLK 0x02 +# define RADEON_VCLK_SRC_SEL_PPLLCLK 0x03 +# define RADEON_PIXCLK_ALWAYS_ONb (1<<6) +# define RADEON_PIXCLK_DAC_ALWAYS_ONb (1<<7) + +#define RADEON_VENDOR_ID 0x0f00 /* PCI */ +#define RADEON_VGA_DDA_CONFIG 0x02e8 +#define RADEON_VGA_DDA_ON_OFF 0x02ec +#define RADEON_VID_BUFFER_CONTROL 0x0900 +#define RADEON_VIDEOMUX_CNTL 0x0190 +#define RADEON_VIPH_CONTROL 0x0c40 /* ? */ + +#define RADEON_WAIT_UNTIL 0x1720 +# define RADEON_WAIT_CRTC_PFLIP (1 << 0) +# define RADEON_WAIT_2D_IDLECLEAN (1 << 16) +# define RADEON_WAIT_3D_IDLECLEAN (1 << 17) +# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18) + +#define RADEON_X_MPLL_REF_FB_DIV 0x000a /* PLL */ +#define RADEON_XCLK_CNTL 0x000d /* PLL */ +#define RADEON_XDLL_CNTL 0x000c /* PLL */ +#define RADEON_XPLL_CNTL 0x000b /* PLL */ + + + + /* Registers for 3D/TCL */ +#define RADEON_PP_BORDER_COLOR_0 0x1d40 +#define RADEON_PP_BORDER_COLOR_1 0x1d44 +#define RADEON_PP_BORDER_COLOR_2 0x1d48 +#define RADEON_PP_CNTL 0x1c38 +# define RADEON_STIPPLE_ENABLE (1 << 0) +# define RADEON_SCISSOR_ENABLE (1 << 1) +# define RADEON_PATTERN_ENABLE (1 << 2) +# define RADEON_SHADOW_ENABLE (1 << 3) +# define RADEON_TEX_ENABLE_MASK (0xf << 4) +# define RADEON_TEX_0_ENABLE (1 << 4) +# define RADEON_TEX_1_ENABLE (1 << 5) +# define RADEON_TEX_2_ENABLE (1 << 6) +# define RADEON_TEX_3_ENABLE (1 << 7) +# define RADEON_TEX_BLEND_ENABLE_MASK (0xf << 12) +# define RADEON_TEX_BLEND_0_ENABLE (1 << 12) +# define RADEON_TEX_BLEND_1_ENABLE (1 << 13) +# define RADEON_TEX_BLEND_2_ENABLE (1 << 14) +# define RADEON_TEX_BLEND_3_ENABLE (1 << 15) +# define RADEON_PLANAR_YUV_ENABLE (1 << 20) +# define RADEON_SPECULAR_ENABLE (1 << 21) +# define RADEON_FOG_ENABLE (1 << 22) +# define RADEON_ALPHA_TEST_ENABLE (1 << 23) +# define RADEON_ANTI_ALIAS_NONE (0 << 24) +# define RADEON_ANTI_ALIAS_LINE (1 << 24) +# define RADEON_ANTI_ALIAS_POLY (2 << 24) +# define RADEON_ANTI_ALIAS_LINE_POLY (3 << 24) +# define RADEON_BUMP_MAP_ENABLE (1 << 26) +# define RADEON_BUMPED_MAP_T0 (0 << 27) +# define RADEON_BUMPED_MAP_T1 (1 << 27) +# define RADEON_BUMPED_MAP_T2 (2 << 27) +# define RADEON_TEX_3D_ENABLE_0 (1 << 29) +# define RADEON_TEX_3D_ENABLE_1 (1 << 30) +# define RADEON_MC_ENABLE (1 << 31) +#define RADEON_PP_FOG_COLOR 0x1c18 +# define RADEON_FOG_COLOR_MASK 0x00ffffff +# define RADEON_FOG_VERTEX (0 << 24) +# define RADEON_FOG_TABLE (1 << 24) +# define RADEON_FOG_USE_DEPTH (0 << 25) +# define RADEON_FOG_USE_DIFFUSE_ALPHA (2 << 25) +# define RADEON_FOG_USE_SPEC_ALPHA (3 << 25) +#define RADEON_PP_LUM_MATRIX 0x1d00 +#define RADEON_PP_MISC 0x1c14 +# define RADEON_REF_ALPHA_MASK 0x000000ff +# define RADEON_ALPHA_TEST_FAIL (0 << 8) +# define RADEON_ALPHA_TEST_LESS (1 << 8) +# define RADEON_ALPHA_TEST_LEQUAL (2 << 8) +# define RADEON_ALPHA_TEST_EQUAL (3 << 8) +# define RADEON_ALPHA_TEST_GEQUAL (4 << 8) +# define RADEON_ALPHA_TEST_GREATER (5 << 8) +# define RADEON_ALPHA_TEST_NEQUAL (6 << 8) +# define RADEON_ALPHA_TEST_PASS (7 << 8) +# define RADEON_ALPHA_TEST_OP_MASK (7 << 8) +# define RADEON_CHROMA_FUNC_FAIL (0 << 16) +# define RADEON_CHROMA_FUNC_PASS (1 << 16) +# define RADEON_CHROMA_FUNC_NEQUAL (2 << 16) +# define RADEON_CHROMA_FUNC_EQUAL (3 << 16) +# define RADEON_CHROMA_KEY_NEAREST (0 << 18) +# define RADEON_CHROMA_KEY_ZERO (1 << 18) +# define RADEON_SHADOW_ID_AUTO_INC (1 << 20) +# define RADEON_SHADOW_FUNC_EQUAL (0 << 21) +# define RADEON_SHADOW_FUNC_NEQUAL (1 << 21) +# define RADEON_SHADOW_PASS_1 (0 << 22) +# define RADEON_SHADOW_PASS_2 (1 << 22) +# define RADEON_RIGHT_HAND_CUBE_D3D (0 << 24) +# define RADEON_RIGHT_HAND_CUBE_OGL (1 << 24) +#define RADEON_PP_ROT_MATRIX_0 0x1d58 +#define RADEON_PP_ROT_MATRIX_1 0x1d5c +#define RADEON_PP_TXFILTER_0 0x1c54 +#define RADEON_PP_TXFILTER_1 0x1c6c +#define RADEON_PP_TXFILTER_2 0x1c84 +# define RADEON_MAG_FILTER_NEAREST (0 << 0) +# define RADEON_MAG_FILTER_LINEAR (1 << 0) +# define RADEON_MAG_FILTER_MASK (1 << 0) +# define RADEON_MIN_FILTER_NEAREST (0 << 1) +# define RADEON_MIN_FILTER_LINEAR (1 << 1) +# define RADEON_MIN_FILTER_NEAREST_MIP_NEAREST (2 << 1) +# define RADEON_MIN_FILTER_NEAREST_MIP_LINEAR (3 << 1) +# define RADEON_MIN_FILTER_LINEAR_MIP_NEAREST (6 << 1) +# define RADEON_MIN_FILTER_LINEAR_MIP_LINEAR (7 << 1) +# define RADEON_MIN_FILTER_ANISO_NEAREST (8 << 1) +# define RADEON_MIN_FILTER_ANISO_LINEAR (9 << 1) +# define RADEON_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (10 << 1) +# define RADEON_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR (11 << 1) +# define RADEON_MIN_FILTER_MASK (15 << 1) +# define RADEON_MAX_ANISO_1_TO_1 (0 << 5) +# define RADEON_MAX_ANISO_2_TO_1 (1 << 5) +# define RADEON_MAX_ANISO_4_TO_1 (2 << 5) +# define RADEON_MAX_ANISO_8_TO_1 (3 << 5) +# define RADEON_MAX_ANISO_16_TO_1 (4 << 5) +# define RADEON_MAX_ANISO_MASK (7 << 5) +# define RADEON_LOD_BIAS_MASK (0xff << 8) +# define RADEON_LOD_BIAS_SHIFT 8 +# define RADEON_MAX_MIP_LEVEL_MASK (0x0f << 16) +# define RADEON_MAX_MIP_LEVEL_SHIFT 16 +# define RADEON_YUV_TO_RGB (1 << 20) +# define RADEON_YUV_TEMPERATURE_COOL (0 << 21) +# define RADEON_YUV_TEMPERATURE_HOT (1 << 21) +# define RADEON_YUV_TEMPERATURE_MASK (1 << 21) +# define RADEON_WRAPEN_S (1 << 22) +# define RADEON_CLAMP_S_WRAP (0 << 23) +# define RADEON_CLAMP_S_MIRROR (1 << 23) +# define RADEON_CLAMP_S_CLAMP_LAST (2 << 23) +# define RADEON_CLAMP_S_MIRROR_CLAMP_LAST (3 << 23) +# define RADEON_CLAMP_S_CLAMP_BORDER (4 << 23) +# define RADEON_CLAMP_S_MIRROR_CLAMP_BORDER (5 << 23) +# define RADEON_CLAMP_S_CLAMP_GL (6 << 23) +# define RADEON_CLAMP_S_MIRROR_CLAMP_GL (7 << 23) +# define RADEON_CLAMP_S_MASK (7 << 23) +# define RADEON_WRAPEN_T (1 << 26) +# define RADEON_CLAMP_T_WRAP (0 << 27) +# define RADEON_CLAMP_T_MIRROR (1 << 27) +# define RADEON_CLAMP_T_CLAMP_LAST (2 << 27) +# define RADEON_CLAMP_T_MIRROR_CLAMP_LAST (3 << 27) +# define RADEON_CLAMP_T_CLAMP_BORDER (4 << 27) +# define RADEON_CLAMP_T_MIRROR_CLAMP_BORDER (5 << 27) +# define RADEON_CLAMP_T_CLAMP_GL (6 << 27) +# define RADEON_CLAMP_T_MIRROR_CLAMP_GL (7 << 27) +# define RADEON_CLAMP_T_MASK (7 << 27) +# define RADEON_BORDER_MODE_OGL (0 << 31) +# define RADEON_BORDER_MODE_D3D (1 << 31) +#define RADEON_PP_TXFORMAT_0 0x1c58 +#define RADEON_PP_TXFORMAT_1 0x1c70 +#define RADEON_PP_TXFORMAT_2 0x1c88 +# define RADEON_TXFORMAT_I8 (0 << 0) +# define RADEON_TXFORMAT_AI88 (1 << 0) +# define RADEON_TXFORMAT_RGB332 (2 << 0) +# define RADEON_TXFORMAT_ARGB1555 (3 << 0) +# define RADEON_TXFORMAT_RGB565 (4 << 0) +# define RADEON_TXFORMAT_ARGB4444 (5 << 0) +# define RADEON_TXFORMAT_ARGB8888 (6 << 0) +# define RADEON_TXFORMAT_RGBA8888 (7 << 0) +# define RADEON_TXFORMAT_Y8 (8 << 0) +# define RADEON_TXFORMAT_VYUY422 (10 << 0) +# define RADEON_TXFORMAT_YVYU422 (11 << 0) +# define RADEON_TXFORMAT_DXT1 (12 << 0) +# define RADEON_TXFORMAT_DXT23 (14 << 0) +# define RADEON_TXFORMAT_DXT45 (15 << 0) +# define RADEON_TXFORMAT_SHADOW16 (16 << 0) +# define RADEON_TXFORMAT_SHADOW32 (17 << 0) +# define RADEON_TXFORMAT_DUDV88 (18 << 0) +# define RADEON_TXFORMAT_LDUDV655 (19 << 0) +# define RADEON_TXFORMAT_LDUDUV8888 (20 << 0) +# define RADEON_TXFORMAT_FORMAT_MASK (31 << 0) +# define RADEON_TXFORMAT_FORMAT_SHIFT 0 +# define RADEON_TXFORMAT_APPLE_YUV_MODE (1 << 5) +# define RADEON_TXFORMAT_ALPHA_IN_MAP (1 << 6) +# define RADEON_TXFORMAT_NON_POWER2 (1 << 7) +# define RADEON_TXFORMAT_WIDTH_MASK (15 << 8) +# define RADEON_TXFORMAT_WIDTH_SHIFT 8 +# define RADEON_TXFORMAT_HEIGHT_MASK (15 << 12) +# define RADEON_TXFORMAT_HEIGHT_SHIFT 12 +# define RADEON_TXFORMAT_F5_WIDTH_MASK (15 << 16) +# define RADEON_TXFORMAT_F5_WIDTH_SHIFT 16 +# define RADEON_TXFORMAT_F5_HEIGHT_MASK (15 << 20) +# define RADEON_TXFORMAT_F5_HEIGHT_SHIFT 20 +# define RADEON_TXFORMAT_ST_ROUTE_STQ0 (0 << 24) +# define RADEON_TXFORMAT_ST_ROUTE_MASK (3 << 24) +# define RADEON_TXFORMAT_ST_ROUTE_STQ1 (1 << 24) +# define RADEON_TXFORMAT_ST_ROUTE_STQ2 (2 << 24) +# define RADEON_TXFORMAT_ENDIAN_NO_SWAP (0 << 26) +# define RADEON_TXFORMAT_ENDIAN_16BPP_SWAP (1 << 26) +# define RADEON_TXFORMAT_ENDIAN_32BPP_SWAP (2 << 26) +# define RADEON_TXFORMAT_ENDIAN_HALFDW_SWAP (3 << 26) +# define RADEON_TXFORMAT_ALPHA_MASK_ENABLE (1 << 28) +# define RADEON_TXFORMAT_CHROMA_KEY_ENABLE (1 << 29) +# define RADEON_TXFORMAT_CUBIC_MAP_ENABLE (1 << 30) +# define RADEON_TXFORMAT_PERSPECTIVE_ENABLE (1 << 31) +#define RADEON_PP_CUBIC_FACES_0 0x1d24 +#define RADEON_PP_CUBIC_FACES_1 0x1d28 +#define RADEON_PP_CUBIC_FACES_2 0x1d2c +# define RADEON_FACE_WIDTH_1_SHIFT 0 +# define RADEON_FACE_HEIGHT_1_SHIFT 4 +# define RADEON_FACE_WIDTH_1_MASK (0xf << 0) +# define RADEON_FACE_HEIGHT_1_MASK (0xf << 4) +# define RADEON_FACE_WIDTH_2_SHIFT 8 +# define RADEON_FACE_HEIGHT_2_SHIFT 12 +# define RADEON_FACE_WIDTH_2_MASK (0xf << 8) +# define RADEON_FACE_HEIGHT_2_MASK (0xf << 12) +# define RADEON_FACE_WIDTH_3_SHIFT 16 +# define RADEON_FACE_HEIGHT_3_SHIFT 20 +# define RADEON_FACE_WIDTH_3_MASK (0xf << 16) +# define RADEON_FACE_HEIGHT_3_MASK (0xf << 20) +# define RADEON_FACE_WIDTH_4_SHIFT 24 +# define RADEON_FACE_HEIGHT_4_SHIFT 28 +# define RADEON_FACE_WIDTH_4_MASK (0xf << 24) +# define RADEON_FACE_HEIGHT_4_MASK (0xf << 28) + +#define RADEON_PP_TXOFFSET_0 0x1c5c +#define RADEON_PP_TXOFFSET_1 0x1c74 +#define RADEON_PP_TXOFFSET_2 0x1c8c +# define RADEON_TXO_ENDIAN_NO_SWAP (0 << 0) +# define RADEON_TXO_ENDIAN_BYTE_SWAP (1 << 0) +# define RADEON_TXO_ENDIAN_WORD_SWAP (2 << 0) +# define RADEON_TXO_ENDIAN_HALFDW_SWAP (3 << 0) +# define RADEON_TXO_MACRO_LINEAR (0 << 2) +# define RADEON_TXO_MACRO_TILE (1 << 2) +# define RADEON_TXO_MICRO_LINEAR (0 << 3) +# define RADEON_TXO_MICRO_TILE_X2 (1 << 3) +# define RADEON_TXO_MICRO_TILE_OPT (2 << 3) +# define RADEON_TXO_OFFSET_MASK 0xffffffe0 +# define RADEON_TXO_OFFSET_SHIFT 5 + +#define RADEON_PP_CUBIC_OFFSET_T0_0 0x1dd0 /* bits [31:5] */ +#define RADEON_PP_CUBIC_OFFSET_T0_1 0x1dd4 +#define RADEON_PP_CUBIC_OFFSET_T0_2 0x1dd8 +#define RADEON_PP_CUBIC_OFFSET_T0_3 0x1ddc +#define RADEON_PP_CUBIC_OFFSET_T0_4 0x1de0 +#define RADEON_PP_CUBIC_OFFSET_T1_0 0x1e00 +#define RADEON_PP_CUBIC_OFFSET_T1_1 0x1e04 +#define RADEON_PP_CUBIC_OFFSET_T1_2 0x1e08 +#define RADEON_PP_CUBIC_OFFSET_T1_3 0x1e0c +#define RADEON_PP_CUBIC_OFFSET_T1_4 0x1e10 +#define RADEON_PP_CUBIC_OFFSET_T2_0 0x1e14 +#define RADEON_PP_CUBIC_OFFSET_T2_1 0x1e18 +#define RADEON_PP_CUBIC_OFFSET_T2_2 0x1e1c +#define RADEON_PP_CUBIC_OFFSET_T2_3 0x1e20 +#define RADEON_PP_CUBIC_OFFSET_T2_4 0x1e24 + +#define RADEON_PP_TEX_SIZE_0 0x1d04 /* NPOT */ +#define RADEON_PP_TEX_SIZE_1 0x1d0c +#define RADEON_PP_TEX_SIZE_2 0x1d14 +# define RADEON_TEX_USIZE_MASK (0x7ff << 0) +# define RADEON_TEX_USIZE_SHIFT 0 +# define RADEON_TEX_VSIZE_MASK (0x7ff << 16) +# define RADEON_TEX_VSIZE_SHIFT 16 +# define RADEON_SIGNED_RGB_MASK (1 << 30) +# define RADEON_SIGNED_RGB_SHIFT 30 +# define RADEON_SIGNED_ALPHA_MASK (1 << 31) +# define RADEON_SIGNED_ALPHA_SHIFT 31 +#define RADEON_PP_TEX_PITCH_0 0x1d08 /* NPOT */ +#define RADEON_PP_TEX_PITCH_1 0x1d10 /* NPOT */ +#define RADEON_PP_TEX_PITCH_2 0x1d18 /* NPOT */ +/* note: bits 13-5: 32 byte aligned stride of texture map */ + +#define RADEON_PP_TXCBLEND_0 0x1c60 +#define RADEON_PP_TXCBLEND_1 0x1c78 +#define RADEON_PP_TXCBLEND_2 0x1c90 +# define RADEON_COLOR_ARG_A_SHIFT 0 +# define RADEON_COLOR_ARG_A_MASK (0x1f << 0) +# define RADEON_COLOR_ARG_A_ZERO (0 << 0) +# define RADEON_COLOR_ARG_A_CURRENT_COLOR (2 << 0) +# define RADEON_COLOR_ARG_A_CURRENT_ALPHA (3 << 0) +# define RADEON_COLOR_ARG_A_DIFFUSE_COLOR (4 << 0) +# define RADEON_COLOR_ARG_A_DIFFUSE_ALPHA (5 << 0) +# define RADEON_COLOR_ARG_A_SPECULAR_COLOR (6 << 0) +# define RADEON_COLOR_ARG_A_SPECULAR_ALPHA (7 << 0) +# define RADEON_COLOR_ARG_A_TFACTOR_COLOR (8 << 0) +# define RADEON_COLOR_ARG_A_TFACTOR_ALPHA (9 << 0) +# define RADEON_COLOR_ARG_A_T0_COLOR (10 << 0) +# define RADEON_COLOR_ARG_A_T0_ALPHA (11 << 0) +# define RADEON_COLOR_ARG_A_T1_COLOR (12 << 0) +# define RADEON_COLOR_ARG_A_T1_ALPHA (13 << 0) +# define RADEON_COLOR_ARG_A_T2_COLOR (14 << 0) +# define RADEON_COLOR_ARG_A_T2_ALPHA (15 << 0) +# define RADEON_COLOR_ARG_A_T3_COLOR (16 << 0) +# define RADEON_COLOR_ARG_A_T3_ALPHA (17 << 0) +# define RADEON_COLOR_ARG_B_SHIFT 5 +# define RADEON_COLOR_ARG_B_MASK (0x1f << 5) +# define RADEON_COLOR_ARG_B_ZERO (0 << 5) +# define RADEON_COLOR_ARG_B_CURRENT_COLOR (2 << 5) +# define RADEON_COLOR_ARG_B_CURRENT_ALPHA (3 << 5) +# define RADEON_COLOR_ARG_B_DIFFUSE_COLOR (4 << 5) +# define RADEON_COLOR_ARG_B_DIFFUSE_ALPHA (5 << 5) +# define RADEON_COLOR_ARG_B_SPECULAR_COLOR (6 << 5) +# define RADEON_COLOR_ARG_B_SPECULAR_ALPHA (7 << 5) +# define RADEON_COLOR_ARG_B_TFACTOR_COLOR (8 << 5) +# define RADEON_COLOR_ARG_B_TFACTOR_ALPHA (9 << 5) +# define RADEON_COLOR_ARG_B_T0_COLOR (10 << 5) +# define RADEON_COLOR_ARG_B_T0_ALPHA (11 << 5) +# define RADEON_COLOR_ARG_B_T1_COLOR (12 << 5) +# define RADEON_COLOR_ARG_B_T1_ALPHA (13 << 5) +# define RADEON_COLOR_ARG_B_T2_COLOR (14 << 5) +# define RADEON_COLOR_ARG_B_T2_ALPHA (15 << 5) +# define RADEON_COLOR_ARG_B_T3_COLOR (16 << 5) +# define RADEON_COLOR_ARG_B_T3_ALPHA (17 << 5) +# define RADEON_COLOR_ARG_C_SHIFT 10 +# define RADEON_COLOR_ARG_C_MASK (0x1f << 10) +# define RADEON_COLOR_ARG_C_ZERO (0 << 10) +# define RADEON_COLOR_ARG_C_CURRENT_COLOR (2 << 10) +# define RADEON_COLOR_ARG_C_CURRENT_ALPHA (3 << 10) +# define RADEON_COLOR_ARG_C_DIFFUSE_COLOR (4 << 10) +# define RADEON_COLOR_ARG_C_DIFFUSE_ALPHA (5 << 10) +# define RADEON_COLOR_ARG_C_SPECULAR_COLOR (6 << 10) +# define RADEON_COLOR_ARG_C_SPECULAR_ALPHA (7 << 10) +# define RADEON_COLOR_ARG_C_TFACTOR_COLOR (8 << 10) +# define RADEON_COLOR_ARG_C_TFACTOR_ALPHA (9 << 10) +# define RADEON_COLOR_ARG_C_T0_COLOR (10 << 10) +# define RADEON_COLOR_ARG_C_T0_ALPHA (11 << 10) +# define RADEON_COLOR_ARG_C_T1_COLOR (12 << 10) +# define RADEON_COLOR_ARG_C_T1_ALPHA (13 << 10) +# define RADEON_COLOR_ARG_C_T2_COLOR (14 << 10) +# define RADEON_COLOR_ARG_C_T2_ALPHA (15 << 10) +# define RADEON_COLOR_ARG_C_T3_COLOR (16 << 10) +# define RADEON_COLOR_ARG_C_T3_ALPHA (17 << 10) +# define RADEON_COMP_ARG_A (1 << 15) +# define RADEON_COMP_ARG_A_SHIFT 15 +# define RADEON_COMP_ARG_B (1 << 16) +# define RADEON_COMP_ARG_B_SHIFT 16 +# define RADEON_COMP_ARG_C (1 << 17) +# define RADEON_COMP_ARG_C_SHIFT 17 +# define RADEON_BLEND_CTL_MASK (7 << 18) +# define RADEON_BLEND_CTL_ADD (0 << 18) +# define RADEON_BLEND_CTL_SUBTRACT (1 << 18) +# define RADEON_BLEND_CTL_ADDSIGNED (2 << 18) +# define RADEON_BLEND_CTL_BLEND (3 << 18) +# define RADEON_BLEND_CTL_DOT3 (4 << 18) +# define RADEON_SCALE_SHIFT 21 +# define RADEON_SCALE_MASK (3 << 21) +# define RADEON_SCALE_1X (0 << 21) +# define RADEON_SCALE_2X (1 << 21) +# define RADEON_SCALE_4X (2 << 21) +# define RADEON_CLAMP_TX (1 << 23) +# define RADEON_T0_EQ_TCUR (1 << 24) +# define RADEON_T1_EQ_TCUR (1 << 25) +# define RADEON_T2_EQ_TCUR (1 << 26) +# define RADEON_T3_EQ_TCUR (1 << 27) +# define RADEON_COLOR_ARG_MASK 0x1f +# define RADEON_COMP_ARG_SHIFT 15 +#define RADEON_PP_TXABLEND_0 0x1c64 +#define RADEON_PP_TXABLEND_1 0x1c7c +#define RADEON_PP_TXABLEND_2 0x1c94 +# define RADEON_ALPHA_ARG_A_SHIFT 0 +# define RADEON_ALPHA_ARG_A_MASK (0xf << 0) +# define RADEON_ALPHA_ARG_A_ZERO (0 << 0) +# define RADEON_ALPHA_ARG_A_CURRENT_ALPHA (1 << 0) +# define RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA (2 << 0) +# define RADEON_ALPHA_ARG_A_SPECULAR_ALPHA (3 << 0) +# define RADEON_ALPHA_ARG_A_TFACTOR_ALPHA (4 << 0) +# define RADEON_ALPHA_ARG_A_T0_ALPHA (5 << 0) +# define RADEON_ALPHA_ARG_A_T1_ALPHA (6 << 0) +# define RADEON_ALPHA_ARG_A_T2_ALPHA (7 << 0) +# define RADEON_ALPHA_ARG_A_T3_ALPHA (8 << 0) +# define RADEON_ALPHA_ARG_B_SHIFT 4 +# define RADEON_ALPHA_ARG_B_MASK (0xf << 4) +# define RADEON_ALPHA_ARG_B_ZERO (0 << 4) +# define RADEON_ALPHA_ARG_B_CURRENT_ALPHA (1 << 4) +# define RADEON_ALPHA_ARG_B_DIFFUSE_ALPHA (2 << 4) +# define RADEON_ALPHA_ARG_B_SPECULAR_ALPHA (3 << 4) +# define RADEON_ALPHA_ARG_B_TFACTOR_ALPHA (4 << 4) +# define RADEON_ALPHA_ARG_B_T0_ALPHA (5 << 4) +# define RADEON_ALPHA_ARG_B_T1_ALPHA (6 << 4) +# define RADEON_ALPHA_ARG_B_T2_ALPHA (7 << 4) +# define RADEON_ALPHA_ARG_B_T3_ALPHA (8 << 4) +# define RADEON_ALPHA_ARG_C_SHIFT 8 +# define RADEON_ALPHA_ARG_C_MASK (0xf << 8) +# define RADEON_ALPHA_ARG_C_ZERO (0 << 8) +# define RADEON_ALPHA_ARG_C_CURRENT_ALPHA (1 << 8) +# define RADEON_ALPHA_ARG_C_DIFFUSE_ALPHA (2 << 8) +# define RADEON_ALPHA_ARG_C_SPECULAR_ALPHA (3 << 8) +# define RADEON_ALPHA_ARG_C_TFACTOR_ALPHA (4 << 8) +# define RADEON_ALPHA_ARG_C_T0_ALPHA (5 << 8) +# define RADEON_ALPHA_ARG_C_T1_ALPHA (6 << 8) +# define RADEON_ALPHA_ARG_C_T2_ALPHA (7 << 8) +# define RADEON_ALPHA_ARG_C_T3_ALPHA (8 << 8) +# define RADEON_DOT_ALPHA_DONT_REPLICATE (1 << 9) +# define RADEON_ALPHA_ARG_MASK 0xf + +#define RADEON_PP_TFACTOR_0 0x1c68 +#define RADEON_PP_TFACTOR_1 0x1c80 +#define RADEON_PP_TFACTOR_2 0x1c98 + +#define RADEON_RB3D_BLENDCNTL 0x1c20 +# define RADEON_COMB_FCN_MASK (3 << 12) +# define RADEON_COMB_FCN_ADD_CLAMP (0 << 12) +# define RADEON_COMB_FCN_ADD_NOCLAMP (1 << 12) +# define RADEON_COMB_FCN_SUB_CLAMP (2 << 12) +# define RADEON_COMB_FCN_SUB_NOCLAMP (3 << 12) +# define RADEON_SRC_BLEND_GL_ZERO (32 << 16) +# define RADEON_SRC_BLEND_GL_ONE (33 << 16) +# define RADEON_SRC_BLEND_GL_SRC_COLOR (34 << 16) +# define RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 16) +# define RADEON_SRC_BLEND_GL_DST_COLOR (36 << 16) +# define RADEON_SRC_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 16) +# define RADEON_SRC_BLEND_GL_SRC_ALPHA (38 << 16) +# define RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 16) +# define RADEON_SRC_BLEND_GL_DST_ALPHA (40 << 16) +# define RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 16) +# define RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE (42 << 16) +# define RADEON_SRC_BLEND_MASK (63 << 16) +# define RADEON_DST_BLEND_GL_ZERO (32 << 24) +# define RADEON_DST_BLEND_GL_ONE (33 << 24) +# define RADEON_DST_BLEND_GL_SRC_COLOR (34 << 24) +# define RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 24) +# define RADEON_DST_BLEND_GL_DST_COLOR (36 << 24) +# define RADEON_DST_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 24) +# define RADEON_DST_BLEND_GL_SRC_ALPHA (38 << 24) +# define RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 24) +# define RADEON_DST_BLEND_GL_DST_ALPHA (40 << 24) +# define RADEON_DST_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 24) +# define RADEON_DST_BLEND_MASK (63 << 24) +#define RADEON_RB3D_CNTL 0x1c3c +# define RADEON_ALPHA_BLEND_ENABLE (1 << 0) +# define RADEON_PLANE_MASK_ENABLE (1 << 1) +# define RADEON_DITHER_ENABLE (1 << 2) +# define RADEON_ROUND_ENABLE (1 << 3) +# define RADEON_SCALE_DITHER_ENABLE (1 << 4) +# define RADEON_DITHER_INIT (1 << 5) +# define RADEON_ROP_ENABLE (1 << 6) +# define RADEON_STENCIL_ENABLE (1 << 7) +# define RADEON_Z_ENABLE (1 << 8) +# define RADEON_DEPTH_XZ_OFFEST_ENABLE (1 << 9) +# define RADEON_COLOR_FORMAT_ARGB1555 (3 << 10) +# define RADEON_COLOR_FORMAT_RGB565 (4 << 10) +# define RADEON_COLOR_FORMAT_ARGB8888 (6 << 10) +# define RADEON_COLOR_FORMAT_RGB332 (7 << 10) +# define RADEON_COLOR_FORMAT_Y8 (8 << 10) +# define RADEON_COLOR_FORMAT_RGB8 (9 << 10) +# define RADEON_COLOR_FORMAT_YUV422_VYUY (11 << 10) +# define RADEON_COLOR_FORMAT_YUV422_YVYU (12 << 10) +# define RADEON_COLOR_FORMAT_aYUV444 (14 << 10) +# define RADEON_COLOR_FORMAT_ARGB4444 (15 << 10) +# define RADEON_CLRCMP_FLIP_ENABLE (1 << 14) +# define RADEON_ZBLOCK16 (1 << 15) +#define RADEON_RB3D_COLOROFFSET 0x1c40 +# define RADEON_COLOROFFSET_MASK 0xfffffff0 +#define RADEON_RB3D_COLORPITCH 0x1c48 +# define RADEON_COLORPITCH_MASK 0x000001ff8 +# define RADEON_COLOR_TILE_ENABLE (1 << 16) +# define RADEON_COLOR_MICROTILE_ENABLE (1 << 17) +# define RADEON_COLOR_ENDIAN_NO_SWAP (0 << 18) +# define RADEON_COLOR_ENDIAN_WORD_SWAP (1 << 18) +# define RADEON_COLOR_ENDIAN_DWORD_SWAP (2 << 18) +#define RADEON_RB3D_DEPTHOFFSET 0x1c24 +#define RADEON_RB3D_DEPTHPITCH 0x1c28 +# define RADEON_DEPTHPITCH_MASK 0x00001ff8 +# define RADEON_DEPTH_HYPERZ (3 << 16) +# define RADEON_DEPTH_ENDIAN_NO_SWAP (0 << 18) +# define RADEON_DEPTH_ENDIAN_WORD_SWAP (1 << 18) +# define RADEON_DEPTH_ENDIAN_DWORD_SWAP (2 << 18) +#define RADEON_RB3D_PLANEMASK 0x1d84 +#define RADEON_RB3D_ROPCNTL 0x1d80 +# define RADEON_ROP_MASK (15 << 8) +# define RADEON_ROP_CLEAR (0 << 8) +# define RADEON_ROP_NOR (1 << 8) +# define RADEON_ROP_AND_INVERTED (2 << 8) +# define RADEON_ROP_COPY_INVERTED (3 << 8) +# define RADEON_ROP_AND_REVERSE (4 << 8) +# define RADEON_ROP_INVERT (5 << 8) +# define RADEON_ROP_XOR (6 << 8) +# define RADEON_ROP_NAND (7 << 8) +# define RADEON_ROP_AND (8 << 8) +# define RADEON_ROP_EQUIV (9 << 8) +# define RADEON_ROP_NOOP (10 << 8) +# define RADEON_ROP_OR_INVERTED (11 << 8) +# define RADEON_ROP_COPY (12 << 8) +# define RADEON_ROP_OR_REVERSE (13 << 8) +# define RADEON_ROP_OR (14 << 8) +# define RADEON_ROP_SET (15 << 8) +#define RADEON_RB3D_STENCILREFMASK 0x1d7c +# define RADEON_STENCIL_REF_SHIFT 0 +# define RADEON_STENCIL_REF_MASK (0xff << 0) +# define RADEON_STENCIL_MASK_SHIFT 16 +# define RADEON_STENCIL_VALUE_MASK (0xff << 16) +# define RADEON_STENCIL_WRITEMASK_SHIFT 24 +# define RADEON_STENCIL_WRITE_MASK (0xff << 24) +#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c +# define RADEON_DEPTH_FORMAT_MASK (0xf << 0) +# define RADEON_DEPTH_FORMAT_16BIT_INT_Z (0 << 0) +# define RADEON_DEPTH_FORMAT_24BIT_INT_Z (2 << 0) +# define RADEON_DEPTH_FORMAT_24BIT_FLOAT_Z (3 << 0) +# define RADEON_DEPTH_FORMAT_32BIT_INT_Z (4 << 0) +# define RADEON_DEPTH_FORMAT_32BIT_FLOAT_Z (5 << 0) +# define RADEON_DEPTH_FORMAT_16BIT_FLOAT_W (7 << 0) +# define RADEON_DEPTH_FORMAT_24BIT_FLOAT_W (9 << 0) +# define RADEON_DEPTH_FORMAT_32BIT_FLOAT_W (11 << 0) +# define RADEON_Z_TEST_NEVER (0 << 4) +# define RADEON_Z_TEST_LESS (1 << 4) +# define RADEON_Z_TEST_LEQUAL (2 << 4) +# define RADEON_Z_TEST_EQUAL (3 << 4) +# define RADEON_Z_TEST_GEQUAL (4 << 4) +# define RADEON_Z_TEST_GREATER (5 << 4) +# define RADEON_Z_TEST_NEQUAL (6 << 4) +# define RADEON_Z_TEST_ALWAYS (7 << 4) +# define RADEON_Z_TEST_MASK (7 << 4) +# define RADEON_Z_HIERARCHY_ENABLE (1 << 8) +# define RADEON_STENCIL_TEST_NEVER (0 << 12) +# define RADEON_STENCIL_TEST_LESS (1 << 12) +# define RADEON_STENCIL_TEST_LEQUAL (2 << 12) +# define RADEON_STENCIL_TEST_EQUAL (3 << 12) +# define RADEON_STENCIL_TEST_GEQUAL (4 << 12) +# define RADEON_STENCIL_TEST_GREATER (5 << 12) +# define RADEON_STENCIL_TEST_NEQUAL (6 << 12) +# define RADEON_STENCIL_TEST_ALWAYS (7 << 12) +# define RADEON_STENCIL_TEST_MASK (0x7 << 12) +# define RADEON_STENCIL_FAIL_KEEP (0 << 16) +# define RADEON_STENCIL_FAIL_ZERO (1 << 16) +# define RADEON_STENCIL_FAIL_REPLACE (2 << 16) +# define RADEON_STENCIL_FAIL_INC (3 << 16) +# define RADEON_STENCIL_FAIL_DEC (4 << 16) +# define RADEON_STENCIL_FAIL_INVERT (5 << 16) +# define RADEON_STENCIL_FAIL_INC_WRAP (6 << 16) +# define RADEON_STENCIL_FAIL_DEC_WRAP (7 << 16) +# define RADEON_STENCIL_FAIL_MASK (0x7 << 16) +# define RADEON_STENCIL_ZPASS_KEEP (0 << 20) +# define RADEON_STENCIL_ZPASS_ZERO (1 << 20) +# define RADEON_STENCIL_ZPASS_REPLACE (2 << 20) +# define RADEON_STENCIL_ZPASS_INC (3 << 20) +# define RADEON_STENCIL_ZPASS_DEC (4 << 20) +# define RADEON_STENCIL_ZPASS_INVERT (5 << 20) +# define RADEON_STENCIL_ZPASS_INC_WRAP (6 << 20) +# define RADEON_STENCIL_ZPASS_DEC_WRAP (7 << 20) +# define RADEON_STENCIL_ZPASS_MASK (0x7 << 20) +# define RADEON_STENCIL_ZFAIL_KEEP (0 << 24) +# define RADEON_STENCIL_ZFAIL_ZERO (1 << 24) +# define RADEON_STENCIL_ZFAIL_REPLACE (2 << 24) +# define RADEON_STENCIL_ZFAIL_INC (3 << 24) +# define RADEON_STENCIL_ZFAIL_DEC (4 << 24) +# define RADEON_STENCIL_ZFAIL_INVERT (5 << 24) +# define RADEON_STENCIL_ZFAIL_INC_WRAP (6 << 24) +# define RADEON_STENCIL_ZFAIL_DEC_WRAP (7 << 24) +# define RADEON_STENCIL_ZFAIL_MASK (0x7 << 24) +# define RADEON_Z_COMPRESSION_ENABLE (1 << 28) +# define RADEON_FORCE_Z_DIRTY (1 << 29) +# define RADEON_Z_WRITE_ENABLE (1 << 30) +# define RADEON_Z_DECOMPRESSION_ENABLE (1 << 31) +#define RADEON_RE_LINE_PATTERN 0x1cd0 +# define RADEON_LINE_PATTERN_MASK 0x0000ffff +# define RADEON_LINE_REPEAT_COUNT_SHIFT 16 +# define RADEON_LINE_PATTERN_START_SHIFT 24 +# define RADEON_LINE_PATTERN_LITTLE_BIT_ORDER (0 << 28) +# define RADEON_LINE_PATTERN_BIG_BIT_ORDER (1 << 28) +# define RADEON_LINE_PATTERN_AUTO_RESET (1 << 29) +#define RADEON_RE_LINE_STATE 0x1cd4 +# define RADEON_LINE_CURRENT_PTR_SHIFT 0 +# define RADEON_LINE_CURRENT_COUNT_SHIFT 8 +#define RADEON_RE_MISC 0x26c4 +# define RADEON_STIPPLE_COORD_MASK 0x1f +# define RADEON_STIPPLE_X_OFFSET_SHIFT 0 +# define RADEON_STIPPLE_X_OFFSET_MASK (0x1f << 0) +# define RADEON_STIPPLE_Y_OFFSET_SHIFT 8 +# define RADEON_STIPPLE_Y_OFFSET_MASK (0x1f << 8) +# define RADEON_STIPPLE_LITTLE_BIT_ORDER (0 << 16) +# define RADEON_STIPPLE_BIG_BIT_ORDER (1 << 16) +#define RADEON_RE_SOLID_COLOR 0x1c1c +#define RADEON_RE_TOP_LEFT 0x26c0 +# define RADEON_RE_LEFT_SHIFT 0 +# define RADEON_RE_TOP_SHIFT 16 +#define RADEON_RE_WIDTH_HEIGHT 0x1c44 +# define RADEON_RE_WIDTH_SHIFT 0 +# define RADEON_RE_HEIGHT_SHIFT 16 + +#define RADEON_SE_CNTL 0x1c4c +# define RADEON_FFACE_CULL_CW (0 << 0) +# define RADEON_FFACE_CULL_CCW (1 << 0) +# define RADEON_FFACE_CULL_DIR_MASK (1 << 0) +# define RADEON_BFACE_CULL (0 << 1) +# define RADEON_BFACE_SOLID (3 << 1) +# define RADEON_FFACE_CULL (0 << 3) +# define RADEON_FFACE_SOLID (3 << 3) +# define RADEON_FFACE_CULL_MASK (3 << 3) +# define RADEON_BADVTX_CULL_DISABLE (1 << 5) +# define RADEON_FLAT_SHADE_VTX_0 (0 << 6) +# define RADEON_FLAT_SHADE_VTX_1 (1 << 6) +# define RADEON_FLAT_SHADE_VTX_2 (2 << 6) +# define RADEON_FLAT_SHADE_VTX_LAST (3 << 6) +# define RADEON_DIFFUSE_SHADE_SOLID (0 << 8) +# define RADEON_DIFFUSE_SHADE_FLAT (1 << 8) +# define RADEON_DIFFUSE_SHADE_GOURAUD (2 << 8) +# define RADEON_DIFFUSE_SHADE_MASK (3 << 8) +# define RADEON_ALPHA_SHADE_SOLID (0 << 10) +# define RADEON_ALPHA_SHADE_FLAT (1 << 10) +# define RADEON_ALPHA_SHADE_GOURAUD (2 << 10) +# define RADEON_ALPHA_SHADE_MASK (3 << 10) +# define RADEON_SPECULAR_SHADE_SOLID (0 << 12) +# define RADEON_SPECULAR_SHADE_FLAT (1 << 12) +# define RADEON_SPECULAR_SHADE_GOURAUD (2 << 12) +# define RADEON_SPECULAR_SHADE_MASK (3 << 12) +# define RADEON_FOG_SHADE_SOLID (0 << 14) +# define RADEON_FOG_SHADE_FLAT (1 << 14) +# define RADEON_FOG_SHADE_GOURAUD (2 << 14) +# define RADEON_FOG_SHADE_MASK (3 << 14) +# define RADEON_ZBIAS_ENABLE_POINT (1 << 16) +# define RADEON_ZBIAS_ENABLE_LINE (1 << 17) +# define RADEON_ZBIAS_ENABLE_TRI (1 << 18) +# define RADEON_WIDELINE_ENABLE (1 << 20) +# define RADEON_VPORT_XY_XFORM_ENABLE (1 << 24) +# define RADEON_VPORT_Z_XFORM_ENABLE (1 << 25) +# define RADEON_VTX_PIX_CENTER_D3D (0 << 27) +# define RADEON_VTX_PIX_CENTER_OGL (1 << 27) +# define RADEON_ROUND_MODE_TRUNC (0 << 28) +# define RADEON_ROUND_MODE_ROUND (1 << 28) +# define RADEON_ROUND_MODE_ROUND_EVEN (2 << 28) +# define RADEON_ROUND_MODE_ROUND_ODD (3 << 28) +# define RADEON_ROUND_PREC_16TH_PIX (0 << 30) +# define RADEON_ROUND_PREC_8TH_PIX (1 << 30) +# define RADEON_ROUND_PREC_4TH_PIX (2 << 30) +# define RADEON_ROUND_PREC_HALF_PIX (3 << 30) +#define RADEON_SE_CNTL_STATUS 0x2140 +# define RADEON_VC_NO_SWAP (0 << 0) +# define RADEON_VC_16BIT_SWAP (1 << 0) +# define RADEON_VC_32BIT_SWAP (2 << 0) +# define RADEON_VC_HALF_DWORD_SWAP (3 << 0) +# define RADEON_TCL_BYPASS (1 << 8) +#define RADEON_SE_COORD_FMT 0x1c50 +# define RADEON_VTX_XY_PRE_MULT_1_OVER_W0 (1 << 0) +# define RADEON_VTX_Z_PRE_MULT_1_OVER_W0 (1 << 1) +# define RADEON_VTX_ST0_NONPARAMETRIC (1 << 8) +# define RADEON_VTX_ST1_NONPARAMETRIC (1 << 9) +# define RADEON_VTX_ST2_NONPARAMETRIC (1 << 10) +# define RADEON_VTX_ST3_NONPARAMETRIC (1 << 11) +# define RADEON_VTX_W0_NORMALIZE (1 << 12) +# define RADEON_VTX_W0_IS_NOT_1_OVER_W0 (1 << 16) +# define RADEON_VTX_ST0_PRE_MULT_1_OVER_W0 (1 << 17) +# define RADEON_VTX_ST1_PRE_MULT_1_OVER_W0 (1 << 19) +# define RADEON_VTX_ST2_PRE_MULT_1_OVER_W0 (1 << 21) +# define RADEON_VTX_ST3_PRE_MULT_1_OVER_W0 (1 << 23) +# define RADEON_TEX1_W_ROUTING_USE_W0 (0 << 26) +# define RADEON_TEX1_W_ROUTING_USE_Q1 (1 << 26) +#define RADEON_SE_LINE_WIDTH 0x1db8 +#define RADEON_SE_TCL_LIGHT_MODEL_CTL 0x226c +# define RADEON_LIGHTING_ENABLE (1 << 0) +# define RADEON_LIGHT_IN_MODELSPACE (1 << 1) +# define RADEON_LOCAL_VIEWER (1 << 2) +# define RADEON_NORMALIZE_NORMALS (1 << 3) +# define RADEON_RESCALE_NORMALS (1 << 4) +# define RADEON_SPECULAR_LIGHTS (1 << 5) +# define RADEON_DIFFUSE_SPECULAR_COMBINE (1 << 6) +# define RADEON_LIGHT_ALPHA (1 << 7) +# define RADEON_LOCAL_LIGHT_VEC_GL (1 << 8) +# define RADEON_LIGHT_NO_NORMAL_AMBIENT_ONLY (1 << 9) +# define RADEON_LM_SOURCE_STATE_PREMULT 0 +# define RADEON_LM_SOURCE_STATE_MULT 1 +# define RADEON_LM_SOURCE_VERTEX_DIFFUSE 2 +# define RADEON_LM_SOURCE_VERTEX_SPECULAR 3 +# define RADEON_EMISSIVE_SOURCE_SHIFT 16 +# define RADEON_AMBIENT_SOURCE_SHIFT 18 +# define RADEON_DIFFUSE_SOURCE_SHIFT 20 +# define RADEON_SPECULAR_SOURCE_SHIFT 22 +#define RADEON_SE_TCL_MATERIAL_AMBIENT_RED 0x2220 +#define RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN 0x2224 +#define RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE 0x2228 +#define RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA 0x222c +#define RADEON_SE_TCL_MATERIAL_DIFFUSE_RED 0x2230 +#define RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN 0x2234 +#define RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE 0x2238 +#define RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA 0x223c +#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED 0x2210 +#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN 0x2214 +#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE 0x2218 +#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA 0x221c +#define RADEON_SE_TCL_MATERIAL_SPECULAR_RED 0x2240 +#define RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN 0x2244 +#define RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE 0x2248 +#define RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA 0x224c +#define RADEON_SE_TCL_MATRIX_SELECT_0 0x225c +# define RADEON_MODELVIEW_0_SHIFT 0 +# define RADEON_MODELVIEW_1_SHIFT 4 +# define RADEON_MODELVIEW_2_SHIFT 8 +# define RADEON_MODELVIEW_3_SHIFT 12 +# define RADEON_IT_MODELVIEW_0_SHIFT 16 +# define RADEON_IT_MODELVIEW_1_SHIFT 20 +# define RADEON_IT_MODELVIEW_2_SHIFT 24 +# define RADEON_IT_MODELVIEW_3_SHIFT 28 +#define RADEON_SE_TCL_MATRIX_SELECT_1 0x2260 +# define RADEON_MODELPROJECT_0_SHIFT 0 +# define RADEON_MODELPROJECT_1_SHIFT 4 +# define RADEON_MODELPROJECT_2_SHIFT 8 +# define RADEON_MODELPROJECT_3_SHIFT 12 +# define RADEON_TEXMAT_0_SHIFT 16 +# define RADEON_TEXMAT_1_SHIFT 20 +# define RADEON_TEXMAT_2_SHIFT 24 +# define RADEON_TEXMAT_3_SHIFT 28 + + +#define RADEON_SE_TCL_OUTPUT_VTX_FMT 0x2254 +# define RADEON_TCL_VTX_W0 (1 << 0) +# define RADEON_TCL_VTX_FP_DIFFUSE (1 << 1) +# define RADEON_TCL_VTX_FP_ALPHA (1 << 2) +# define RADEON_TCL_VTX_PK_DIFFUSE (1 << 3) +# define RADEON_TCL_VTX_FP_SPEC (1 << 4) +# define RADEON_TCL_VTX_FP_FOG (1 << 5) +# define RADEON_TCL_VTX_PK_SPEC (1 << 6) +# define RADEON_TCL_VTX_ST0 (1 << 7) +# define RADEON_TCL_VTX_ST1 (1 << 8) +# define RADEON_TCL_VTX_Q1 (1 << 9) +# define RADEON_TCL_VTX_ST2 (1 << 10) +# define RADEON_TCL_VTX_Q2 (1 << 11) +# define RADEON_TCL_VTX_ST3 (1 << 12) +# define RADEON_TCL_VTX_Q3 (1 << 13) +# define RADEON_TCL_VTX_Q0 (1 << 14) +# define RADEON_TCL_VTX_WEIGHT_COUNT_SHIFT 15 +# define RADEON_TCL_VTX_NORM0 (1 << 18) +# define RADEON_TCL_VTX_XY1 (1 << 27) +# define RADEON_TCL_VTX_Z1 (1 << 28) +# define RADEON_TCL_VTX_W1 (1 << 29) +# define RADEON_TCL_VTX_NORM1 (1 << 30) +# define RADEON_TCL_VTX_Z0 (1 << 31) + +#define RADEON_SE_TCL_OUTPUT_VTX_SEL 0x2258 +# define RADEON_TCL_COMPUTE_XYZW (1 << 0) +# define RADEON_TCL_COMPUTE_DIFFUSE (1 << 1) +# define RADEON_TCL_COMPUTE_SPECULAR (1 << 2) +# define RADEON_TCL_FORCE_NAN_IF_COLOR_NAN (1 << 3) +# define RADEON_TCL_FORCE_INORDER_PROC (1 << 4) +# define RADEON_TCL_TEX_INPUT_TEX_0 0 +# define RADEON_TCL_TEX_INPUT_TEX_1 1 +# define RADEON_TCL_TEX_INPUT_TEX_2 2 +# define RADEON_TCL_TEX_INPUT_TEX_3 3 +# define RADEON_TCL_TEX_COMPUTED_TEX_0 8 +# define RADEON_TCL_TEX_COMPUTED_TEX_1 9 +# define RADEON_TCL_TEX_COMPUTED_TEX_2 10 +# define RADEON_TCL_TEX_COMPUTED_TEX_3 11 +# define RADEON_TCL_TEX_0_OUTPUT_SHIFT 16 +# define RADEON_TCL_TEX_1_OUTPUT_SHIFT 20 +# define RADEON_TCL_TEX_2_OUTPUT_SHIFT 24 +# define RADEON_TCL_TEX_3_OUTPUT_SHIFT 28 + +#define RADEON_SE_TCL_PER_LIGHT_CTL_0 0x2270 +# define RADEON_LIGHT_0_ENABLE (1 << 0) +# define RADEON_LIGHT_0_ENABLE_AMBIENT (1 << 1) +# define RADEON_LIGHT_0_ENABLE_SPECULAR (1 << 2) +# define RADEON_LIGHT_0_IS_LOCAL (1 << 3) +# define RADEON_LIGHT_0_IS_SPOT (1 << 4) +# define RADEON_LIGHT_0_DUAL_CONE (1 << 5) +# define RADEON_LIGHT_0_ENABLE_RANGE_ATTEN (1 << 6) +# define RADEON_LIGHT_0_CONSTANT_RANGE_ATTEN (1 << 7) +# define RADEON_LIGHT_0_SHIFT 0 +# define RADEON_LIGHT_1_ENABLE (1 << 16) +# define RADEON_LIGHT_1_ENABLE_AMBIENT (1 << 17) +# define RADEON_LIGHT_1_ENABLE_SPECULAR (1 << 18) +# define RADEON_LIGHT_1_IS_LOCAL (1 << 19) +# define RADEON_LIGHT_1_IS_SPOT (1 << 20) +# define RADEON_LIGHT_1_DUAL_CONE (1 << 21) +# define RADEON_LIGHT_1_ENABLE_RANGE_ATTEN (1 << 22) +# define RADEON_LIGHT_1_CONSTANT_RANGE_ATTEN (1 << 23) +# define RADEON_LIGHT_1_SHIFT 16 +#define RADEON_SE_TCL_PER_LIGHT_CTL_1 0x2274 +# define RADEON_LIGHT_2_SHIFT 0 +# define RADEON_LIGHT_3_SHIFT 16 +#define RADEON_SE_TCL_PER_LIGHT_CTL_2 0x2278 +# define RADEON_LIGHT_4_SHIFT 0 +# define RADEON_LIGHT_5_SHIFT 16 +#define RADEON_SE_TCL_PER_LIGHT_CTL_3 0x227c +# define RADEON_LIGHT_6_SHIFT 0 +# define RADEON_LIGHT_7_SHIFT 16 + +#define RADEON_SE_TCL_STATE_FLUSH 0x2284 + +#define RADEON_SE_TCL_SHININESS 0x2250 + +#define RADEON_SE_TCL_TEXTURE_PROC_CTL 0x2268 +# define RADEON_TEXGEN_TEXMAT_0_ENABLE (1 << 0) +# define RADEON_TEXGEN_TEXMAT_1_ENABLE (1 << 1) +# define RADEON_TEXGEN_TEXMAT_2_ENABLE (1 << 2) +# define RADEON_TEXGEN_TEXMAT_3_ENABLE (1 << 3) +# define RADEON_TEXMAT_0_ENABLE (1 << 4) +# define RADEON_TEXMAT_1_ENABLE (1 << 5) +# define RADEON_TEXMAT_2_ENABLE (1 << 6) +# define RADEON_TEXMAT_3_ENABLE (1 << 7) +# define RADEON_TEXGEN_INPUT_MASK 0xf +# define RADEON_TEXGEN_INPUT_TEXCOORD_0 0 +# define RADEON_TEXGEN_INPUT_TEXCOORD_1 1 +# define RADEON_TEXGEN_INPUT_TEXCOORD_2 2 +# define RADEON_TEXGEN_INPUT_TEXCOORD_3 3 +# define RADEON_TEXGEN_INPUT_OBJ 4 +# define RADEON_TEXGEN_INPUT_EYE 5 +# define RADEON_TEXGEN_INPUT_EYE_NORMAL 6 +# define RADEON_TEXGEN_INPUT_EYE_REFLECT 7 +# define RADEON_TEXGEN_INPUT_EYE_NORMALIZED 8 +# define RADEON_TEXGEN_0_INPUT_SHIFT 16 +# define RADEON_TEXGEN_1_INPUT_SHIFT 20 +# define RADEON_TEXGEN_2_INPUT_SHIFT 24 +# define RADEON_TEXGEN_3_INPUT_SHIFT 28 + +#define RADEON_SE_TCL_UCP_VERT_BLEND_CTL 0x2264 +# define RADEON_UCP_IN_CLIP_SPACE (1 << 0) +# define RADEON_UCP_IN_MODEL_SPACE (1 << 1) +# define RADEON_UCP_ENABLE_0 (1 << 2) +# define RADEON_UCP_ENABLE_1 (1 << 3) +# define RADEON_UCP_ENABLE_2 (1 << 4) +# define RADEON_UCP_ENABLE_3 (1 << 5) +# define RADEON_UCP_ENABLE_4 (1 << 6) +# define RADEON_UCP_ENABLE_5 (1 << 7) +# define RADEON_TCL_FOG_MASK (3 << 8) +# define RADEON_TCL_FOG_DISABLE (0 << 8) +# define RADEON_TCL_FOG_EXP (1 << 8) +# define RADEON_TCL_FOG_EXP2 (2 << 8) +# define RADEON_TCL_FOG_LINEAR (3 << 8) +# define RADEON_RNG_BASED_FOG (1 << 10) +# define RADEON_LIGHT_TWOSIDE (1 << 11) +# define RADEON_BLEND_OP_COUNT_MASK (7 << 12) +# define RADEON_BLEND_OP_COUNT_SHIFT 12 +# define RADEON_POSITION_BLEND_OP_ENABLE (1 << 16) +# define RADEON_NORMAL_BLEND_OP_ENABLE (1 << 17) +# define RADEON_VERTEX_BLEND_SRC_0_PRIMARY (0 << 18) +# define RADEON_VERTEX_BLEND_SRC_0_SECONDARY (1 << 18) +# define RADEON_VERTEX_BLEND_SRC_1_PRIMARY (0 << 19) +# define RADEON_VERTEX_BLEND_SRC_1_SECONDARY (1 << 19) +# define RADEON_VERTEX_BLEND_SRC_2_PRIMARY (0 << 20) +# define RADEON_VERTEX_BLEND_SRC_2_SECONDARY (1 << 20) +# define RADEON_VERTEX_BLEND_SRC_3_PRIMARY (0 << 21) +# define RADEON_VERTEX_BLEND_SRC_3_SECONDARY (1 << 21) +# define RADEON_VERTEX_BLEND_WGT_MINUS_ONE (1 << 22) +# define RADEON_CULL_FRONT_IS_CW (0 << 28) +# define RADEON_CULL_FRONT_IS_CCW (1 << 28) +# define RADEON_CULL_FRONT (1 << 29) +# define RADEON_CULL_BACK (1 << 30) +# define RADEON_FORCE_W_TO_ONE (1 << 31) + +#define RADEON_SE_VPORT_XSCALE 0x1d98 +#define RADEON_SE_VPORT_XOFFSET 0x1d9c +#define RADEON_SE_VPORT_YSCALE 0x1da0 +#define RADEON_SE_VPORT_YOFFSET 0x1da4 +#define RADEON_SE_VPORT_ZSCALE 0x1da8 +#define RADEON_SE_VPORT_ZOFFSET 0x1dac +#define RADEON_SE_ZBIAS_FACTOR 0x1db0 +#define RADEON_SE_ZBIAS_CONSTANT 0x1db4 + + + + /* Registers for CP and Microcode Engine */ +#define RADEON_CP_ME_RAM_ADDR 0x07d4 +#define RADEON_CP_ME_RAM_RADDR 0x07d8 +#define RADEON_CP_ME_RAM_DATAH 0x07dc +#define RADEON_CP_ME_RAM_DATAL 0x07e0 + +#define RADEON_CP_RB_BASE 0x0700 +#define RADEON_CP_RB_CNTL 0x0704 +#define RADEON_CP_RB_RPTR_ADDR 0x070c +#define RADEON_CP_RB_RPTR 0x0710 +#define RADEON_CP_RB_WPTR 0x0714 + +#define RADEON_CP_IB_BASE 0x0738 +#define RADEON_CP_IB_BUFSZ 0x073c + +#define RADEON_CP_CSQ_CNTL 0x0740 +# define RADEON_CSQ_CNT_PRIMARY_MASK (0xff << 0) +# define RADEON_CSQ_PRIDIS_INDDIS (0 << 28) +# define RADEON_CSQ_PRIPIO_INDDIS (1 << 28) +# define RADEON_CSQ_PRIBM_INDDIS (2 << 28) +# define RADEON_CSQ_PRIPIO_INDBM (3 << 28) +# define RADEON_CSQ_PRIBM_INDBM (4 << 28) +# define RADEON_CSQ_PRIPIO_INDPIO (15 << 28) +#define RADEON_CP_CSQ_STAT 0x07f8 +# define RADEON_CSQ_RPTR_PRIMARY_MASK (0xff << 0) +# define RADEON_CSQ_WPTR_PRIMARY_MASK (0xff << 8) +# define RADEON_CSQ_RPTR_INDIRECT_MASK (0xff << 16) +# define RADEON_CSQ_WPTR_INDIRECT_MASK (0xff << 24) +#define RADEON_CP_CSQ_ADDR 0x07f0 +#define RADEON_CP_CSQ_DATA 0x07f4 +#define RADEON_CP_CSQ_APER_PRIMARY 0x1000 +#define RADEON_CP_CSQ_APER_INDIRECT 0x1300 + +#define RADEON_CP_RB_WPTR_DELAY 0x0718 +# define RADEON_PRE_WRITE_TIMER_SHIFT 0 +# define RADEON_PRE_WRITE_LIMIT_SHIFT 23 + +#define RADEON_AIC_CNTL 0x01d0 +# define RADEON_PCIGART_TRANSLATE_EN (1 << 0) +#define RADEON_AIC_LO_ADDR 0x01dc + + + + /* Constants */ +#define RADEON_LAST_FRAME_REG RADEON_GUI_SCRATCH_REG0 +#define RADEON_LAST_CLEAR_REG RADEON_GUI_SCRATCH_REG2 + + + + /* CP packet types */ +#define RADEON_CP_PACKET0 0x00000000 +#define RADEON_CP_PACKET1 0x40000000 +#define RADEON_CP_PACKET2 0x80000000 +#define RADEON_CP_PACKET3 0xC0000000 +# define RADEON_CP_PACKET_MASK 0xC0000000 +# define RADEON_CP_PACKET_COUNT_MASK 0x3fff0000 +# define RADEON_CP_PACKET_MAX_DWORDS (1 << 12) +# define RADEON_CP_PACKET0_REG_MASK 0x000007ff +# define RADEON_CP_PACKET1_REG0_MASK 0x000007ff +# define RADEON_CP_PACKET1_REG1_MASK 0x003ff800 + +#define RADEON_CP_PACKET0_ONE_REG_WR 0x00008000 + +#define RADEON_CP_PACKET3_NOP 0xC0001000 +#define RADEON_CP_PACKET3_NEXT_CHAR 0xC0001900 +#define RADEON_CP_PACKET3_PLY_NEXTSCAN 0xC0001D00 +#define RADEON_CP_PACKET3_SET_SCISSORS 0xC0001E00 +#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM 0xC0002300 +#define RADEON_CP_PACKET3_LOAD_MICROCODE 0xC0002400 +#define RADEON_CP_PACKET3_WAIT_FOR_IDLE 0xC0002600 +#define RADEON_CP_PACKET3_3D_DRAW_VBUF 0xC0002800 +#define RADEON_CP_PACKET3_3D_DRAW_IMMD 0xC0002900 +#define RADEON_CP_PACKET3_3D_DRAW_INDX 0xC0002A00 +#define RADEON_CP_PACKET3_LOAD_PALETTE 0xC0002C00 +#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00 +#define RADEON_CP_PACKET3_CNTL_PAINT 0xC0009100 +#define RADEON_CP_PACKET3_CNTL_BITBLT 0xC0009200 +#define RADEON_CP_PACKET3_CNTL_SMALLTEXT 0xC0009300 +#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT 0xC0009400 +#define RADEON_CP_PACKET3_CNTL_POLYLINE 0xC0009500 +#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES 0xC0009800 +#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI 0xC0009A00 +#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI 0xC0009B00 +#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT 0xC0009C00 + + +#define RADEON_CP_VC_FRMT_XY 0x00000000 +#define RADEON_CP_VC_FRMT_W0 0x00000001 +#define RADEON_CP_VC_FRMT_FPCOLOR 0x00000002 +#define RADEON_CP_VC_FRMT_FPALPHA 0x00000004 +#define RADEON_CP_VC_FRMT_PKCOLOR 0x00000008 +#define RADEON_CP_VC_FRMT_FPSPEC 0x00000010 +#define RADEON_CP_VC_FRMT_FPFOG 0x00000020 +#define RADEON_CP_VC_FRMT_PKSPEC 0x00000040 +#define RADEON_CP_VC_FRMT_ST0 0x00000080 +#define RADEON_CP_VC_FRMT_ST1 0x00000100 +#define RADEON_CP_VC_FRMT_Q1 0x00000200 +#define RADEON_CP_VC_FRMT_ST2 0x00000400 +#define RADEON_CP_VC_FRMT_Q2 0x00000800 +#define RADEON_CP_VC_FRMT_ST3 0x00001000 +#define RADEON_CP_VC_FRMT_Q3 0x00002000 +#define RADEON_CP_VC_FRMT_Q0 0x00004000 +#define RADEON_CP_VC_FRMT_BLND_WEIGHT_CNT_MASK 0x00038000 +#define RADEON_CP_VC_FRMT_N0 0x00040000 +#define RADEON_CP_VC_FRMT_XY1 0x08000000 +#define RADEON_CP_VC_FRMT_Z1 0x10000000 +#define RADEON_CP_VC_FRMT_W1 0x20000000 +#define RADEON_CP_VC_FRMT_N1 0x40000000 +#define RADEON_CP_VC_FRMT_Z 0x80000000 + +#define RADEON_CP_VC_CNTL_PRIM_TYPE_NONE 0x00000000 +#define RADEON_CP_VC_CNTL_PRIM_TYPE_POINT 0x00000001 +#define RADEON_CP_VC_CNTL_PRIM_TYPE_LINE 0x00000002 +#define RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP 0x00000003 +#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST 0x00000004 +#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN 0x00000005 +#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP 0x00000006 +#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_TYPE_2 0x00000007 +#define RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST 0x00000008 +#define RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST 0x00000009 +#define RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST 0x0000000a +#define RADEON_CP_VC_CNTL_PRIM_WALK_IND 0x00000010 +#define RADEON_CP_VC_CNTL_PRIM_WALK_LIST 0x00000020 +#define RADEON_CP_VC_CNTL_PRIM_WALK_RING 0x00000030 +#define RADEON_CP_VC_CNTL_COLOR_ORDER_BGRA 0x00000000 +#define RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA 0x00000040 +#define RADEON_CP_VC_CNTL_MAOS_ENABLE 0x00000080 +#define RADEON_CP_VC_CNTL_VTX_FMT_NON_RADEON_MODE 0x00000000 +#define RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE 0x00000100 +#define RADEON_CP_VC_CNTL_TCL_DISABLE 0x00000000 +#define RADEON_CP_VC_CNTL_TCL_ENABLE 0x00000200 +#define RADEON_CP_VC_CNTL_NUM_SHIFT 16 + +#define RADEON_VS_MATRIX_0_ADDR 0 +#define RADEON_VS_MATRIX_1_ADDR 4 +#define RADEON_VS_MATRIX_2_ADDR 8 +#define RADEON_VS_MATRIX_3_ADDR 12 +#define RADEON_VS_MATRIX_4_ADDR 16 +#define RADEON_VS_MATRIX_5_ADDR 20 +#define RADEON_VS_MATRIX_6_ADDR 24 +#define RADEON_VS_MATRIX_7_ADDR 28 +#define RADEON_VS_MATRIX_8_ADDR 32 +#define RADEON_VS_MATRIX_9_ADDR 36 +#define RADEON_VS_MATRIX_10_ADDR 40 +#define RADEON_VS_MATRIX_11_ADDR 44 +#define RADEON_VS_MATRIX_12_ADDR 48 +#define RADEON_VS_MATRIX_13_ADDR 52 +#define RADEON_VS_MATRIX_14_ADDR 56 +#define RADEON_VS_MATRIX_15_ADDR 60 +#define RADEON_VS_LIGHT_AMBIENT_ADDR 64 +#define RADEON_VS_LIGHT_DIFFUSE_ADDR 72 +#define RADEON_VS_LIGHT_SPECULAR_ADDR 80 +#define RADEON_VS_LIGHT_DIRPOS_ADDR 88 +#define RADEON_VS_LIGHT_HWVSPOT_ADDR 96 +#define RADEON_VS_LIGHT_ATTENUATION_ADDR 104 +#define RADEON_VS_MATRIX_EYE2CLIP_ADDR 112 +#define RADEON_VS_UCP_ADDR 116 +#define RADEON_VS_GLOBAL_AMBIENT_ADDR 122 +#define RADEON_VS_FOG_PARAM_ADDR 123 +#define RADEON_VS_EYE_VECTOR_ADDR 124 + +#define RADEON_SS_LIGHT_DCD_ADDR 0 +#define RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR 8 +#define RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR 16 +#define RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR 24 +#define RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR 32 +#define RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR 48 +#define RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR 49 +#define RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR 50 +#define RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR 51 +#define RADEON_SS_SHININESS 60 + +#define RADEON_TV_MASTER_CNTL 0x0800 +# define RADEON_TVCLK_ALWAYS_ONb (1 << 30) +#define RADEON_TV_DAC_CNTL 0x088c +# define RADEON_TV_DAC_CMPOUT (1 << 5) +#define RADEON_TV_PRE_DAC_MUX_CNTL 0x0888 +# define RADEON_Y_RED_EN (1 << 0) +# define RADEON_C_GRN_EN (1 << 1) +# define RADEON_CMP_BLU_EN (1 << 2) +# define RADEON_RED_MX_FORCE_DAC_DATA (6 << 4) +# define RADEON_GRN_MX_FORCE_DAC_DATA (6 << 8) +# define RADEON_BLU_MX_FORCE_DAC_DATA (6 << 12) +# define RADEON_TV_FORCE_DAC_DATA_SHIFT 16 +#endif diff --git a/src/Makefile.am b/src/Makefile.am deleted file mode 100644 index aa854c5..0000000 --- a/src/Makefile.am +++ /dev/null @@ -1,7 +0,0 @@ -AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING - -xxx_dri_la_LTLIBRARIES = xxx_dri.la -xxx_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) -Iserver -xxx_dri_la_LDFLAGS = -module -noprefix -lm -ldl $(DRM_LIBS) $(DRI_LIBS) -xxx_dri_ladir = @libdir@/dri -xxx_dri_la_SOURCES = \ |